1 /*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "code_generator_x86_64.h"
18
19 #include "art_method.h"
20 #include "code_generator_utils.h"
21 #include "compiled_method.h"
22 #include "entrypoints/quick/quick_entrypoints.h"
23 #include "gc/accounting/card_table.h"
24 #include "intrinsics.h"
25 #include "intrinsics_x86_64.h"
26 #include "mirror/array-inl.h"
27 #include "mirror/class-inl.h"
28 #include "mirror/object_reference.h"
29 #include "thread.h"
30 #include "utils/assembler.h"
31 #include "utils/stack_checks.h"
32 #include "utils/x86_64/assembler_x86_64.h"
33 #include "utils/x86_64/managed_register_x86_64.h"
34
35 namespace art {
36
37 template<class MirrorType>
38 class GcRoot;
39
40 namespace x86_64 {
41
42 static constexpr int kCurrentMethodStackOffset = 0;
43 static constexpr Register kMethodRegisterArgument = RDI;
44 // The compare/jump sequence will generate about (1.5 * num_entries) instructions. A jump
45 // table version generates 7 instructions and num_entries literals. Compare/jump sequence will
46 // generates less code/data with a small num_entries.
47 static constexpr uint32_t kPackedSwitchJumpTableThreshold = 5;
48
49 static constexpr Register kCoreCalleeSaves[] = { RBX, RBP, R12, R13, R14, R15 };
50 static constexpr FloatRegister kFpuCalleeSaves[] = { XMM12, XMM13, XMM14, XMM15 };
51
52 static constexpr int kC2ConditionMask = 0x400;
53
54 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
55 #define __ down_cast<X86_64Assembler*>(codegen->GetAssembler())-> // NOLINT
56 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kX86_64PointerSize, x).Int32Value()
57
58 class NullCheckSlowPathX86_64 : public SlowPathCode {
59 public:
NullCheckSlowPathX86_64(HNullCheck * instruction)60 explicit NullCheckSlowPathX86_64(HNullCheck* instruction) : SlowPathCode(instruction) {}
61
EmitNativeCode(CodeGenerator * codegen)62 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
63 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
64 __ Bind(GetEntryLabel());
65 if (instruction_->CanThrowIntoCatchBlock()) {
66 // Live registers will be restored in the catch block if caught.
67 SaveLiveRegisters(codegen, instruction_->GetLocations());
68 }
69 x86_64_codegen->InvokeRuntime(kQuickThrowNullPointer,
70 instruction_,
71 instruction_->GetDexPc(),
72 this);
73 CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
74 }
75
IsFatal() const76 bool IsFatal() const OVERRIDE { return true; }
77
GetDescription() const78 const char* GetDescription() const OVERRIDE { return "NullCheckSlowPathX86_64"; }
79
80 private:
81 DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathX86_64);
82 };
83
84 class DivZeroCheckSlowPathX86_64 : public SlowPathCode {
85 public:
DivZeroCheckSlowPathX86_64(HDivZeroCheck * instruction)86 explicit DivZeroCheckSlowPathX86_64(HDivZeroCheck* instruction) : SlowPathCode(instruction) {}
87
EmitNativeCode(CodeGenerator * codegen)88 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
89 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
90 __ Bind(GetEntryLabel());
91 x86_64_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this);
92 CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
93 }
94
IsFatal() const95 bool IsFatal() const OVERRIDE { return true; }
96
GetDescription() const97 const char* GetDescription() const OVERRIDE { return "DivZeroCheckSlowPathX86_64"; }
98
99 private:
100 DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathX86_64);
101 };
102
103 class DivRemMinusOneSlowPathX86_64 : public SlowPathCode {
104 public:
DivRemMinusOneSlowPathX86_64(HInstruction * at,Register reg,Primitive::Type type,bool is_div)105 DivRemMinusOneSlowPathX86_64(HInstruction* at, Register reg, Primitive::Type type, bool is_div)
106 : SlowPathCode(at), cpu_reg_(CpuRegister(reg)), type_(type), is_div_(is_div) {}
107
EmitNativeCode(CodeGenerator * codegen)108 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
109 __ Bind(GetEntryLabel());
110 if (type_ == Primitive::kPrimInt) {
111 if (is_div_) {
112 __ negl(cpu_reg_);
113 } else {
114 __ xorl(cpu_reg_, cpu_reg_);
115 }
116
117 } else {
118 DCHECK_EQ(Primitive::kPrimLong, type_);
119 if (is_div_) {
120 __ negq(cpu_reg_);
121 } else {
122 __ xorl(cpu_reg_, cpu_reg_);
123 }
124 }
125 __ jmp(GetExitLabel());
126 }
127
GetDescription() const128 const char* GetDescription() const OVERRIDE { return "DivRemMinusOneSlowPathX86_64"; }
129
130 private:
131 const CpuRegister cpu_reg_;
132 const Primitive::Type type_;
133 const bool is_div_;
134 DISALLOW_COPY_AND_ASSIGN(DivRemMinusOneSlowPathX86_64);
135 };
136
137 class SuspendCheckSlowPathX86_64 : public SlowPathCode {
138 public:
SuspendCheckSlowPathX86_64(HSuspendCheck * instruction,HBasicBlock * successor)139 SuspendCheckSlowPathX86_64(HSuspendCheck* instruction, HBasicBlock* successor)
140 : SlowPathCode(instruction), successor_(successor) {}
141
EmitNativeCode(CodeGenerator * codegen)142 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
143 LocationSummary* locations = instruction_->GetLocations();
144 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
145 __ Bind(GetEntryLabel());
146 SaveLiveRegisters(codegen, locations); // Only saves full width XMM for SIMD.
147 x86_64_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this);
148 CheckEntrypointTypes<kQuickTestSuspend, void, void>();
149 RestoreLiveRegisters(codegen, locations); // Only restores full width XMM for SIMD.
150 if (successor_ == nullptr) {
151 __ jmp(GetReturnLabel());
152 } else {
153 __ jmp(x86_64_codegen->GetLabelOf(successor_));
154 }
155 }
156
GetReturnLabel()157 Label* GetReturnLabel() {
158 DCHECK(successor_ == nullptr);
159 return &return_label_;
160 }
161
GetSuccessor() const162 HBasicBlock* GetSuccessor() const {
163 return successor_;
164 }
165
GetDescription() const166 const char* GetDescription() const OVERRIDE { return "SuspendCheckSlowPathX86_64"; }
167
168 private:
169 HBasicBlock* const successor_;
170 Label return_label_;
171
172 DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathX86_64);
173 };
174
175 class BoundsCheckSlowPathX86_64 : public SlowPathCode {
176 public:
BoundsCheckSlowPathX86_64(HBoundsCheck * instruction)177 explicit BoundsCheckSlowPathX86_64(HBoundsCheck* instruction)
178 : SlowPathCode(instruction) {}
179
EmitNativeCode(CodeGenerator * codegen)180 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
181 LocationSummary* locations = instruction_->GetLocations();
182 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
183 __ Bind(GetEntryLabel());
184 if (instruction_->CanThrowIntoCatchBlock()) {
185 // Live registers will be restored in the catch block if caught.
186 SaveLiveRegisters(codegen, instruction_->GetLocations());
187 }
188 // Are we using an array length from memory?
189 HInstruction* array_length = instruction_->InputAt(1);
190 Location length_loc = locations->InAt(1);
191 InvokeRuntimeCallingConvention calling_convention;
192 if (array_length->IsArrayLength() && array_length->IsEmittedAtUseSite()) {
193 // Load the array length into our temporary.
194 uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length->AsArrayLength());
195 Location array_loc = array_length->GetLocations()->InAt(0);
196 Address array_len(array_loc.AsRegister<CpuRegister>(), len_offset);
197 length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(1));
198 // Check for conflicts with index.
199 if (length_loc.Equals(locations->InAt(0))) {
200 // We know we aren't using parameter 2.
201 length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(2));
202 }
203 __ movl(length_loc.AsRegister<CpuRegister>(), array_len);
204 if (mirror::kUseStringCompression) {
205 __ shrl(length_loc.AsRegister<CpuRegister>(), Immediate(1));
206 }
207 }
208
209 // We're moving two locations to locations that could overlap, so we need a parallel
210 // move resolver.
211 codegen->EmitParallelMoves(
212 locations->InAt(0),
213 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
214 Primitive::kPrimInt,
215 length_loc,
216 Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
217 Primitive::kPrimInt);
218 QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt()
219 ? kQuickThrowStringBounds
220 : kQuickThrowArrayBounds;
221 x86_64_codegen->InvokeRuntime(entrypoint, instruction_, instruction_->GetDexPc(), this);
222 CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>();
223 CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
224 }
225
IsFatal() const226 bool IsFatal() const OVERRIDE { return true; }
227
GetDescription() const228 const char* GetDescription() const OVERRIDE { return "BoundsCheckSlowPathX86_64"; }
229
230 private:
231 DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathX86_64);
232 };
233
234 class LoadClassSlowPathX86_64 : public SlowPathCode {
235 public:
LoadClassSlowPathX86_64(HLoadClass * cls,HInstruction * at,uint32_t dex_pc,bool do_clinit)236 LoadClassSlowPathX86_64(HLoadClass* cls,
237 HInstruction* at,
238 uint32_t dex_pc,
239 bool do_clinit)
240 : SlowPathCode(at), cls_(cls), dex_pc_(dex_pc), do_clinit_(do_clinit) {
241 DCHECK(at->IsLoadClass() || at->IsClinitCheck());
242 }
243
EmitNativeCode(CodeGenerator * codegen)244 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
245 LocationSummary* locations = instruction_->GetLocations();
246 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
247 __ Bind(GetEntryLabel());
248
249 SaveLiveRegisters(codegen, locations);
250
251 // Custom calling convention: RAX serves as both input and output.
252 __ movl(CpuRegister(RAX), Immediate(cls_->GetTypeIndex().index_));
253 x86_64_codegen->InvokeRuntime(do_clinit_ ? kQuickInitializeStaticStorage : kQuickInitializeType,
254 instruction_,
255 dex_pc_,
256 this);
257 if (do_clinit_) {
258 CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, uint32_t>();
259 } else {
260 CheckEntrypointTypes<kQuickInitializeType, void*, uint32_t>();
261 }
262
263 Location out = locations->Out();
264 // Move the class to the desired location.
265 if (out.IsValid()) {
266 DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
267 x86_64_codegen->Move(out, Location::RegisterLocation(RAX));
268 }
269
270 RestoreLiveRegisters(codegen, locations);
271 // For HLoadClass/kBssEntry, store the resolved Class to the BSS entry.
272 DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_);
273 if (cls_ == instruction_ && cls_->GetLoadKind() == HLoadClass::LoadKind::kBssEntry) {
274 DCHECK(out.IsValid());
275 __ movl(Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false),
276 locations->Out().AsRegister<CpuRegister>());
277 Label* fixup_label = x86_64_codegen->NewTypeBssEntryPatch(cls_);
278 __ Bind(fixup_label);
279 }
280 __ jmp(GetExitLabel());
281 }
282
GetDescription() const283 const char* GetDescription() const OVERRIDE { return "LoadClassSlowPathX86_64"; }
284
285 private:
286 // The class this slow path will load.
287 HLoadClass* const cls_;
288
289 // The dex PC of `at_`.
290 const uint32_t dex_pc_;
291
292 // Whether to initialize the class.
293 const bool do_clinit_;
294
295 DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathX86_64);
296 };
297
298 class LoadStringSlowPathX86_64 : public SlowPathCode {
299 public:
LoadStringSlowPathX86_64(HLoadString * instruction)300 explicit LoadStringSlowPathX86_64(HLoadString* instruction) : SlowPathCode(instruction) {}
301
EmitNativeCode(CodeGenerator * codegen)302 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
303 LocationSummary* locations = instruction_->GetLocations();
304 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
305
306 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
307 __ Bind(GetEntryLabel());
308 SaveLiveRegisters(codegen, locations);
309
310 const dex::StringIndex string_index = instruction_->AsLoadString()->GetStringIndex();
311 // Custom calling convention: RAX serves as both input and output.
312 __ movl(CpuRegister(RAX), Immediate(string_index.index_));
313 x86_64_codegen->InvokeRuntime(kQuickResolveString,
314 instruction_,
315 instruction_->GetDexPc(),
316 this);
317 CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
318 x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX));
319 RestoreLiveRegisters(codegen, locations);
320
321 // Store the resolved String to the BSS entry.
322 __ movl(Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false),
323 locations->Out().AsRegister<CpuRegister>());
324 Label* fixup_label = x86_64_codegen->NewStringBssEntryPatch(instruction_->AsLoadString());
325 __ Bind(fixup_label);
326
327 __ jmp(GetExitLabel());
328 }
329
GetDescription() const330 const char* GetDescription() const OVERRIDE { return "LoadStringSlowPathX86_64"; }
331
332 private:
333 DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathX86_64);
334 };
335
336 class TypeCheckSlowPathX86_64 : public SlowPathCode {
337 public:
TypeCheckSlowPathX86_64(HInstruction * instruction,bool is_fatal)338 TypeCheckSlowPathX86_64(HInstruction* instruction, bool is_fatal)
339 : SlowPathCode(instruction), is_fatal_(is_fatal) {}
340
EmitNativeCode(CodeGenerator * codegen)341 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
342 LocationSummary* locations = instruction_->GetLocations();
343 uint32_t dex_pc = instruction_->GetDexPc();
344 DCHECK(instruction_->IsCheckCast()
345 || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
346
347 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
348 __ Bind(GetEntryLabel());
349
350 if (!is_fatal_) {
351 SaveLiveRegisters(codegen, locations);
352 }
353
354 // We're moving two locations to locations that could overlap, so we need a parallel
355 // move resolver.
356 InvokeRuntimeCallingConvention calling_convention;
357 codegen->EmitParallelMoves(locations->InAt(0),
358 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
359 Primitive::kPrimNot,
360 locations->InAt(1),
361 Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
362 Primitive::kPrimNot);
363 if (instruction_->IsInstanceOf()) {
364 x86_64_codegen->InvokeRuntime(kQuickInstanceofNonTrivial, instruction_, dex_pc, this);
365 CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>();
366 } else {
367 DCHECK(instruction_->IsCheckCast());
368 x86_64_codegen->InvokeRuntime(kQuickCheckInstanceOf, instruction_, dex_pc, this);
369 CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>();
370 }
371
372 if (!is_fatal_) {
373 if (instruction_->IsInstanceOf()) {
374 x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX));
375 }
376
377 RestoreLiveRegisters(codegen, locations);
378 __ jmp(GetExitLabel());
379 }
380 }
381
GetDescription() const382 const char* GetDescription() const OVERRIDE { return "TypeCheckSlowPathX86_64"; }
383
IsFatal() const384 bool IsFatal() const OVERRIDE { return is_fatal_; }
385
386 private:
387 const bool is_fatal_;
388
389 DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathX86_64);
390 };
391
392 class DeoptimizationSlowPathX86_64 : public SlowPathCode {
393 public:
DeoptimizationSlowPathX86_64(HDeoptimize * instruction)394 explicit DeoptimizationSlowPathX86_64(HDeoptimize* instruction)
395 : SlowPathCode(instruction) {}
396
EmitNativeCode(CodeGenerator * codegen)397 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
398 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
399 __ Bind(GetEntryLabel());
400 LocationSummary* locations = instruction_->GetLocations();
401 SaveLiveRegisters(codegen, locations);
402 InvokeRuntimeCallingConvention calling_convention;
403 x86_64_codegen->Load32BitValue(
404 CpuRegister(calling_convention.GetRegisterAt(0)),
405 static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind()));
406 x86_64_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
407 CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
408 }
409
GetDescription() const410 const char* GetDescription() const OVERRIDE { return "DeoptimizationSlowPathX86_64"; }
411
412 private:
413 DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86_64);
414 };
415
416 class ArraySetSlowPathX86_64 : public SlowPathCode {
417 public:
ArraySetSlowPathX86_64(HInstruction * instruction)418 explicit ArraySetSlowPathX86_64(HInstruction* instruction) : SlowPathCode(instruction) {}
419
EmitNativeCode(CodeGenerator * codegen)420 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
421 LocationSummary* locations = instruction_->GetLocations();
422 __ Bind(GetEntryLabel());
423 SaveLiveRegisters(codegen, locations);
424
425 InvokeRuntimeCallingConvention calling_convention;
426 HParallelMove parallel_move(codegen->GetGraph()->GetArena());
427 parallel_move.AddMove(
428 locations->InAt(0),
429 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
430 Primitive::kPrimNot,
431 nullptr);
432 parallel_move.AddMove(
433 locations->InAt(1),
434 Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
435 Primitive::kPrimInt,
436 nullptr);
437 parallel_move.AddMove(
438 locations->InAt(2),
439 Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
440 Primitive::kPrimNot,
441 nullptr);
442 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
443
444 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
445 x86_64_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this);
446 CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
447 RestoreLiveRegisters(codegen, locations);
448 __ jmp(GetExitLabel());
449 }
450
GetDescription() const451 const char* GetDescription() const OVERRIDE { return "ArraySetSlowPathX86_64"; }
452
453 private:
454 DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86_64);
455 };
456
457 // Slow path marking an object reference `ref` during a read
458 // barrier. The field `obj.field` in the object `obj` holding this
459 // reference does not get updated by this slow path after marking (see
460 // ReadBarrierMarkAndUpdateFieldSlowPathX86_64 below for that).
461 //
462 // This means that after the execution of this slow path, `ref` will
463 // always be up-to-date, but `obj.field` may not; i.e., after the
464 // flip, `ref` will be a to-space reference, but `obj.field` will
465 // probably still be a from-space reference (unless it gets updated by
466 // another thread, or if another thread installed another object
467 // reference (different from `ref`) in `obj.field`).
468 class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode {
469 public:
ReadBarrierMarkSlowPathX86_64(HInstruction * instruction,Location ref,bool unpoison_ref_before_marking)470 ReadBarrierMarkSlowPathX86_64(HInstruction* instruction,
471 Location ref,
472 bool unpoison_ref_before_marking)
473 : SlowPathCode(instruction),
474 ref_(ref),
475 unpoison_ref_before_marking_(unpoison_ref_before_marking) {
476 DCHECK(kEmitCompilerReadBarrier);
477 }
478
GetDescription() const479 const char* GetDescription() const OVERRIDE { return "ReadBarrierMarkSlowPathX86_64"; }
480
EmitNativeCode(CodeGenerator * codegen)481 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
482 LocationSummary* locations = instruction_->GetLocations();
483 CpuRegister ref_cpu_reg = ref_.AsRegister<CpuRegister>();
484 Register ref_reg = ref_cpu_reg.AsRegister();
485 DCHECK(locations->CanCall());
486 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
487 DCHECK(instruction_->IsInstanceFieldGet() ||
488 instruction_->IsStaticFieldGet() ||
489 instruction_->IsArrayGet() ||
490 instruction_->IsArraySet() ||
491 instruction_->IsLoadClass() ||
492 instruction_->IsLoadString() ||
493 instruction_->IsInstanceOf() ||
494 instruction_->IsCheckCast() ||
495 (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) ||
496 (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified()))
497 << "Unexpected instruction in read barrier marking slow path: "
498 << instruction_->DebugName();
499
500 __ Bind(GetEntryLabel());
501 if (unpoison_ref_before_marking_) {
502 // Object* ref = ref_addr->AsMirrorPtr()
503 __ MaybeUnpoisonHeapReference(ref_cpu_reg);
504 }
505 // No need to save live registers; it's taken care of by the
506 // entrypoint. Also, there is no need to update the stack mask,
507 // as this runtime call will not trigger a garbage collection.
508 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
509 DCHECK_NE(ref_reg, RSP);
510 DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg;
511 // "Compact" slow path, saving two moves.
512 //
513 // Instead of using the standard runtime calling convention (input
514 // and output in R0):
515 //
516 // RDI <- ref
517 // RAX <- ReadBarrierMark(RDI)
518 // ref <- RAX
519 //
520 // we just use rX (the register containing `ref`) as input and output
521 // of a dedicated entrypoint:
522 //
523 // rX <- ReadBarrierMarkRegX(rX)
524 //
525 int32_t entry_point_offset =
526 CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(ref_reg);
527 // This runtime call does not require a stack map.
528 x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
529 __ jmp(GetExitLabel());
530 }
531
532 private:
533 // The location (register) of the marked object reference.
534 const Location ref_;
535 // Should the reference in `ref_` be unpoisoned prior to marking it?
536 const bool unpoison_ref_before_marking_;
537
538 DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86_64);
539 };
540
541 // Slow path marking an object reference `ref` during a read barrier,
542 // and if needed, atomically updating the field `obj.field` in the
543 // object `obj` holding this reference after marking (contrary to
544 // ReadBarrierMarkSlowPathX86_64 above, which never tries to update
545 // `obj.field`).
546 //
547 // This means that after the execution of this slow path, both `ref`
548 // and `obj.field` will be up-to-date; i.e., after the flip, both will
549 // hold the same to-space reference (unless another thread installed
550 // another object reference (different from `ref`) in `obj.field`).
551 class ReadBarrierMarkAndUpdateFieldSlowPathX86_64 : public SlowPathCode {
552 public:
ReadBarrierMarkAndUpdateFieldSlowPathX86_64(HInstruction * instruction,Location ref,CpuRegister obj,const Address & field_addr,bool unpoison_ref_before_marking,CpuRegister temp1,CpuRegister temp2)553 ReadBarrierMarkAndUpdateFieldSlowPathX86_64(HInstruction* instruction,
554 Location ref,
555 CpuRegister obj,
556 const Address& field_addr,
557 bool unpoison_ref_before_marking,
558 CpuRegister temp1,
559 CpuRegister temp2)
560 : SlowPathCode(instruction),
561 ref_(ref),
562 obj_(obj),
563 field_addr_(field_addr),
564 unpoison_ref_before_marking_(unpoison_ref_before_marking),
565 temp1_(temp1),
566 temp2_(temp2) {
567 DCHECK(kEmitCompilerReadBarrier);
568 }
569
GetDescription() const570 const char* GetDescription() const OVERRIDE {
571 return "ReadBarrierMarkAndUpdateFieldSlowPathX86_64";
572 }
573
EmitNativeCode(CodeGenerator * codegen)574 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
575 LocationSummary* locations = instruction_->GetLocations();
576 CpuRegister ref_cpu_reg = ref_.AsRegister<CpuRegister>();
577 Register ref_reg = ref_cpu_reg.AsRegister();
578 DCHECK(locations->CanCall());
579 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
580 // This slow path is only used by the UnsafeCASObject intrinsic.
581 DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
582 << "Unexpected instruction in read barrier marking and field updating slow path: "
583 << instruction_->DebugName();
584 DCHECK(instruction_->GetLocations()->Intrinsified());
585 DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject);
586
587 __ Bind(GetEntryLabel());
588 if (unpoison_ref_before_marking_) {
589 // Object* ref = ref_addr->AsMirrorPtr()
590 __ MaybeUnpoisonHeapReference(ref_cpu_reg);
591 }
592
593 // Save the old (unpoisoned) reference.
594 __ movl(temp1_, ref_cpu_reg);
595
596 // No need to save live registers; it's taken care of by the
597 // entrypoint. Also, there is no need to update the stack mask,
598 // as this runtime call will not trigger a garbage collection.
599 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
600 DCHECK_NE(ref_reg, RSP);
601 DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg;
602 // "Compact" slow path, saving two moves.
603 //
604 // Instead of using the standard runtime calling convention (input
605 // and output in R0):
606 //
607 // RDI <- ref
608 // RAX <- ReadBarrierMark(RDI)
609 // ref <- RAX
610 //
611 // we just use rX (the register containing `ref`) as input and output
612 // of a dedicated entrypoint:
613 //
614 // rX <- ReadBarrierMarkRegX(rX)
615 //
616 int32_t entry_point_offset =
617 CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(ref_reg);
618 // This runtime call does not require a stack map.
619 x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
620
621 // If the new reference is different from the old reference,
622 // update the field in the holder (`*field_addr`).
623 //
624 // Note that this field could also hold a different object, if
625 // another thread had concurrently changed it. In that case, the
626 // LOCK CMPXCHGL instruction in the compare-and-set (CAS)
627 // operation below would abort the CAS, leaving the field as-is.
628 NearLabel done;
629 __ cmpl(temp1_, ref_cpu_reg);
630 __ j(kEqual, &done);
631
632 // Update the the holder's field atomically. This may fail if
633 // mutator updates before us, but it's OK. This is achived
634 // using a strong compare-and-set (CAS) operation with relaxed
635 // memory synchronization ordering, where the expected value is
636 // the old reference and the desired value is the new reference.
637 // This operation is implemented with a 32-bit LOCK CMPXLCHG
638 // instruction, which requires the expected value (the old
639 // reference) to be in EAX. Save RAX beforehand, and move the
640 // expected value (stored in `temp1_`) into EAX.
641 __ movq(temp2_, CpuRegister(RAX));
642 __ movl(CpuRegister(RAX), temp1_);
643
644 // Convenience aliases.
645 CpuRegister base = obj_;
646 CpuRegister expected = CpuRegister(RAX);
647 CpuRegister value = ref_cpu_reg;
648
649 bool base_equals_value = (base.AsRegister() == value.AsRegister());
650 Register value_reg = ref_reg;
651 if (kPoisonHeapReferences) {
652 if (base_equals_value) {
653 // If `base` and `value` are the same register location, move
654 // `value_reg` to a temporary register. This way, poisoning
655 // `value_reg` won't invalidate `base`.
656 value_reg = temp1_.AsRegister();
657 __ movl(CpuRegister(value_reg), base);
658 }
659
660 // Check that the register allocator did not assign the location
661 // of `expected` (RAX) to `value` nor to `base`, so that heap
662 // poisoning (when enabled) works as intended below.
663 // - If `value` were equal to `expected`, both references would
664 // be poisoned twice, meaning they would not be poisoned at
665 // all, as heap poisoning uses address negation.
666 // - If `base` were equal to `expected`, poisoning `expected`
667 // would invalidate `base`.
668 DCHECK_NE(value_reg, expected.AsRegister());
669 DCHECK_NE(base.AsRegister(), expected.AsRegister());
670
671 __ PoisonHeapReference(expected);
672 __ PoisonHeapReference(CpuRegister(value_reg));
673 }
674
675 __ LockCmpxchgl(field_addr_, CpuRegister(value_reg));
676
677 // If heap poisoning is enabled, we need to unpoison the values
678 // that were poisoned earlier.
679 if (kPoisonHeapReferences) {
680 if (base_equals_value) {
681 // `value_reg` has been moved to a temporary register, no need
682 // to unpoison it.
683 } else {
684 __ UnpoisonHeapReference(CpuRegister(value_reg));
685 }
686 // No need to unpoison `expected` (RAX), as it is be overwritten below.
687 }
688
689 // Restore RAX.
690 __ movq(CpuRegister(RAX), temp2_);
691
692 __ Bind(&done);
693 __ jmp(GetExitLabel());
694 }
695
696 private:
697 // The location (register) of the marked object reference.
698 const Location ref_;
699 // The register containing the object holding the marked object reference field.
700 const CpuRegister obj_;
701 // The address of the marked reference field. The base of this address must be `obj_`.
702 const Address field_addr_;
703
704 // Should the reference in `ref_` be unpoisoned prior to marking it?
705 const bool unpoison_ref_before_marking_;
706
707 const CpuRegister temp1_;
708 const CpuRegister temp2_;
709
710 DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathX86_64);
711 };
712
713 // Slow path generating a read barrier for a heap reference.
714 class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode {
715 public:
ReadBarrierForHeapReferenceSlowPathX86_64(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)716 ReadBarrierForHeapReferenceSlowPathX86_64(HInstruction* instruction,
717 Location out,
718 Location ref,
719 Location obj,
720 uint32_t offset,
721 Location index)
722 : SlowPathCode(instruction),
723 out_(out),
724 ref_(ref),
725 obj_(obj),
726 offset_(offset),
727 index_(index) {
728 DCHECK(kEmitCompilerReadBarrier);
729 // If `obj` is equal to `out` or `ref`, it means the initial
730 // object has been overwritten by (or after) the heap object
731 // reference load to be instrumented, e.g.:
732 //
733 // __ movl(out, Address(out, offset));
734 // codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
735 //
736 // In that case, we have lost the information about the original
737 // object, and the emitted read barrier cannot work properly.
738 DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out;
739 DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
740 }
741
EmitNativeCode(CodeGenerator * codegen)742 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
743 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
744 LocationSummary* locations = instruction_->GetLocations();
745 CpuRegister reg_out = out_.AsRegister<CpuRegister>();
746 DCHECK(locations->CanCall());
747 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out.AsRegister())) << out_;
748 DCHECK(instruction_->IsInstanceFieldGet() ||
749 instruction_->IsStaticFieldGet() ||
750 instruction_->IsArrayGet() ||
751 instruction_->IsInstanceOf() ||
752 instruction_->IsCheckCast() ||
753 (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
754 << "Unexpected instruction in read barrier for heap reference slow path: "
755 << instruction_->DebugName();
756
757 __ Bind(GetEntryLabel());
758 SaveLiveRegisters(codegen, locations);
759
760 // We may have to change the index's value, but as `index_` is a
761 // constant member (like other "inputs" of this slow path),
762 // introduce a copy of it, `index`.
763 Location index = index_;
764 if (index_.IsValid()) {
765 // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics.
766 if (instruction_->IsArrayGet()) {
767 // Compute real offset and store it in index_.
768 Register index_reg = index_.AsRegister<CpuRegister>().AsRegister();
769 DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg));
770 if (codegen->IsCoreCalleeSaveRegister(index_reg)) {
771 // We are about to change the value of `index_reg` (see the
772 // calls to art::x86_64::X86_64Assembler::shll and
773 // art::x86_64::X86_64Assembler::AddImmediate below), but it
774 // has not been saved by the previous call to
775 // art::SlowPathCode::SaveLiveRegisters, as it is a
776 // callee-save register --
777 // art::SlowPathCode::SaveLiveRegisters does not consider
778 // callee-save registers, as it has been designed with the
779 // assumption that callee-save registers are supposed to be
780 // handled by the called function. So, as a callee-save
781 // register, `index_reg` _would_ eventually be saved onto
782 // the stack, but it would be too late: we would have
783 // changed its value earlier. Therefore, we manually save
784 // it here into another freely available register,
785 // `free_reg`, chosen of course among the caller-save
786 // registers (as a callee-save `free_reg` register would
787 // exhibit the same problem).
788 //
789 // Note we could have requested a temporary register from
790 // the register allocator instead; but we prefer not to, as
791 // this is a slow path, and we know we can find a
792 // caller-save register that is available.
793 Register free_reg = FindAvailableCallerSaveRegister(codegen).AsRegister();
794 __ movl(CpuRegister(free_reg), CpuRegister(index_reg));
795 index_reg = free_reg;
796 index = Location::RegisterLocation(index_reg);
797 } else {
798 // The initial register stored in `index_` has already been
799 // saved in the call to art::SlowPathCode::SaveLiveRegisters
800 // (as it is not a callee-save register), so we can freely
801 // use it.
802 }
803 // Shifting the index value contained in `index_reg` by the
804 // scale factor (2) cannot overflow in practice, as the
805 // runtime is unable to allocate object arrays with a size
806 // larger than 2^26 - 1 (that is, 2^28 - 4 bytes).
807 __ shll(CpuRegister(index_reg), Immediate(TIMES_4));
808 static_assert(
809 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
810 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
811 __ AddImmediate(CpuRegister(index_reg), Immediate(offset_));
812 } else {
813 // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile
814 // intrinsics, `index_` is not shifted by a scale factor of 2
815 // (as in the case of ArrayGet), as it is actually an offset
816 // to an object field within an object.
817 DCHECK(instruction_->IsInvoke()) << instruction_->DebugName();
818 DCHECK(instruction_->GetLocations()->Intrinsified());
819 DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
820 (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile))
821 << instruction_->AsInvoke()->GetIntrinsic();
822 DCHECK_EQ(offset_, 0U);
823 DCHECK(index_.IsRegister());
824 }
825 }
826
827 // We're moving two or three locations to locations that could
828 // overlap, so we need a parallel move resolver.
829 InvokeRuntimeCallingConvention calling_convention;
830 HParallelMove parallel_move(codegen->GetGraph()->GetArena());
831 parallel_move.AddMove(ref_,
832 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
833 Primitive::kPrimNot,
834 nullptr);
835 parallel_move.AddMove(obj_,
836 Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
837 Primitive::kPrimNot,
838 nullptr);
839 if (index.IsValid()) {
840 parallel_move.AddMove(index,
841 Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
842 Primitive::kPrimInt,
843 nullptr);
844 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
845 } else {
846 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
847 __ movl(CpuRegister(calling_convention.GetRegisterAt(2)), Immediate(offset_));
848 }
849 x86_64_codegen->InvokeRuntime(kQuickReadBarrierSlow,
850 instruction_,
851 instruction_->GetDexPc(),
852 this);
853 CheckEntrypointTypes<
854 kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
855 x86_64_codegen->Move(out_, Location::RegisterLocation(RAX));
856
857 RestoreLiveRegisters(codegen, locations);
858 __ jmp(GetExitLabel());
859 }
860
GetDescription() const861 const char* GetDescription() const OVERRIDE {
862 return "ReadBarrierForHeapReferenceSlowPathX86_64";
863 }
864
865 private:
FindAvailableCallerSaveRegister(CodeGenerator * codegen)866 CpuRegister FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
867 size_t ref = static_cast<int>(ref_.AsRegister<CpuRegister>().AsRegister());
868 size_t obj = static_cast<int>(obj_.AsRegister<CpuRegister>().AsRegister());
869 for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
870 if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) {
871 return static_cast<CpuRegister>(i);
872 }
873 }
874 // We shall never fail to find a free caller-save register, as
875 // there are more than two core caller-save registers on x86-64
876 // (meaning it is possible to find one which is different from
877 // `ref` and `obj`).
878 DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u);
879 LOG(FATAL) << "Could not find a free caller-save register";
880 UNREACHABLE();
881 }
882
883 const Location out_;
884 const Location ref_;
885 const Location obj_;
886 const uint32_t offset_;
887 // An additional location containing an index to an array.
888 // Only used for HArrayGet and the UnsafeGetObject &
889 // UnsafeGetObjectVolatile intrinsics.
890 const Location index_;
891
892 DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathX86_64);
893 };
894
895 // Slow path generating a read barrier for a GC root.
896 class ReadBarrierForRootSlowPathX86_64 : public SlowPathCode {
897 public:
ReadBarrierForRootSlowPathX86_64(HInstruction * instruction,Location out,Location root)898 ReadBarrierForRootSlowPathX86_64(HInstruction* instruction, Location out, Location root)
899 : SlowPathCode(instruction), out_(out), root_(root) {
900 DCHECK(kEmitCompilerReadBarrier);
901 }
902
EmitNativeCode(CodeGenerator * codegen)903 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
904 LocationSummary* locations = instruction_->GetLocations();
905 DCHECK(locations->CanCall());
906 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
907 DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
908 << "Unexpected instruction in read barrier for GC root slow path: "
909 << instruction_->DebugName();
910
911 __ Bind(GetEntryLabel());
912 SaveLiveRegisters(codegen, locations);
913
914 InvokeRuntimeCallingConvention calling_convention;
915 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
916 x86_64_codegen->Move(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), root_);
917 x86_64_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow,
918 instruction_,
919 instruction_->GetDexPc(),
920 this);
921 CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
922 x86_64_codegen->Move(out_, Location::RegisterLocation(RAX));
923
924 RestoreLiveRegisters(codegen, locations);
925 __ jmp(GetExitLabel());
926 }
927
GetDescription() const928 const char* GetDescription() const OVERRIDE { return "ReadBarrierForRootSlowPathX86_64"; }
929
930 private:
931 const Location out_;
932 const Location root_;
933
934 DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathX86_64);
935 };
936
937 #undef __
938 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
939 #define __ down_cast<X86_64Assembler*>(GetAssembler())-> // NOLINT
940
X86_64IntegerCondition(IfCondition cond)941 inline Condition X86_64IntegerCondition(IfCondition cond) {
942 switch (cond) {
943 case kCondEQ: return kEqual;
944 case kCondNE: return kNotEqual;
945 case kCondLT: return kLess;
946 case kCondLE: return kLessEqual;
947 case kCondGT: return kGreater;
948 case kCondGE: return kGreaterEqual;
949 case kCondB: return kBelow;
950 case kCondBE: return kBelowEqual;
951 case kCondA: return kAbove;
952 case kCondAE: return kAboveEqual;
953 }
954 LOG(FATAL) << "Unreachable";
955 UNREACHABLE();
956 }
957
958 // Maps FP condition to x86_64 name.
X86_64FPCondition(IfCondition cond)959 inline Condition X86_64FPCondition(IfCondition cond) {
960 switch (cond) {
961 case kCondEQ: return kEqual;
962 case kCondNE: return kNotEqual;
963 case kCondLT: return kBelow;
964 case kCondLE: return kBelowEqual;
965 case kCondGT: return kAbove;
966 case kCondGE: return kAboveEqual;
967 default: break; // should not happen
968 };
969 LOG(FATAL) << "Unreachable";
970 UNREACHABLE();
971 }
972
GetSupportedInvokeStaticOrDirectDispatch(const HInvokeStaticOrDirect::DispatchInfo & desired_dispatch_info,HInvokeStaticOrDirect * invoke ATTRIBUTE_UNUSED)973 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86_64::GetSupportedInvokeStaticOrDirectDispatch(
974 const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
975 HInvokeStaticOrDirect* invoke ATTRIBUTE_UNUSED) {
976 return desired_dispatch_info;
977 }
978
GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect * invoke,Location temp)979 Location CodeGeneratorX86_64::GenerateCalleeMethodStaticOrDirectCall(HInvokeStaticOrDirect* invoke,
980 Location temp) {
981 // All registers are assumed to be correctly set up.
982 Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp.
983 switch (invoke->GetMethodLoadKind()) {
984 case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: {
985 // temp = thread->string_init_entrypoint
986 uint32_t offset =
987 GetThreadOffset<kX86_64PointerSize>(invoke->GetStringInitEntryPoint()).Int32Value();
988 __ gs()->movq(temp.AsRegister<CpuRegister>(), Address::Absolute(offset, /* no_rip */ true));
989 break;
990 }
991 case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
992 callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
993 break;
994 case HInvokeStaticOrDirect::MethodLoadKind::kDirectAddress:
995 Load64BitValue(temp.AsRegister<CpuRegister>(), invoke->GetMethodAddress());
996 break;
997 case HInvokeStaticOrDirect::MethodLoadKind::kDexCachePcRelative: {
998 __ movq(temp.AsRegister<CpuRegister>(),
999 Address::Absolute(kDummy32BitOffset, /* no_rip */ false));
1000 // Bind a new fixup label at the end of the "movl" insn.
1001 uint32_t offset = invoke->GetDexCacheArrayOffset();
1002 __ Bind(NewPcRelativeDexCacheArrayPatch(invoke->GetDexFileForPcRelativeDexCache(), offset));
1003 break;
1004 }
1005 case HInvokeStaticOrDirect::MethodLoadKind::kDexCacheViaMethod: {
1006 Location current_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
1007 Register method_reg;
1008 CpuRegister reg = temp.AsRegister<CpuRegister>();
1009 if (current_method.IsRegister()) {
1010 method_reg = current_method.AsRegister<Register>();
1011 } else {
1012 DCHECK(invoke->GetLocations()->Intrinsified());
1013 DCHECK(!current_method.IsValid());
1014 method_reg = reg.AsRegister();
1015 __ movq(reg, Address(CpuRegister(RSP), kCurrentMethodStackOffset));
1016 }
1017 // /* ArtMethod*[] */ temp = temp.ptr_sized_fields_->dex_cache_resolved_methods_;
1018 __ movq(reg,
1019 Address(CpuRegister(method_reg),
1020 ArtMethod::DexCacheResolvedMethodsOffset(kX86_64PointerSize).SizeValue()));
1021 // temp = temp[index_in_cache];
1022 // Note: Don't use invoke->GetTargetMethod() as it may point to a different dex file.
1023 uint32_t index_in_cache = invoke->GetDexMethodIndex();
1024 __ movq(reg, Address(reg, CodeGenerator::GetCachePointerOffset(index_in_cache)));
1025 break;
1026 }
1027 }
1028 return callee_method;
1029 }
1030
GenerateStaticOrDirectCall(HInvokeStaticOrDirect * invoke,Location temp)1031 void CodeGeneratorX86_64::GenerateStaticOrDirectCall(HInvokeStaticOrDirect* invoke,
1032 Location temp) {
1033 // All registers are assumed to be correctly set up.
1034 Location callee_method = GenerateCalleeMethodStaticOrDirectCall(invoke, temp);
1035
1036 switch (invoke->GetCodePtrLocation()) {
1037 case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf:
1038 __ call(&frame_entry_label_);
1039 break;
1040 case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod:
1041 // (callee_method + offset_of_quick_compiled_code)()
1042 __ call(Address(callee_method.AsRegister<CpuRegister>(),
1043 ArtMethod::EntryPointFromQuickCompiledCodeOffset(
1044 kX86_64PointerSize).SizeValue()));
1045 break;
1046 }
1047
1048 DCHECK(!IsLeafMethod());
1049 }
1050
GenerateVirtualCall(HInvokeVirtual * invoke,Location temp_in)1051 void CodeGeneratorX86_64::GenerateVirtualCall(HInvokeVirtual* invoke, Location temp_in) {
1052 CpuRegister temp = temp_in.AsRegister<CpuRegister>();
1053 size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
1054 invoke->GetVTableIndex(), kX86_64PointerSize).SizeValue();
1055
1056 // Use the calling convention instead of the location of the receiver, as
1057 // intrinsics may have put the receiver in a different register. In the intrinsics
1058 // slow path, the arguments have been moved to the right place, so here we are
1059 // guaranteed that the receiver is the first register of the calling convention.
1060 InvokeDexCallingConvention calling_convention;
1061 Register receiver = calling_convention.GetRegisterAt(0);
1062
1063 size_t class_offset = mirror::Object::ClassOffset().SizeValue();
1064 // /* HeapReference<Class> */ temp = receiver->klass_
1065 __ movl(temp, Address(CpuRegister(receiver), class_offset));
1066 MaybeRecordImplicitNullCheck(invoke);
1067 // Instead of simply (possibly) unpoisoning `temp` here, we should
1068 // emit a read barrier for the previous class reference load.
1069 // However this is not required in practice, as this is an
1070 // intermediate/temporary reference and because the current
1071 // concurrent copying collector keeps the from-space memory
1072 // intact/accessible until the end of the marking phase (the
1073 // concurrent copying collector may not in the future).
1074 __ MaybeUnpoisonHeapReference(temp);
1075 // temp = temp->GetMethodAt(method_offset);
1076 __ movq(temp, Address(temp, method_offset));
1077 // call temp->GetEntryPoint();
1078 __ call(Address(temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(
1079 kX86_64PointerSize).SizeValue()));
1080 }
1081
RecordBootStringPatch(HLoadString * load_string)1082 void CodeGeneratorX86_64::RecordBootStringPatch(HLoadString* load_string) {
1083 DCHECK(GetCompilerOptions().IsBootImage());
1084 string_patches_.emplace_back(load_string->GetDexFile(), load_string->GetStringIndex().index_);
1085 __ Bind(&string_patches_.back().label);
1086 }
1087
RecordBootTypePatch(HLoadClass * load_class)1088 void CodeGeneratorX86_64::RecordBootTypePatch(HLoadClass* load_class) {
1089 boot_image_type_patches_.emplace_back(load_class->GetDexFile(),
1090 load_class->GetTypeIndex().index_);
1091 __ Bind(&boot_image_type_patches_.back().label);
1092 }
1093
NewTypeBssEntryPatch(HLoadClass * load_class)1094 Label* CodeGeneratorX86_64::NewTypeBssEntryPatch(HLoadClass* load_class) {
1095 type_bss_entry_patches_.emplace_back(load_class->GetDexFile(), load_class->GetTypeIndex().index_);
1096 return &type_bss_entry_patches_.back().label;
1097 }
1098
NewStringBssEntryPatch(HLoadString * load_string)1099 Label* CodeGeneratorX86_64::NewStringBssEntryPatch(HLoadString* load_string) {
1100 DCHECK(!GetCompilerOptions().IsBootImage());
1101 string_patches_.emplace_back(load_string->GetDexFile(), load_string->GetStringIndex().index_);
1102 return &string_patches_.back().label;
1103 }
1104
NewPcRelativeDexCacheArrayPatch(const DexFile & dex_file,uint32_t element_offset)1105 Label* CodeGeneratorX86_64::NewPcRelativeDexCacheArrayPatch(const DexFile& dex_file,
1106 uint32_t element_offset) {
1107 // Add a patch entry and return the label.
1108 pc_relative_dex_cache_patches_.emplace_back(dex_file, element_offset);
1109 return &pc_relative_dex_cache_patches_.back().label;
1110 }
1111
1112 // The label points to the end of the "movl" or another instruction but the literal offset
1113 // for method patch needs to point to the embedded constant which occupies the last 4 bytes.
1114 constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u;
1115
1116 template <LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
EmitPcRelativeLinkerPatches(const ArenaDeque<PatchInfo<Label>> & infos,ArenaVector<LinkerPatch> * linker_patches)1117 inline void CodeGeneratorX86_64::EmitPcRelativeLinkerPatches(
1118 const ArenaDeque<PatchInfo<Label>>& infos,
1119 ArenaVector<LinkerPatch>* linker_patches) {
1120 for (const PatchInfo<Label>& info : infos) {
1121 uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
1122 linker_patches->push_back(
1123 Factory(literal_offset, &info.dex_file, info.label.Position(), info.index));
1124 }
1125 }
1126
EmitLinkerPatches(ArenaVector<LinkerPatch> * linker_patches)1127 void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<LinkerPatch>* linker_patches) {
1128 DCHECK(linker_patches->empty());
1129 size_t size =
1130 pc_relative_dex_cache_patches_.size() +
1131 string_patches_.size() +
1132 boot_image_type_patches_.size() +
1133 type_bss_entry_patches_.size();
1134 linker_patches->reserve(size);
1135 EmitPcRelativeLinkerPatches<LinkerPatch::DexCacheArrayPatch>(pc_relative_dex_cache_patches_,
1136 linker_patches);
1137 if (!GetCompilerOptions().IsBootImage()) {
1138 DCHECK(boot_image_type_patches_.empty());
1139 EmitPcRelativeLinkerPatches<LinkerPatch::StringBssEntryPatch>(string_patches_, linker_patches);
1140 } else {
1141 // These are always PC-relative, see GetSupportedLoadClassKind()/GetSupportedLoadStringKind().
1142 EmitPcRelativeLinkerPatches<LinkerPatch::RelativeTypePatch>(boot_image_type_patches_,
1143 linker_patches);
1144 EmitPcRelativeLinkerPatches<LinkerPatch::RelativeStringPatch>(string_patches_, linker_patches);
1145 }
1146 EmitPcRelativeLinkerPatches<LinkerPatch::TypeBssEntryPatch>(type_bss_entry_patches_,
1147 linker_patches);
1148 DCHECK_EQ(size, linker_patches->size());
1149 }
1150
DumpCoreRegister(std::ostream & stream,int reg) const1151 void CodeGeneratorX86_64::DumpCoreRegister(std::ostream& stream, int reg) const {
1152 stream << Register(reg);
1153 }
1154
DumpFloatingPointRegister(std::ostream & stream,int reg) const1155 void CodeGeneratorX86_64::DumpFloatingPointRegister(std::ostream& stream, int reg) const {
1156 stream << FloatRegister(reg);
1157 }
1158
SaveCoreRegister(size_t stack_index,uint32_t reg_id)1159 size_t CodeGeneratorX86_64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
1160 __ movq(Address(CpuRegister(RSP), stack_index), CpuRegister(reg_id));
1161 return kX86_64WordSize;
1162 }
1163
RestoreCoreRegister(size_t stack_index,uint32_t reg_id)1164 size_t CodeGeneratorX86_64::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) {
1165 __ movq(CpuRegister(reg_id), Address(CpuRegister(RSP), stack_index));
1166 return kX86_64WordSize;
1167 }
1168
SaveFloatingPointRegister(size_t stack_index,uint32_t reg_id)1169 size_t CodeGeneratorX86_64::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
1170 if (GetGraph()->HasSIMD()) {
1171 __ movups(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id));
1172 } else {
1173 __ movsd(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id));
1174 }
1175 return GetFloatingPointSpillSlotSize();
1176 }
1177
RestoreFloatingPointRegister(size_t stack_index,uint32_t reg_id)1178 size_t CodeGeneratorX86_64::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
1179 if (GetGraph()->HasSIMD()) {
1180 __ movups(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index));
1181 } else {
1182 __ movsd(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index));
1183 }
1184 return GetFloatingPointSpillSlotSize();
1185 }
1186
InvokeRuntime(QuickEntrypointEnum entrypoint,HInstruction * instruction,uint32_t dex_pc,SlowPathCode * slow_path)1187 void CodeGeneratorX86_64::InvokeRuntime(QuickEntrypointEnum entrypoint,
1188 HInstruction* instruction,
1189 uint32_t dex_pc,
1190 SlowPathCode* slow_path) {
1191 ValidateInvokeRuntime(entrypoint, instruction, slow_path);
1192 GenerateInvokeRuntime(GetThreadOffset<kX86_64PointerSize>(entrypoint).Int32Value());
1193 if (EntrypointRequiresStackMap(entrypoint)) {
1194 RecordPcInfo(instruction, dex_pc, slow_path);
1195 }
1196 }
1197
InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,HInstruction * instruction,SlowPathCode * slow_path)1198 void CodeGeneratorX86_64::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
1199 HInstruction* instruction,
1200 SlowPathCode* slow_path) {
1201 ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
1202 GenerateInvokeRuntime(entry_point_offset);
1203 }
1204
GenerateInvokeRuntime(int32_t entry_point_offset)1205 void CodeGeneratorX86_64::GenerateInvokeRuntime(int32_t entry_point_offset) {
1206 __ gs()->call(Address::Absolute(entry_point_offset, /* no_rip */ true));
1207 }
1208
1209 static constexpr int kNumberOfCpuRegisterPairs = 0;
1210 // Use a fake return address register to mimic Quick.
1211 static constexpr Register kFakeReturnRegister = Register(kLastCpuRegister + 1);
CodeGeneratorX86_64(HGraph * graph,const X86_64InstructionSetFeatures & isa_features,const CompilerOptions & compiler_options,OptimizingCompilerStats * stats)1212 CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph,
1213 const X86_64InstructionSetFeatures& isa_features,
1214 const CompilerOptions& compiler_options,
1215 OptimizingCompilerStats* stats)
1216 : CodeGenerator(graph,
1217 kNumberOfCpuRegisters,
1218 kNumberOfFloatRegisters,
1219 kNumberOfCpuRegisterPairs,
1220 ComputeRegisterMask(reinterpret_cast<const int*>(kCoreCalleeSaves),
1221 arraysize(kCoreCalleeSaves))
1222 | (1 << kFakeReturnRegister),
1223 ComputeRegisterMask(reinterpret_cast<const int*>(kFpuCalleeSaves),
1224 arraysize(kFpuCalleeSaves)),
1225 compiler_options,
1226 stats),
1227 block_labels_(nullptr),
1228 location_builder_(graph, this),
1229 instruction_visitor_(graph, this),
1230 move_resolver_(graph->GetArena(), this),
1231 assembler_(graph->GetArena()),
1232 isa_features_(isa_features),
1233 constant_area_start_(0),
1234 pc_relative_dex_cache_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
1235 string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
1236 boot_image_type_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
1237 type_bss_entry_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
1238 fixups_to_jump_tables_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
1239 jit_string_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)),
1240 jit_class_patches_(graph->GetArena()->Adapter(kArenaAllocCodeGenerator)) {
1241 AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
1242 }
1243
InstructionCodeGeneratorX86_64(HGraph * graph,CodeGeneratorX86_64 * codegen)1244 InstructionCodeGeneratorX86_64::InstructionCodeGeneratorX86_64(HGraph* graph,
1245 CodeGeneratorX86_64* codegen)
1246 : InstructionCodeGenerator(graph, codegen),
1247 assembler_(codegen->GetAssembler()),
1248 codegen_(codegen) {}
1249
SetupBlockedRegisters() const1250 void CodeGeneratorX86_64::SetupBlockedRegisters() const {
1251 // Stack register is always reserved.
1252 blocked_core_registers_[RSP] = true;
1253
1254 // Block the register used as TMP.
1255 blocked_core_registers_[TMP] = true;
1256 }
1257
DWARFReg(Register reg)1258 static dwarf::Reg DWARFReg(Register reg) {
1259 return dwarf::Reg::X86_64Core(static_cast<int>(reg));
1260 }
1261
DWARFReg(FloatRegister reg)1262 static dwarf::Reg DWARFReg(FloatRegister reg) {
1263 return dwarf::Reg::X86_64Fp(static_cast<int>(reg));
1264 }
1265
GenerateFrameEntry()1266 void CodeGeneratorX86_64::GenerateFrameEntry() {
1267 __ cfi().SetCurrentCFAOffset(kX86_64WordSize); // return address
1268 __ Bind(&frame_entry_label_);
1269 bool skip_overflow_check = IsLeafMethod()
1270 && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86_64);
1271 DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
1272
1273 if (!skip_overflow_check) {
1274 __ testq(CpuRegister(RAX), Address(
1275 CpuRegister(RSP), -static_cast<int32_t>(GetStackOverflowReservedBytes(kX86_64))));
1276 RecordPcInfo(nullptr, 0);
1277 }
1278
1279 if (HasEmptyFrame()) {
1280 return;
1281 }
1282
1283 for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) {
1284 Register reg = kCoreCalleeSaves[i];
1285 if (allocated_registers_.ContainsCoreRegister(reg)) {
1286 __ pushq(CpuRegister(reg));
1287 __ cfi().AdjustCFAOffset(kX86_64WordSize);
1288 __ cfi().RelOffset(DWARFReg(reg), 0);
1289 }
1290 }
1291
1292 int adjust = GetFrameSize() - GetCoreSpillSize();
1293 __ subq(CpuRegister(RSP), Immediate(adjust));
1294 __ cfi().AdjustCFAOffset(adjust);
1295 uint32_t xmm_spill_location = GetFpuSpillStart();
1296 size_t xmm_spill_slot_size = GetFloatingPointSpillSlotSize();
1297
1298 for (int i = arraysize(kFpuCalleeSaves) - 1; i >= 0; --i) {
1299 if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) {
1300 int offset = xmm_spill_location + (xmm_spill_slot_size * i);
1301 __ movsd(Address(CpuRegister(RSP), offset), XmmRegister(kFpuCalleeSaves[i]));
1302 __ cfi().RelOffset(DWARFReg(kFpuCalleeSaves[i]), offset);
1303 }
1304 }
1305
1306 if (GetGraph()->HasShouldDeoptimizeFlag()) {
1307 // Initialize should_deoptimize flag to 0.
1308 __ movl(Address(CpuRegister(RSP), xmm_spill_location - kShouldDeoptimizeFlagSize),
1309 Immediate(0));
1310 }
1311
1312 // Save the current method if we need it. Note that we do not
1313 // do this in HCurrentMethod, as the instruction might have been removed
1314 // in the SSA graph.
1315 if (RequiresCurrentMethod()) {
1316 __ movq(Address(CpuRegister(RSP), kCurrentMethodStackOffset),
1317 CpuRegister(kMethodRegisterArgument));
1318 }
1319 }
1320
GenerateFrameExit()1321 void CodeGeneratorX86_64::GenerateFrameExit() {
1322 __ cfi().RememberState();
1323 if (!HasEmptyFrame()) {
1324 uint32_t xmm_spill_location = GetFpuSpillStart();
1325 size_t xmm_spill_slot_size = GetFloatingPointSpillSlotSize();
1326 for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) {
1327 if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) {
1328 int offset = xmm_spill_location + (xmm_spill_slot_size * i);
1329 __ movsd(XmmRegister(kFpuCalleeSaves[i]), Address(CpuRegister(RSP), offset));
1330 __ cfi().Restore(DWARFReg(kFpuCalleeSaves[i]));
1331 }
1332 }
1333
1334 int adjust = GetFrameSize() - GetCoreSpillSize();
1335 __ addq(CpuRegister(RSP), Immediate(adjust));
1336 __ cfi().AdjustCFAOffset(-adjust);
1337
1338 for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
1339 Register reg = kCoreCalleeSaves[i];
1340 if (allocated_registers_.ContainsCoreRegister(reg)) {
1341 __ popq(CpuRegister(reg));
1342 __ cfi().AdjustCFAOffset(-static_cast<int>(kX86_64WordSize));
1343 __ cfi().Restore(DWARFReg(reg));
1344 }
1345 }
1346 }
1347 __ ret();
1348 __ cfi().RestoreState();
1349 __ cfi().DefCFAOffset(GetFrameSize());
1350 }
1351
Bind(HBasicBlock * block)1352 void CodeGeneratorX86_64::Bind(HBasicBlock* block) {
1353 __ Bind(GetLabelOf(block));
1354 }
1355
Move(Location destination,Location source)1356 void CodeGeneratorX86_64::Move(Location destination, Location source) {
1357 if (source.Equals(destination)) {
1358 return;
1359 }
1360 if (destination.IsRegister()) {
1361 CpuRegister dest = destination.AsRegister<CpuRegister>();
1362 if (source.IsRegister()) {
1363 __ movq(dest, source.AsRegister<CpuRegister>());
1364 } else if (source.IsFpuRegister()) {
1365 __ movd(dest, source.AsFpuRegister<XmmRegister>());
1366 } else if (source.IsStackSlot()) {
1367 __ movl(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
1368 } else if (source.IsConstant()) {
1369 HConstant* constant = source.GetConstant();
1370 if (constant->IsLongConstant()) {
1371 Load64BitValue(dest, constant->AsLongConstant()->GetValue());
1372 } else {
1373 Load32BitValue(dest, GetInt32ValueOf(constant));
1374 }
1375 } else {
1376 DCHECK(source.IsDoubleStackSlot());
1377 __ movq(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
1378 }
1379 } else if (destination.IsFpuRegister()) {
1380 XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
1381 if (source.IsRegister()) {
1382 __ movd(dest, source.AsRegister<CpuRegister>());
1383 } else if (source.IsFpuRegister()) {
1384 __ movaps(dest, source.AsFpuRegister<XmmRegister>());
1385 } else if (source.IsConstant()) {
1386 HConstant* constant = source.GetConstant();
1387 int64_t value = CodeGenerator::GetInt64ValueOf(constant);
1388 if (constant->IsFloatConstant()) {
1389 Load32BitValue(dest, static_cast<int32_t>(value));
1390 } else {
1391 Load64BitValue(dest, value);
1392 }
1393 } else if (source.IsStackSlot()) {
1394 __ movss(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
1395 } else {
1396 DCHECK(source.IsDoubleStackSlot());
1397 __ movsd(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
1398 }
1399 } else if (destination.IsStackSlot()) {
1400 if (source.IsRegister()) {
1401 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()),
1402 source.AsRegister<CpuRegister>());
1403 } else if (source.IsFpuRegister()) {
1404 __ movss(Address(CpuRegister(RSP), destination.GetStackIndex()),
1405 source.AsFpuRegister<XmmRegister>());
1406 } else if (source.IsConstant()) {
1407 HConstant* constant = source.GetConstant();
1408 int32_t value = GetInt32ValueOf(constant);
1409 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), Immediate(value));
1410 } else {
1411 DCHECK(source.IsStackSlot()) << source;
1412 __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
1413 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
1414 }
1415 } else {
1416 DCHECK(destination.IsDoubleStackSlot());
1417 if (source.IsRegister()) {
1418 __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()),
1419 source.AsRegister<CpuRegister>());
1420 } else if (source.IsFpuRegister()) {
1421 __ movsd(Address(CpuRegister(RSP), destination.GetStackIndex()),
1422 source.AsFpuRegister<XmmRegister>());
1423 } else if (source.IsConstant()) {
1424 HConstant* constant = source.GetConstant();
1425 DCHECK(constant->IsLongConstant() || constant->IsDoubleConstant());
1426 int64_t value = GetInt64ValueOf(constant);
1427 Store64BitValueToStack(destination, value);
1428 } else {
1429 DCHECK(source.IsDoubleStackSlot());
1430 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
1431 __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
1432 }
1433 }
1434 }
1435
MoveConstant(Location location,int32_t value)1436 void CodeGeneratorX86_64::MoveConstant(Location location, int32_t value) {
1437 DCHECK(location.IsRegister());
1438 Load64BitValue(location.AsRegister<CpuRegister>(), static_cast<int64_t>(value));
1439 }
1440
MoveLocation(Location dst,Location src,Primitive::Type dst_type ATTRIBUTE_UNUSED)1441 void CodeGeneratorX86_64::MoveLocation(
1442 Location dst, Location src, Primitive::Type dst_type ATTRIBUTE_UNUSED) {
1443 Move(dst, src);
1444 }
1445
AddLocationAsTemp(Location location,LocationSummary * locations)1446 void CodeGeneratorX86_64::AddLocationAsTemp(Location location, LocationSummary* locations) {
1447 if (location.IsRegister()) {
1448 locations->AddTemp(location);
1449 } else {
1450 UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location;
1451 }
1452 }
1453
HandleGoto(HInstruction * got,HBasicBlock * successor)1454 void InstructionCodeGeneratorX86_64::HandleGoto(HInstruction* got, HBasicBlock* successor) {
1455 DCHECK(!successor->IsExitBlock());
1456
1457 HBasicBlock* block = got->GetBlock();
1458 HInstruction* previous = got->GetPrevious();
1459
1460 HLoopInformation* info = block->GetLoopInformation();
1461 if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
1462 GenerateSuspendCheck(info->GetSuspendCheck(), successor);
1463 return;
1464 }
1465
1466 if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) {
1467 GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
1468 }
1469 if (!codegen_->GoesToNextBlock(got->GetBlock(), successor)) {
1470 __ jmp(codegen_->GetLabelOf(successor));
1471 }
1472 }
1473
VisitGoto(HGoto * got)1474 void LocationsBuilderX86_64::VisitGoto(HGoto* got) {
1475 got->SetLocations(nullptr);
1476 }
1477
VisitGoto(HGoto * got)1478 void InstructionCodeGeneratorX86_64::VisitGoto(HGoto* got) {
1479 HandleGoto(got, got->GetSuccessor());
1480 }
1481
VisitTryBoundary(HTryBoundary * try_boundary)1482 void LocationsBuilderX86_64::VisitTryBoundary(HTryBoundary* try_boundary) {
1483 try_boundary->SetLocations(nullptr);
1484 }
1485
VisitTryBoundary(HTryBoundary * try_boundary)1486 void InstructionCodeGeneratorX86_64::VisitTryBoundary(HTryBoundary* try_boundary) {
1487 HBasicBlock* successor = try_boundary->GetNormalFlowSuccessor();
1488 if (!successor->IsExitBlock()) {
1489 HandleGoto(try_boundary, successor);
1490 }
1491 }
1492
VisitExit(HExit * exit)1493 void LocationsBuilderX86_64::VisitExit(HExit* exit) {
1494 exit->SetLocations(nullptr);
1495 }
1496
VisitExit(HExit * exit ATTRIBUTE_UNUSED)1497 void InstructionCodeGeneratorX86_64::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
1498 }
1499
1500 template<class LabelType>
GenerateFPJumps(HCondition * cond,LabelType * true_label,LabelType * false_label)1501 void InstructionCodeGeneratorX86_64::GenerateFPJumps(HCondition* cond,
1502 LabelType* true_label,
1503 LabelType* false_label) {
1504 if (cond->IsFPConditionTrueIfNaN()) {
1505 __ j(kUnordered, true_label);
1506 } else if (cond->IsFPConditionFalseIfNaN()) {
1507 __ j(kUnordered, false_label);
1508 }
1509 __ j(X86_64FPCondition(cond->GetCondition()), true_label);
1510 }
1511
GenerateCompareTest(HCondition * condition)1512 void InstructionCodeGeneratorX86_64::GenerateCompareTest(HCondition* condition) {
1513 LocationSummary* locations = condition->GetLocations();
1514
1515 Location left = locations->InAt(0);
1516 Location right = locations->InAt(1);
1517 Primitive::Type type = condition->InputAt(0)->GetType();
1518 switch (type) {
1519 case Primitive::kPrimBoolean:
1520 case Primitive::kPrimByte:
1521 case Primitive::kPrimChar:
1522 case Primitive::kPrimShort:
1523 case Primitive::kPrimInt:
1524 case Primitive::kPrimNot: {
1525 codegen_->GenerateIntCompare(left, right);
1526 break;
1527 }
1528 case Primitive::kPrimLong: {
1529 codegen_->GenerateLongCompare(left, right);
1530 break;
1531 }
1532 case Primitive::kPrimFloat: {
1533 if (right.IsFpuRegister()) {
1534 __ ucomiss(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>());
1535 } else if (right.IsConstant()) {
1536 __ ucomiss(left.AsFpuRegister<XmmRegister>(),
1537 codegen_->LiteralFloatAddress(
1538 right.GetConstant()->AsFloatConstant()->GetValue()));
1539 } else {
1540 DCHECK(right.IsStackSlot());
1541 __ ucomiss(left.AsFpuRegister<XmmRegister>(),
1542 Address(CpuRegister(RSP), right.GetStackIndex()));
1543 }
1544 break;
1545 }
1546 case Primitive::kPrimDouble: {
1547 if (right.IsFpuRegister()) {
1548 __ ucomisd(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>());
1549 } else if (right.IsConstant()) {
1550 __ ucomisd(left.AsFpuRegister<XmmRegister>(),
1551 codegen_->LiteralDoubleAddress(
1552 right.GetConstant()->AsDoubleConstant()->GetValue()));
1553 } else {
1554 DCHECK(right.IsDoubleStackSlot());
1555 __ ucomisd(left.AsFpuRegister<XmmRegister>(),
1556 Address(CpuRegister(RSP), right.GetStackIndex()));
1557 }
1558 break;
1559 }
1560 default:
1561 LOG(FATAL) << "Unexpected condition type " << type;
1562 }
1563 }
1564
1565 template<class LabelType>
GenerateCompareTestAndBranch(HCondition * condition,LabelType * true_target_in,LabelType * false_target_in)1566 void InstructionCodeGeneratorX86_64::GenerateCompareTestAndBranch(HCondition* condition,
1567 LabelType* true_target_in,
1568 LabelType* false_target_in) {
1569 // Generated branching requires both targets to be explicit. If either of the
1570 // targets is nullptr (fallthrough) use and bind `fallthrough_target` instead.
1571 LabelType fallthrough_target;
1572 LabelType* true_target = true_target_in == nullptr ? &fallthrough_target : true_target_in;
1573 LabelType* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in;
1574
1575 // Generate the comparison to set the CC.
1576 GenerateCompareTest(condition);
1577
1578 // Now generate the correct jump(s).
1579 Primitive::Type type = condition->InputAt(0)->GetType();
1580 switch (type) {
1581 case Primitive::kPrimLong: {
1582 __ j(X86_64IntegerCondition(condition->GetCondition()), true_target);
1583 break;
1584 }
1585 case Primitive::kPrimFloat: {
1586 GenerateFPJumps(condition, true_target, false_target);
1587 break;
1588 }
1589 case Primitive::kPrimDouble: {
1590 GenerateFPJumps(condition, true_target, false_target);
1591 break;
1592 }
1593 default:
1594 LOG(FATAL) << "Unexpected condition type " << type;
1595 }
1596
1597 if (false_target != &fallthrough_target) {
1598 __ jmp(false_target);
1599 }
1600
1601 if (fallthrough_target.IsLinked()) {
1602 __ Bind(&fallthrough_target);
1603 }
1604 }
1605
AreEflagsSetFrom(HInstruction * cond,HInstruction * branch)1606 static bool AreEflagsSetFrom(HInstruction* cond, HInstruction* branch) {
1607 // Moves may affect the eflags register (move zero uses xorl), so the EFLAGS
1608 // are set only strictly before `branch`. We can't use the eflags on long
1609 // conditions if they are materialized due to the complex branching.
1610 return cond->IsCondition() &&
1611 cond->GetNext() == branch &&
1612 !Primitive::IsFloatingPointType(cond->InputAt(0)->GetType());
1613 }
1614
1615 template<class LabelType>
GenerateTestAndBranch(HInstruction * instruction,size_t condition_input_index,LabelType * true_target,LabelType * false_target)1616 void InstructionCodeGeneratorX86_64::GenerateTestAndBranch(HInstruction* instruction,
1617 size_t condition_input_index,
1618 LabelType* true_target,
1619 LabelType* false_target) {
1620 HInstruction* cond = instruction->InputAt(condition_input_index);
1621
1622 if (true_target == nullptr && false_target == nullptr) {
1623 // Nothing to do. The code always falls through.
1624 return;
1625 } else if (cond->IsIntConstant()) {
1626 // Constant condition, statically compared against "true" (integer value 1).
1627 if (cond->AsIntConstant()->IsTrue()) {
1628 if (true_target != nullptr) {
1629 __ jmp(true_target);
1630 }
1631 } else {
1632 DCHECK(cond->AsIntConstant()->IsFalse()) << cond->AsIntConstant()->GetValue();
1633 if (false_target != nullptr) {
1634 __ jmp(false_target);
1635 }
1636 }
1637 return;
1638 }
1639
1640 // The following code generates these patterns:
1641 // (1) true_target == nullptr && false_target != nullptr
1642 // - opposite condition true => branch to false_target
1643 // (2) true_target != nullptr && false_target == nullptr
1644 // - condition true => branch to true_target
1645 // (3) true_target != nullptr && false_target != nullptr
1646 // - condition true => branch to true_target
1647 // - branch to false_target
1648 if (IsBooleanValueOrMaterializedCondition(cond)) {
1649 if (AreEflagsSetFrom(cond, instruction)) {
1650 if (true_target == nullptr) {
1651 __ j(X86_64IntegerCondition(cond->AsCondition()->GetOppositeCondition()), false_target);
1652 } else {
1653 __ j(X86_64IntegerCondition(cond->AsCondition()->GetCondition()), true_target);
1654 }
1655 } else {
1656 // Materialized condition, compare against 0.
1657 Location lhs = instruction->GetLocations()->InAt(condition_input_index);
1658 if (lhs.IsRegister()) {
1659 __ testl(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>());
1660 } else {
1661 __ cmpl(Address(CpuRegister(RSP), lhs.GetStackIndex()), Immediate(0));
1662 }
1663 if (true_target == nullptr) {
1664 __ j(kEqual, false_target);
1665 } else {
1666 __ j(kNotEqual, true_target);
1667 }
1668 }
1669 } else {
1670 // Condition has not been materialized, use its inputs as the
1671 // comparison and its condition as the branch condition.
1672 HCondition* condition = cond->AsCondition();
1673
1674 // If this is a long or FP comparison that has been folded into
1675 // the HCondition, generate the comparison directly.
1676 Primitive::Type type = condition->InputAt(0)->GetType();
1677 if (type == Primitive::kPrimLong || Primitive::IsFloatingPointType(type)) {
1678 GenerateCompareTestAndBranch(condition, true_target, false_target);
1679 return;
1680 }
1681
1682 Location lhs = condition->GetLocations()->InAt(0);
1683 Location rhs = condition->GetLocations()->InAt(1);
1684 codegen_->GenerateIntCompare(lhs, rhs);
1685 if (true_target == nullptr) {
1686 __ j(X86_64IntegerCondition(condition->GetOppositeCondition()), false_target);
1687 } else {
1688 __ j(X86_64IntegerCondition(condition->GetCondition()), true_target);
1689 }
1690 }
1691
1692 // If neither branch falls through (case 3), the conditional branch to `true_target`
1693 // was already emitted (case 2) and we need to emit a jump to `false_target`.
1694 if (true_target != nullptr && false_target != nullptr) {
1695 __ jmp(false_target);
1696 }
1697 }
1698
VisitIf(HIf * if_instr)1699 void LocationsBuilderX86_64::VisitIf(HIf* if_instr) {
1700 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(if_instr);
1701 if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
1702 locations->SetInAt(0, Location::Any());
1703 }
1704 }
1705
VisitIf(HIf * if_instr)1706 void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) {
1707 HBasicBlock* true_successor = if_instr->IfTrueSuccessor();
1708 HBasicBlock* false_successor = if_instr->IfFalseSuccessor();
1709 Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ?
1710 nullptr : codegen_->GetLabelOf(true_successor);
1711 Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ?
1712 nullptr : codegen_->GetLabelOf(false_successor);
1713 GenerateTestAndBranch(if_instr, /* condition_input_index */ 0, true_target, false_target);
1714 }
1715
VisitDeoptimize(HDeoptimize * deoptimize)1716 void LocationsBuilderX86_64::VisitDeoptimize(HDeoptimize* deoptimize) {
1717 LocationSummary* locations = new (GetGraph()->GetArena())
1718 LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
1719 InvokeRuntimeCallingConvention calling_convention;
1720 RegisterSet caller_saves = RegisterSet::Empty();
1721 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1722 locations->SetCustomSlowPathCallerSaves(caller_saves);
1723 if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
1724 locations->SetInAt(0, Location::Any());
1725 }
1726 }
1727
VisitDeoptimize(HDeoptimize * deoptimize)1728 void InstructionCodeGeneratorX86_64::VisitDeoptimize(HDeoptimize* deoptimize) {
1729 SlowPathCode* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathX86_64>(deoptimize);
1730 GenerateTestAndBranch<Label>(deoptimize,
1731 /* condition_input_index */ 0,
1732 slow_path->GetEntryLabel(),
1733 /* false_target */ nullptr);
1734 }
1735
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)1736 void LocationsBuilderX86_64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
1737 LocationSummary* locations = new (GetGraph()->GetArena())
1738 LocationSummary(flag, LocationSummary::kNoCall);
1739 locations->SetOut(Location::RequiresRegister());
1740 }
1741
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)1742 void InstructionCodeGeneratorX86_64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
1743 __ movl(flag->GetLocations()->Out().AsRegister<CpuRegister>(),
1744 Address(CpuRegister(RSP), codegen_->GetStackOffsetOfShouldDeoptimizeFlag()));
1745 }
1746
SelectCanUseCMOV(HSelect * select)1747 static bool SelectCanUseCMOV(HSelect* select) {
1748 // There are no conditional move instructions for XMMs.
1749 if (Primitive::IsFloatingPointType(select->GetType())) {
1750 return false;
1751 }
1752
1753 // A FP condition doesn't generate the single CC that we need.
1754 HInstruction* condition = select->GetCondition();
1755 if (condition->IsCondition() &&
1756 Primitive::IsFloatingPointType(condition->InputAt(0)->GetType())) {
1757 return false;
1758 }
1759
1760 // We can generate a CMOV for this Select.
1761 return true;
1762 }
1763
VisitSelect(HSelect * select)1764 void LocationsBuilderX86_64::VisitSelect(HSelect* select) {
1765 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(select);
1766 if (Primitive::IsFloatingPointType(select->GetType())) {
1767 locations->SetInAt(0, Location::RequiresFpuRegister());
1768 locations->SetInAt(1, Location::Any());
1769 } else {
1770 locations->SetInAt(0, Location::RequiresRegister());
1771 if (SelectCanUseCMOV(select)) {
1772 if (select->InputAt(1)->IsConstant()) {
1773 locations->SetInAt(1, Location::RequiresRegister());
1774 } else {
1775 locations->SetInAt(1, Location::Any());
1776 }
1777 } else {
1778 locations->SetInAt(1, Location::Any());
1779 }
1780 }
1781 if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
1782 locations->SetInAt(2, Location::RequiresRegister());
1783 }
1784 locations->SetOut(Location::SameAsFirstInput());
1785 }
1786
VisitSelect(HSelect * select)1787 void InstructionCodeGeneratorX86_64::VisitSelect(HSelect* select) {
1788 LocationSummary* locations = select->GetLocations();
1789 if (SelectCanUseCMOV(select)) {
1790 // If both the condition and the source types are integer, we can generate
1791 // a CMOV to implement Select.
1792 CpuRegister value_false = locations->InAt(0).AsRegister<CpuRegister>();
1793 Location value_true_loc = locations->InAt(1);
1794 DCHECK(locations->InAt(0).Equals(locations->Out()));
1795
1796 HInstruction* select_condition = select->GetCondition();
1797 Condition cond = kNotEqual;
1798
1799 // Figure out how to test the 'condition'.
1800 if (select_condition->IsCondition()) {
1801 HCondition* condition = select_condition->AsCondition();
1802 if (!condition->IsEmittedAtUseSite()) {
1803 // This was a previously materialized condition.
1804 // Can we use the existing condition code?
1805 if (AreEflagsSetFrom(condition, select)) {
1806 // Materialization was the previous instruction. Condition codes are right.
1807 cond = X86_64IntegerCondition(condition->GetCondition());
1808 } else {
1809 // No, we have to recreate the condition code.
1810 CpuRegister cond_reg = locations->InAt(2).AsRegister<CpuRegister>();
1811 __ testl(cond_reg, cond_reg);
1812 }
1813 } else {
1814 GenerateCompareTest(condition);
1815 cond = X86_64IntegerCondition(condition->GetCondition());
1816 }
1817 } else {
1818 // Must be a Boolean condition, which needs to be compared to 0.
1819 CpuRegister cond_reg = locations->InAt(2).AsRegister<CpuRegister>();
1820 __ testl(cond_reg, cond_reg);
1821 }
1822
1823 // If the condition is true, overwrite the output, which already contains false.
1824 // Generate the correct sized CMOV.
1825 bool is_64_bit = Primitive::Is64BitType(select->GetType());
1826 if (value_true_loc.IsRegister()) {
1827 __ cmov(cond, value_false, value_true_loc.AsRegister<CpuRegister>(), is_64_bit);
1828 } else {
1829 __ cmov(cond,
1830 value_false,
1831 Address(CpuRegister(RSP), value_true_loc.GetStackIndex()), is_64_bit);
1832 }
1833 } else {
1834 NearLabel false_target;
1835 GenerateTestAndBranch<NearLabel>(select,
1836 /* condition_input_index */ 2,
1837 /* true_target */ nullptr,
1838 &false_target);
1839 codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType());
1840 __ Bind(&false_target);
1841 }
1842 }
1843
VisitNativeDebugInfo(HNativeDebugInfo * info)1844 void LocationsBuilderX86_64::VisitNativeDebugInfo(HNativeDebugInfo* info) {
1845 new (GetGraph()->GetArena()) LocationSummary(info);
1846 }
1847
VisitNativeDebugInfo(HNativeDebugInfo *)1848 void InstructionCodeGeneratorX86_64::VisitNativeDebugInfo(HNativeDebugInfo*) {
1849 // MaybeRecordNativeDebugInfo is already called implicitly in CodeGenerator::Compile.
1850 }
1851
GenerateNop()1852 void CodeGeneratorX86_64::GenerateNop() {
1853 __ nop();
1854 }
1855
HandleCondition(HCondition * cond)1856 void LocationsBuilderX86_64::HandleCondition(HCondition* cond) {
1857 LocationSummary* locations =
1858 new (GetGraph()->GetArena()) LocationSummary(cond, LocationSummary::kNoCall);
1859 // Handle the long/FP comparisons made in instruction simplification.
1860 switch (cond->InputAt(0)->GetType()) {
1861 case Primitive::kPrimLong:
1862 locations->SetInAt(0, Location::RequiresRegister());
1863 locations->SetInAt(1, Location::Any());
1864 break;
1865 case Primitive::kPrimFloat:
1866 case Primitive::kPrimDouble:
1867 locations->SetInAt(0, Location::RequiresFpuRegister());
1868 locations->SetInAt(1, Location::Any());
1869 break;
1870 default:
1871 locations->SetInAt(0, Location::RequiresRegister());
1872 locations->SetInAt(1, Location::Any());
1873 break;
1874 }
1875 if (!cond->IsEmittedAtUseSite()) {
1876 locations->SetOut(Location::RequiresRegister());
1877 }
1878 }
1879
HandleCondition(HCondition * cond)1880 void InstructionCodeGeneratorX86_64::HandleCondition(HCondition* cond) {
1881 if (cond->IsEmittedAtUseSite()) {
1882 return;
1883 }
1884
1885 LocationSummary* locations = cond->GetLocations();
1886 Location lhs = locations->InAt(0);
1887 Location rhs = locations->InAt(1);
1888 CpuRegister reg = locations->Out().AsRegister<CpuRegister>();
1889 NearLabel true_label, false_label;
1890
1891 switch (cond->InputAt(0)->GetType()) {
1892 default:
1893 // Integer case.
1894
1895 // Clear output register: setcc only sets the low byte.
1896 __ xorl(reg, reg);
1897
1898 codegen_->GenerateIntCompare(lhs, rhs);
1899 __ setcc(X86_64IntegerCondition(cond->GetCondition()), reg);
1900 return;
1901 case Primitive::kPrimLong:
1902 // Clear output register: setcc only sets the low byte.
1903 __ xorl(reg, reg);
1904
1905 codegen_->GenerateLongCompare(lhs, rhs);
1906 __ setcc(X86_64IntegerCondition(cond->GetCondition()), reg);
1907 return;
1908 case Primitive::kPrimFloat: {
1909 XmmRegister lhs_reg = lhs.AsFpuRegister<XmmRegister>();
1910 if (rhs.IsConstant()) {
1911 float value = rhs.GetConstant()->AsFloatConstant()->GetValue();
1912 __ ucomiss(lhs_reg, codegen_->LiteralFloatAddress(value));
1913 } else if (rhs.IsStackSlot()) {
1914 __ ucomiss(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex()));
1915 } else {
1916 __ ucomiss(lhs_reg, rhs.AsFpuRegister<XmmRegister>());
1917 }
1918 GenerateFPJumps(cond, &true_label, &false_label);
1919 break;
1920 }
1921 case Primitive::kPrimDouble: {
1922 XmmRegister lhs_reg = lhs.AsFpuRegister<XmmRegister>();
1923 if (rhs.IsConstant()) {
1924 double value = rhs.GetConstant()->AsDoubleConstant()->GetValue();
1925 __ ucomisd(lhs_reg, codegen_->LiteralDoubleAddress(value));
1926 } else if (rhs.IsDoubleStackSlot()) {
1927 __ ucomisd(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex()));
1928 } else {
1929 __ ucomisd(lhs_reg, rhs.AsFpuRegister<XmmRegister>());
1930 }
1931 GenerateFPJumps(cond, &true_label, &false_label);
1932 break;
1933 }
1934 }
1935
1936 // Convert the jumps into the result.
1937 NearLabel done_label;
1938
1939 // False case: result = 0.
1940 __ Bind(&false_label);
1941 __ xorl(reg, reg);
1942 __ jmp(&done_label);
1943
1944 // True case: result = 1.
1945 __ Bind(&true_label);
1946 __ movl(reg, Immediate(1));
1947 __ Bind(&done_label);
1948 }
1949
VisitEqual(HEqual * comp)1950 void LocationsBuilderX86_64::VisitEqual(HEqual* comp) {
1951 HandleCondition(comp);
1952 }
1953
VisitEqual(HEqual * comp)1954 void InstructionCodeGeneratorX86_64::VisitEqual(HEqual* comp) {
1955 HandleCondition(comp);
1956 }
1957
VisitNotEqual(HNotEqual * comp)1958 void LocationsBuilderX86_64::VisitNotEqual(HNotEqual* comp) {
1959 HandleCondition(comp);
1960 }
1961
VisitNotEqual(HNotEqual * comp)1962 void InstructionCodeGeneratorX86_64::VisitNotEqual(HNotEqual* comp) {
1963 HandleCondition(comp);
1964 }
1965
VisitLessThan(HLessThan * comp)1966 void LocationsBuilderX86_64::VisitLessThan(HLessThan* comp) {
1967 HandleCondition(comp);
1968 }
1969
VisitLessThan(HLessThan * comp)1970 void InstructionCodeGeneratorX86_64::VisitLessThan(HLessThan* comp) {
1971 HandleCondition(comp);
1972 }
1973
VisitLessThanOrEqual(HLessThanOrEqual * comp)1974 void LocationsBuilderX86_64::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
1975 HandleCondition(comp);
1976 }
1977
VisitLessThanOrEqual(HLessThanOrEqual * comp)1978 void InstructionCodeGeneratorX86_64::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
1979 HandleCondition(comp);
1980 }
1981
VisitGreaterThan(HGreaterThan * comp)1982 void LocationsBuilderX86_64::VisitGreaterThan(HGreaterThan* comp) {
1983 HandleCondition(comp);
1984 }
1985
VisitGreaterThan(HGreaterThan * comp)1986 void InstructionCodeGeneratorX86_64::VisitGreaterThan(HGreaterThan* comp) {
1987 HandleCondition(comp);
1988 }
1989
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)1990 void LocationsBuilderX86_64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
1991 HandleCondition(comp);
1992 }
1993
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)1994 void InstructionCodeGeneratorX86_64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
1995 HandleCondition(comp);
1996 }
1997
VisitBelow(HBelow * comp)1998 void LocationsBuilderX86_64::VisitBelow(HBelow* comp) {
1999 HandleCondition(comp);
2000 }
2001
VisitBelow(HBelow * comp)2002 void InstructionCodeGeneratorX86_64::VisitBelow(HBelow* comp) {
2003 HandleCondition(comp);
2004 }
2005
VisitBelowOrEqual(HBelowOrEqual * comp)2006 void LocationsBuilderX86_64::VisitBelowOrEqual(HBelowOrEqual* comp) {
2007 HandleCondition(comp);
2008 }
2009
VisitBelowOrEqual(HBelowOrEqual * comp)2010 void InstructionCodeGeneratorX86_64::VisitBelowOrEqual(HBelowOrEqual* comp) {
2011 HandleCondition(comp);
2012 }
2013
VisitAbove(HAbove * comp)2014 void LocationsBuilderX86_64::VisitAbove(HAbove* comp) {
2015 HandleCondition(comp);
2016 }
2017
VisitAbove(HAbove * comp)2018 void InstructionCodeGeneratorX86_64::VisitAbove(HAbove* comp) {
2019 HandleCondition(comp);
2020 }
2021
VisitAboveOrEqual(HAboveOrEqual * comp)2022 void LocationsBuilderX86_64::VisitAboveOrEqual(HAboveOrEqual* comp) {
2023 HandleCondition(comp);
2024 }
2025
VisitAboveOrEqual(HAboveOrEqual * comp)2026 void InstructionCodeGeneratorX86_64::VisitAboveOrEqual(HAboveOrEqual* comp) {
2027 HandleCondition(comp);
2028 }
2029
VisitCompare(HCompare * compare)2030 void LocationsBuilderX86_64::VisitCompare(HCompare* compare) {
2031 LocationSummary* locations =
2032 new (GetGraph()->GetArena()) LocationSummary(compare, LocationSummary::kNoCall);
2033 switch (compare->InputAt(0)->GetType()) {
2034 case Primitive::kPrimBoolean:
2035 case Primitive::kPrimByte:
2036 case Primitive::kPrimShort:
2037 case Primitive::kPrimChar:
2038 case Primitive::kPrimInt:
2039 case Primitive::kPrimLong: {
2040 locations->SetInAt(0, Location::RequiresRegister());
2041 locations->SetInAt(1, Location::Any());
2042 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2043 break;
2044 }
2045 case Primitive::kPrimFloat:
2046 case Primitive::kPrimDouble: {
2047 locations->SetInAt(0, Location::RequiresFpuRegister());
2048 locations->SetInAt(1, Location::Any());
2049 locations->SetOut(Location::RequiresRegister());
2050 break;
2051 }
2052 default:
2053 LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType();
2054 }
2055 }
2056
VisitCompare(HCompare * compare)2057 void InstructionCodeGeneratorX86_64::VisitCompare(HCompare* compare) {
2058 LocationSummary* locations = compare->GetLocations();
2059 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
2060 Location left = locations->InAt(0);
2061 Location right = locations->InAt(1);
2062
2063 NearLabel less, greater, done;
2064 Primitive::Type type = compare->InputAt(0)->GetType();
2065 Condition less_cond = kLess;
2066
2067 switch (type) {
2068 case Primitive::kPrimBoolean:
2069 case Primitive::kPrimByte:
2070 case Primitive::kPrimShort:
2071 case Primitive::kPrimChar:
2072 case Primitive::kPrimInt: {
2073 codegen_->GenerateIntCompare(left, right);
2074 break;
2075 }
2076 case Primitive::kPrimLong: {
2077 codegen_->GenerateLongCompare(left, right);
2078 break;
2079 }
2080 case Primitive::kPrimFloat: {
2081 XmmRegister left_reg = left.AsFpuRegister<XmmRegister>();
2082 if (right.IsConstant()) {
2083 float value = right.GetConstant()->AsFloatConstant()->GetValue();
2084 __ ucomiss(left_reg, codegen_->LiteralFloatAddress(value));
2085 } else if (right.IsStackSlot()) {
2086 __ ucomiss(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
2087 } else {
2088 __ ucomiss(left_reg, right.AsFpuRegister<XmmRegister>());
2089 }
2090 __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
2091 less_cond = kBelow; // ucomis{s,d} sets CF
2092 break;
2093 }
2094 case Primitive::kPrimDouble: {
2095 XmmRegister left_reg = left.AsFpuRegister<XmmRegister>();
2096 if (right.IsConstant()) {
2097 double value = right.GetConstant()->AsDoubleConstant()->GetValue();
2098 __ ucomisd(left_reg, codegen_->LiteralDoubleAddress(value));
2099 } else if (right.IsDoubleStackSlot()) {
2100 __ ucomisd(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
2101 } else {
2102 __ ucomisd(left_reg, right.AsFpuRegister<XmmRegister>());
2103 }
2104 __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
2105 less_cond = kBelow; // ucomis{s,d} sets CF
2106 break;
2107 }
2108 default:
2109 LOG(FATAL) << "Unexpected compare type " << type;
2110 }
2111
2112 __ movl(out, Immediate(0));
2113 __ j(kEqual, &done);
2114 __ j(less_cond, &less);
2115
2116 __ Bind(&greater);
2117 __ movl(out, Immediate(1));
2118 __ jmp(&done);
2119
2120 __ Bind(&less);
2121 __ movl(out, Immediate(-1));
2122
2123 __ Bind(&done);
2124 }
2125
VisitIntConstant(HIntConstant * constant)2126 void LocationsBuilderX86_64::VisitIntConstant(HIntConstant* constant) {
2127 LocationSummary* locations =
2128 new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
2129 locations->SetOut(Location::ConstantLocation(constant));
2130 }
2131
VisitIntConstant(HIntConstant * constant ATTRIBUTE_UNUSED)2132 void InstructionCodeGeneratorX86_64::VisitIntConstant(HIntConstant* constant ATTRIBUTE_UNUSED) {
2133 // Will be generated at use site.
2134 }
2135
VisitNullConstant(HNullConstant * constant)2136 void LocationsBuilderX86_64::VisitNullConstant(HNullConstant* constant) {
2137 LocationSummary* locations =
2138 new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
2139 locations->SetOut(Location::ConstantLocation(constant));
2140 }
2141
VisitNullConstant(HNullConstant * constant ATTRIBUTE_UNUSED)2142 void InstructionCodeGeneratorX86_64::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) {
2143 // Will be generated at use site.
2144 }
2145
VisitLongConstant(HLongConstant * constant)2146 void LocationsBuilderX86_64::VisitLongConstant(HLongConstant* constant) {
2147 LocationSummary* locations =
2148 new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
2149 locations->SetOut(Location::ConstantLocation(constant));
2150 }
2151
VisitLongConstant(HLongConstant * constant ATTRIBUTE_UNUSED)2152 void InstructionCodeGeneratorX86_64::VisitLongConstant(HLongConstant* constant ATTRIBUTE_UNUSED) {
2153 // Will be generated at use site.
2154 }
2155
VisitFloatConstant(HFloatConstant * constant)2156 void LocationsBuilderX86_64::VisitFloatConstant(HFloatConstant* constant) {
2157 LocationSummary* locations =
2158 new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
2159 locations->SetOut(Location::ConstantLocation(constant));
2160 }
2161
VisitFloatConstant(HFloatConstant * constant ATTRIBUTE_UNUSED)2162 void InstructionCodeGeneratorX86_64::VisitFloatConstant(HFloatConstant* constant ATTRIBUTE_UNUSED) {
2163 // Will be generated at use site.
2164 }
2165
VisitDoubleConstant(HDoubleConstant * constant)2166 void LocationsBuilderX86_64::VisitDoubleConstant(HDoubleConstant* constant) {
2167 LocationSummary* locations =
2168 new (GetGraph()->GetArena()) LocationSummary(constant, LocationSummary::kNoCall);
2169 locations->SetOut(Location::ConstantLocation(constant));
2170 }
2171
VisitDoubleConstant(HDoubleConstant * constant ATTRIBUTE_UNUSED)2172 void InstructionCodeGeneratorX86_64::VisitDoubleConstant(
2173 HDoubleConstant* constant ATTRIBUTE_UNUSED) {
2174 // Will be generated at use site.
2175 }
2176
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)2177 void LocationsBuilderX86_64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
2178 memory_barrier->SetLocations(nullptr);
2179 }
2180
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)2181 void InstructionCodeGeneratorX86_64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
2182 codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
2183 }
2184
VisitReturnVoid(HReturnVoid * ret)2185 void LocationsBuilderX86_64::VisitReturnVoid(HReturnVoid* ret) {
2186 ret->SetLocations(nullptr);
2187 }
2188
VisitReturnVoid(HReturnVoid * ret ATTRIBUTE_UNUSED)2189 void InstructionCodeGeneratorX86_64::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE_UNUSED) {
2190 codegen_->GenerateFrameExit();
2191 }
2192
VisitReturn(HReturn * ret)2193 void LocationsBuilderX86_64::VisitReturn(HReturn* ret) {
2194 LocationSummary* locations =
2195 new (GetGraph()->GetArena()) LocationSummary(ret, LocationSummary::kNoCall);
2196 switch (ret->InputAt(0)->GetType()) {
2197 case Primitive::kPrimBoolean:
2198 case Primitive::kPrimByte:
2199 case Primitive::kPrimChar:
2200 case Primitive::kPrimShort:
2201 case Primitive::kPrimInt:
2202 case Primitive::kPrimNot:
2203 case Primitive::kPrimLong:
2204 locations->SetInAt(0, Location::RegisterLocation(RAX));
2205 break;
2206
2207 case Primitive::kPrimFloat:
2208 case Primitive::kPrimDouble:
2209 locations->SetInAt(0, Location::FpuRegisterLocation(XMM0));
2210 break;
2211
2212 default:
2213 LOG(FATAL) << "Unexpected return type " << ret->InputAt(0)->GetType();
2214 }
2215 }
2216
VisitReturn(HReturn * ret)2217 void InstructionCodeGeneratorX86_64::VisitReturn(HReturn* ret) {
2218 if (kIsDebugBuild) {
2219 switch (ret->InputAt(0)->GetType()) {
2220 case Primitive::kPrimBoolean:
2221 case Primitive::kPrimByte:
2222 case Primitive::kPrimChar:
2223 case Primitive::kPrimShort:
2224 case Primitive::kPrimInt:
2225 case Primitive::kPrimNot:
2226 case Primitive::kPrimLong:
2227 DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegister<CpuRegister>().AsRegister(), RAX);
2228 break;
2229
2230 case Primitive::kPrimFloat:
2231 case Primitive::kPrimDouble:
2232 DCHECK_EQ(ret->GetLocations()->InAt(0).AsFpuRegister<XmmRegister>().AsFloatRegister(),
2233 XMM0);
2234 break;
2235
2236 default:
2237 LOG(FATAL) << "Unexpected return type " << ret->InputAt(0)->GetType();
2238 }
2239 }
2240 codegen_->GenerateFrameExit();
2241 }
2242
GetReturnLocation(Primitive::Type type) const2243 Location InvokeDexCallingConventionVisitorX86_64::GetReturnLocation(Primitive::Type type) const {
2244 switch (type) {
2245 case Primitive::kPrimBoolean:
2246 case Primitive::kPrimByte:
2247 case Primitive::kPrimChar:
2248 case Primitive::kPrimShort:
2249 case Primitive::kPrimInt:
2250 case Primitive::kPrimNot:
2251 case Primitive::kPrimLong:
2252 return Location::RegisterLocation(RAX);
2253
2254 case Primitive::kPrimVoid:
2255 return Location::NoLocation();
2256
2257 case Primitive::kPrimDouble:
2258 case Primitive::kPrimFloat:
2259 return Location::FpuRegisterLocation(XMM0);
2260 }
2261
2262 UNREACHABLE();
2263 }
2264
GetMethodLocation() const2265 Location InvokeDexCallingConventionVisitorX86_64::GetMethodLocation() const {
2266 return Location::RegisterLocation(kMethodRegisterArgument);
2267 }
2268
GetNextLocation(Primitive::Type type)2269 Location InvokeDexCallingConventionVisitorX86_64::GetNextLocation(Primitive::Type type) {
2270 switch (type) {
2271 case Primitive::kPrimBoolean:
2272 case Primitive::kPrimByte:
2273 case Primitive::kPrimChar:
2274 case Primitive::kPrimShort:
2275 case Primitive::kPrimInt:
2276 case Primitive::kPrimNot: {
2277 uint32_t index = gp_index_++;
2278 stack_index_++;
2279 if (index < calling_convention.GetNumberOfRegisters()) {
2280 return Location::RegisterLocation(calling_convention.GetRegisterAt(index));
2281 } else {
2282 return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
2283 }
2284 }
2285
2286 case Primitive::kPrimLong: {
2287 uint32_t index = gp_index_;
2288 stack_index_ += 2;
2289 if (index < calling_convention.GetNumberOfRegisters()) {
2290 gp_index_ += 1;
2291 return Location::RegisterLocation(calling_convention.GetRegisterAt(index));
2292 } else {
2293 gp_index_ += 2;
2294 return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
2295 }
2296 }
2297
2298 case Primitive::kPrimFloat: {
2299 uint32_t index = float_index_++;
2300 stack_index_++;
2301 if (index < calling_convention.GetNumberOfFpuRegisters()) {
2302 return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
2303 } else {
2304 return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
2305 }
2306 }
2307
2308 case Primitive::kPrimDouble: {
2309 uint32_t index = float_index_++;
2310 stack_index_ += 2;
2311 if (index < calling_convention.GetNumberOfFpuRegisters()) {
2312 return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
2313 } else {
2314 return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
2315 }
2316 }
2317
2318 case Primitive::kPrimVoid:
2319 LOG(FATAL) << "Unexpected parameter type " << type;
2320 break;
2321 }
2322 return Location::NoLocation();
2323 }
2324
VisitInvokeUnresolved(HInvokeUnresolved * invoke)2325 void LocationsBuilderX86_64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
2326 // The trampoline uses the same calling convention as dex calling conventions,
2327 // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
2328 // the method_idx.
2329 HandleInvoke(invoke);
2330 }
2331
VisitInvokeUnresolved(HInvokeUnresolved * invoke)2332 void InstructionCodeGeneratorX86_64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
2333 codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke);
2334 }
2335
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)2336 void LocationsBuilderX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
2337 // Explicit clinit checks triggered by static invokes must have been pruned by
2338 // art::PrepareForRegisterAllocation.
2339 DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
2340
2341 IntrinsicLocationsBuilderX86_64 intrinsic(codegen_);
2342 if (intrinsic.TryDispatch(invoke)) {
2343 return;
2344 }
2345
2346 HandleInvoke(invoke);
2347 }
2348
TryGenerateIntrinsicCode(HInvoke * invoke,CodeGeneratorX86_64 * codegen)2349 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
2350 if (invoke->GetLocations()->Intrinsified()) {
2351 IntrinsicCodeGeneratorX86_64 intrinsic(codegen);
2352 intrinsic.Dispatch(invoke);
2353 return true;
2354 }
2355 return false;
2356 }
2357
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)2358 void InstructionCodeGeneratorX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
2359 // Explicit clinit checks triggered by static invokes must have been pruned by
2360 // art::PrepareForRegisterAllocation.
2361 DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
2362
2363 if (TryGenerateIntrinsicCode(invoke, codegen_)) {
2364 return;
2365 }
2366
2367 LocationSummary* locations = invoke->GetLocations();
2368 codegen_->GenerateStaticOrDirectCall(
2369 invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
2370 codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
2371 }
2372
HandleInvoke(HInvoke * invoke)2373 void LocationsBuilderX86_64::HandleInvoke(HInvoke* invoke) {
2374 InvokeDexCallingConventionVisitorX86_64 calling_convention_visitor;
2375 CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
2376 }
2377
VisitInvokeVirtual(HInvokeVirtual * invoke)2378 void LocationsBuilderX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
2379 IntrinsicLocationsBuilderX86_64 intrinsic(codegen_);
2380 if (intrinsic.TryDispatch(invoke)) {
2381 return;
2382 }
2383
2384 HandleInvoke(invoke);
2385 }
2386
VisitInvokeVirtual(HInvokeVirtual * invoke)2387 void InstructionCodeGeneratorX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
2388 if (TryGenerateIntrinsicCode(invoke, codegen_)) {
2389 return;
2390 }
2391
2392 codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
2393 DCHECK(!codegen_->IsLeafMethod());
2394 codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
2395 }
2396
VisitInvokeInterface(HInvokeInterface * invoke)2397 void LocationsBuilderX86_64::VisitInvokeInterface(HInvokeInterface* invoke) {
2398 HandleInvoke(invoke);
2399 // Add the hidden argument.
2400 invoke->GetLocations()->AddTemp(Location::RegisterLocation(RAX));
2401 }
2402
VisitInvokeInterface(HInvokeInterface * invoke)2403 void InstructionCodeGeneratorX86_64::VisitInvokeInterface(HInvokeInterface* invoke) {
2404 // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
2405 LocationSummary* locations = invoke->GetLocations();
2406 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
2407 CpuRegister hidden_reg = locations->GetTemp(1).AsRegister<CpuRegister>();
2408 Location receiver = locations->InAt(0);
2409 size_t class_offset = mirror::Object::ClassOffset().SizeValue();
2410
2411 // Set the hidden argument. This is safe to do this here, as RAX
2412 // won't be modified thereafter, before the `call` instruction.
2413 DCHECK_EQ(RAX, hidden_reg.AsRegister());
2414 codegen_->Load64BitValue(hidden_reg, invoke->GetDexMethodIndex());
2415
2416 if (receiver.IsStackSlot()) {
2417 __ movl(temp, Address(CpuRegister(RSP), receiver.GetStackIndex()));
2418 // /* HeapReference<Class> */ temp = temp->klass_
2419 __ movl(temp, Address(temp, class_offset));
2420 } else {
2421 // /* HeapReference<Class> */ temp = receiver->klass_
2422 __ movl(temp, Address(receiver.AsRegister<CpuRegister>(), class_offset));
2423 }
2424 codegen_->MaybeRecordImplicitNullCheck(invoke);
2425 // Instead of simply (possibly) unpoisoning `temp` here, we should
2426 // emit a read barrier for the previous class reference load.
2427 // However this is not required in practice, as this is an
2428 // intermediate/temporary reference and because the current
2429 // concurrent copying collector keeps the from-space memory
2430 // intact/accessible until the end of the marking phase (the
2431 // concurrent copying collector may not in the future).
2432 __ MaybeUnpoisonHeapReference(temp);
2433 // temp = temp->GetAddressOfIMT()
2434 __ movq(temp,
2435 Address(temp, mirror::Class::ImtPtrOffset(kX86_64PointerSize).Uint32Value()));
2436 // temp = temp->GetImtEntryAt(method_offset);
2437 uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
2438 invoke->GetImtIndex(), kX86_64PointerSize));
2439 // temp = temp->GetImtEntryAt(method_offset);
2440 __ movq(temp, Address(temp, method_offset));
2441 // call temp->GetEntryPoint();
2442 __ call(Address(
2443 temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86_64PointerSize).SizeValue()));
2444
2445 DCHECK(!codegen_->IsLeafMethod());
2446 codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
2447 }
2448
VisitInvokePolymorphic(HInvokePolymorphic * invoke)2449 void LocationsBuilderX86_64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
2450 HandleInvoke(invoke);
2451 }
2452
VisitInvokePolymorphic(HInvokePolymorphic * invoke)2453 void InstructionCodeGeneratorX86_64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
2454 codegen_->GenerateInvokePolymorphicCall(invoke);
2455 }
2456
VisitNeg(HNeg * neg)2457 void LocationsBuilderX86_64::VisitNeg(HNeg* neg) {
2458 LocationSummary* locations =
2459 new (GetGraph()->GetArena()) LocationSummary(neg, LocationSummary::kNoCall);
2460 switch (neg->GetResultType()) {
2461 case Primitive::kPrimInt:
2462 case Primitive::kPrimLong:
2463 locations->SetInAt(0, Location::RequiresRegister());
2464 locations->SetOut(Location::SameAsFirstInput());
2465 break;
2466
2467 case Primitive::kPrimFloat:
2468 case Primitive::kPrimDouble:
2469 locations->SetInAt(0, Location::RequiresFpuRegister());
2470 locations->SetOut(Location::SameAsFirstInput());
2471 locations->AddTemp(Location::RequiresFpuRegister());
2472 break;
2473
2474 default:
2475 LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
2476 }
2477 }
2478
VisitNeg(HNeg * neg)2479 void InstructionCodeGeneratorX86_64::VisitNeg(HNeg* neg) {
2480 LocationSummary* locations = neg->GetLocations();
2481 Location out = locations->Out();
2482 Location in = locations->InAt(0);
2483 switch (neg->GetResultType()) {
2484 case Primitive::kPrimInt:
2485 DCHECK(in.IsRegister());
2486 DCHECK(in.Equals(out));
2487 __ negl(out.AsRegister<CpuRegister>());
2488 break;
2489
2490 case Primitive::kPrimLong:
2491 DCHECK(in.IsRegister());
2492 DCHECK(in.Equals(out));
2493 __ negq(out.AsRegister<CpuRegister>());
2494 break;
2495
2496 case Primitive::kPrimFloat: {
2497 DCHECK(in.Equals(out));
2498 XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2499 // Implement float negation with an exclusive or with value
2500 // 0x80000000 (mask for bit 31, representing the sign of a
2501 // single-precision floating-point number).
2502 __ movss(mask, codegen_->LiteralInt32Address(0x80000000));
2503 __ xorps(out.AsFpuRegister<XmmRegister>(), mask);
2504 break;
2505 }
2506
2507 case Primitive::kPrimDouble: {
2508 DCHECK(in.Equals(out));
2509 XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2510 // Implement double negation with an exclusive or with value
2511 // 0x8000000000000000 (mask for bit 63, representing the sign of
2512 // a double-precision floating-point number).
2513 __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x8000000000000000)));
2514 __ xorpd(out.AsFpuRegister<XmmRegister>(), mask);
2515 break;
2516 }
2517
2518 default:
2519 LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
2520 }
2521 }
2522
VisitTypeConversion(HTypeConversion * conversion)2523 void LocationsBuilderX86_64::VisitTypeConversion(HTypeConversion* conversion) {
2524 LocationSummary* locations =
2525 new (GetGraph()->GetArena()) LocationSummary(conversion, LocationSummary::kNoCall);
2526 Primitive::Type result_type = conversion->GetResultType();
2527 Primitive::Type input_type = conversion->GetInputType();
2528 DCHECK_NE(result_type, input_type);
2529
2530 // The Java language does not allow treating boolean as an integral type but
2531 // our bit representation makes it safe.
2532
2533 switch (result_type) {
2534 case Primitive::kPrimByte:
2535 switch (input_type) {
2536 case Primitive::kPrimLong:
2537 // Type conversion from long to byte is a result of code transformations.
2538 case Primitive::kPrimBoolean:
2539 // Boolean input is a result of code transformations.
2540 case Primitive::kPrimShort:
2541 case Primitive::kPrimInt:
2542 case Primitive::kPrimChar:
2543 // Processing a Dex `int-to-byte' instruction.
2544 locations->SetInAt(0, Location::Any());
2545 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2546 break;
2547
2548 default:
2549 LOG(FATAL) << "Unexpected type conversion from " << input_type
2550 << " to " << result_type;
2551 }
2552 break;
2553
2554 case Primitive::kPrimShort:
2555 switch (input_type) {
2556 case Primitive::kPrimLong:
2557 // Type conversion from long to short is a result of code transformations.
2558 case Primitive::kPrimBoolean:
2559 // Boolean input is a result of code transformations.
2560 case Primitive::kPrimByte:
2561 case Primitive::kPrimInt:
2562 case Primitive::kPrimChar:
2563 // Processing a Dex `int-to-short' instruction.
2564 locations->SetInAt(0, Location::Any());
2565 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2566 break;
2567
2568 default:
2569 LOG(FATAL) << "Unexpected type conversion from " << input_type
2570 << " to " << result_type;
2571 }
2572 break;
2573
2574 case Primitive::kPrimInt:
2575 switch (input_type) {
2576 case Primitive::kPrimLong:
2577 // Processing a Dex `long-to-int' instruction.
2578 locations->SetInAt(0, Location::Any());
2579 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2580 break;
2581
2582 case Primitive::kPrimFloat:
2583 // Processing a Dex `float-to-int' instruction.
2584 locations->SetInAt(0, Location::RequiresFpuRegister());
2585 locations->SetOut(Location::RequiresRegister());
2586 break;
2587
2588 case Primitive::kPrimDouble:
2589 // Processing a Dex `double-to-int' instruction.
2590 locations->SetInAt(0, Location::RequiresFpuRegister());
2591 locations->SetOut(Location::RequiresRegister());
2592 break;
2593
2594 default:
2595 LOG(FATAL) << "Unexpected type conversion from " << input_type
2596 << " to " << result_type;
2597 }
2598 break;
2599
2600 case Primitive::kPrimLong:
2601 switch (input_type) {
2602 case Primitive::kPrimBoolean:
2603 // Boolean input is a result of code transformations.
2604 case Primitive::kPrimByte:
2605 case Primitive::kPrimShort:
2606 case Primitive::kPrimInt:
2607 case Primitive::kPrimChar:
2608 // Processing a Dex `int-to-long' instruction.
2609 // TODO: We would benefit from a (to-be-implemented)
2610 // Location::RegisterOrStackSlot requirement for this input.
2611 locations->SetInAt(0, Location::RequiresRegister());
2612 locations->SetOut(Location::RequiresRegister());
2613 break;
2614
2615 case Primitive::kPrimFloat:
2616 // Processing a Dex `float-to-long' instruction.
2617 locations->SetInAt(0, Location::RequiresFpuRegister());
2618 locations->SetOut(Location::RequiresRegister());
2619 break;
2620
2621 case Primitive::kPrimDouble:
2622 // Processing a Dex `double-to-long' instruction.
2623 locations->SetInAt(0, Location::RequiresFpuRegister());
2624 locations->SetOut(Location::RequiresRegister());
2625 break;
2626
2627 default:
2628 LOG(FATAL) << "Unexpected type conversion from " << input_type
2629 << " to " << result_type;
2630 }
2631 break;
2632
2633 case Primitive::kPrimChar:
2634 switch (input_type) {
2635 case Primitive::kPrimLong:
2636 // Type conversion from long to char is a result of code transformations.
2637 case Primitive::kPrimBoolean:
2638 // Boolean input is a result of code transformations.
2639 case Primitive::kPrimByte:
2640 case Primitive::kPrimShort:
2641 case Primitive::kPrimInt:
2642 // Processing a Dex `int-to-char' instruction.
2643 locations->SetInAt(0, Location::Any());
2644 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2645 break;
2646
2647 default:
2648 LOG(FATAL) << "Unexpected type conversion from " << input_type
2649 << " to " << result_type;
2650 }
2651 break;
2652
2653 case Primitive::kPrimFloat:
2654 switch (input_type) {
2655 case Primitive::kPrimBoolean:
2656 // Boolean input is a result of code transformations.
2657 case Primitive::kPrimByte:
2658 case Primitive::kPrimShort:
2659 case Primitive::kPrimInt:
2660 case Primitive::kPrimChar:
2661 // Processing a Dex `int-to-float' instruction.
2662 locations->SetInAt(0, Location::Any());
2663 locations->SetOut(Location::RequiresFpuRegister());
2664 break;
2665
2666 case Primitive::kPrimLong:
2667 // Processing a Dex `long-to-float' instruction.
2668 locations->SetInAt(0, Location::Any());
2669 locations->SetOut(Location::RequiresFpuRegister());
2670 break;
2671
2672 case Primitive::kPrimDouble:
2673 // Processing a Dex `double-to-float' instruction.
2674 locations->SetInAt(0, Location::Any());
2675 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
2676 break;
2677
2678 default:
2679 LOG(FATAL) << "Unexpected type conversion from " << input_type
2680 << " to " << result_type;
2681 };
2682 break;
2683
2684 case Primitive::kPrimDouble:
2685 switch (input_type) {
2686 case Primitive::kPrimBoolean:
2687 // Boolean input is a result of code transformations.
2688 case Primitive::kPrimByte:
2689 case Primitive::kPrimShort:
2690 case Primitive::kPrimInt:
2691 case Primitive::kPrimChar:
2692 // Processing a Dex `int-to-double' instruction.
2693 locations->SetInAt(0, Location::Any());
2694 locations->SetOut(Location::RequiresFpuRegister());
2695 break;
2696
2697 case Primitive::kPrimLong:
2698 // Processing a Dex `long-to-double' instruction.
2699 locations->SetInAt(0, Location::Any());
2700 locations->SetOut(Location::RequiresFpuRegister());
2701 break;
2702
2703 case Primitive::kPrimFloat:
2704 // Processing a Dex `float-to-double' instruction.
2705 locations->SetInAt(0, Location::Any());
2706 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
2707 break;
2708
2709 default:
2710 LOG(FATAL) << "Unexpected type conversion from " << input_type
2711 << " to " << result_type;
2712 }
2713 break;
2714
2715 default:
2716 LOG(FATAL) << "Unexpected type conversion from " << input_type
2717 << " to " << result_type;
2718 }
2719 }
2720
VisitTypeConversion(HTypeConversion * conversion)2721 void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conversion) {
2722 LocationSummary* locations = conversion->GetLocations();
2723 Location out = locations->Out();
2724 Location in = locations->InAt(0);
2725 Primitive::Type result_type = conversion->GetResultType();
2726 Primitive::Type input_type = conversion->GetInputType();
2727 DCHECK_NE(result_type, input_type);
2728 switch (result_type) {
2729 case Primitive::kPrimByte:
2730 switch (input_type) {
2731 case Primitive::kPrimLong:
2732 // Type conversion from long to byte is a result of code transformations.
2733 case Primitive::kPrimBoolean:
2734 // Boolean input is a result of code transformations.
2735 case Primitive::kPrimShort:
2736 case Primitive::kPrimInt:
2737 case Primitive::kPrimChar:
2738 // Processing a Dex `int-to-byte' instruction.
2739 if (in.IsRegister()) {
2740 __ movsxb(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
2741 } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
2742 __ movsxb(out.AsRegister<CpuRegister>(),
2743 Address(CpuRegister(RSP), in.GetStackIndex()));
2744 } else {
2745 __ movl(out.AsRegister<CpuRegister>(),
2746 Immediate(static_cast<int8_t>(Int64FromConstant(in.GetConstant()))));
2747 }
2748 break;
2749
2750 default:
2751 LOG(FATAL) << "Unexpected type conversion from " << input_type
2752 << " to " << result_type;
2753 }
2754 break;
2755
2756 case Primitive::kPrimShort:
2757 switch (input_type) {
2758 case Primitive::kPrimLong:
2759 // Type conversion from long to short is a result of code transformations.
2760 case Primitive::kPrimBoolean:
2761 // Boolean input is a result of code transformations.
2762 case Primitive::kPrimByte:
2763 case Primitive::kPrimInt:
2764 case Primitive::kPrimChar:
2765 // Processing a Dex `int-to-short' instruction.
2766 if (in.IsRegister()) {
2767 __ movsxw(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
2768 } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
2769 __ movsxw(out.AsRegister<CpuRegister>(),
2770 Address(CpuRegister(RSP), in.GetStackIndex()));
2771 } else {
2772 __ movl(out.AsRegister<CpuRegister>(),
2773 Immediate(static_cast<int16_t>(Int64FromConstant(in.GetConstant()))));
2774 }
2775 break;
2776
2777 default:
2778 LOG(FATAL) << "Unexpected type conversion from " << input_type
2779 << " to " << result_type;
2780 }
2781 break;
2782
2783 case Primitive::kPrimInt:
2784 switch (input_type) {
2785 case Primitive::kPrimLong:
2786 // Processing a Dex `long-to-int' instruction.
2787 if (in.IsRegister()) {
2788 __ movl(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
2789 } else if (in.IsDoubleStackSlot()) {
2790 __ movl(out.AsRegister<CpuRegister>(),
2791 Address(CpuRegister(RSP), in.GetStackIndex()));
2792 } else {
2793 DCHECK(in.IsConstant());
2794 DCHECK(in.GetConstant()->IsLongConstant());
2795 int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
2796 __ movl(out.AsRegister<CpuRegister>(), Immediate(static_cast<int32_t>(value)));
2797 }
2798 break;
2799
2800 case Primitive::kPrimFloat: {
2801 // Processing a Dex `float-to-int' instruction.
2802 XmmRegister input = in.AsFpuRegister<XmmRegister>();
2803 CpuRegister output = out.AsRegister<CpuRegister>();
2804 NearLabel done, nan;
2805
2806 __ movl(output, Immediate(kPrimIntMax));
2807 // if input >= (float)INT_MAX goto done
2808 __ comiss(input, codegen_->LiteralFloatAddress(kPrimIntMax));
2809 __ j(kAboveEqual, &done);
2810 // if input == NaN goto nan
2811 __ j(kUnordered, &nan);
2812 // output = float-to-int-truncate(input)
2813 __ cvttss2si(output, input, false);
2814 __ jmp(&done);
2815 __ Bind(&nan);
2816 // output = 0
2817 __ xorl(output, output);
2818 __ Bind(&done);
2819 break;
2820 }
2821
2822 case Primitive::kPrimDouble: {
2823 // Processing a Dex `double-to-int' instruction.
2824 XmmRegister input = in.AsFpuRegister<XmmRegister>();
2825 CpuRegister output = out.AsRegister<CpuRegister>();
2826 NearLabel done, nan;
2827
2828 __ movl(output, Immediate(kPrimIntMax));
2829 // if input >= (double)INT_MAX goto done
2830 __ comisd(input, codegen_->LiteralDoubleAddress(kPrimIntMax));
2831 __ j(kAboveEqual, &done);
2832 // if input == NaN goto nan
2833 __ j(kUnordered, &nan);
2834 // output = double-to-int-truncate(input)
2835 __ cvttsd2si(output, input);
2836 __ jmp(&done);
2837 __ Bind(&nan);
2838 // output = 0
2839 __ xorl(output, output);
2840 __ Bind(&done);
2841 break;
2842 }
2843
2844 default:
2845 LOG(FATAL) << "Unexpected type conversion from " << input_type
2846 << " to " << result_type;
2847 }
2848 break;
2849
2850 case Primitive::kPrimLong:
2851 switch (input_type) {
2852 DCHECK(out.IsRegister());
2853 case Primitive::kPrimBoolean:
2854 // Boolean input is a result of code transformations.
2855 case Primitive::kPrimByte:
2856 case Primitive::kPrimShort:
2857 case Primitive::kPrimInt:
2858 case Primitive::kPrimChar:
2859 // Processing a Dex `int-to-long' instruction.
2860 DCHECK(in.IsRegister());
2861 __ movsxd(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
2862 break;
2863
2864 case Primitive::kPrimFloat: {
2865 // Processing a Dex `float-to-long' instruction.
2866 XmmRegister input = in.AsFpuRegister<XmmRegister>();
2867 CpuRegister output = out.AsRegister<CpuRegister>();
2868 NearLabel done, nan;
2869
2870 codegen_->Load64BitValue(output, kPrimLongMax);
2871 // if input >= (float)LONG_MAX goto done
2872 __ comiss(input, codegen_->LiteralFloatAddress(kPrimLongMax));
2873 __ j(kAboveEqual, &done);
2874 // if input == NaN goto nan
2875 __ j(kUnordered, &nan);
2876 // output = float-to-long-truncate(input)
2877 __ cvttss2si(output, input, true);
2878 __ jmp(&done);
2879 __ Bind(&nan);
2880 // output = 0
2881 __ xorl(output, output);
2882 __ Bind(&done);
2883 break;
2884 }
2885
2886 case Primitive::kPrimDouble: {
2887 // Processing a Dex `double-to-long' instruction.
2888 XmmRegister input = in.AsFpuRegister<XmmRegister>();
2889 CpuRegister output = out.AsRegister<CpuRegister>();
2890 NearLabel done, nan;
2891
2892 codegen_->Load64BitValue(output, kPrimLongMax);
2893 // if input >= (double)LONG_MAX goto done
2894 __ comisd(input, codegen_->LiteralDoubleAddress(kPrimLongMax));
2895 __ j(kAboveEqual, &done);
2896 // if input == NaN goto nan
2897 __ j(kUnordered, &nan);
2898 // output = double-to-long-truncate(input)
2899 __ cvttsd2si(output, input, true);
2900 __ jmp(&done);
2901 __ Bind(&nan);
2902 // output = 0
2903 __ xorl(output, output);
2904 __ Bind(&done);
2905 break;
2906 }
2907
2908 default:
2909 LOG(FATAL) << "Unexpected type conversion from " << input_type
2910 << " to " << result_type;
2911 }
2912 break;
2913
2914 case Primitive::kPrimChar:
2915 switch (input_type) {
2916 case Primitive::kPrimLong:
2917 // Type conversion from long to char is a result of code transformations.
2918 case Primitive::kPrimBoolean:
2919 // Boolean input is a result of code transformations.
2920 case Primitive::kPrimByte:
2921 case Primitive::kPrimShort:
2922 case Primitive::kPrimInt:
2923 // Processing a Dex `int-to-char' instruction.
2924 if (in.IsRegister()) {
2925 __ movzxw(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
2926 } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
2927 __ movzxw(out.AsRegister<CpuRegister>(),
2928 Address(CpuRegister(RSP), in.GetStackIndex()));
2929 } else {
2930 __ movl(out.AsRegister<CpuRegister>(),
2931 Immediate(static_cast<uint16_t>(Int64FromConstant(in.GetConstant()))));
2932 }
2933 break;
2934
2935 default:
2936 LOG(FATAL) << "Unexpected type conversion from " << input_type
2937 << " to " << result_type;
2938 }
2939 break;
2940
2941 case Primitive::kPrimFloat:
2942 switch (input_type) {
2943 case Primitive::kPrimBoolean:
2944 // Boolean input is a result of code transformations.
2945 case Primitive::kPrimByte:
2946 case Primitive::kPrimShort:
2947 case Primitive::kPrimInt:
2948 case Primitive::kPrimChar:
2949 // Processing a Dex `int-to-float' instruction.
2950 if (in.IsRegister()) {
2951 __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), false);
2952 } else if (in.IsConstant()) {
2953 int32_t v = in.GetConstant()->AsIntConstant()->GetValue();
2954 XmmRegister dest = out.AsFpuRegister<XmmRegister>();
2955 codegen_->Load32BitValue(dest, static_cast<float>(v));
2956 } else {
2957 __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(),
2958 Address(CpuRegister(RSP), in.GetStackIndex()), false);
2959 }
2960 break;
2961
2962 case Primitive::kPrimLong:
2963 // Processing a Dex `long-to-float' instruction.
2964 if (in.IsRegister()) {
2965 __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), true);
2966 } else if (in.IsConstant()) {
2967 int64_t v = in.GetConstant()->AsLongConstant()->GetValue();
2968 XmmRegister dest = out.AsFpuRegister<XmmRegister>();
2969 codegen_->Load32BitValue(dest, static_cast<float>(v));
2970 } else {
2971 __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(),
2972 Address(CpuRegister(RSP), in.GetStackIndex()), true);
2973 }
2974 break;
2975
2976 case Primitive::kPrimDouble:
2977 // Processing a Dex `double-to-float' instruction.
2978 if (in.IsFpuRegister()) {
2979 __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
2980 } else if (in.IsConstant()) {
2981 double v = in.GetConstant()->AsDoubleConstant()->GetValue();
2982 XmmRegister dest = out.AsFpuRegister<XmmRegister>();
2983 codegen_->Load32BitValue(dest, static_cast<float>(v));
2984 } else {
2985 __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(),
2986 Address(CpuRegister(RSP), in.GetStackIndex()));
2987 }
2988 break;
2989
2990 default:
2991 LOG(FATAL) << "Unexpected type conversion from " << input_type
2992 << " to " << result_type;
2993 };
2994 break;
2995
2996 case Primitive::kPrimDouble:
2997 switch (input_type) {
2998 case Primitive::kPrimBoolean:
2999 // Boolean input is a result of code transformations.
3000 case Primitive::kPrimByte:
3001 case Primitive::kPrimShort:
3002 case Primitive::kPrimInt:
3003 case Primitive::kPrimChar:
3004 // Processing a Dex `int-to-double' instruction.
3005 if (in.IsRegister()) {
3006 __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), false);
3007 } else if (in.IsConstant()) {
3008 int32_t v = in.GetConstant()->AsIntConstant()->GetValue();
3009 XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3010 codegen_->Load64BitValue(dest, static_cast<double>(v));
3011 } else {
3012 __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(),
3013 Address(CpuRegister(RSP), in.GetStackIndex()), false);
3014 }
3015 break;
3016
3017 case Primitive::kPrimLong:
3018 // Processing a Dex `long-to-double' instruction.
3019 if (in.IsRegister()) {
3020 __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), true);
3021 } else if (in.IsConstant()) {
3022 int64_t v = in.GetConstant()->AsLongConstant()->GetValue();
3023 XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3024 codegen_->Load64BitValue(dest, static_cast<double>(v));
3025 } else {
3026 __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(),
3027 Address(CpuRegister(RSP), in.GetStackIndex()), true);
3028 }
3029 break;
3030
3031 case Primitive::kPrimFloat:
3032 // Processing a Dex `float-to-double' instruction.
3033 if (in.IsFpuRegister()) {
3034 __ cvtss2sd(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
3035 } else if (in.IsConstant()) {
3036 float v = in.GetConstant()->AsFloatConstant()->GetValue();
3037 XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3038 codegen_->Load64BitValue(dest, static_cast<double>(v));
3039 } else {
3040 __ cvtss2sd(out.AsFpuRegister<XmmRegister>(),
3041 Address(CpuRegister(RSP), in.GetStackIndex()));
3042 }
3043 break;
3044
3045 default:
3046 LOG(FATAL) << "Unexpected type conversion from " << input_type
3047 << " to " << result_type;
3048 };
3049 break;
3050
3051 default:
3052 LOG(FATAL) << "Unexpected type conversion from " << input_type
3053 << " to " << result_type;
3054 }
3055 }
3056
VisitAdd(HAdd * add)3057 void LocationsBuilderX86_64::VisitAdd(HAdd* add) {
3058 LocationSummary* locations =
3059 new (GetGraph()->GetArena()) LocationSummary(add, LocationSummary::kNoCall);
3060 switch (add->GetResultType()) {
3061 case Primitive::kPrimInt: {
3062 locations->SetInAt(0, Location::RequiresRegister());
3063 locations->SetInAt(1, Location::RegisterOrConstant(add->InputAt(1)));
3064 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3065 break;
3066 }
3067
3068 case Primitive::kPrimLong: {
3069 locations->SetInAt(0, Location::RequiresRegister());
3070 // We can use a leaq or addq if the constant can fit in an immediate.
3071 locations->SetInAt(1, Location::RegisterOrInt32Constant(add->InputAt(1)));
3072 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3073 break;
3074 }
3075
3076 case Primitive::kPrimDouble:
3077 case Primitive::kPrimFloat: {
3078 locations->SetInAt(0, Location::RequiresFpuRegister());
3079 locations->SetInAt(1, Location::Any());
3080 locations->SetOut(Location::SameAsFirstInput());
3081 break;
3082 }
3083
3084 default:
3085 LOG(FATAL) << "Unexpected add type " << add->GetResultType();
3086 }
3087 }
3088
VisitAdd(HAdd * add)3089 void InstructionCodeGeneratorX86_64::VisitAdd(HAdd* add) {
3090 LocationSummary* locations = add->GetLocations();
3091 Location first = locations->InAt(0);
3092 Location second = locations->InAt(1);
3093 Location out = locations->Out();
3094
3095 switch (add->GetResultType()) {
3096 case Primitive::kPrimInt: {
3097 if (second.IsRegister()) {
3098 if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3099 __ addl(out.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3100 } else if (out.AsRegister<Register>() == second.AsRegister<Register>()) {
3101 __ addl(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>());
3102 } else {
3103 __ leal(out.AsRegister<CpuRegister>(), Address(
3104 first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>(), TIMES_1, 0));
3105 }
3106 } else if (second.IsConstant()) {
3107 if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3108 __ addl(out.AsRegister<CpuRegister>(),
3109 Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
3110 } else {
3111 __ leal(out.AsRegister<CpuRegister>(), Address(
3112 first.AsRegister<CpuRegister>(), second.GetConstant()->AsIntConstant()->GetValue()));
3113 }
3114 } else {
3115 DCHECK(first.Equals(locations->Out()));
3116 __ addl(first.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), second.GetStackIndex()));
3117 }
3118 break;
3119 }
3120
3121 case Primitive::kPrimLong: {
3122 if (second.IsRegister()) {
3123 if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3124 __ addq(out.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3125 } else if (out.AsRegister<Register>() == second.AsRegister<Register>()) {
3126 __ addq(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>());
3127 } else {
3128 __ leaq(out.AsRegister<CpuRegister>(), Address(
3129 first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>(), TIMES_1, 0));
3130 }
3131 } else {
3132 DCHECK(second.IsConstant());
3133 int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3134 int32_t int32_value = Low32Bits(value);
3135 DCHECK_EQ(int32_value, value);
3136 if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3137 __ addq(out.AsRegister<CpuRegister>(), Immediate(int32_value));
3138 } else {
3139 __ leaq(out.AsRegister<CpuRegister>(), Address(
3140 first.AsRegister<CpuRegister>(), int32_value));
3141 }
3142 }
3143 break;
3144 }
3145
3146 case Primitive::kPrimFloat: {
3147 if (second.IsFpuRegister()) {
3148 __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3149 } else if (second.IsConstant()) {
3150 __ addss(first.AsFpuRegister<XmmRegister>(),
3151 codegen_->LiteralFloatAddress(
3152 second.GetConstant()->AsFloatConstant()->GetValue()));
3153 } else {
3154 DCHECK(second.IsStackSlot());
3155 __ addss(first.AsFpuRegister<XmmRegister>(),
3156 Address(CpuRegister(RSP), second.GetStackIndex()));
3157 }
3158 break;
3159 }
3160
3161 case Primitive::kPrimDouble: {
3162 if (second.IsFpuRegister()) {
3163 __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3164 } else if (second.IsConstant()) {
3165 __ addsd(first.AsFpuRegister<XmmRegister>(),
3166 codegen_->LiteralDoubleAddress(
3167 second.GetConstant()->AsDoubleConstant()->GetValue()));
3168 } else {
3169 DCHECK(second.IsDoubleStackSlot());
3170 __ addsd(first.AsFpuRegister<XmmRegister>(),
3171 Address(CpuRegister(RSP), second.GetStackIndex()));
3172 }
3173 break;
3174 }
3175
3176 default:
3177 LOG(FATAL) << "Unexpected add type " << add->GetResultType();
3178 }
3179 }
3180
VisitSub(HSub * sub)3181 void LocationsBuilderX86_64::VisitSub(HSub* sub) {
3182 LocationSummary* locations =
3183 new (GetGraph()->GetArena()) LocationSummary(sub, LocationSummary::kNoCall);
3184 switch (sub->GetResultType()) {
3185 case Primitive::kPrimInt: {
3186 locations->SetInAt(0, Location::RequiresRegister());
3187 locations->SetInAt(1, Location::Any());
3188 locations->SetOut(Location::SameAsFirstInput());
3189 break;
3190 }
3191 case Primitive::kPrimLong: {
3192 locations->SetInAt(0, Location::RequiresRegister());
3193 locations->SetInAt(1, Location::RegisterOrInt32Constant(sub->InputAt(1)));
3194 locations->SetOut(Location::SameAsFirstInput());
3195 break;
3196 }
3197 case Primitive::kPrimFloat:
3198 case Primitive::kPrimDouble: {
3199 locations->SetInAt(0, Location::RequiresFpuRegister());
3200 locations->SetInAt(1, Location::Any());
3201 locations->SetOut(Location::SameAsFirstInput());
3202 break;
3203 }
3204 default:
3205 LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
3206 }
3207 }
3208
VisitSub(HSub * sub)3209 void InstructionCodeGeneratorX86_64::VisitSub(HSub* sub) {
3210 LocationSummary* locations = sub->GetLocations();
3211 Location first = locations->InAt(0);
3212 Location second = locations->InAt(1);
3213 DCHECK(first.Equals(locations->Out()));
3214 switch (sub->GetResultType()) {
3215 case Primitive::kPrimInt: {
3216 if (second.IsRegister()) {
3217 __ subl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3218 } else if (second.IsConstant()) {
3219 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue());
3220 __ subl(first.AsRegister<CpuRegister>(), imm);
3221 } else {
3222 __ subl(first.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), second.GetStackIndex()));
3223 }
3224 break;
3225 }
3226 case Primitive::kPrimLong: {
3227 if (second.IsConstant()) {
3228 int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3229 DCHECK(IsInt<32>(value));
3230 __ subq(first.AsRegister<CpuRegister>(), Immediate(static_cast<int32_t>(value)));
3231 } else {
3232 __ subq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3233 }
3234 break;
3235 }
3236
3237 case Primitive::kPrimFloat: {
3238 if (second.IsFpuRegister()) {
3239 __ subss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3240 } else if (second.IsConstant()) {
3241 __ subss(first.AsFpuRegister<XmmRegister>(),
3242 codegen_->LiteralFloatAddress(
3243 second.GetConstant()->AsFloatConstant()->GetValue()));
3244 } else {
3245 DCHECK(second.IsStackSlot());
3246 __ subss(first.AsFpuRegister<XmmRegister>(),
3247 Address(CpuRegister(RSP), second.GetStackIndex()));
3248 }
3249 break;
3250 }
3251
3252 case Primitive::kPrimDouble: {
3253 if (second.IsFpuRegister()) {
3254 __ subsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3255 } else if (second.IsConstant()) {
3256 __ subsd(first.AsFpuRegister<XmmRegister>(),
3257 codegen_->LiteralDoubleAddress(
3258 second.GetConstant()->AsDoubleConstant()->GetValue()));
3259 } else {
3260 DCHECK(second.IsDoubleStackSlot());
3261 __ subsd(first.AsFpuRegister<XmmRegister>(),
3262 Address(CpuRegister(RSP), second.GetStackIndex()));
3263 }
3264 break;
3265 }
3266
3267 default:
3268 LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
3269 }
3270 }
3271
VisitMul(HMul * mul)3272 void LocationsBuilderX86_64::VisitMul(HMul* mul) {
3273 LocationSummary* locations =
3274 new (GetGraph()->GetArena()) LocationSummary(mul, LocationSummary::kNoCall);
3275 switch (mul->GetResultType()) {
3276 case Primitive::kPrimInt: {
3277 locations->SetInAt(0, Location::RequiresRegister());
3278 locations->SetInAt(1, Location::Any());
3279 if (mul->InputAt(1)->IsIntConstant()) {
3280 // Can use 3 operand multiply.
3281 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3282 } else {
3283 locations->SetOut(Location::SameAsFirstInput());
3284 }
3285 break;
3286 }
3287 case Primitive::kPrimLong: {
3288 locations->SetInAt(0, Location::RequiresRegister());
3289 locations->SetInAt(1, Location::Any());
3290 if (mul->InputAt(1)->IsLongConstant() &&
3291 IsInt<32>(mul->InputAt(1)->AsLongConstant()->GetValue())) {
3292 // Can use 3 operand multiply.
3293 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3294 } else {
3295 locations->SetOut(Location::SameAsFirstInput());
3296 }
3297 break;
3298 }
3299 case Primitive::kPrimFloat:
3300 case Primitive::kPrimDouble: {
3301 locations->SetInAt(0, Location::RequiresFpuRegister());
3302 locations->SetInAt(1, Location::Any());
3303 locations->SetOut(Location::SameAsFirstInput());
3304 break;
3305 }
3306
3307 default:
3308 LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
3309 }
3310 }
3311
VisitMul(HMul * mul)3312 void InstructionCodeGeneratorX86_64::VisitMul(HMul* mul) {
3313 LocationSummary* locations = mul->GetLocations();
3314 Location first = locations->InAt(0);
3315 Location second = locations->InAt(1);
3316 Location out = locations->Out();
3317 switch (mul->GetResultType()) {
3318 case Primitive::kPrimInt:
3319 // The constant may have ended up in a register, so test explicitly to avoid
3320 // problems where the output may not be the same as the first operand.
3321 if (mul->InputAt(1)->IsIntConstant()) {
3322 Immediate imm(mul->InputAt(1)->AsIntConstant()->GetValue());
3323 __ imull(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(), imm);
3324 } else if (second.IsRegister()) {
3325 DCHECK(first.Equals(out));
3326 __ imull(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3327 } else {
3328 DCHECK(first.Equals(out));
3329 DCHECK(second.IsStackSlot());
3330 __ imull(first.AsRegister<CpuRegister>(),
3331 Address(CpuRegister(RSP), second.GetStackIndex()));
3332 }
3333 break;
3334 case Primitive::kPrimLong: {
3335 // The constant may have ended up in a register, so test explicitly to avoid
3336 // problems where the output may not be the same as the first operand.
3337 if (mul->InputAt(1)->IsLongConstant()) {
3338 int64_t value = mul->InputAt(1)->AsLongConstant()->GetValue();
3339 if (IsInt<32>(value)) {
3340 __ imulq(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(),
3341 Immediate(static_cast<int32_t>(value)));
3342 } else {
3343 // Have to use the constant area.
3344 DCHECK(first.Equals(out));
3345 __ imulq(first.AsRegister<CpuRegister>(), codegen_->LiteralInt64Address(value));
3346 }
3347 } else if (second.IsRegister()) {
3348 DCHECK(first.Equals(out));
3349 __ imulq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3350 } else {
3351 DCHECK(second.IsDoubleStackSlot());
3352 DCHECK(first.Equals(out));
3353 __ imulq(first.AsRegister<CpuRegister>(),
3354 Address(CpuRegister(RSP), second.GetStackIndex()));
3355 }
3356 break;
3357 }
3358
3359 case Primitive::kPrimFloat: {
3360 DCHECK(first.Equals(out));
3361 if (second.IsFpuRegister()) {
3362 __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3363 } else if (second.IsConstant()) {
3364 __ mulss(first.AsFpuRegister<XmmRegister>(),
3365 codegen_->LiteralFloatAddress(
3366 second.GetConstant()->AsFloatConstant()->GetValue()));
3367 } else {
3368 DCHECK(second.IsStackSlot());
3369 __ mulss(first.AsFpuRegister<XmmRegister>(),
3370 Address(CpuRegister(RSP), second.GetStackIndex()));
3371 }
3372 break;
3373 }
3374
3375 case Primitive::kPrimDouble: {
3376 DCHECK(first.Equals(out));
3377 if (second.IsFpuRegister()) {
3378 __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3379 } else if (second.IsConstant()) {
3380 __ mulsd(first.AsFpuRegister<XmmRegister>(),
3381 codegen_->LiteralDoubleAddress(
3382 second.GetConstant()->AsDoubleConstant()->GetValue()));
3383 } else {
3384 DCHECK(second.IsDoubleStackSlot());
3385 __ mulsd(first.AsFpuRegister<XmmRegister>(),
3386 Address(CpuRegister(RSP), second.GetStackIndex()));
3387 }
3388 break;
3389 }
3390
3391 default:
3392 LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
3393 }
3394 }
3395
PushOntoFPStack(Location source,uint32_t temp_offset,uint32_t stack_adjustment,bool is_float)3396 void InstructionCodeGeneratorX86_64::PushOntoFPStack(Location source, uint32_t temp_offset,
3397 uint32_t stack_adjustment, bool is_float) {
3398 if (source.IsStackSlot()) {
3399 DCHECK(is_float);
3400 __ flds(Address(CpuRegister(RSP), source.GetStackIndex() + stack_adjustment));
3401 } else if (source.IsDoubleStackSlot()) {
3402 DCHECK(!is_float);
3403 __ fldl(Address(CpuRegister(RSP), source.GetStackIndex() + stack_adjustment));
3404 } else {
3405 // Write the value to the temporary location on the stack and load to FP stack.
3406 if (is_float) {
3407 Location stack_temp = Location::StackSlot(temp_offset);
3408 codegen_->Move(stack_temp, source);
3409 __ flds(Address(CpuRegister(RSP), temp_offset));
3410 } else {
3411 Location stack_temp = Location::DoubleStackSlot(temp_offset);
3412 codegen_->Move(stack_temp, source);
3413 __ fldl(Address(CpuRegister(RSP), temp_offset));
3414 }
3415 }
3416 }
3417
GenerateRemFP(HRem * rem)3418 void InstructionCodeGeneratorX86_64::GenerateRemFP(HRem *rem) {
3419 Primitive::Type type = rem->GetResultType();
3420 bool is_float = type == Primitive::kPrimFloat;
3421 size_t elem_size = Primitive::ComponentSize(type);
3422 LocationSummary* locations = rem->GetLocations();
3423 Location first = locations->InAt(0);
3424 Location second = locations->InAt(1);
3425 Location out = locations->Out();
3426
3427 // Create stack space for 2 elements.
3428 // TODO: enhance register allocator to ask for stack temporaries.
3429 __ subq(CpuRegister(RSP), Immediate(2 * elem_size));
3430
3431 // Load the values to the FP stack in reverse order, using temporaries if needed.
3432 PushOntoFPStack(second, elem_size, 2 * elem_size, is_float);
3433 PushOntoFPStack(first, 0, 2 * elem_size, is_float);
3434
3435 // Loop doing FPREM until we stabilize.
3436 NearLabel retry;
3437 __ Bind(&retry);
3438 __ fprem();
3439
3440 // Move FP status to AX.
3441 __ fstsw();
3442
3443 // And see if the argument reduction is complete. This is signaled by the
3444 // C2 FPU flag bit set to 0.
3445 __ andl(CpuRegister(RAX), Immediate(kC2ConditionMask));
3446 __ j(kNotEqual, &retry);
3447
3448 // We have settled on the final value. Retrieve it into an XMM register.
3449 // Store FP top of stack to real stack.
3450 if (is_float) {
3451 __ fsts(Address(CpuRegister(RSP), 0));
3452 } else {
3453 __ fstl(Address(CpuRegister(RSP), 0));
3454 }
3455
3456 // Pop the 2 items from the FP stack.
3457 __ fucompp();
3458
3459 // Load the value from the stack into an XMM register.
3460 DCHECK(out.IsFpuRegister()) << out;
3461 if (is_float) {
3462 __ movss(out.AsFpuRegister<XmmRegister>(), Address(CpuRegister(RSP), 0));
3463 } else {
3464 __ movsd(out.AsFpuRegister<XmmRegister>(), Address(CpuRegister(RSP), 0));
3465 }
3466
3467 // And remove the temporary stack space we allocated.
3468 __ addq(CpuRegister(RSP), Immediate(2 * elem_size));
3469 }
3470
DivRemOneOrMinusOne(HBinaryOperation * instruction)3471 void InstructionCodeGeneratorX86_64::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
3472 DCHECK(instruction->IsDiv() || instruction->IsRem());
3473
3474 LocationSummary* locations = instruction->GetLocations();
3475 Location second = locations->InAt(1);
3476 DCHECK(second.IsConstant());
3477
3478 CpuRegister output_register = locations->Out().AsRegister<CpuRegister>();
3479 CpuRegister input_register = locations->InAt(0).AsRegister<CpuRegister>();
3480 int64_t imm = Int64FromConstant(second.GetConstant());
3481
3482 DCHECK(imm == 1 || imm == -1);
3483
3484 switch (instruction->GetResultType()) {
3485 case Primitive::kPrimInt: {
3486 if (instruction->IsRem()) {
3487 __ xorl(output_register, output_register);
3488 } else {
3489 __ movl(output_register, input_register);
3490 if (imm == -1) {
3491 __ negl(output_register);
3492 }
3493 }
3494 break;
3495 }
3496
3497 case Primitive::kPrimLong: {
3498 if (instruction->IsRem()) {
3499 __ xorl(output_register, output_register);
3500 } else {
3501 __ movq(output_register, input_register);
3502 if (imm == -1) {
3503 __ negq(output_register);
3504 }
3505 }
3506 break;
3507 }
3508
3509 default:
3510 LOG(FATAL) << "Unexpected type for div by (-)1 " << instruction->GetResultType();
3511 }
3512 }
3513
DivByPowerOfTwo(HDiv * instruction)3514 void InstructionCodeGeneratorX86_64::DivByPowerOfTwo(HDiv* instruction) {
3515 LocationSummary* locations = instruction->GetLocations();
3516 Location second = locations->InAt(1);
3517
3518 CpuRegister output_register = locations->Out().AsRegister<CpuRegister>();
3519 CpuRegister numerator = locations->InAt(0).AsRegister<CpuRegister>();
3520
3521 int64_t imm = Int64FromConstant(second.GetConstant());
3522 DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
3523 uint64_t abs_imm = AbsOrMin(imm);
3524
3525 CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>();
3526
3527 if (instruction->GetResultType() == Primitive::kPrimInt) {
3528 __ leal(tmp, Address(numerator, abs_imm - 1));
3529 __ testl(numerator, numerator);
3530 __ cmov(kGreaterEqual, tmp, numerator);
3531 int shift = CTZ(imm);
3532 __ sarl(tmp, Immediate(shift));
3533
3534 if (imm < 0) {
3535 __ negl(tmp);
3536 }
3537
3538 __ movl(output_register, tmp);
3539 } else {
3540 DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong);
3541 CpuRegister rdx = locations->GetTemp(0).AsRegister<CpuRegister>();
3542
3543 codegen_->Load64BitValue(rdx, abs_imm - 1);
3544 __ addq(rdx, numerator);
3545 __ testq(numerator, numerator);
3546 __ cmov(kGreaterEqual, rdx, numerator);
3547 int shift = CTZ(imm);
3548 __ sarq(rdx, Immediate(shift));
3549
3550 if (imm < 0) {
3551 __ negq(rdx);
3552 }
3553
3554 __ movq(output_register, rdx);
3555 }
3556 }
3557
GenerateDivRemWithAnyConstant(HBinaryOperation * instruction)3558 void InstructionCodeGeneratorX86_64::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) {
3559 DCHECK(instruction->IsDiv() || instruction->IsRem());
3560
3561 LocationSummary* locations = instruction->GetLocations();
3562 Location second = locations->InAt(1);
3563
3564 CpuRegister numerator = instruction->IsDiv() ? locations->GetTemp(1).AsRegister<CpuRegister>()
3565 : locations->GetTemp(0).AsRegister<CpuRegister>();
3566 CpuRegister eax = locations->InAt(0).AsRegister<CpuRegister>();
3567 CpuRegister edx = instruction->IsDiv() ? locations->GetTemp(0).AsRegister<CpuRegister>()
3568 : locations->Out().AsRegister<CpuRegister>();
3569 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
3570
3571 DCHECK_EQ(RAX, eax.AsRegister());
3572 DCHECK_EQ(RDX, edx.AsRegister());
3573 if (instruction->IsDiv()) {
3574 DCHECK_EQ(RAX, out.AsRegister());
3575 } else {
3576 DCHECK_EQ(RDX, out.AsRegister());
3577 }
3578
3579 int64_t magic;
3580 int shift;
3581
3582 // TODO: can these branches be written as one?
3583 if (instruction->GetResultType() == Primitive::kPrimInt) {
3584 int imm = second.GetConstant()->AsIntConstant()->GetValue();
3585
3586 CalculateMagicAndShiftForDivRem(imm, false /* is_long */, &magic, &shift);
3587
3588 __ movl(numerator, eax);
3589
3590 __ movl(eax, Immediate(magic));
3591 __ imull(numerator);
3592
3593 if (imm > 0 && magic < 0) {
3594 __ addl(edx, numerator);
3595 } else if (imm < 0 && magic > 0) {
3596 __ subl(edx, numerator);
3597 }
3598
3599 if (shift != 0) {
3600 __ sarl(edx, Immediate(shift));
3601 }
3602
3603 __ movl(eax, edx);
3604 __ shrl(edx, Immediate(31));
3605 __ addl(edx, eax);
3606
3607 if (instruction->IsRem()) {
3608 __ movl(eax, numerator);
3609 __ imull(edx, Immediate(imm));
3610 __ subl(eax, edx);
3611 __ movl(edx, eax);
3612 } else {
3613 __ movl(eax, edx);
3614 }
3615 } else {
3616 int64_t imm = second.GetConstant()->AsLongConstant()->GetValue();
3617
3618 DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong);
3619
3620 CpuRegister rax = eax;
3621 CpuRegister rdx = edx;
3622
3623 CalculateMagicAndShiftForDivRem(imm, true /* is_long */, &magic, &shift);
3624
3625 // Save the numerator.
3626 __ movq(numerator, rax);
3627
3628 // RAX = magic
3629 codegen_->Load64BitValue(rax, magic);
3630
3631 // RDX:RAX = magic * numerator
3632 __ imulq(numerator);
3633
3634 if (imm > 0 && magic < 0) {
3635 // RDX += numerator
3636 __ addq(rdx, numerator);
3637 } else if (imm < 0 && magic > 0) {
3638 // RDX -= numerator
3639 __ subq(rdx, numerator);
3640 }
3641
3642 // Shift if needed.
3643 if (shift != 0) {
3644 __ sarq(rdx, Immediate(shift));
3645 }
3646
3647 // RDX += 1 if RDX < 0
3648 __ movq(rax, rdx);
3649 __ shrq(rdx, Immediate(63));
3650 __ addq(rdx, rax);
3651
3652 if (instruction->IsRem()) {
3653 __ movq(rax, numerator);
3654
3655 if (IsInt<32>(imm)) {
3656 __ imulq(rdx, Immediate(static_cast<int32_t>(imm)));
3657 } else {
3658 __ imulq(rdx, codegen_->LiteralInt64Address(imm));
3659 }
3660
3661 __ subq(rax, rdx);
3662 __ movq(rdx, rax);
3663 } else {
3664 __ movq(rax, rdx);
3665 }
3666 }
3667 }
3668
GenerateDivRemIntegral(HBinaryOperation * instruction)3669 void InstructionCodeGeneratorX86_64::GenerateDivRemIntegral(HBinaryOperation* instruction) {
3670 DCHECK(instruction->IsDiv() || instruction->IsRem());
3671 Primitive::Type type = instruction->GetResultType();
3672 DCHECK(type == Primitive::kPrimInt || type == Primitive::kPrimLong);
3673
3674 bool is_div = instruction->IsDiv();
3675 LocationSummary* locations = instruction->GetLocations();
3676
3677 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
3678 Location second = locations->InAt(1);
3679
3680 DCHECK_EQ(RAX, locations->InAt(0).AsRegister<CpuRegister>().AsRegister());
3681 DCHECK_EQ(is_div ? RAX : RDX, out.AsRegister());
3682
3683 if (second.IsConstant()) {
3684 int64_t imm = Int64FromConstant(second.GetConstant());
3685
3686 if (imm == 0) {
3687 // Do not generate anything. DivZeroCheck would prevent any code to be executed.
3688 } else if (imm == 1 || imm == -1) {
3689 DivRemOneOrMinusOne(instruction);
3690 } else if (instruction->IsDiv() && IsPowerOfTwo(AbsOrMin(imm))) {
3691 DivByPowerOfTwo(instruction->AsDiv());
3692 } else {
3693 DCHECK(imm <= -2 || imm >= 2);
3694 GenerateDivRemWithAnyConstant(instruction);
3695 }
3696 } else {
3697 SlowPathCode* slow_path =
3698 new (GetGraph()->GetArena()) DivRemMinusOneSlowPathX86_64(
3699 instruction, out.AsRegister(), type, is_div);
3700 codegen_->AddSlowPath(slow_path);
3701
3702 CpuRegister second_reg = second.AsRegister<CpuRegister>();
3703 // 0x80000000(00000000)/-1 triggers an arithmetic exception!
3704 // Dividing by -1 is actually negation and -0x800000000(00000000) = 0x80000000(00000000)
3705 // so it's safe to just use negl instead of more complex comparisons.
3706 if (type == Primitive::kPrimInt) {
3707 __ cmpl(second_reg, Immediate(-1));
3708 __ j(kEqual, slow_path->GetEntryLabel());
3709 // edx:eax <- sign-extended of eax
3710 __ cdq();
3711 // eax = quotient, edx = remainder
3712 __ idivl(second_reg);
3713 } else {
3714 __ cmpq(second_reg, Immediate(-1));
3715 __ j(kEqual, slow_path->GetEntryLabel());
3716 // rdx:rax <- sign-extended of rax
3717 __ cqo();
3718 // rax = quotient, rdx = remainder
3719 __ idivq(second_reg);
3720 }
3721 __ Bind(slow_path->GetExitLabel());
3722 }
3723 }
3724
VisitDiv(HDiv * div)3725 void LocationsBuilderX86_64::VisitDiv(HDiv* div) {
3726 LocationSummary* locations =
3727 new (GetGraph()->GetArena()) LocationSummary(div, LocationSummary::kNoCall);
3728 switch (div->GetResultType()) {
3729 case Primitive::kPrimInt:
3730 case Primitive::kPrimLong: {
3731 locations->SetInAt(0, Location::RegisterLocation(RAX));
3732 locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1)));
3733 locations->SetOut(Location::SameAsFirstInput());
3734 // Intel uses edx:eax as the dividend.
3735 locations->AddTemp(Location::RegisterLocation(RDX));
3736 // We need to save the numerator while we tweak rax and rdx. As we are using imul in a way
3737 // which enforces results to be in RAX and RDX, things are simpler if we use RDX also as
3738 // output and request another temp.
3739 if (div->InputAt(1)->IsConstant()) {
3740 locations->AddTemp(Location::RequiresRegister());
3741 }
3742 break;
3743 }
3744
3745 case Primitive::kPrimFloat:
3746 case Primitive::kPrimDouble: {
3747 locations->SetInAt(0, Location::RequiresFpuRegister());
3748 locations->SetInAt(1, Location::Any());
3749 locations->SetOut(Location::SameAsFirstInput());
3750 break;
3751 }
3752
3753 default:
3754 LOG(FATAL) << "Unexpected div type " << div->GetResultType();
3755 }
3756 }
3757
VisitDiv(HDiv * div)3758 void InstructionCodeGeneratorX86_64::VisitDiv(HDiv* div) {
3759 LocationSummary* locations = div->GetLocations();
3760 Location first = locations->InAt(0);
3761 Location second = locations->InAt(1);
3762 DCHECK(first.Equals(locations->Out()));
3763
3764 Primitive::Type type = div->GetResultType();
3765 switch (type) {
3766 case Primitive::kPrimInt:
3767 case Primitive::kPrimLong: {
3768 GenerateDivRemIntegral(div);
3769 break;
3770 }
3771
3772 case Primitive::kPrimFloat: {
3773 if (second.IsFpuRegister()) {
3774 __ divss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3775 } else if (second.IsConstant()) {
3776 __ divss(first.AsFpuRegister<XmmRegister>(),
3777 codegen_->LiteralFloatAddress(
3778 second.GetConstant()->AsFloatConstant()->GetValue()));
3779 } else {
3780 DCHECK(second.IsStackSlot());
3781 __ divss(first.AsFpuRegister<XmmRegister>(),
3782 Address(CpuRegister(RSP), second.GetStackIndex()));
3783 }
3784 break;
3785 }
3786
3787 case Primitive::kPrimDouble: {
3788 if (second.IsFpuRegister()) {
3789 __ divsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3790 } else if (second.IsConstant()) {
3791 __ divsd(first.AsFpuRegister<XmmRegister>(),
3792 codegen_->LiteralDoubleAddress(
3793 second.GetConstant()->AsDoubleConstant()->GetValue()));
3794 } else {
3795 DCHECK(second.IsDoubleStackSlot());
3796 __ divsd(first.AsFpuRegister<XmmRegister>(),
3797 Address(CpuRegister(RSP), second.GetStackIndex()));
3798 }
3799 break;
3800 }
3801
3802 default:
3803 LOG(FATAL) << "Unexpected div type " << div->GetResultType();
3804 }
3805 }
3806
VisitRem(HRem * rem)3807 void LocationsBuilderX86_64::VisitRem(HRem* rem) {
3808 Primitive::Type type = rem->GetResultType();
3809 LocationSummary* locations =
3810 new (GetGraph()->GetArena()) LocationSummary(rem, LocationSummary::kNoCall);
3811
3812 switch (type) {
3813 case Primitive::kPrimInt:
3814 case Primitive::kPrimLong: {
3815 locations->SetInAt(0, Location::RegisterLocation(RAX));
3816 locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1)));
3817 // Intel uses rdx:rax as the dividend and puts the remainder in rdx
3818 locations->SetOut(Location::RegisterLocation(RDX));
3819 // We need to save the numerator while we tweak eax and edx. As we are using imul in a way
3820 // which enforces results to be in RAX and RDX, things are simpler if we use EAX also as
3821 // output and request another temp.
3822 if (rem->InputAt(1)->IsConstant()) {
3823 locations->AddTemp(Location::RequiresRegister());
3824 }
3825 break;
3826 }
3827
3828 case Primitive::kPrimFloat:
3829 case Primitive::kPrimDouble: {
3830 locations->SetInAt(0, Location::Any());
3831 locations->SetInAt(1, Location::Any());
3832 locations->SetOut(Location::RequiresFpuRegister());
3833 locations->AddTemp(Location::RegisterLocation(RAX));
3834 break;
3835 }
3836
3837 default:
3838 LOG(FATAL) << "Unexpected rem type " << type;
3839 }
3840 }
3841
VisitRem(HRem * rem)3842 void InstructionCodeGeneratorX86_64::VisitRem(HRem* rem) {
3843 Primitive::Type type = rem->GetResultType();
3844 switch (type) {
3845 case Primitive::kPrimInt:
3846 case Primitive::kPrimLong: {
3847 GenerateDivRemIntegral(rem);
3848 break;
3849 }
3850 case Primitive::kPrimFloat:
3851 case Primitive::kPrimDouble: {
3852 GenerateRemFP(rem);
3853 break;
3854 }
3855 default:
3856 LOG(FATAL) << "Unexpected rem type " << rem->GetResultType();
3857 }
3858 }
3859
VisitDivZeroCheck(HDivZeroCheck * instruction)3860 void LocationsBuilderX86_64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
3861 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
3862 locations->SetInAt(0, Location::Any());
3863 }
3864
VisitDivZeroCheck(HDivZeroCheck * instruction)3865 void InstructionCodeGeneratorX86_64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
3866 SlowPathCode* slow_path =
3867 new (GetGraph()->GetArena()) DivZeroCheckSlowPathX86_64(instruction);
3868 codegen_->AddSlowPath(slow_path);
3869
3870 LocationSummary* locations = instruction->GetLocations();
3871 Location value = locations->InAt(0);
3872
3873 switch (instruction->GetType()) {
3874 case Primitive::kPrimBoolean:
3875 case Primitive::kPrimByte:
3876 case Primitive::kPrimChar:
3877 case Primitive::kPrimShort:
3878 case Primitive::kPrimInt: {
3879 if (value.IsRegister()) {
3880 __ testl(value.AsRegister<CpuRegister>(), value.AsRegister<CpuRegister>());
3881 __ j(kEqual, slow_path->GetEntryLabel());
3882 } else if (value.IsStackSlot()) {
3883 __ cmpl(Address(CpuRegister(RSP), value.GetStackIndex()), Immediate(0));
3884 __ j(kEqual, slow_path->GetEntryLabel());
3885 } else {
3886 DCHECK(value.IsConstant()) << value;
3887 if (value.GetConstant()->AsIntConstant()->GetValue() == 0) {
3888 __ jmp(slow_path->GetEntryLabel());
3889 }
3890 }
3891 break;
3892 }
3893 case Primitive::kPrimLong: {
3894 if (value.IsRegister()) {
3895 __ testq(value.AsRegister<CpuRegister>(), value.AsRegister<CpuRegister>());
3896 __ j(kEqual, slow_path->GetEntryLabel());
3897 } else if (value.IsDoubleStackSlot()) {
3898 __ cmpq(Address(CpuRegister(RSP), value.GetStackIndex()), Immediate(0));
3899 __ j(kEqual, slow_path->GetEntryLabel());
3900 } else {
3901 DCHECK(value.IsConstant()) << value;
3902 if (value.GetConstant()->AsLongConstant()->GetValue() == 0) {
3903 __ jmp(slow_path->GetEntryLabel());
3904 }
3905 }
3906 break;
3907 }
3908 default:
3909 LOG(FATAL) << "Unexpected type for HDivZeroCheck " << instruction->GetType();
3910 }
3911 }
3912
HandleShift(HBinaryOperation * op)3913 void LocationsBuilderX86_64::HandleShift(HBinaryOperation* op) {
3914 DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
3915
3916 LocationSummary* locations =
3917 new (GetGraph()->GetArena()) LocationSummary(op, LocationSummary::kNoCall);
3918
3919 switch (op->GetResultType()) {
3920 case Primitive::kPrimInt:
3921 case Primitive::kPrimLong: {
3922 locations->SetInAt(0, Location::RequiresRegister());
3923 // The shift count needs to be in CL.
3924 locations->SetInAt(1, Location::ByteRegisterOrConstant(RCX, op->InputAt(1)));
3925 locations->SetOut(Location::SameAsFirstInput());
3926 break;
3927 }
3928 default:
3929 LOG(FATAL) << "Unexpected operation type " << op->GetResultType();
3930 }
3931 }
3932
HandleShift(HBinaryOperation * op)3933 void InstructionCodeGeneratorX86_64::HandleShift(HBinaryOperation* op) {
3934 DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
3935
3936 LocationSummary* locations = op->GetLocations();
3937 CpuRegister first_reg = locations->InAt(0).AsRegister<CpuRegister>();
3938 Location second = locations->InAt(1);
3939
3940 switch (op->GetResultType()) {
3941 case Primitive::kPrimInt: {
3942 if (second.IsRegister()) {
3943 CpuRegister second_reg = second.AsRegister<CpuRegister>();
3944 if (op->IsShl()) {
3945 __ shll(first_reg, second_reg);
3946 } else if (op->IsShr()) {
3947 __ sarl(first_reg, second_reg);
3948 } else {
3949 __ shrl(first_reg, second_reg);
3950 }
3951 } else {
3952 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance);
3953 if (op->IsShl()) {
3954 __ shll(first_reg, imm);
3955 } else if (op->IsShr()) {
3956 __ sarl(first_reg, imm);
3957 } else {
3958 __ shrl(first_reg, imm);
3959 }
3960 }
3961 break;
3962 }
3963 case Primitive::kPrimLong: {
3964 if (second.IsRegister()) {
3965 CpuRegister second_reg = second.AsRegister<CpuRegister>();
3966 if (op->IsShl()) {
3967 __ shlq(first_reg, second_reg);
3968 } else if (op->IsShr()) {
3969 __ sarq(first_reg, second_reg);
3970 } else {
3971 __ shrq(first_reg, second_reg);
3972 }
3973 } else {
3974 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance);
3975 if (op->IsShl()) {
3976 __ shlq(first_reg, imm);
3977 } else if (op->IsShr()) {
3978 __ sarq(first_reg, imm);
3979 } else {
3980 __ shrq(first_reg, imm);
3981 }
3982 }
3983 break;
3984 }
3985 default:
3986 LOG(FATAL) << "Unexpected operation type " << op->GetResultType();
3987 UNREACHABLE();
3988 }
3989 }
3990
VisitRor(HRor * ror)3991 void LocationsBuilderX86_64::VisitRor(HRor* ror) {
3992 LocationSummary* locations =
3993 new (GetGraph()->GetArena()) LocationSummary(ror, LocationSummary::kNoCall);
3994
3995 switch (ror->GetResultType()) {
3996 case Primitive::kPrimInt:
3997 case Primitive::kPrimLong: {
3998 locations->SetInAt(0, Location::RequiresRegister());
3999 // The shift count needs to be in CL (unless it is a constant).
4000 locations->SetInAt(1, Location::ByteRegisterOrConstant(RCX, ror->InputAt(1)));
4001 locations->SetOut(Location::SameAsFirstInput());
4002 break;
4003 }
4004 default:
4005 LOG(FATAL) << "Unexpected operation type " << ror->GetResultType();
4006 UNREACHABLE();
4007 }
4008 }
4009
VisitRor(HRor * ror)4010 void InstructionCodeGeneratorX86_64::VisitRor(HRor* ror) {
4011 LocationSummary* locations = ror->GetLocations();
4012 CpuRegister first_reg = locations->InAt(0).AsRegister<CpuRegister>();
4013 Location second = locations->InAt(1);
4014
4015 switch (ror->GetResultType()) {
4016 case Primitive::kPrimInt:
4017 if (second.IsRegister()) {
4018 CpuRegister second_reg = second.AsRegister<CpuRegister>();
4019 __ rorl(first_reg, second_reg);
4020 } else {
4021 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance);
4022 __ rorl(first_reg, imm);
4023 }
4024 break;
4025 case Primitive::kPrimLong:
4026 if (second.IsRegister()) {
4027 CpuRegister second_reg = second.AsRegister<CpuRegister>();
4028 __ rorq(first_reg, second_reg);
4029 } else {
4030 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance);
4031 __ rorq(first_reg, imm);
4032 }
4033 break;
4034 default:
4035 LOG(FATAL) << "Unexpected operation type " << ror->GetResultType();
4036 UNREACHABLE();
4037 }
4038 }
4039
VisitShl(HShl * shl)4040 void LocationsBuilderX86_64::VisitShl(HShl* shl) {
4041 HandleShift(shl);
4042 }
4043
VisitShl(HShl * shl)4044 void InstructionCodeGeneratorX86_64::VisitShl(HShl* shl) {
4045 HandleShift(shl);
4046 }
4047
VisitShr(HShr * shr)4048 void LocationsBuilderX86_64::VisitShr(HShr* shr) {
4049 HandleShift(shr);
4050 }
4051
VisitShr(HShr * shr)4052 void InstructionCodeGeneratorX86_64::VisitShr(HShr* shr) {
4053 HandleShift(shr);
4054 }
4055
VisitUShr(HUShr * ushr)4056 void LocationsBuilderX86_64::VisitUShr(HUShr* ushr) {
4057 HandleShift(ushr);
4058 }
4059
VisitUShr(HUShr * ushr)4060 void InstructionCodeGeneratorX86_64::VisitUShr(HUShr* ushr) {
4061 HandleShift(ushr);
4062 }
4063
VisitNewInstance(HNewInstance * instruction)4064 void LocationsBuilderX86_64::VisitNewInstance(HNewInstance* instruction) {
4065 LocationSummary* locations =
4066 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
4067 InvokeRuntimeCallingConvention calling_convention;
4068 if (instruction->IsStringAlloc()) {
4069 locations->AddTemp(Location::RegisterLocation(kMethodRegisterArgument));
4070 } else {
4071 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
4072 }
4073 locations->SetOut(Location::RegisterLocation(RAX));
4074 }
4075
VisitNewInstance(HNewInstance * instruction)4076 void InstructionCodeGeneratorX86_64::VisitNewInstance(HNewInstance* instruction) {
4077 // Note: if heap poisoning is enabled, the entry point takes cares
4078 // of poisoning the reference.
4079 if (instruction->IsStringAlloc()) {
4080 // String is allocated through StringFactory. Call NewEmptyString entry point.
4081 CpuRegister temp = instruction->GetLocations()->GetTemp(0).AsRegister<CpuRegister>();
4082 MemberOffset code_offset = ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86_64PointerSize);
4083 __ gs()->movq(temp, Address::Absolute(QUICK_ENTRY_POINT(pNewEmptyString), /* no_rip */ true));
4084 __ call(Address(temp, code_offset.SizeValue()));
4085 codegen_->RecordPcInfo(instruction, instruction->GetDexPc());
4086 } else {
4087 codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
4088 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
4089 DCHECK(!codegen_->IsLeafMethod());
4090 }
4091 }
4092
VisitNewArray(HNewArray * instruction)4093 void LocationsBuilderX86_64::VisitNewArray(HNewArray* instruction) {
4094 LocationSummary* locations =
4095 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
4096 InvokeRuntimeCallingConvention calling_convention;
4097 locations->SetOut(Location::RegisterLocation(RAX));
4098 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
4099 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
4100 }
4101
VisitNewArray(HNewArray * instruction)4102 void InstructionCodeGeneratorX86_64::VisitNewArray(HNewArray* instruction) {
4103 // Note: if heap poisoning is enabled, the entry point takes cares
4104 // of poisoning the reference.
4105 QuickEntrypointEnum entrypoint =
4106 CodeGenerator::GetArrayAllocationEntrypoint(instruction->GetLoadClass()->GetClass());
4107 codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc());
4108 CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
4109 DCHECK(!codegen_->IsLeafMethod());
4110 }
4111
VisitParameterValue(HParameterValue * instruction)4112 void LocationsBuilderX86_64::VisitParameterValue(HParameterValue* instruction) {
4113 LocationSummary* locations =
4114 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
4115 Location location = parameter_visitor_.GetNextLocation(instruction->GetType());
4116 if (location.IsStackSlot()) {
4117 location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
4118 } else if (location.IsDoubleStackSlot()) {
4119 location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
4120 }
4121 locations->SetOut(location);
4122 }
4123
VisitParameterValue(HParameterValue * instruction ATTRIBUTE_UNUSED)4124 void InstructionCodeGeneratorX86_64::VisitParameterValue(
4125 HParameterValue* instruction ATTRIBUTE_UNUSED) {
4126 // Nothing to do, the parameter is already at its location.
4127 }
4128
VisitCurrentMethod(HCurrentMethod * instruction)4129 void LocationsBuilderX86_64::VisitCurrentMethod(HCurrentMethod* instruction) {
4130 LocationSummary* locations =
4131 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
4132 locations->SetOut(Location::RegisterLocation(kMethodRegisterArgument));
4133 }
4134
VisitCurrentMethod(HCurrentMethod * instruction ATTRIBUTE_UNUSED)4135 void InstructionCodeGeneratorX86_64::VisitCurrentMethod(
4136 HCurrentMethod* instruction ATTRIBUTE_UNUSED) {
4137 // Nothing to do, the method is already at its location.
4138 }
4139
VisitClassTableGet(HClassTableGet * instruction)4140 void LocationsBuilderX86_64::VisitClassTableGet(HClassTableGet* instruction) {
4141 LocationSummary* locations =
4142 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
4143 locations->SetInAt(0, Location::RequiresRegister());
4144 locations->SetOut(Location::RequiresRegister());
4145 }
4146
VisitClassTableGet(HClassTableGet * instruction)4147 void InstructionCodeGeneratorX86_64::VisitClassTableGet(HClassTableGet* instruction) {
4148 LocationSummary* locations = instruction->GetLocations();
4149 if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
4150 uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
4151 instruction->GetIndex(), kX86_64PointerSize).SizeValue();
4152 __ movq(locations->Out().AsRegister<CpuRegister>(),
4153 Address(locations->InAt(0).AsRegister<CpuRegister>(), method_offset));
4154 } else {
4155 uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
4156 instruction->GetIndex(), kX86_64PointerSize));
4157 __ movq(locations->Out().AsRegister<CpuRegister>(),
4158 Address(locations->InAt(0).AsRegister<CpuRegister>(),
4159 mirror::Class::ImtPtrOffset(kX86_64PointerSize).Uint32Value()));
4160 __ movq(locations->Out().AsRegister<CpuRegister>(),
4161 Address(locations->Out().AsRegister<CpuRegister>(), method_offset));
4162 }
4163 }
4164
VisitNot(HNot * not_)4165 void LocationsBuilderX86_64::VisitNot(HNot* not_) {
4166 LocationSummary* locations =
4167 new (GetGraph()->GetArena()) LocationSummary(not_, LocationSummary::kNoCall);
4168 locations->SetInAt(0, Location::RequiresRegister());
4169 locations->SetOut(Location::SameAsFirstInput());
4170 }
4171
VisitNot(HNot * not_)4172 void InstructionCodeGeneratorX86_64::VisitNot(HNot* not_) {
4173 LocationSummary* locations = not_->GetLocations();
4174 DCHECK_EQ(locations->InAt(0).AsRegister<CpuRegister>().AsRegister(),
4175 locations->Out().AsRegister<CpuRegister>().AsRegister());
4176 Location out = locations->Out();
4177 switch (not_->GetResultType()) {
4178 case Primitive::kPrimInt:
4179 __ notl(out.AsRegister<CpuRegister>());
4180 break;
4181
4182 case Primitive::kPrimLong:
4183 __ notq(out.AsRegister<CpuRegister>());
4184 break;
4185
4186 default:
4187 LOG(FATAL) << "Unimplemented type for not operation " << not_->GetResultType();
4188 }
4189 }
4190
VisitBooleanNot(HBooleanNot * bool_not)4191 void LocationsBuilderX86_64::VisitBooleanNot(HBooleanNot* bool_not) {
4192 LocationSummary* locations =
4193 new (GetGraph()->GetArena()) LocationSummary(bool_not, LocationSummary::kNoCall);
4194 locations->SetInAt(0, Location::RequiresRegister());
4195 locations->SetOut(Location::SameAsFirstInput());
4196 }
4197
VisitBooleanNot(HBooleanNot * bool_not)4198 void InstructionCodeGeneratorX86_64::VisitBooleanNot(HBooleanNot* bool_not) {
4199 LocationSummary* locations = bool_not->GetLocations();
4200 DCHECK_EQ(locations->InAt(0).AsRegister<CpuRegister>().AsRegister(),
4201 locations->Out().AsRegister<CpuRegister>().AsRegister());
4202 Location out = locations->Out();
4203 __ xorl(out.AsRegister<CpuRegister>(), Immediate(1));
4204 }
4205
VisitPhi(HPhi * instruction)4206 void LocationsBuilderX86_64::VisitPhi(HPhi* instruction) {
4207 LocationSummary* locations =
4208 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
4209 for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
4210 locations->SetInAt(i, Location::Any());
4211 }
4212 locations->SetOut(Location::Any());
4213 }
4214
VisitPhi(HPhi * instruction ATTRIBUTE_UNUSED)4215 void InstructionCodeGeneratorX86_64::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) {
4216 LOG(FATAL) << "Unimplemented";
4217 }
4218
GenerateMemoryBarrier(MemBarrierKind kind)4219 void CodeGeneratorX86_64::GenerateMemoryBarrier(MemBarrierKind kind) {
4220 /*
4221 * According to the JSR-133 Cookbook, for x86-64 only StoreLoad/AnyAny barriers need memory fence.
4222 * All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86-64 memory model.
4223 * For those cases, all we need to ensure is that there is a scheduling barrier in place.
4224 */
4225 switch (kind) {
4226 case MemBarrierKind::kAnyAny: {
4227 MemoryFence();
4228 break;
4229 }
4230 case MemBarrierKind::kAnyStore:
4231 case MemBarrierKind::kLoadAny:
4232 case MemBarrierKind::kStoreStore: {
4233 // nop
4234 break;
4235 }
4236 case MemBarrierKind::kNTStoreStore:
4237 // Non-Temporal Store/Store needs an explicit fence.
4238 MemoryFence(/* non-temporal */ true);
4239 break;
4240 }
4241 }
4242
HandleFieldGet(HInstruction * instruction)4243 void LocationsBuilderX86_64::HandleFieldGet(HInstruction* instruction) {
4244 DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
4245
4246 bool object_field_get_with_read_barrier =
4247 kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot);
4248 LocationSummary* locations =
4249 new (GetGraph()->GetArena()) LocationSummary(instruction,
4250 object_field_get_with_read_barrier ?
4251 LocationSummary::kCallOnSlowPath :
4252 LocationSummary::kNoCall);
4253 if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
4254 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
4255 }
4256 locations->SetInAt(0, Location::RequiresRegister());
4257 if (Primitive::IsFloatingPointType(instruction->GetType())) {
4258 locations->SetOut(Location::RequiresFpuRegister());
4259 } else {
4260 // The output overlaps for an object field get when read barriers
4261 // are enabled: we do not want the move to overwrite the object's
4262 // location, as we need it to emit the read barrier.
4263 locations->SetOut(
4264 Location::RequiresRegister(),
4265 object_field_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
4266 }
4267 }
4268
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)4269 void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction,
4270 const FieldInfo& field_info) {
4271 DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
4272
4273 LocationSummary* locations = instruction->GetLocations();
4274 Location base_loc = locations->InAt(0);
4275 CpuRegister base = base_loc.AsRegister<CpuRegister>();
4276 Location out = locations->Out();
4277 bool is_volatile = field_info.IsVolatile();
4278 Primitive::Type field_type = field_info.GetFieldType();
4279 uint32_t offset = field_info.GetFieldOffset().Uint32Value();
4280
4281 switch (field_type) {
4282 case Primitive::kPrimBoolean: {
4283 __ movzxb(out.AsRegister<CpuRegister>(), Address(base, offset));
4284 break;
4285 }
4286
4287 case Primitive::kPrimByte: {
4288 __ movsxb(out.AsRegister<CpuRegister>(), Address(base, offset));
4289 break;
4290 }
4291
4292 case Primitive::kPrimShort: {
4293 __ movsxw(out.AsRegister<CpuRegister>(), Address(base, offset));
4294 break;
4295 }
4296
4297 case Primitive::kPrimChar: {
4298 __ movzxw(out.AsRegister<CpuRegister>(), Address(base, offset));
4299 break;
4300 }
4301
4302 case Primitive::kPrimInt: {
4303 __ movl(out.AsRegister<CpuRegister>(), Address(base, offset));
4304 break;
4305 }
4306
4307 case Primitive::kPrimNot: {
4308 // /* HeapReference<Object> */ out = *(base + offset)
4309 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
4310 // Note that a potential implicit null check is handled in this
4311 // CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier call.
4312 codegen_->GenerateFieldLoadWithBakerReadBarrier(
4313 instruction, out, base, offset, /* needs_null_check */ true);
4314 if (is_volatile) {
4315 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
4316 }
4317 } else {
4318 __ movl(out.AsRegister<CpuRegister>(), Address(base, offset));
4319 codegen_->MaybeRecordImplicitNullCheck(instruction);
4320 if (is_volatile) {
4321 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
4322 }
4323 // If read barriers are enabled, emit read barriers other than
4324 // Baker's using a slow path (and also unpoison the loaded
4325 // reference, if heap poisoning is enabled).
4326 codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset);
4327 }
4328 break;
4329 }
4330
4331 case Primitive::kPrimLong: {
4332 __ movq(out.AsRegister<CpuRegister>(), Address(base, offset));
4333 break;
4334 }
4335
4336 case Primitive::kPrimFloat: {
4337 __ movss(out.AsFpuRegister<XmmRegister>(), Address(base, offset));
4338 break;
4339 }
4340
4341 case Primitive::kPrimDouble: {
4342 __ movsd(out.AsFpuRegister<XmmRegister>(), Address(base, offset));
4343 break;
4344 }
4345
4346 case Primitive::kPrimVoid:
4347 LOG(FATAL) << "Unreachable type " << field_type;
4348 UNREACHABLE();
4349 }
4350
4351 if (field_type == Primitive::kPrimNot) {
4352 // Potential implicit null checks, in the case of reference
4353 // fields, are handled in the previous switch statement.
4354 } else {
4355 codegen_->MaybeRecordImplicitNullCheck(instruction);
4356 }
4357
4358 if (is_volatile) {
4359 if (field_type == Primitive::kPrimNot) {
4360 // Memory barriers, in the case of references, are also handled
4361 // in the previous switch statement.
4362 } else {
4363 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
4364 }
4365 }
4366 }
4367
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info)4368 void LocationsBuilderX86_64::HandleFieldSet(HInstruction* instruction,
4369 const FieldInfo& field_info) {
4370 DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
4371
4372 LocationSummary* locations =
4373 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
4374 Primitive::Type field_type = field_info.GetFieldType();
4375 bool is_volatile = field_info.IsVolatile();
4376 bool needs_write_barrier =
4377 CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1));
4378
4379 locations->SetInAt(0, Location::RequiresRegister());
4380 if (Primitive::IsFloatingPointType(instruction->InputAt(1)->GetType())) {
4381 if (is_volatile) {
4382 // In order to satisfy the semantics of volatile, this must be a single instruction store.
4383 locations->SetInAt(1, Location::FpuRegisterOrInt32Constant(instruction->InputAt(1)));
4384 } else {
4385 locations->SetInAt(1, Location::FpuRegisterOrConstant(instruction->InputAt(1)));
4386 }
4387 } else {
4388 if (is_volatile) {
4389 // In order to satisfy the semantics of volatile, this must be a single instruction store.
4390 locations->SetInAt(1, Location::RegisterOrInt32Constant(instruction->InputAt(1)));
4391 } else {
4392 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
4393 }
4394 }
4395 if (needs_write_barrier) {
4396 // Temporary registers for the write barrier.
4397 locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too.
4398 locations->AddTemp(Location::RequiresRegister());
4399 } else if (kPoisonHeapReferences && field_type == Primitive::kPrimNot) {
4400 // Temporary register for the reference poisoning.
4401 locations->AddTemp(Location::RequiresRegister());
4402 }
4403 }
4404
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info,bool value_can_be_null)4405 void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction,
4406 const FieldInfo& field_info,
4407 bool value_can_be_null) {
4408 DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
4409
4410 LocationSummary* locations = instruction->GetLocations();
4411 CpuRegister base = locations->InAt(0).AsRegister<CpuRegister>();
4412 Location value = locations->InAt(1);
4413 bool is_volatile = field_info.IsVolatile();
4414 Primitive::Type field_type = field_info.GetFieldType();
4415 uint32_t offset = field_info.GetFieldOffset().Uint32Value();
4416
4417 if (is_volatile) {
4418 codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
4419 }
4420
4421 bool maybe_record_implicit_null_check_done = false;
4422
4423 switch (field_type) {
4424 case Primitive::kPrimBoolean:
4425 case Primitive::kPrimByte: {
4426 if (value.IsConstant()) {
4427 int8_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
4428 __ movb(Address(base, offset), Immediate(v));
4429 } else {
4430 __ movb(Address(base, offset), value.AsRegister<CpuRegister>());
4431 }
4432 break;
4433 }
4434
4435 case Primitive::kPrimShort:
4436 case Primitive::kPrimChar: {
4437 if (value.IsConstant()) {
4438 int16_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
4439 __ movw(Address(base, offset), Immediate(v));
4440 } else {
4441 __ movw(Address(base, offset), value.AsRegister<CpuRegister>());
4442 }
4443 break;
4444 }
4445
4446 case Primitive::kPrimInt:
4447 case Primitive::kPrimNot: {
4448 if (value.IsConstant()) {
4449 int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
4450 // `field_type == Primitive::kPrimNot` implies `v == 0`.
4451 DCHECK((field_type != Primitive::kPrimNot) || (v == 0));
4452 // Note: if heap poisoning is enabled, no need to poison
4453 // (negate) `v` if it is a reference, as it would be null.
4454 __ movl(Address(base, offset), Immediate(v));
4455 } else {
4456 if (kPoisonHeapReferences && field_type == Primitive::kPrimNot) {
4457 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
4458 __ movl(temp, value.AsRegister<CpuRegister>());
4459 __ PoisonHeapReference(temp);
4460 __ movl(Address(base, offset), temp);
4461 } else {
4462 __ movl(Address(base, offset), value.AsRegister<CpuRegister>());
4463 }
4464 }
4465 break;
4466 }
4467
4468 case Primitive::kPrimLong: {
4469 if (value.IsConstant()) {
4470 int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
4471 codegen_->MoveInt64ToAddress(Address(base, offset),
4472 Address(base, offset + sizeof(int32_t)),
4473 v,
4474 instruction);
4475 maybe_record_implicit_null_check_done = true;
4476 } else {
4477 __ movq(Address(base, offset), value.AsRegister<CpuRegister>());
4478 }
4479 break;
4480 }
4481
4482 case Primitive::kPrimFloat: {
4483 if (value.IsConstant()) {
4484 int32_t v =
4485 bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue());
4486 __ movl(Address(base, offset), Immediate(v));
4487 } else {
4488 __ movss(Address(base, offset), value.AsFpuRegister<XmmRegister>());
4489 }
4490 break;
4491 }
4492
4493 case Primitive::kPrimDouble: {
4494 if (value.IsConstant()) {
4495 int64_t v =
4496 bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue());
4497 codegen_->MoveInt64ToAddress(Address(base, offset),
4498 Address(base, offset + sizeof(int32_t)),
4499 v,
4500 instruction);
4501 maybe_record_implicit_null_check_done = true;
4502 } else {
4503 __ movsd(Address(base, offset), value.AsFpuRegister<XmmRegister>());
4504 }
4505 break;
4506 }
4507
4508 case Primitive::kPrimVoid:
4509 LOG(FATAL) << "Unreachable type " << field_type;
4510 UNREACHABLE();
4511 }
4512
4513 if (!maybe_record_implicit_null_check_done) {
4514 codegen_->MaybeRecordImplicitNullCheck(instruction);
4515 }
4516
4517 if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
4518 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
4519 CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>();
4520 codegen_->MarkGCCard(temp, card, base, value.AsRegister<CpuRegister>(), value_can_be_null);
4521 }
4522
4523 if (is_volatile) {
4524 codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
4525 }
4526 }
4527
VisitInstanceFieldSet(HInstanceFieldSet * instruction)4528 void LocationsBuilderX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
4529 HandleFieldSet(instruction, instruction->GetFieldInfo());
4530 }
4531
VisitInstanceFieldSet(HInstanceFieldSet * instruction)4532 void InstructionCodeGeneratorX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
4533 HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
4534 }
4535
VisitInstanceFieldGet(HInstanceFieldGet * instruction)4536 void LocationsBuilderX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
4537 HandleFieldGet(instruction);
4538 }
4539
VisitInstanceFieldGet(HInstanceFieldGet * instruction)4540 void InstructionCodeGeneratorX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
4541 HandleFieldGet(instruction, instruction->GetFieldInfo());
4542 }
4543
VisitStaticFieldGet(HStaticFieldGet * instruction)4544 void LocationsBuilderX86_64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
4545 HandleFieldGet(instruction);
4546 }
4547
VisitStaticFieldGet(HStaticFieldGet * instruction)4548 void InstructionCodeGeneratorX86_64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
4549 HandleFieldGet(instruction, instruction->GetFieldInfo());
4550 }
4551
VisitStaticFieldSet(HStaticFieldSet * instruction)4552 void LocationsBuilderX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
4553 HandleFieldSet(instruction, instruction->GetFieldInfo());
4554 }
4555
VisitStaticFieldSet(HStaticFieldSet * instruction)4556 void InstructionCodeGeneratorX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
4557 HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
4558 }
4559
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)4560 void LocationsBuilderX86_64::VisitUnresolvedInstanceFieldGet(
4561 HUnresolvedInstanceFieldGet* instruction) {
4562 FieldAccessCallingConventionX86_64 calling_convention;
4563 codegen_->CreateUnresolvedFieldLocationSummary(
4564 instruction, instruction->GetFieldType(), calling_convention);
4565 }
4566
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)4567 void InstructionCodeGeneratorX86_64::VisitUnresolvedInstanceFieldGet(
4568 HUnresolvedInstanceFieldGet* instruction) {
4569 FieldAccessCallingConventionX86_64 calling_convention;
4570 codegen_->GenerateUnresolvedFieldAccess(instruction,
4571 instruction->GetFieldType(),
4572 instruction->GetFieldIndex(),
4573 instruction->GetDexPc(),
4574 calling_convention);
4575 }
4576
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)4577 void LocationsBuilderX86_64::VisitUnresolvedInstanceFieldSet(
4578 HUnresolvedInstanceFieldSet* instruction) {
4579 FieldAccessCallingConventionX86_64 calling_convention;
4580 codegen_->CreateUnresolvedFieldLocationSummary(
4581 instruction, instruction->GetFieldType(), calling_convention);
4582 }
4583
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)4584 void InstructionCodeGeneratorX86_64::VisitUnresolvedInstanceFieldSet(
4585 HUnresolvedInstanceFieldSet* instruction) {
4586 FieldAccessCallingConventionX86_64 calling_convention;
4587 codegen_->GenerateUnresolvedFieldAccess(instruction,
4588 instruction->GetFieldType(),
4589 instruction->GetFieldIndex(),
4590 instruction->GetDexPc(),
4591 calling_convention);
4592 }
4593
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)4594 void LocationsBuilderX86_64::VisitUnresolvedStaticFieldGet(
4595 HUnresolvedStaticFieldGet* instruction) {
4596 FieldAccessCallingConventionX86_64 calling_convention;
4597 codegen_->CreateUnresolvedFieldLocationSummary(
4598 instruction, instruction->GetFieldType(), calling_convention);
4599 }
4600
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)4601 void InstructionCodeGeneratorX86_64::VisitUnresolvedStaticFieldGet(
4602 HUnresolvedStaticFieldGet* instruction) {
4603 FieldAccessCallingConventionX86_64 calling_convention;
4604 codegen_->GenerateUnresolvedFieldAccess(instruction,
4605 instruction->GetFieldType(),
4606 instruction->GetFieldIndex(),
4607 instruction->GetDexPc(),
4608 calling_convention);
4609 }
4610
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)4611 void LocationsBuilderX86_64::VisitUnresolvedStaticFieldSet(
4612 HUnresolvedStaticFieldSet* instruction) {
4613 FieldAccessCallingConventionX86_64 calling_convention;
4614 codegen_->CreateUnresolvedFieldLocationSummary(
4615 instruction, instruction->GetFieldType(), calling_convention);
4616 }
4617
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)4618 void InstructionCodeGeneratorX86_64::VisitUnresolvedStaticFieldSet(
4619 HUnresolvedStaticFieldSet* instruction) {
4620 FieldAccessCallingConventionX86_64 calling_convention;
4621 codegen_->GenerateUnresolvedFieldAccess(instruction,
4622 instruction->GetFieldType(),
4623 instruction->GetFieldIndex(),
4624 instruction->GetDexPc(),
4625 calling_convention);
4626 }
4627
VisitNullCheck(HNullCheck * instruction)4628 void LocationsBuilderX86_64::VisitNullCheck(HNullCheck* instruction) {
4629 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
4630 Location loc = codegen_->GetCompilerOptions().GetImplicitNullChecks()
4631 ? Location::RequiresRegister()
4632 : Location::Any();
4633 locations->SetInAt(0, loc);
4634 }
4635
GenerateImplicitNullCheck(HNullCheck * instruction)4636 void CodeGeneratorX86_64::GenerateImplicitNullCheck(HNullCheck* instruction) {
4637 if (CanMoveNullCheckToUser(instruction)) {
4638 return;
4639 }
4640 LocationSummary* locations = instruction->GetLocations();
4641 Location obj = locations->InAt(0);
4642
4643 __ testl(CpuRegister(RAX), Address(obj.AsRegister<CpuRegister>(), 0));
4644 RecordPcInfo(instruction, instruction->GetDexPc());
4645 }
4646
GenerateExplicitNullCheck(HNullCheck * instruction)4647 void CodeGeneratorX86_64::GenerateExplicitNullCheck(HNullCheck* instruction) {
4648 SlowPathCode* slow_path = new (GetGraph()->GetArena()) NullCheckSlowPathX86_64(instruction);
4649 AddSlowPath(slow_path);
4650
4651 LocationSummary* locations = instruction->GetLocations();
4652 Location obj = locations->InAt(0);
4653
4654 if (obj.IsRegister()) {
4655 __ testl(obj.AsRegister<CpuRegister>(), obj.AsRegister<CpuRegister>());
4656 } else if (obj.IsStackSlot()) {
4657 __ cmpl(Address(CpuRegister(RSP), obj.GetStackIndex()), Immediate(0));
4658 } else {
4659 DCHECK(obj.IsConstant()) << obj;
4660 DCHECK(obj.GetConstant()->IsNullConstant());
4661 __ jmp(slow_path->GetEntryLabel());
4662 return;
4663 }
4664 __ j(kEqual, slow_path->GetEntryLabel());
4665 }
4666
VisitNullCheck(HNullCheck * instruction)4667 void InstructionCodeGeneratorX86_64::VisitNullCheck(HNullCheck* instruction) {
4668 codegen_->GenerateNullCheck(instruction);
4669 }
4670
VisitArrayGet(HArrayGet * instruction)4671 void LocationsBuilderX86_64::VisitArrayGet(HArrayGet* instruction) {
4672 bool object_array_get_with_read_barrier =
4673 kEmitCompilerReadBarrier && (instruction->GetType() == Primitive::kPrimNot);
4674 LocationSummary* locations =
4675 new (GetGraph()->GetArena()) LocationSummary(instruction,
4676 object_array_get_with_read_barrier ?
4677 LocationSummary::kCallOnSlowPath :
4678 LocationSummary::kNoCall);
4679 if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
4680 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
4681 }
4682 locations->SetInAt(0, Location::RequiresRegister());
4683 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
4684 if (Primitive::IsFloatingPointType(instruction->GetType())) {
4685 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
4686 } else {
4687 // The output overlaps for an object array get when read barriers
4688 // are enabled: we do not want the move to overwrite the array's
4689 // location, as we need it to emit the read barrier.
4690 locations->SetOut(
4691 Location::RequiresRegister(),
4692 object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
4693 }
4694 }
4695
VisitArrayGet(HArrayGet * instruction)4696 void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) {
4697 LocationSummary* locations = instruction->GetLocations();
4698 Location obj_loc = locations->InAt(0);
4699 CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
4700 Location index = locations->InAt(1);
4701 Location out_loc = locations->Out();
4702 uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
4703
4704 Primitive::Type type = instruction->GetType();
4705 switch (type) {
4706 case Primitive::kPrimBoolean: {
4707 CpuRegister out = out_loc.AsRegister<CpuRegister>();
4708 __ movzxb(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_1, data_offset));
4709 break;
4710 }
4711
4712 case Primitive::kPrimByte: {
4713 CpuRegister out = out_loc.AsRegister<CpuRegister>();
4714 __ movsxb(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_1, data_offset));
4715 break;
4716 }
4717
4718 case Primitive::kPrimShort: {
4719 CpuRegister out = out_loc.AsRegister<CpuRegister>();
4720 __ movsxw(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_2, data_offset));
4721 break;
4722 }
4723
4724 case Primitive::kPrimChar: {
4725 CpuRegister out = out_loc.AsRegister<CpuRegister>();
4726 if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
4727 // Branch cases into compressed and uncompressed for each index's type.
4728 uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
4729 NearLabel done, not_compressed;
4730 __ testb(Address(obj, count_offset), Immediate(1));
4731 codegen_->MaybeRecordImplicitNullCheck(instruction);
4732 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
4733 "Expecting 0=compressed, 1=uncompressed");
4734 __ j(kNotZero, ¬_compressed);
4735 __ movzxb(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_1, data_offset));
4736 __ jmp(&done);
4737 __ Bind(¬_compressed);
4738 __ movzxw(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_2, data_offset));
4739 __ Bind(&done);
4740 } else {
4741 __ movzxw(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_2, data_offset));
4742 }
4743 break;
4744 }
4745
4746 case Primitive::kPrimInt: {
4747 CpuRegister out = out_loc.AsRegister<CpuRegister>();
4748 __ movl(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset));
4749 break;
4750 }
4751
4752 case Primitive::kPrimNot: {
4753 static_assert(
4754 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
4755 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
4756 // /* HeapReference<Object> */ out =
4757 // *(obj + data_offset + index * sizeof(HeapReference<Object>))
4758 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
4759 // Note that a potential implicit null check is handled in this
4760 // CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier call.
4761 codegen_->GenerateArrayLoadWithBakerReadBarrier(
4762 instruction, out_loc, obj, data_offset, index, /* needs_null_check */ true);
4763 } else {
4764 CpuRegister out = out_loc.AsRegister<CpuRegister>();
4765 __ movl(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset));
4766 codegen_->MaybeRecordImplicitNullCheck(instruction);
4767 // If read barriers are enabled, emit read barriers other than
4768 // Baker's using a slow path (and also unpoison the loaded
4769 // reference, if heap poisoning is enabled).
4770 if (index.IsConstant()) {
4771 uint32_t offset =
4772 (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
4773 codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset);
4774 } else {
4775 codegen_->MaybeGenerateReadBarrierSlow(
4776 instruction, out_loc, out_loc, obj_loc, data_offset, index);
4777 }
4778 }
4779 break;
4780 }
4781
4782 case Primitive::kPrimLong: {
4783 CpuRegister out = out_loc.AsRegister<CpuRegister>();
4784 __ movq(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_8, data_offset));
4785 break;
4786 }
4787
4788 case Primitive::kPrimFloat: {
4789 XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
4790 __ movss(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset));
4791 break;
4792 }
4793
4794 case Primitive::kPrimDouble: {
4795 XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
4796 __ movsd(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_8, data_offset));
4797 break;
4798 }
4799
4800 case Primitive::kPrimVoid:
4801 LOG(FATAL) << "Unreachable type " << type;
4802 UNREACHABLE();
4803 }
4804
4805 if (type == Primitive::kPrimNot) {
4806 // Potential implicit null checks, in the case of reference
4807 // arrays, are handled in the previous switch statement.
4808 } else {
4809 codegen_->MaybeRecordImplicitNullCheck(instruction);
4810 }
4811 }
4812
VisitArraySet(HArraySet * instruction)4813 void LocationsBuilderX86_64::VisitArraySet(HArraySet* instruction) {
4814 Primitive::Type value_type = instruction->GetComponentType();
4815
4816 bool needs_write_barrier =
4817 CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
4818 bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
4819
4820 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(
4821 instruction,
4822 may_need_runtime_call_for_type_check ?
4823 LocationSummary::kCallOnSlowPath :
4824 LocationSummary::kNoCall);
4825
4826 locations->SetInAt(0, Location::RequiresRegister());
4827 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
4828 if (Primitive::IsFloatingPointType(value_type)) {
4829 locations->SetInAt(2, Location::FpuRegisterOrConstant(instruction->InputAt(2)));
4830 } else {
4831 locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2)));
4832 }
4833
4834 if (needs_write_barrier) {
4835 // Temporary registers for the write barrier.
4836 locations->AddTemp(Location::RequiresRegister()); // Possibly used for ref. poisoning too.
4837 locations->AddTemp(Location::RequiresRegister());
4838 }
4839 }
4840
VisitArraySet(HArraySet * instruction)4841 void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) {
4842 LocationSummary* locations = instruction->GetLocations();
4843 Location array_loc = locations->InAt(0);
4844 CpuRegister array = array_loc.AsRegister<CpuRegister>();
4845 Location index = locations->InAt(1);
4846 Location value = locations->InAt(2);
4847 Primitive::Type value_type = instruction->GetComponentType();
4848 bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
4849 bool needs_write_barrier =
4850 CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
4851 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
4852 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
4853 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
4854
4855 switch (value_type) {
4856 case Primitive::kPrimBoolean:
4857 case Primitive::kPrimByte: {
4858 uint32_t offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
4859 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_1, offset);
4860 if (value.IsRegister()) {
4861 __ movb(address, value.AsRegister<CpuRegister>());
4862 } else {
4863 __ movb(address, Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
4864 }
4865 codegen_->MaybeRecordImplicitNullCheck(instruction);
4866 break;
4867 }
4868
4869 case Primitive::kPrimShort:
4870 case Primitive::kPrimChar: {
4871 uint32_t offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
4872 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_2, offset);
4873 if (value.IsRegister()) {
4874 __ movw(address, value.AsRegister<CpuRegister>());
4875 } else {
4876 DCHECK(value.IsConstant()) << value;
4877 __ movw(address, Immediate(value.GetConstant()->AsIntConstant()->GetValue()));
4878 }
4879 codegen_->MaybeRecordImplicitNullCheck(instruction);
4880 break;
4881 }
4882
4883 case Primitive::kPrimNot: {
4884 uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
4885 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_4, offset);
4886
4887 if (!value.IsRegister()) {
4888 // Just setting null.
4889 DCHECK(instruction->InputAt(2)->IsNullConstant());
4890 DCHECK(value.IsConstant()) << value;
4891 __ movl(address, Immediate(0));
4892 codegen_->MaybeRecordImplicitNullCheck(instruction);
4893 DCHECK(!needs_write_barrier);
4894 DCHECK(!may_need_runtime_call_for_type_check);
4895 break;
4896 }
4897
4898 DCHECK(needs_write_barrier);
4899 CpuRegister register_value = value.AsRegister<CpuRegister>();
4900 // We cannot use a NearLabel for `done`, as its range may be too
4901 // short when Baker read barriers are enabled.
4902 Label done;
4903 NearLabel not_null, do_put;
4904 SlowPathCode* slow_path = nullptr;
4905 Location temp_loc = locations->GetTemp(0);
4906 CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
4907 if (may_need_runtime_call_for_type_check) {
4908 slow_path = new (GetGraph()->GetArena()) ArraySetSlowPathX86_64(instruction);
4909 codegen_->AddSlowPath(slow_path);
4910 if (instruction->GetValueCanBeNull()) {
4911 __ testl(register_value, register_value);
4912 __ j(kNotEqual, ¬_null);
4913 __ movl(address, Immediate(0));
4914 codegen_->MaybeRecordImplicitNullCheck(instruction);
4915 __ jmp(&done);
4916 __ Bind(¬_null);
4917 }
4918
4919 // Note that when Baker read barriers are enabled, the type
4920 // checks are performed without read barriers. This is fine,
4921 // even in the case where a class object is in the from-space
4922 // after the flip, as a comparison involving such a type would
4923 // not produce a false positive; it may of course produce a
4924 // false negative, in which case we would take the ArraySet
4925 // slow path.
4926
4927 // /* HeapReference<Class> */ temp = array->klass_
4928 __ movl(temp, Address(array, class_offset));
4929 codegen_->MaybeRecordImplicitNullCheck(instruction);
4930 __ MaybeUnpoisonHeapReference(temp);
4931
4932 // /* HeapReference<Class> */ temp = temp->component_type_
4933 __ movl(temp, Address(temp, component_offset));
4934 // If heap poisoning is enabled, no need to unpoison `temp`
4935 // nor the object reference in `register_value->klass`, as
4936 // we are comparing two poisoned references.
4937 __ cmpl(temp, Address(register_value, class_offset));
4938
4939 if (instruction->StaticTypeOfArrayIsObjectArray()) {
4940 __ j(kEqual, &do_put);
4941 // If heap poisoning is enabled, the `temp` reference has
4942 // not been unpoisoned yet; unpoison it now.
4943 __ MaybeUnpoisonHeapReference(temp);
4944
4945 // If heap poisoning is enabled, no need to unpoison the
4946 // heap reference loaded below, as it is only used for a
4947 // comparison with null.
4948 __ cmpl(Address(temp, super_offset), Immediate(0));
4949 __ j(kNotEqual, slow_path->GetEntryLabel());
4950 __ Bind(&do_put);
4951 } else {
4952 __ j(kNotEqual, slow_path->GetEntryLabel());
4953 }
4954 }
4955
4956 if (kPoisonHeapReferences) {
4957 __ movl(temp, register_value);
4958 __ PoisonHeapReference(temp);
4959 __ movl(address, temp);
4960 } else {
4961 __ movl(address, register_value);
4962 }
4963 if (!may_need_runtime_call_for_type_check) {
4964 codegen_->MaybeRecordImplicitNullCheck(instruction);
4965 }
4966
4967 CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>();
4968 codegen_->MarkGCCard(
4969 temp, card, array, value.AsRegister<CpuRegister>(), instruction->GetValueCanBeNull());
4970 __ Bind(&done);
4971
4972 if (slow_path != nullptr) {
4973 __ Bind(slow_path->GetExitLabel());
4974 }
4975
4976 break;
4977 }
4978
4979 case Primitive::kPrimInt: {
4980 uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
4981 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_4, offset);
4982 if (value.IsRegister()) {
4983 __ movl(address, value.AsRegister<CpuRegister>());
4984 } else {
4985 DCHECK(value.IsConstant()) << value;
4986 int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
4987 __ movl(address, Immediate(v));
4988 }
4989 codegen_->MaybeRecordImplicitNullCheck(instruction);
4990 break;
4991 }
4992
4993 case Primitive::kPrimLong: {
4994 uint32_t offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
4995 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset);
4996 if (value.IsRegister()) {
4997 __ movq(address, value.AsRegister<CpuRegister>());
4998 codegen_->MaybeRecordImplicitNullCheck(instruction);
4999 } else {
5000 int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
5001 Address address_high =
5002 CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset + sizeof(int32_t));
5003 codegen_->MoveInt64ToAddress(address, address_high, v, instruction);
5004 }
5005 break;
5006 }
5007
5008 case Primitive::kPrimFloat: {
5009 uint32_t offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
5010 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_4, offset);
5011 if (value.IsFpuRegister()) {
5012 __ movss(address, value.AsFpuRegister<XmmRegister>());
5013 } else {
5014 DCHECK(value.IsConstant());
5015 int32_t v = bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue());
5016 __ movl(address, Immediate(v));
5017 }
5018 codegen_->MaybeRecordImplicitNullCheck(instruction);
5019 break;
5020 }
5021
5022 case Primitive::kPrimDouble: {
5023 uint32_t offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
5024 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset);
5025 if (value.IsFpuRegister()) {
5026 __ movsd(address, value.AsFpuRegister<XmmRegister>());
5027 codegen_->MaybeRecordImplicitNullCheck(instruction);
5028 } else {
5029 int64_t v =
5030 bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue());
5031 Address address_high =
5032 CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset + sizeof(int32_t));
5033 codegen_->MoveInt64ToAddress(address, address_high, v, instruction);
5034 }
5035 break;
5036 }
5037
5038 case Primitive::kPrimVoid:
5039 LOG(FATAL) << "Unreachable type " << instruction->GetType();
5040 UNREACHABLE();
5041 }
5042 }
5043
VisitArrayLength(HArrayLength * instruction)5044 void LocationsBuilderX86_64::VisitArrayLength(HArrayLength* instruction) {
5045 LocationSummary* locations =
5046 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
5047 locations->SetInAt(0, Location::RequiresRegister());
5048 if (!instruction->IsEmittedAtUseSite()) {
5049 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5050 }
5051 }
5052
VisitArrayLength(HArrayLength * instruction)5053 void InstructionCodeGeneratorX86_64::VisitArrayLength(HArrayLength* instruction) {
5054 if (instruction->IsEmittedAtUseSite()) {
5055 return;
5056 }
5057
5058 LocationSummary* locations = instruction->GetLocations();
5059 uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
5060 CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
5061 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
5062 __ movl(out, Address(obj, offset));
5063 codegen_->MaybeRecordImplicitNullCheck(instruction);
5064 // Mask out most significant bit in case the array is String's array of char.
5065 if (mirror::kUseStringCompression && instruction->IsStringLength()) {
5066 __ shrl(out, Immediate(1));
5067 }
5068 }
5069
VisitBoundsCheck(HBoundsCheck * instruction)5070 void LocationsBuilderX86_64::VisitBoundsCheck(HBoundsCheck* instruction) {
5071 RegisterSet caller_saves = RegisterSet::Empty();
5072 InvokeRuntimeCallingConvention calling_convention;
5073 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
5074 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
5075 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves);
5076 locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
5077 HInstruction* length = instruction->InputAt(1);
5078 if (!length->IsEmittedAtUseSite()) {
5079 locations->SetInAt(1, Location::RegisterOrConstant(length));
5080 }
5081 }
5082
VisitBoundsCheck(HBoundsCheck * instruction)5083 void InstructionCodeGeneratorX86_64::VisitBoundsCheck(HBoundsCheck* instruction) {
5084 LocationSummary* locations = instruction->GetLocations();
5085 Location index_loc = locations->InAt(0);
5086 Location length_loc = locations->InAt(1);
5087 SlowPathCode* slow_path = new (GetGraph()->GetArena()) BoundsCheckSlowPathX86_64(instruction);
5088
5089 if (length_loc.IsConstant()) {
5090 int32_t length = CodeGenerator::GetInt32ValueOf(length_loc.GetConstant());
5091 if (index_loc.IsConstant()) {
5092 // BCE will remove the bounds check if we are guarenteed to pass.
5093 int32_t index = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
5094 if (index < 0 || index >= length) {
5095 codegen_->AddSlowPath(slow_path);
5096 __ jmp(slow_path->GetEntryLabel());
5097 } else {
5098 // Some optimization after BCE may have generated this, and we should not
5099 // generate a bounds check if it is a valid range.
5100 }
5101 return;
5102 }
5103
5104 // We have to reverse the jump condition because the length is the constant.
5105 CpuRegister index_reg = index_loc.AsRegister<CpuRegister>();
5106 __ cmpl(index_reg, Immediate(length));
5107 codegen_->AddSlowPath(slow_path);
5108 __ j(kAboveEqual, slow_path->GetEntryLabel());
5109 } else {
5110 HInstruction* array_length = instruction->InputAt(1);
5111 if (array_length->IsEmittedAtUseSite()) {
5112 // Address the length field in the array.
5113 DCHECK(array_length->IsArrayLength());
5114 uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length->AsArrayLength());
5115 Location array_loc = array_length->GetLocations()->InAt(0);
5116 Address array_len(array_loc.AsRegister<CpuRegister>(), len_offset);
5117 if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
5118 // TODO: if index_loc.IsConstant(), compare twice the index (to compensate for
5119 // the string compression flag) with the in-memory length and avoid the temporary.
5120 CpuRegister length_reg = CpuRegister(TMP);
5121 __ movl(length_reg, array_len);
5122 codegen_->MaybeRecordImplicitNullCheck(array_length);
5123 __ shrl(length_reg, Immediate(1));
5124 codegen_->GenerateIntCompare(length_reg, index_loc);
5125 } else {
5126 // Checking the bound for general case:
5127 // Array of char or String's array when the compression feature off.
5128 if (index_loc.IsConstant()) {
5129 int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
5130 __ cmpl(array_len, Immediate(value));
5131 } else {
5132 __ cmpl(array_len, index_loc.AsRegister<CpuRegister>());
5133 }
5134 codegen_->MaybeRecordImplicitNullCheck(array_length);
5135 }
5136 } else {
5137 codegen_->GenerateIntCompare(length_loc, index_loc);
5138 }
5139 codegen_->AddSlowPath(slow_path);
5140 __ j(kBelowEqual, slow_path->GetEntryLabel());
5141 }
5142 }
5143
MarkGCCard(CpuRegister temp,CpuRegister card,CpuRegister object,CpuRegister value,bool value_can_be_null)5144 void CodeGeneratorX86_64::MarkGCCard(CpuRegister temp,
5145 CpuRegister card,
5146 CpuRegister object,
5147 CpuRegister value,
5148 bool value_can_be_null) {
5149 NearLabel is_null;
5150 if (value_can_be_null) {
5151 __ testl(value, value);
5152 __ j(kEqual, &is_null);
5153 }
5154 __ gs()->movq(card, Address::Absolute(Thread::CardTableOffset<kX86_64PointerSize>().Int32Value(),
5155 /* no_rip */ true));
5156 __ movq(temp, object);
5157 __ shrq(temp, Immediate(gc::accounting::CardTable::kCardShift));
5158 __ movb(Address(temp, card, TIMES_1, 0), card);
5159 if (value_can_be_null) {
5160 __ Bind(&is_null);
5161 }
5162 }
5163
VisitParallelMove(HParallelMove * instruction ATTRIBUTE_UNUSED)5164 void LocationsBuilderX86_64::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) {
5165 LOG(FATAL) << "Unimplemented";
5166 }
5167
VisitParallelMove(HParallelMove * instruction)5168 void InstructionCodeGeneratorX86_64::VisitParallelMove(HParallelMove* instruction) {
5169 codegen_->GetMoveResolver()->EmitNativeCode(instruction);
5170 }
5171
VisitSuspendCheck(HSuspendCheck * instruction)5172 void LocationsBuilderX86_64::VisitSuspendCheck(HSuspendCheck* instruction) {
5173 LocationSummary* locations =
5174 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnSlowPath);
5175 // In suspend check slow path, usually there are no caller-save registers at all.
5176 // If SIMD instructions are present, however, we force spilling all live SIMD
5177 // registers in full width (since the runtime only saves/restores lower part).
5178 locations->SetCustomSlowPathCallerSaves(
5179 GetGraph()->HasSIMD() ? RegisterSet::AllFpu() : RegisterSet::Empty());
5180 }
5181
VisitSuspendCheck(HSuspendCheck * instruction)5182 void InstructionCodeGeneratorX86_64::VisitSuspendCheck(HSuspendCheck* instruction) {
5183 HBasicBlock* block = instruction->GetBlock();
5184 if (block->GetLoopInformation() != nullptr) {
5185 DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction);
5186 // The back edge will generate the suspend check.
5187 return;
5188 }
5189 if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) {
5190 // The goto will generate the suspend check.
5191 return;
5192 }
5193 GenerateSuspendCheck(instruction, nullptr);
5194 }
5195
GenerateSuspendCheck(HSuspendCheck * instruction,HBasicBlock * successor)5196 void InstructionCodeGeneratorX86_64::GenerateSuspendCheck(HSuspendCheck* instruction,
5197 HBasicBlock* successor) {
5198 SuspendCheckSlowPathX86_64* slow_path =
5199 down_cast<SuspendCheckSlowPathX86_64*>(instruction->GetSlowPath());
5200 if (slow_path == nullptr) {
5201 slow_path = new (GetGraph()->GetArena()) SuspendCheckSlowPathX86_64(instruction, successor);
5202 instruction->SetSlowPath(slow_path);
5203 codegen_->AddSlowPath(slow_path);
5204 if (successor != nullptr) {
5205 DCHECK(successor->IsLoopHeader());
5206 codegen_->ClearSpillSlotsFromLoopPhisInStackMap(instruction);
5207 }
5208 } else {
5209 DCHECK_EQ(slow_path->GetSuccessor(), successor);
5210 }
5211
5212 __ gs()->cmpw(Address::Absolute(Thread::ThreadFlagsOffset<kX86_64PointerSize>().Int32Value(),
5213 /* no_rip */ true),
5214 Immediate(0));
5215 if (successor == nullptr) {
5216 __ j(kNotEqual, slow_path->GetEntryLabel());
5217 __ Bind(slow_path->GetReturnLabel());
5218 } else {
5219 __ j(kEqual, codegen_->GetLabelOf(successor));
5220 __ jmp(slow_path->GetEntryLabel());
5221 }
5222 }
5223
GetAssembler() const5224 X86_64Assembler* ParallelMoveResolverX86_64::GetAssembler() const {
5225 return codegen_->GetAssembler();
5226 }
5227
EmitMove(size_t index)5228 void ParallelMoveResolverX86_64::EmitMove(size_t index) {
5229 MoveOperands* move = moves_[index];
5230 Location source = move->GetSource();
5231 Location destination = move->GetDestination();
5232
5233 if (source.IsRegister()) {
5234 if (destination.IsRegister()) {
5235 __ movq(destination.AsRegister<CpuRegister>(), source.AsRegister<CpuRegister>());
5236 } else if (destination.IsStackSlot()) {
5237 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()),
5238 source.AsRegister<CpuRegister>());
5239 } else {
5240 DCHECK(destination.IsDoubleStackSlot());
5241 __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()),
5242 source.AsRegister<CpuRegister>());
5243 }
5244 } else if (source.IsStackSlot()) {
5245 if (destination.IsRegister()) {
5246 __ movl(destination.AsRegister<CpuRegister>(),
5247 Address(CpuRegister(RSP), source.GetStackIndex()));
5248 } else if (destination.IsFpuRegister()) {
5249 __ movss(destination.AsFpuRegister<XmmRegister>(),
5250 Address(CpuRegister(RSP), source.GetStackIndex()));
5251 } else {
5252 DCHECK(destination.IsStackSlot());
5253 __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
5254 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
5255 }
5256 } else if (source.IsDoubleStackSlot()) {
5257 if (destination.IsRegister()) {
5258 __ movq(destination.AsRegister<CpuRegister>(),
5259 Address(CpuRegister(RSP), source.GetStackIndex()));
5260 } else if (destination.IsFpuRegister()) {
5261 __ movsd(destination.AsFpuRegister<XmmRegister>(),
5262 Address(CpuRegister(RSP), source.GetStackIndex()));
5263 } else {
5264 DCHECK(destination.IsDoubleStackSlot()) << destination;
5265 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
5266 __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
5267 }
5268 } else if (source.IsSIMDStackSlot()) {
5269 DCHECK(destination.IsFpuRegister());
5270 __ movups(destination.AsFpuRegister<XmmRegister>(),
5271 Address(CpuRegister(RSP), source.GetStackIndex()));
5272 } else if (source.IsConstant()) {
5273 HConstant* constant = source.GetConstant();
5274 if (constant->IsIntConstant() || constant->IsNullConstant()) {
5275 int32_t value = CodeGenerator::GetInt32ValueOf(constant);
5276 if (destination.IsRegister()) {
5277 if (value == 0) {
5278 __ xorl(destination.AsRegister<CpuRegister>(), destination.AsRegister<CpuRegister>());
5279 } else {
5280 __ movl(destination.AsRegister<CpuRegister>(), Immediate(value));
5281 }
5282 } else {
5283 DCHECK(destination.IsStackSlot()) << destination;
5284 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), Immediate(value));
5285 }
5286 } else if (constant->IsLongConstant()) {
5287 int64_t value = constant->AsLongConstant()->GetValue();
5288 if (destination.IsRegister()) {
5289 codegen_->Load64BitValue(destination.AsRegister<CpuRegister>(), value);
5290 } else {
5291 DCHECK(destination.IsDoubleStackSlot()) << destination;
5292 codegen_->Store64BitValueToStack(destination, value);
5293 }
5294 } else if (constant->IsFloatConstant()) {
5295 float fp_value = constant->AsFloatConstant()->GetValue();
5296 if (destination.IsFpuRegister()) {
5297 XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
5298 codegen_->Load32BitValue(dest, fp_value);
5299 } else {
5300 DCHECK(destination.IsStackSlot()) << destination;
5301 Immediate imm(bit_cast<int32_t, float>(fp_value));
5302 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), imm);
5303 }
5304 } else {
5305 DCHECK(constant->IsDoubleConstant()) << constant->DebugName();
5306 double fp_value = constant->AsDoubleConstant()->GetValue();
5307 int64_t value = bit_cast<int64_t, double>(fp_value);
5308 if (destination.IsFpuRegister()) {
5309 XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
5310 codegen_->Load64BitValue(dest, fp_value);
5311 } else {
5312 DCHECK(destination.IsDoubleStackSlot()) << destination;
5313 codegen_->Store64BitValueToStack(destination, value);
5314 }
5315 }
5316 } else if (source.IsFpuRegister()) {
5317 if (destination.IsFpuRegister()) {
5318 __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
5319 } else if (destination.IsStackSlot()) {
5320 __ movss(Address(CpuRegister(RSP), destination.GetStackIndex()),
5321 source.AsFpuRegister<XmmRegister>());
5322 } else if (destination.IsDoubleStackSlot()) {
5323 __ movsd(Address(CpuRegister(RSP), destination.GetStackIndex()),
5324 source.AsFpuRegister<XmmRegister>());
5325 } else {
5326 DCHECK(destination.IsSIMDStackSlot());
5327 __ movups(Address(CpuRegister(RSP), destination.GetStackIndex()),
5328 source.AsFpuRegister<XmmRegister>());
5329 }
5330 }
5331 }
5332
Exchange32(CpuRegister reg,int mem)5333 void ParallelMoveResolverX86_64::Exchange32(CpuRegister reg, int mem) {
5334 __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
5335 __ movl(Address(CpuRegister(RSP), mem), reg);
5336 __ movl(reg, CpuRegister(TMP));
5337 }
5338
Exchange32(int mem1,int mem2)5339 void ParallelMoveResolverX86_64::Exchange32(int mem1, int mem2) {
5340 ScratchRegisterScope ensure_scratch(
5341 this, TMP, RAX, codegen_->GetNumberOfCoreRegisters());
5342
5343 int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0;
5344 __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem1 + stack_offset));
5345 __ movl(CpuRegister(ensure_scratch.GetRegister()),
5346 Address(CpuRegister(RSP), mem2 + stack_offset));
5347 __ movl(Address(CpuRegister(RSP), mem2 + stack_offset), CpuRegister(TMP));
5348 __ movl(Address(CpuRegister(RSP), mem1 + stack_offset),
5349 CpuRegister(ensure_scratch.GetRegister()));
5350 }
5351
Exchange64(CpuRegister reg1,CpuRegister reg2)5352 void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg1, CpuRegister reg2) {
5353 __ movq(CpuRegister(TMP), reg1);
5354 __ movq(reg1, reg2);
5355 __ movq(reg2, CpuRegister(TMP));
5356 }
5357
Exchange64(CpuRegister reg,int mem)5358 void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg, int mem) {
5359 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
5360 __ movq(Address(CpuRegister(RSP), mem), reg);
5361 __ movq(reg, CpuRegister(TMP));
5362 }
5363
Exchange64(int mem1,int mem2)5364 void ParallelMoveResolverX86_64::Exchange64(int mem1, int mem2) {
5365 ScratchRegisterScope ensure_scratch(
5366 this, TMP, RAX, codegen_->GetNumberOfCoreRegisters());
5367
5368 int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0;
5369 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem1 + stack_offset));
5370 __ movq(CpuRegister(ensure_scratch.GetRegister()),
5371 Address(CpuRegister(RSP), mem2 + stack_offset));
5372 __ movq(Address(CpuRegister(RSP), mem2 + stack_offset), CpuRegister(TMP));
5373 __ movq(Address(CpuRegister(RSP), mem1 + stack_offset),
5374 CpuRegister(ensure_scratch.GetRegister()));
5375 }
5376
Exchange32(XmmRegister reg,int mem)5377 void ParallelMoveResolverX86_64::Exchange32(XmmRegister reg, int mem) {
5378 __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
5379 __ movss(Address(CpuRegister(RSP), mem), reg);
5380 __ movd(reg, CpuRegister(TMP));
5381 }
5382
Exchange64(XmmRegister reg,int mem)5383 void ParallelMoveResolverX86_64::Exchange64(XmmRegister reg, int mem) {
5384 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
5385 __ movsd(Address(CpuRegister(RSP), mem), reg);
5386 __ movd(reg, CpuRegister(TMP));
5387 }
5388
EmitSwap(size_t index)5389 void ParallelMoveResolverX86_64::EmitSwap(size_t index) {
5390 MoveOperands* move = moves_[index];
5391 Location source = move->GetSource();
5392 Location destination = move->GetDestination();
5393
5394 if (source.IsRegister() && destination.IsRegister()) {
5395 Exchange64(source.AsRegister<CpuRegister>(), destination.AsRegister<CpuRegister>());
5396 } else if (source.IsRegister() && destination.IsStackSlot()) {
5397 Exchange32(source.AsRegister<CpuRegister>(), destination.GetStackIndex());
5398 } else if (source.IsStackSlot() && destination.IsRegister()) {
5399 Exchange32(destination.AsRegister<CpuRegister>(), source.GetStackIndex());
5400 } else if (source.IsStackSlot() && destination.IsStackSlot()) {
5401 Exchange32(destination.GetStackIndex(), source.GetStackIndex());
5402 } else if (source.IsRegister() && destination.IsDoubleStackSlot()) {
5403 Exchange64(source.AsRegister<CpuRegister>(), destination.GetStackIndex());
5404 } else if (source.IsDoubleStackSlot() && destination.IsRegister()) {
5405 Exchange64(destination.AsRegister<CpuRegister>(), source.GetStackIndex());
5406 } else if (source.IsDoubleStackSlot() && destination.IsDoubleStackSlot()) {
5407 Exchange64(destination.GetStackIndex(), source.GetStackIndex());
5408 } else if (source.IsFpuRegister() && destination.IsFpuRegister()) {
5409 __ movd(CpuRegister(TMP), source.AsFpuRegister<XmmRegister>());
5410 __ movaps(source.AsFpuRegister<XmmRegister>(), destination.AsFpuRegister<XmmRegister>());
5411 __ movd(destination.AsFpuRegister<XmmRegister>(), CpuRegister(TMP));
5412 } else if (source.IsFpuRegister() && destination.IsStackSlot()) {
5413 Exchange32(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
5414 } else if (source.IsStackSlot() && destination.IsFpuRegister()) {
5415 Exchange32(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
5416 } else if (source.IsFpuRegister() && destination.IsDoubleStackSlot()) {
5417 Exchange64(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
5418 } else if (source.IsDoubleStackSlot() && destination.IsFpuRegister()) {
5419 Exchange64(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
5420 } else {
5421 LOG(FATAL) << "Unimplemented swap between " << source << " and " << destination;
5422 }
5423 }
5424
5425
SpillScratch(int reg)5426 void ParallelMoveResolverX86_64::SpillScratch(int reg) {
5427 __ pushq(CpuRegister(reg));
5428 }
5429
5430
RestoreScratch(int reg)5431 void ParallelMoveResolverX86_64::RestoreScratch(int reg) {
5432 __ popq(CpuRegister(reg));
5433 }
5434
GenerateClassInitializationCheck(SlowPathCode * slow_path,CpuRegister class_reg)5435 void InstructionCodeGeneratorX86_64::GenerateClassInitializationCheck(
5436 SlowPathCode* slow_path, CpuRegister class_reg) {
5437 __ cmpl(Address(class_reg, mirror::Class::StatusOffset().Int32Value()),
5438 Immediate(mirror::Class::kStatusInitialized));
5439 __ j(kLess, slow_path->GetEntryLabel());
5440 __ Bind(slow_path->GetExitLabel());
5441 // No need for memory fence, thanks to the x86-64 memory model.
5442 }
5443
GetSupportedLoadClassKind(HLoadClass::LoadKind desired_class_load_kind)5444 HLoadClass::LoadKind CodeGeneratorX86_64::GetSupportedLoadClassKind(
5445 HLoadClass::LoadKind desired_class_load_kind) {
5446 switch (desired_class_load_kind) {
5447 case HLoadClass::LoadKind::kInvalid:
5448 LOG(FATAL) << "UNREACHABLE";
5449 UNREACHABLE();
5450 case HLoadClass::LoadKind::kReferrersClass:
5451 break;
5452 case HLoadClass::LoadKind::kBootImageLinkTimeAddress:
5453 DCHECK(!GetCompilerOptions().GetCompilePic());
5454 // We prefer the always-available RIP-relative address for the x86-64 boot image.
5455 return HLoadClass::LoadKind::kBootImageLinkTimePcRelative;
5456 case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
5457 DCHECK(GetCompilerOptions().GetCompilePic());
5458 break;
5459 case HLoadClass::LoadKind::kBootImageAddress:
5460 break;
5461 case HLoadClass::LoadKind::kBssEntry:
5462 DCHECK(!Runtime::Current()->UseJitCompilation());
5463 break;
5464 case HLoadClass::LoadKind::kJitTableAddress:
5465 DCHECK(Runtime::Current()->UseJitCompilation());
5466 break;
5467 case HLoadClass::LoadKind::kDexCacheViaMethod:
5468 break;
5469 }
5470 return desired_class_load_kind;
5471 }
5472
VisitLoadClass(HLoadClass * cls)5473 void LocationsBuilderX86_64::VisitLoadClass(HLoadClass* cls) {
5474 HLoadClass::LoadKind load_kind = cls->GetLoadKind();
5475 if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) {
5476 // Custom calling convention: RAX serves as both input and output.
5477 CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
5478 cls,
5479 Location::RegisterLocation(RAX),
5480 Location::RegisterLocation(RAX));
5481 return;
5482 }
5483 DCHECK(!cls->NeedsAccessCheck());
5484
5485 const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage();
5486 LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
5487 ? LocationSummary::kCallOnSlowPath
5488 : LocationSummary::kNoCall;
5489 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(cls, call_kind);
5490 if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) {
5491 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
5492 }
5493
5494 if (load_kind == HLoadClass::LoadKind::kReferrersClass) {
5495 locations->SetInAt(0, Location::RequiresRegister());
5496 }
5497 locations->SetOut(Location::RequiresRegister());
5498 if (load_kind == HLoadClass::LoadKind::kBssEntry) {
5499 if (!kUseReadBarrier || kUseBakerReadBarrier) {
5500 // Rely on the type resolution and/or initialization to save everything.
5501 // Custom calling convention: RAX serves as both input and output.
5502 RegisterSet caller_saves = RegisterSet::Empty();
5503 caller_saves.Add(Location::RegisterLocation(RAX));
5504 locations->SetCustomSlowPathCallerSaves(caller_saves);
5505 } else {
5506 // For non-Baker read barrier we have a temp-clobbering call.
5507 }
5508 }
5509 }
5510
NewJitRootClassPatch(const DexFile & dex_file,dex::TypeIndex dex_index,Handle<mirror::Class> handle)5511 Label* CodeGeneratorX86_64::NewJitRootClassPatch(const DexFile& dex_file,
5512 dex::TypeIndex dex_index,
5513 Handle<mirror::Class> handle) {
5514 jit_class_roots_.Overwrite(
5515 TypeReference(&dex_file, dex_index), reinterpret_cast64<uint64_t>(handle.GetReference()));
5516 // Add a patch entry and return the label.
5517 jit_class_patches_.emplace_back(dex_file, dex_index.index_);
5518 PatchInfo<Label>* info = &jit_class_patches_.back();
5519 return &info->label;
5520 }
5521
5522 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
5523 // move.
VisitLoadClass(HLoadClass * cls)5524 void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS {
5525 HLoadClass::LoadKind load_kind = cls->GetLoadKind();
5526 if (load_kind == HLoadClass::LoadKind::kDexCacheViaMethod) {
5527 codegen_->GenerateLoadClassRuntimeCall(cls);
5528 return;
5529 }
5530 DCHECK(!cls->NeedsAccessCheck());
5531
5532 LocationSummary* locations = cls->GetLocations();
5533 Location out_loc = locations->Out();
5534 CpuRegister out = out_loc.AsRegister<CpuRegister>();
5535
5536 const ReadBarrierOption read_barrier_option = cls->IsInBootImage()
5537 ? kWithoutReadBarrier
5538 : kCompilerReadBarrierOption;
5539 bool generate_null_check = false;
5540 switch (load_kind) {
5541 case HLoadClass::LoadKind::kReferrersClass: {
5542 DCHECK(!cls->CanCallRuntime());
5543 DCHECK(!cls->MustGenerateClinitCheck());
5544 // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
5545 CpuRegister current_method = locations->InAt(0).AsRegister<CpuRegister>();
5546 GenerateGcRootFieldLoad(
5547 cls,
5548 out_loc,
5549 Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()),
5550 /* fixup_label */ nullptr,
5551 read_barrier_option);
5552 break;
5553 }
5554 case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
5555 DCHECK(codegen_->GetCompilerOptions().IsBootImage());
5556 DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
5557 __ leal(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false));
5558 codegen_->RecordBootTypePatch(cls);
5559 break;
5560 case HLoadClass::LoadKind::kBootImageAddress: {
5561 DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
5562 uint32_t address = dchecked_integral_cast<uint32_t>(
5563 reinterpret_cast<uintptr_t>(cls->GetClass().Get()));
5564 DCHECK_NE(address, 0u);
5565 __ movl(out, Immediate(static_cast<int32_t>(address))); // Zero-extended.
5566 break;
5567 }
5568 case HLoadClass::LoadKind::kBssEntry: {
5569 Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset,
5570 /* no_rip */ false);
5571 Label* fixup_label = codegen_->NewTypeBssEntryPatch(cls);
5572 // /* GcRoot<mirror::Class> */ out = *address /* PC-relative */
5573 GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option);
5574 generate_null_check = true;
5575 break;
5576 }
5577 case HLoadClass::LoadKind::kJitTableAddress: {
5578 Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset,
5579 /* no_rip */ true);
5580 Label* fixup_label =
5581 codegen_->NewJitRootClassPatch(cls->GetDexFile(), cls->GetTypeIndex(), cls->GetClass());
5582 // /* GcRoot<mirror::Class> */ out = *address
5583 GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option);
5584 break;
5585 }
5586 default:
5587 LOG(FATAL) << "Unexpected load kind: " << cls->GetLoadKind();
5588 UNREACHABLE();
5589 }
5590
5591 if (generate_null_check || cls->MustGenerateClinitCheck()) {
5592 DCHECK(cls->CanCallRuntime());
5593 SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86_64(
5594 cls, cls, cls->GetDexPc(), cls->MustGenerateClinitCheck());
5595 codegen_->AddSlowPath(slow_path);
5596 if (generate_null_check) {
5597 __ testl(out, out);
5598 __ j(kEqual, slow_path->GetEntryLabel());
5599 }
5600 if (cls->MustGenerateClinitCheck()) {
5601 GenerateClassInitializationCheck(slow_path, out);
5602 } else {
5603 __ Bind(slow_path->GetExitLabel());
5604 }
5605 }
5606 }
5607
VisitClinitCheck(HClinitCheck * check)5608 void LocationsBuilderX86_64::VisitClinitCheck(HClinitCheck* check) {
5609 LocationSummary* locations =
5610 new (GetGraph()->GetArena()) LocationSummary(check, LocationSummary::kCallOnSlowPath);
5611 locations->SetInAt(0, Location::RequiresRegister());
5612 if (check->HasUses()) {
5613 locations->SetOut(Location::SameAsFirstInput());
5614 }
5615 }
5616
VisitClinitCheck(HClinitCheck * check)5617 void InstructionCodeGeneratorX86_64::VisitClinitCheck(HClinitCheck* check) {
5618 // We assume the class to not be null.
5619 SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadClassSlowPathX86_64(
5620 check->GetLoadClass(), check, check->GetDexPc(), true);
5621 codegen_->AddSlowPath(slow_path);
5622 GenerateClassInitializationCheck(slow_path,
5623 check->GetLocations()->InAt(0).AsRegister<CpuRegister>());
5624 }
5625
GetSupportedLoadStringKind(HLoadString::LoadKind desired_string_load_kind)5626 HLoadString::LoadKind CodeGeneratorX86_64::GetSupportedLoadStringKind(
5627 HLoadString::LoadKind desired_string_load_kind) {
5628 switch (desired_string_load_kind) {
5629 case HLoadString::LoadKind::kBootImageLinkTimeAddress:
5630 DCHECK(!GetCompilerOptions().GetCompilePic());
5631 // We prefer the always-available RIP-relative address for the x86-64 boot image.
5632 return HLoadString::LoadKind::kBootImageLinkTimePcRelative;
5633 case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
5634 DCHECK(GetCompilerOptions().GetCompilePic());
5635 break;
5636 case HLoadString::LoadKind::kBootImageAddress:
5637 break;
5638 case HLoadString::LoadKind::kBssEntry:
5639 DCHECK(!Runtime::Current()->UseJitCompilation());
5640 break;
5641 case HLoadString::LoadKind::kJitTableAddress:
5642 DCHECK(Runtime::Current()->UseJitCompilation());
5643 break;
5644 case HLoadString::LoadKind::kDexCacheViaMethod:
5645 break;
5646 }
5647 return desired_string_load_kind;
5648 }
5649
VisitLoadString(HLoadString * load)5650 void LocationsBuilderX86_64::VisitLoadString(HLoadString* load) {
5651 LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load);
5652 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(load, call_kind);
5653 if (load->GetLoadKind() == HLoadString::LoadKind::kDexCacheViaMethod) {
5654 locations->SetOut(Location::RegisterLocation(RAX));
5655 } else {
5656 locations->SetOut(Location::RequiresRegister());
5657 if (load->GetLoadKind() == HLoadString::LoadKind::kBssEntry) {
5658 if (!kUseReadBarrier || kUseBakerReadBarrier) {
5659 // Rely on the pResolveString to save everything.
5660 // Custom calling convention: RAX serves as both input and output.
5661 RegisterSet caller_saves = RegisterSet::Empty();
5662 caller_saves.Add(Location::RegisterLocation(RAX));
5663 locations->SetCustomSlowPathCallerSaves(caller_saves);
5664 } else {
5665 // For non-Baker read barrier we have a temp-clobbering call.
5666 }
5667 }
5668 }
5669 }
5670
NewJitRootStringPatch(const DexFile & dex_file,dex::StringIndex dex_index,Handle<mirror::String> handle)5671 Label* CodeGeneratorX86_64::NewJitRootStringPatch(const DexFile& dex_file,
5672 dex::StringIndex dex_index,
5673 Handle<mirror::String> handle) {
5674 jit_string_roots_.Overwrite(
5675 StringReference(&dex_file, dex_index), reinterpret_cast64<uint64_t>(handle.GetReference()));
5676 // Add a patch entry and return the label.
5677 jit_string_patches_.emplace_back(dex_file, dex_index.index_);
5678 PatchInfo<Label>* info = &jit_string_patches_.back();
5679 return &info->label;
5680 }
5681
5682 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
5683 // move.
VisitLoadString(HLoadString * load)5684 void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) NO_THREAD_SAFETY_ANALYSIS {
5685 LocationSummary* locations = load->GetLocations();
5686 Location out_loc = locations->Out();
5687 CpuRegister out = out_loc.AsRegister<CpuRegister>();
5688
5689 switch (load->GetLoadKind()) {
5690 case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
5691 DCHECK(codegen_->GetCompilerOptions().IsBootImage());
5692 __ leal(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip */ false));
5693 codegen_->RecordBootStringPatch(load);
5694 return; // No dex cache slow path.
5695 }
5696 case HLoadString::LoadKind::kBootImageAddress: {
5697 uint32_t address = dchecked_integral_cast<uint32_t>(
5698 reinterpret_cast<uintptr_t>(load->GetString().Get()));
5699 DCHECK_NE(address, 0u);
5700 __ movl(out, Immediate(static_cast<int32_t>(address))); // Zero-extended.
5701 return; // No dex cache slow path.
5702 }
5703 case HLoadString::LoadKind::kBssEntry: {
5704 Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset,
5705 /* no_rip */ false);
5706 Label* fixup_label = codegen_->NewStringBssEntryPatch(load);
5707 // /* GcRoot<mirror::Class> */ out = *address /* PC-relative */
5708 GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption);
5709 SlowPathCode* slow_path = new (GetGraph()->GetArena()) LoadStringSlowPathX86_64(load);
5710 codegen_->AddSlowPath(slow_path);
5711 __ testl(out, out);
5712 __ j(kEqual, slow_path->GetEntryLabel());
5713 __ Bind(slow_path->GetExitLabel());
5714 return;
5715 }
5716 case HLoadString::LoadKind::kJitTableAddress: {
5717 Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset,
5718 /* no_rip */ true);
5719 Label* fixup_label = codegen_->NewJitRootStringPatch(
5720 load->GetDexFile(), load->GetStringIndex(), load->GetString());
5721 // /* GcRoot<mirror::String> */ out = *address
5722 GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption);
5723 return;
5724 }
5725 default:
5726 break;
5727 }
5728
5729 // TODO: Re-add the compiler code to do string dex cache lookup again.
5730 // Custom calling convention: RAX serves as both input and output.
5731 __ movl(CpuRegister(RAX), Immediate(load->GetStringIndex().index_));
5732 codegen_->InvokeRuntime(kQuickResolveString,
5733 load,
5734 load->GetDexPc());
5735 CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
5736 }
5737
GetExceptionTlsAddress()5738 static Address GetExceptionTlsAddress() {
5739 return Address::Absolute(Thread::ExceptionOffset<kX86_64PointerSize>().Int32Value(),
5740 /* no_rip */ true);
5741 }
5742
VisitLoadException(HLoadException * load)5743 void LocationsBuilderX86_64::VisitLoadException(HLoadException* load) {
5744 LocationSummary* locations =
5745 new (GetGraph()->GetArena()) LocationSummary(load, LocationSummary::kNoCall);
5746 locations->SetOut(Location::RequiresRegister());
5747 }
5748
VisitLoadException(HLoadException * load)5749 void InstructionCodeGeneratorX86_64::VisitLoadException(HLoadException* load) {
5750 __ gs()->movl(load->GetLocations()->Out().AsRegister<CpuRegister>(), GetExceptionTlsAddress());
5751 }
5752
VisitClearException(HClearException * clear)5753 void LocationsBuilderX86_64::VisitClearException(HClearException* clear) {
5754 new (GetGraph()->GetArena()) LocationSummary(clear, LocationSummary::kNoCall);
5755 }
5756
VisitClearException(HClearException * clear ATTRIBUTE_UNUSED)5757 void InstructionCodeGeneratorX86_64::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) {
5758 __ gs()->movl(GetExceptionTlsAddress(), Immediate(0));
5759 }
5760
VisitThrow(HThrow * instruction)5761 void LocationsBuilderX86_64::VisitThrow(HThrow* instruction) {
5762 LocationSummary* locations =
5763 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
5764 InvokeRuntimeCallingConvention calling_convention;
5765 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
5766 }
5767
VisitThrow(HThrow * instruction)5768 void InstructionCodeGeneratorX86_64::VisitThrow(HThrow* instruction) {
5769 codegen_->InvokeRuntime(kQuickDeliverException, instruction, instruction->GetDexPc());
5770 CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
5771 }
5772
CheckCastTypeCheckNeedsATemporary(TypeCheckKind type_check_kind)5773 static bool CheckCastTypeCheckNeedsATemporary(TypeCheckKind type_check_kind) {
5774 if (type_check_kind == TypeCheckKind::kInterfaceCheck && !kPoisonHeapReferences) {
5775 // We need a temporary for holding the iftable length.
5776 return true;
5777 }
5778 return kEmitCompilerReadBarrier &&
5779 !kUseBakerReadBarrier &&
5780 (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
5781 type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
5782 type_check_kind == TypeCheckKind::kArrayObjectCheck);
5783 }
5784
InstanceOfTypeCheckNeedsATemporary(TypeCheckKind type_check_kind)5785 static bool InstanceOfTypeCheckNeedsATemporary(TypeCheckKind type_check_kind) {
5786 return kEmitCompilerReadBarrier &&
5787 !kUseBakerReadBarrier &&
5788 (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
5789 type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
5790 type_check_kind == TypeCheckKind::kArrayObjectCheck);
5791 }
5792
VisitInstanceOf(HInstanceOf * instruction)5793 void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) {
5794 LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
5795 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
5796 bool baker_read_barrier_slow_path = false;
5797 switch (type_check_kind) {
5798 case TypeCheckKind::kExactCheck:
5799 case TypeCheckKind::kAbstractClassCheck:
5800 case TypeCheckKind::kClassHierarchyCheck:
5801 case TypeCheckKind::kArrayObjectCheck:
5802 call_kind =
5803 kEmitCompilerReadBarrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
5804 baker_read_barrier_slow_path = kUseBakerReadBarrier;
5805 break;
5806 case TypeCheckKind::kArrayCheck:
5807 case TypeCheckKind::kUnresolvedCheck:
5808 case TypeCheckKind::kInterfaceCheck:
5809 call_kind = LocationSummary::kCallOnSlowPath;
5810 break;
5811 }
5812
5813 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
5814 if (baker_read_barrier_slow_path) {
5815 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
5816 }
5817 locations->SetInAt(0, Location::RequiresRegister());
5818 locations->SetInAt(1, Location::Any());
5819 // Note that TypeCheckSlowPathX86_64 uses this "out" register too.
5820 locations->SetOut(Location::RequiresRegister());
5821 // When read barriers are enabled, we need a temporary register for
5822 // some cases.
5823 if (InstanceOfTypeCheckNeedsATemporary(type_check_kind)) {
5824 locations->AddTemp(Location::RequiresRegister());
5825 }
5826 }
5827
VisitInstanceOf(HInstanceOf * instruction)5828 void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) {
5829 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
5830 LocationSummary* locations = instruction->GetLocations();
5831 Location obj_loc = locations->InAt(0);
5832 CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
5833 Location cls = locations->InAt(1);
5834 Location out_loc = locations->Out();
5835 CpuRegister out = out_loc.AsRegister<CpuRegister>();
5836 Location maybe_temp_loc = InstanceOfTypeCheckNeedsATemporary(type_check_kind) ?
5837 locations->GetTemp(0) :
5838 Location::NoLocation();
5839 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
5840 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
5841 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
5842 uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
5843 SlowPathCode* slow_path = nullptr;
5844 NearLabel done, zero;
5845
5846 // Return 0 if `obj` is null.
5847 // Avoid null check if we know obj is not null.
5848 if (instruction->MustDoNullCheck()) {
5849 __ testl(obj, obj);
5850 __ j(kEqual, &zero);
5851 }
5852
5853 switch (type_check_kind) {
5854 case TypeCheckKind::kExactCheck: {
5855 // /* HeapReference<Class> */ out = obj->klass_
5856 GenerateReferenceLoadTwoRegisters(instruction,
5857 out_loc,
5858 obj_loc,
5859 class_offset,
5860 kCompilerReadBarrierOption);
5861 if (cls.IsRegister()) {
5862 __ cmpl(out, cls.AsRegister<CpuRegister>());
5863 } else {
5864 DCHECK(cls.IsStackSlot()) << cls;
5865 __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
5866 }
5867 if (zero.IsLinked()) {
5868 // Classes must be equal for the instanceof to succeed.
5869 __ j(kNotEqual, &zero);
5870 __ movl(out, Immediate(1));
5871 __ jmp(&done);
5872 } else {
5873 __ setcc(kEqual, out);
5874 // setcc only sets the low byte.
5875 __ andl(out, Immediate(1));
5876 }
5877 break;
5878 }
5879
5880 case TypeCheckKind::kAbstractClassCheck: {
5881 // /* HeapReference<Class> */ out = obj->klass_
5882 GenerateReferenceLoadTwoRegisters(instruction,
5883 out_loc,
5884 obj_loc,
5885 class_offset,
5886 kCompilerReadBarrierOption);
5887 // If the class is abstract, we eagerly fetch the super class of the
5888 // object to avoid doing a comparison we know will fail.
5889 NearLabel loop, success;
5890 __ Bind(&loop);
5891 // /* HeapReference<Class> */ out = out->super_class_
5892 GenerateReferenceLoadOneRegister(instruction,
5893 out_loc,
5894 super_offset,
5895 maybe_temp_loc,
5896 kCompilerReadBarrierOption);
5897 __ testl(out, out);
5898 // If `out` is null, we use it for the result, and jump to `done`.
5899 __ j(kEqual, &done);
5900 if (cls.IsRegister()) {
5901 __ cmpl(out, cls.AsRegister<CpuRegister>());
5902 } else {
5903 DCHECK(cls.IsStackSlot()) << cls;
5904 __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
5905 }
5906 __ j(kNotEqual, &loop);
5907 __ movl(out, Immediate(1));
5908 if (zero.IsLinked()) {
5909 __ jmp(&done);
5910 }
5911 break;
5912 }
5913
5914 case TypeCheckKind::kClassHierarchyCheck: {
5915 // /* HeapReference<Class> */ out = obj->klass_
5916 GenerateReferenceLoadTwoRegisters(instruction,
5917 out_loc,
5918 obj_loc,
5919 class_offset,
5920 kCompilerReadBarrierOption);
5921 // Walk over the class hierarchy to find a match.
5922 NearLabel loop, success;
5923 __ Bind(&loop);
5924 if (cls.IsRegister()) {
5925 __ cmpl(out, cls.AsRegister<CpuRegister>());
5926 } else {
5927 DCHECK(cls.IsStackSlot()) << cls;
5928 __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
5929 }
5930 __ j(kEqual, &success);
5931 // /* HeapReference<Class> */ out = out->super_class_
5932 GenerateReferenceLoadOneRegister(instruction,
5933 out_loc,
5934 super_offset,
5935 maybe_temp_loc,
5936 kCompilerReadBarrierOption);
5937 __ testl(out, out);
5938 __ j(kNotEqual, &loop);
5939 // If `out` is null, we use it for the result, and jump to `done`.
5940 __ jmp(&done);
5941 __ Bind(&success);
5942 __ movl(out, Immediate(1));
5943 if (zero.IsLinked()) {
5944 __ jmp(&done);
5945 }
5946 break;
5947 }
5948
5949 case TypeCheckKind::kArrayObjectCheck: {
5950 // /* HeapReference<Class> */ out = obj->klass_
5951 GenerateReferenceLoadTwoRegisters(instruction,
5952 out_loc,
5953 obj_loc,
5954 class_offset,
5955 kCompilerReadBarrierOption);
5956 // Do an exact check.
5957 NearLabel exact_check;
5958 if (cls.IsRegister()) {
5959 __ cmpl(out, cls.AsRegister<CpuRegister>());
5960 } else {
5961 DCHECK(cls.IsStackSlot()) << cls;
5962 __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
5963 }
5964 __ j(kEqual, &exact_check);
5965 // Otherwise, we need to check that the object's class is a non-primitive array.
5966 // /* HeapReference<Class> */ out = out->component_type_
5967 GenerateReferenceLoadOneRegister(instruction,
5968 out_loc,
5969 component_offset,
5970 maybe_temp_loc,
5971 kCompilerReadBarrierOption);
5972 __ testl(out, out);
5973 // If `out` is null, we use it for the result, and jump to `done`.
5974 __ j(kEqual, &done);
5975 __ cmpw(Address(out, primitive_offset), Immediate(Primitive::kPrimNot));
5976 __ j(kNotEqual, &zero);
5977 __ Bind(&exact_check);
5978 __ movl(out, Immediate(1));
5979 __ jmp(&done);
5980 break;
5981 }
5982
5983 case TypeCheckKind::kArrayCheck: {
5984 // No read barrier since the slow path will retry upon failure.
5985 // /* HeapReference<Class> */ out = obj->klass_
5986 GenerateReferenceLoadTwoRegisters(instruction,
5987 out_loc,
5988 obj_loc,
5989 class_offset,
5990 kWithoutReadBarrier);
5991 if (cls.IsRegister()) {
5992 __ cmpl(out, cls.AsRegister<CpuRegister>());
5993 } else {
5994 DCHECK(cls.IsStackSlot()) << cls;
5995 __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
5996 }
5997 DCHECK(locations->OnlyCallsOnSlowPath());
5998 slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86_64(instruction,
5999 /* is_fatal */ false);
6000 codegen_->AddSlowPath(slow_path);
6001 __ j(kNotEqual, slow_path->GetEntryLabel());
6002 __ movl(out, Immediate(1));
6003 if (zero.IsLinked()) {
6004 __ jmp(&done);
6005 }
6006 break;
6007 }
6008
6009 case TypeCheckKind::kUnresolvedCheck:
6010 case TypeCheckKind::kInterfaceCheck: {
6011 // Note that we indeed only call on slow path, but we always go
6012 // into the slow path for the unresolved and interface check
6013 // cases.
6014 //
6015 // We cannot directly call the InstanceofNonTrivial runtime
6016 // entry point without resorting to a type checking slow path
6017 // here (i.e. by calling InvokeRuntime directly), as it would
6018 // require to assign fixed registers for the inputs of this
6019 // HInstanceOf instruction (following the runtime calling
6020 // convention), which might be cluttered by the potential first
6021 // read barrier emission at the beginning of this method.
6022 //
6023 // TODO: Introduce a new runtime entry point taking the object
6024 // to test (instead of its class) as argument, and let it deal
6025 // with the read barrier issues. This will let us refactor this
6026 // case of the `switch` code as it was previously (with a direct
6027 // call to the runtime not using a type checking slow path).
6028 // This should also be beneficial for the other cases above.
6029 DCHECK(locations->OnlyCallsOnSlowPath());
6030 slow_path = new (GetGraph()->GetArena()) TypeCheckSlowPathX86_64(instruction,
6031 /* is_fatal */ false);
6032 codegen_->AddSlowPath(slow_path);
6033 __ jmp(slow_path->GetEntryLabel());
6034 if (zero.IsLinked()) {
6035 __ jmp(&done);
6036 }
6037 break;
6038 }
6039 }
6040
6041 if (zero.IsLinked()) {
6042 __ Bind(&zero);
6043 __ xorl(out, out);
6044 }
6045
6046 if (done.IsLinked()) {
6047 __ Bind(&done);
6048 }
6049
6050 if (slow_path != nullptr) {
6051 __ Bind(slow_path->GetExitLabel());
6052 }
6053 }
6054
IsTypeCheckSlowPathFatal(TypeCheckKind type_check_kind,bool throws_into_catch)6055 static bool IsTypeCheckSlowPathFatal(TypeCheckKind type_check_kind, bool throws_into_catch) {
6056 switch (type_check_kind) {
6057 case TypeCheckKind::kExactCheck:
6058 case TypeCheckKind::kAbstractClassCheck:
6059 case TypeCheckKind::kClassHierarchyCheck:
6060 case TypeCheckKind::kArrayObjectCheck:
6061 return !throws_into_catch && !kEmitCompilerReadBarrier;
6062 case TypeCheckKind::kInterfaceCheck:
6063 return !throws_into_catch && !kEmitCompilerReadBarrier && !kPoisonHeapReferences;
6064 case TypeCheckKind::kArrayCheck:
6065 case TypeCheckKind::kUnresolvedCheck:
6066 return false;
6067 }
6068 LOG(FATAL) << "Unreachable";
6069 UNREACHABLE();
6070 }
6071
VisitCheckCast(HCheckCast * instruction)6072 void LocationsBuilderX86_64::VisitCheckCast(HCheckCast* instruction) {
6073 bool throws_into_catch = instruction->CanThrowIntoCatchBlock();
6074 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
6075 bool is_fatal_slow_path = IsTypeCheckSlowPathFatal(type_check_kind, throws_into_catch);
6076 LocationSummary::CallKind call_kind = is_fatal_slow_path
6077 ? LocationSummary::kNoCall
6078 : LocationSummary::kCallOnSlowPath;
6079 LocationSummary* locations = new (GetGraph()->GetArena()) LocationSummary(instruction, call_kind);
6080 locations->SetInAt(0, Location::RequiresRegister());
6081 if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
6082 // Require a register for the interface check since there is a loop that compares the class to
6083 // a memory address.
6084 locations->SetInAt(1, Location::RequiresRegister());
6085 } else {
6086 locations->SetInAt(1, Location::Any());
6087 }
6088
6089 // Note that TypeCheckSlowPathX86_64 uses this "temp" register too.
6090 locations->AddTemp(Location::RequiresRegister());
6091 // When read barriers are enabled, we need an additional temporary
6092 // register for some cases.
6093 if (CheckCastTypeCheckNeedsATemporary(type_check_kind)) {
6094 locations->AddTemp(Location::RequiresRegister());
6095 }
6096 }
6097
VisitCheckCast(HCheckCast * instruction)6098 void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
6099 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
6100 LocationSummary* locations = instruction->GetLocations();
6101 Location obj_loc = locations->InAt(0);
6102 CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
6103 Location cls = locations->InAt(1);
6104 Location temp_loc = locations->GetTemp(0);
6105 CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
6106 Location maybe_temp2_loc = CheckCastTypeCheckNeedsATemporary(type_check_kind) ?
6107 locations->GetTemp(1) :
6108 Location::NoLocation();
6109 const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
6110 const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
6111 const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
6112 const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
6113 const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
6114 const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
6115 const uint32_t object_array_data_offset =
6116 mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
6117
6118 // Always false for read barriers since we may need to go to the entrypoint for non-fatal cases
6119 // from false negatives. The false negatives may come from avoiding read barriers below. Avoiding
6120 // read barriers is done for performance and code size reasons.
6121 bool is_type_check_slow_path_fatal =
6122 IsTypeCheckSlowPathFatal(type_check_kind, instruction->CanThrowIntoCatchBlock());
6123 SlowPathCode* type_check_slow_path =
6124 new (GetGraph()->GetArena()) TypeCheckSlowPathX86_64(instruction,
6125 is_type_check_slow_path_fatal);
6126 codegen_->AddSlowPath(type_check_slow_path);
6127
6128
6129 NearLabel done;
6130 // Avoid null check if we know obj is not null.
6131 if (instruction->MustDoNullCheck()) {
6132 __ testl(obj, obj);
6133 __ j(kEqual, &done);
6134 }
6135
6136 switch (type_check_kind) {
6137 case TypeCheckKind::kExactCheck:
6138 case TypeCheckKind::kArrayCheck: {
6139 // /* HeapReference<Class> */ temp = obj->klass_
6140 GenerateReferenceLoadTwoRegisters(instruction,
6141 temp_loc,
6142 obj_loc,
6143 class_offset,
6144 kWithoutReadBarrier);
6145 if (cls.IsRegister()) {
6146 __ cmpl(temp, cls.AsRegister<CpuRegister>());
6147 } else {
6148 DCHECK(cls.IsStackSlot()) << cls;
6149 __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
6150 }
6151 // Jump to slow path for throwing the exception or doing a
6152 // more involved array check.
6153 __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
6154 break;
6155 }
6156
6157 case TypeCheckKind::kAbstractClassCheck: {
6158 // /* HeapReference<Class> */ temp = obj->klass_
6159 GenerateReferenceLoadTwoRegisters(instruction,
6160 temp_loc,
6161 obj_loc,
6162 class_offset,
6163 kWithoutReadBarrier);
6164 // If the class is abstract, we eagerly fetch the super class of the
6165 // object to avoid doing a comparison we know will fail.
6166 NearLabel loop;
6167 __ Bind(&loop);
6168 // /* HeapReference<Class> */ temp = temp->super_class_
6169 GenerateReferenceLoadOneRegister(instruction,
6170 temp_loc,
6171 super_offset,
6172 maybe_temp2_loc,
6173 kWithoutReadBarrier);
6174
6175 // If the class reference currently in `temp` is null, jump to the slow path to throw the
6176 // exception.
6177 __ testl(temp, temp);
6178 // Otherwise, compare the classes.
6179 __ j(kZero, type_check_slow_path->GetEntryLabel());
6180 if (cls.IsRegister()) {
6181 __ cmpl(temp, cls.AsRegister<CpuRegister>());
6182 } else {
6183 DCHECK(cls.IsStackSlot()) << cls;
6184 __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
6185 }
6186 __ j(kNotEqual, &loop);
6187 break;
6188 }
6189
6190 case TypeCheckKind::kClassHierarchyCheck: {
6191 // /* HeapReference<Class> */ temp = obj->klass_
6192 GenerateReferenceLoadTwoRegisters(instruction,
6193 temp_loc,
6194 obj_loc,
6195 class_offset,
6196 kWithoutReadBarrier);
6197 // Walk over the class hierarchy to find a match.
6198 NearLabel loop;
6199 __ Bind(&loop);
6200 if (cls.IsRegister()) {
6201 __ cmpl(temp, cls.AsRegister<CpuRegister>());
6202 } else {
6203 DCHECK(cls.IsStackSlot()) << cls;
6204 __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
6205 }
6206 __ j(kEqual, &done);
6207
6208 // /* HeapReference<Class> */ temp = temp->super_class_
6209 GenerateReferenceLoadOneRegister(instruction,
6210 temp_loc,
6211 super_offset,
6212 maybe_temp2_loc,
6213 kWithoutReadBarrier);
6214
6215 // If the class reference currently in `temp` is not null, jump
6216 // back at the beginning of the loop.
6217 __ testl(temp, temp);
6218 __ j(kNotZero, &loop);
6219 // Otherwise, jump to the slow path to throw the exception.
6220 __ jmp(type_check_slow_path->GetEntryLabel());
6221 break;
6222 }
6223
6224 case TypeCheckKind::kArrayObjectCheck: {
6225 // /* HeapReference<Class> */ temp = obj->klass_
6226 GenerateReferenceLoadTwoRegisters(instruction,
6227 temp_loc,
6228 obj_loc,
6229 class_offset,
6230 kWithoutReadBarrier);
6231 // Do an exact check.
6232 NearLabel check_non_primitive_component_type;
6233 if (cls.IsRegister()) {
6234 __ cmpl(temp, cls.AsRegister<CpuRegister>());
6235 } else {
6236 DCHECK(cls.IsStackSlot()) << cls;
6237 __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
6238 }
6239 __ j(kEqual, &done);
6240
6241 // Otherwise, we need to check that the object's class is a non-primitive array.
6242 // /* HeapReference<Class> */ temp = temp->component_type_
6243 GenerateReferenceLoadOneRegister(instruction,
6244 temp_loc,
6245 component_offset,
6246 maybe_temp2_loc,
6247 kWithoutReadBarrier);
6248
6249 // If the component type is not null (i.e. the object is indeed
6250 // an array), jump to label `check_non_primitive_component_type`
6251 // to further check that this component type is not a primitive
6252 // type.
6253 __ testl(temp, temp);
6254 // Otherwise, jump to the slow path to throw the exception.
6255 __ j(kZero, type_check_slow_path->GetEntryLabel());
6256 __ cmpw(Address(temp, primitive_offset), Immediate(Primitive::kPrimNot));
6257 __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
6258 break;
6259 }
6260
6261 case TypeCheckKind::kUnresolvedCheck: {
6262 // We always go into the type check slow path for the unresolved case.
6263 //
6264 // We cannot directly call the CheckCast runtime entry point
6265 // without resorting to a type checking slow path here (i.e. by
6266 // calling InvokeRuntime directly), as it would require to
6267 // assign fixed registers for the inputs of this HInstanceOf
6268 // instruction (following the runtime calling convention), which
6269 // might be cluttered by the potential first read barrier
6270 // emission at the beginning of this method.
6271 __ jmp(type_check_slow_path->GetEntryLabel());
6272 break;
6273 }
6274
6275 case TypeCheckKind::kInterfaceCheck:
6276 // Fast path for the interface check. We always go slow path for heap poisoning since
6277 // unpoisoning cls would require an extra temp.
6278 if (!kPoisonHeapReferences) {
6279 // Try to avoid read barriers to improve the fast path. We can not get false positives by
6280 // doing this.
6281 // /* HeapReference<Class> */ temp = obj->klass_
6282 GenerateReferenceLoadTwoRegisters(instruction,
6283 temp_loc,
6284 obj_loc,
6285 class_offset,
6286 kWithoutReadBarrier);
6287
6288 // /* HeapReference<Class> */ temp = temp->iftable_
6289 GenerateReferenceLoadTwoRegisters(instruction,
6290 temp_loc,
6291 temp_loc,
6292 iftable_offset,
6293 kWithoutReadBarrier);
6294 // Iftable is never null.
6295 __ movl(maybe_temp2_loc.AsRegister<CpuRegister>(), Address(temp, array_length_offset));
6296 // Loop through the iftable and check if any class matches.
6297 NearLabel start_loop;
6298 __ Bind(&start_loop);
6299 // Need to subtract first to handle the empty array case.
6300 __ subl(maybe_temp2_loc.AsRegister<CpuRegister>(), Immediate(2));
6301 __ j(kNegative, type_check_slow_path->GetEntryLabel());
6302 // Go to next interface if the classes do not match.
6303 __ cmpl(cls.AsRegister<CpuRegister>(),
6304 CodeGeneratorX86_64::ArrayAddress(temp,
6305 maybe_temp2_loc,
6306 TIMES_4,
6307 object_array_data_offset));
6308 __ j(kNotEqual, &start_loop); // Return if same class.
6309 } else {
6310 __ jmp(type_check_slow_path->GetEntryLabel());
6311 }
6312 break;
6313 }
6314
6315 if (done.IsLinked()) {
6316 __ Bind(&done);
6317 }
6318
6319 __ Bind(type_check_slow_path->GetExitLabel());
6320 }
6321
VisitMonitorOperation(HMonitorOperation * instruction)6322 void LocationsBuilderX86_64::VisitMonitorOperation(HMonitorOperation* instruction) {
6323 LocationSummary* locations =
6324 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kCallOnMainOnly);
6325 InvokeRuntimeCallingConvention calling_convention;
6326 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
6327 }
6328
VisitMonitorOperation(HMonitorOperation * instruction)6329 void InstructionCodeGeneratorX86_64::VisitMonitorOperation(HMonitorOperation* instruction) {
6330 codegen_->InvokeRuntime(instruction->IsEnter() ? kQuickLockObject : kQuickUnlockObject,
6331 instruction,
6332 instruction->GetDexPc());
6333 if (instruction->IsEnter()) {
6334 CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
6335 } else {
6336 CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
6337 }
6338 }
6339
VisitAnd(HAnd * instruction)6340 void LocationsBuilderX86_64::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction); }
VisitOr(HOr * instruction)6341 void LocationsBuilderX86_64::VisitOr(HOr* instruction) { HandleBitwiseOperation(instruction); }
VisitXor(HXor * instruction)6342 void LocationsBuilderX86_64::VisitXor(HXor* instruction) { HandleBitwiseOperation(instruction); }
6343
HandleBitwiseOperation(HBinaryOperation * instruction)6344 void LocationsBuilderX86_64::HandleBitwiseOperation(HBinaryOperation* instruction) {
6345 LocationSummary* locations =
6346 new (GetGraph()->GetArena()) LocationSummary(instruction, LocationSummary::kNoCall);
6347 DCHECK(instruction->GetResultType() == Primitive::kPrimInt
6348 || instruction->GetResultType() == Primitive::kPrimLong);
6349 locations->SetInAt(0, Location::RequiresRegister());
6350 locations->SetInAt(1, Location::Any());
6351 locations->SetOut(Location::SameAsFirstInput());
6352 }
6353
VisitAnd(HAnd * instruction)6354 void InstructionCodeGeneratorX86_64::VisitAnd(HAnd* instruction) {
6355 HandleBitwiseOperation(instruction);
6356 }
6357
VisitOr(HOr * instruction)6358 void InstructionCodeGeneratorX86_64::VisitOr(HOr* instruction) {
6359 HandleBitwiseOperation(instruction);
6360 }
6361
VisitXor(HXor * instruction)6362 void InstructionCodeGeneratorX86_64::VisitXor(HXor* instruction) {
6363 HandleBitwiseOperation(instruction);
6364 }
6365
HandleBitwiseOperation(HBinaryOperation * instruction)6366 void InstructionCodeGeneratorX86_64::HandleBitwiseOperation(HBinaryOperation* instruction) {
6367 LocationSummary* locations = instruction->GetLocations();
6368 Location first = locations->InAt(0);
6369 Location second = locations->InAt(1);
6370 DCHECK(first.Equals(locations->Out()));
6371
6372 if (instruction->GetResultType() == Primitive::kPrimInt) {
6373 if (second.IsRegister()) {
6374 if (instruction->IsAnd()) {
6375 __ andl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
6376 } else if (instruction->IsOr()) {
6377 __ orl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
6378 } else {
6379 DCHECK(instruction->IsXor());
6380 __ xorl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
6381 }
6382 } else if (second.IsConstant()) {
6383 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue());
6384 if (instruction->IsAnd()) {
6385 __ andl(first.AsRegister<CpuRegister>(), imm);
6386 } else if (instruction->IsOr()) {
6387 __ orl(first.AsRegister<CpuRegister>(), imm);
6388 } else {
6389 DCHECK(instruction->IsXor());
6390 __ xorl(first.AsRegister<CpuRegister>(), imm);
6391 }
6392 } else {
6393 Address address(CpuRegister(RSP), second.GetStackIndex());
6394 if (instruction->IsAnd()) {
6395 __ andl(first.AsRegister<CpuRegister>(), address);
6396 } else if (instruction->IsOr()) {
6397 __ orl(first.AsRegister<CpuRegister>(), address);
6398 } else {
6399 DCHECK(instruction->IsXor());
6400 __ xorl(first.AsRegister<CpuRegister>(), address);
6401 }
6402 }
6403 } else {
6404 DCHECK_EQ(instruction->GetResultType(), Primitive::kPrimLong);
6405 CpuRegister first_reg = first.AsRegister<CpuRegister>();
6406 bool second_is_constant = false;
6407 int64_t value = 0;
6408 if (second.IsConstant()) {
6409 second_is_constant = true;
6410 value = second.GetConstant()->AsLongConstant()->GetValue();
6411 }
6412 bool is_int32_value = IsInt<32>(value);
6413
6414 if (instruction->IsAnd()) {
6415 if (second_is_constant) {
6416 if (is_int32_value) {
6417 __ andq(first_reg, Immediate(static_cast<int32_t>(value)));
6418 } else {
6419 __ andq(first_reg, codegen_->LiteralInt64Address(value));
6420 }
6421 } else if (second.IsDoubleStackSlot()) {
6422 __ andq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
6423 } else {
6424 __ andq(first_reg, second.AsRegister<CpuRegister>());
6425 }
6426 } else if (instruction->IsOr()) {
6427 if (second_is_constant) {
6428 if (is_int32_value) {
6429 __ orq(first_reg, Immediate(static_cast<int32_t>(value)));
6430 } else {
6431 __ orq(first_reg, codegen_->LiteralInt64Address(value));
6432 }
6433 } else if (second.IsDoubleStackSlot()) {
6434 __ orq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
6435 } else {
6436 __ orq(first_reg, second.AsRegister<CpuRegister>());
6437 }
6438 } else {
6439 DCHECK(instruction->IsXor());
6440 if (second_is_constant) {
6441 if (is_int32_value) {
6442 __ xorq(first_reg, Immediate(static_cast<int32_t>(value)));
6443 } else {
6444 __ xorq(first_reg, codegen_->LiteralInt64Address(value));
6445 }
6446 } else if (second.IsDoubleStackSlot()) {
6447 __ xorq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
6448 } else {
6449 __ xorq(first_reg, second.AsRegister<CpuRegister>());
6450 }
6451 }
6452 }
6453 }
6454
GenerateReferenceLoadOneRegister(HInstruction * instruction,Location out,uint32_t offset,Location maybe_temp,ReadBarrierOption read_barrier_option)6455 void InstructionCodeGeneratorX86_64::GenerateReferenceLoadOneRegister(
6456 HInstruction* instruction,
6457 Location out,
6458 uint32_t offset,
6459 Location maybe_temp,
6460 ReadBarrierOption read_barrier_option) {
6461 CpuRegister out_reg = out.AsRegister<CpuRegister>();
6462 if (read_barrier_option == kWithReadBarrier) {
6463 CHECK(kEmitCompilerReadBarrier);
6464 if (kUseBakerReadBarrier) {
6465 // Load with fast path based Baker's read barrier.
6466 // /* HeapReference<Object> */ out = *(out + offset)
6467 codegen_->GenerateFieldLoadWithBakerReadBarrier(
6468 instruction, out, out_reg, offset, /* needs_null_check */ false);
6469 } else {
6470 // Load with slow path based read barrier.
6471 // Save the value of `out` into `maybe_temp` before overwriting it
6472 // in the following move operation, as we will need it for the
6473 // read barrier below.
6474 DCHECK(maybe_temp.IsRegister()) << maybe_temp;
6475 __ movl(maybe_temp.AsRegister<CpuRegister>(), out_reg);
6476 // /* HeapReference<Object> */ out = *(out + offset)
6477 __ movl(out_reg, Address(out_reg, offset));
6478 codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset);
6479 }
6480 } else {
6481 // Plain load with no read barrier.
6482 // /* HeapReference<Object> */ out = *(out + offset)
6483 __ movl(out_reg, Address(out_reg, offset));
6484 __ MaybeUnpoisonHeapReference(out_reg);
6485 }
6486 }
6487
GenerateReferenceLoadTwoRegisters(HInstruction * instruction,Location out,Location obj,uint32_t offset,ReadBarrierOption read_barrier_option)6488 void InstructionCodeGeneratorX86_64::GenerateReferenceLoadTwoRegisters(
6489 HInstruction* instruction,
6490 Location out,
6491 Location obj,
6492 uint32_t offset,
6493 ReadBarrierOption read_barrier_option) {
6494 CpuRegister out_reg = out.AsRegister<CpuRegister>();
6495 CpuRegister obj_reg = obj.AsRegister<CpuRegister>();
6496 if (read_barrier_option == kWithReadBarrier) {
6497 CHECK(kEmitCompilerReadBarrier);
6498 if (kUseBakerReadBarrier) {
6499 // Load with fast path based Baker's read barrier.
6500 // /* HeapReference<Object> */ out = *(obj + offset)
6501 codegen_->GenerateFieldLoadWithBakerReadBarrier(
6502 instruction, out, obj_reg, offset, /* needs_null_check */ false);
6503 } else {
6504 // Load with slow path based read barrier.
6505 // /* HeapReference<Object> */ out = *(obj + offset)
6506 __ movl(out_reg, Address(obj_reg, offset));
6507 codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
6508 }
6509 } else {
6510 // Plain load with no read barrier.
6511 // /* HeapReference<Object> */ out = *(obj + offset)
6512 __ movl(out_reg, Address(obj_reg, offset));
6513 __ MaybeUnpoisonHeapReference(out_reg);
6514 }
6515 }
6516
GenerateGcRootFieldLoad(HInstruction * instruction,Location root,const Address & address,Label * fixup_label,ReadBarrierOption read_barrier_option)6517 void InstructionCodeGeneratorX86_64::GenerateGcRootFieldLoad(
6518 HInstruction* instruction,
6519 Location root,
6520 const Address& address,
6521 Label* fixup_label,
6522 ReadBarrierOption read_barrier_option) {
6523 CpuRegister root_reg = root.AsRegister<CpuRegister>();
6524 if (read_barrier_option == kWithReadBarrier) {
6525 DCHECK(kEmitCompilerReadBarrier);
6526 if (kUseBakerReadBarrier) {
6527 // Fast path implementation of art::ReadBarrier::BarrierForRoot when
6528 // Baker's read barrier are used:
6529 //
6530 // root = obj.field;
6531 // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
6532 // if (temp != null) {
6533 // root = temp(root)
6534 // }
6535
6536 // /* GcRoot<mirror::Object> */ root = *address
6537 __ movl(root_reg, address);
6538 if (fixup_label != nullptr) {
6539 __ Bind(fixup_label);
6540 }
6541 static_assert(
6542 sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
6543 "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
6544 "have different sizes.");
6545 static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
6546 "art::mirror::CompressedReference<mirror::Object> and int32_t "
6547 "have different sizes.");
6548
6549 // Slow path marking the GC root `root`.
6550 SlowPathCode* slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64(
6551 instruction, root, /* unpoison_ref_before_marking */ false);
6552 codegen_->AddSlowPath(slow_path);
6553
6554 // Test the `Thread::Current()->pReadBarrierMarkReg ## root.reg()` entrypoint.
6555 const int32_t entry_point_offset =
6556 CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(root.reg());
6557 __ gs()->cmpl(Address::Absolute(entry_point_offset, /* no_rip */ true), Immediate(0));
6558 // The entrypoint is null when the GC is not marking.
6559 __ j(kNotEqual, slow_path->GetEntryLabel());
6560 __ Bind(slow_path->GetExitLabel());
6561 } else {
6562 // GC root loaded through a slow path for read barriers other
6563 // than Baker's.
6564 // /* GcRoot<mirror::Object>* */ root = address
6565 __ leaq(root_reg, address);
6566 if (fixup_label != nullptr) {
6567 __ Bind(fixup_label);
6568 }
6569 // /* mirror::Object* */ root = root->Read()
6570 codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
6571 }
6572 } else {
6573 // Plain GC root load with no read barrier.
6574 // /* GcRoot<mirror::Object> */ root = *address
6575 __ movl(root_reg, address);
6576 if (fixup_label != nullptr) {
6577 __ Bind(fixup_label);
6578 }
6579 // Note that GC roots are not affected by heap poisoning, thus we
6580 // do not have to unpoison `root_reg` here.
6581 }
6582 }
6583
GenerateFieldLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,CpuRegister obj,uint32_t offset,bool needs_null_check)6584 void CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
6585 Location ref,
6586 CpuRegister obj,
6587 uint32_t offset,
6588 bool needs_null_check) {
6589 DCHECK(kEmitCompilerReadBarrier);
6590 DCHECK(kUseBakerReadBarrier);
6591
6592 // /* HeapReference<Object> */ ref = *(obj + offset)
6593 Address src(obj, offset);
6594 GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check);
6595 }
6596
GenerateArrayLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,CpuRegister obj,uint32_t data_offset,Location index,bool needs_null_check)6597 void CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
6598 Location ref,
6599 CpuRegister obj,
6600 uint32_t data_offset,
6601 Location index,
6602 bool needs_null_check) {
6603 DCHECK(kEmitCompilerReadBarrier);
6604 DCHECK(kUseBakerReadBarrier);
6605
6606 static_assert(
6607 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
6608 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
6609 // /* HeapReference<Object> */ ref =
6610 // *(obj + data_offset + index * sizeof(HeapReference<Object>))
6611 Address src = CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset);
6612 GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check);
6613 }
6614
GenerateReferenceLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,CpuRegister obj,const Address & src,bool needs_null_check,bool always_update_field,CpuRegister * temp1,CpuRegister * temp2)6615 void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
6616 Location ref,
6617 CpuRegister obj,
6618 const Address& src,
6619 bool needs_null_check,
6620 bool always_update_field,
6621 CpuRegister* temp1,
6622 CpuRegister* temp2) {
6623 DCHECK(kEmitCompilerReadBarrier);
6624 DCHECK(kUseBakerReadBarrier);
6625
6626 // In slow path based read barriers, the read barrier call is
6627 // inserted after the original load. However, in fast path based
6628 // Baker's read barriers, we need to perform the load of
6629 // mirror::Object::monitor_ *before* the original reference load.
6630 // This load-load ordering is required by the read barrier.
6631 // The fast path/slow path (for Baker's algorithm) should look like:
6632 //
6633 // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
6634 // lfence; // Load fence or artificial data dependency to prevent load-load reordering
6635 // HeapReference<Object> ref = *src; // Original reference load.
6636 // bool is_gray = (rb_state == ReadBarrier::GrayState());
6637 // if (is_gray) {
6638 // ref = ReadBarrier::Mark(ref); // Performed by runtime entrypoint slow path.
6639 // }
6640 //
6641 // Note: the original implementation in ReadBarrier::Barrier is
6642 // slightly more complex as:
6643 // - it implements the load-load fence using a data dependency on
6644 // the high-bits of rb_state, which are expected to be all zeroes
6645 // (we use CodeGeneratorX86_64::GenerateMemoryBarrier instead
6646 // here, which is a no-op thanks to the x86-64 memory model);
6647 // - it performs additional checks that we do not do here for
6648 // performance reasons.
6649
6650 CpuRegister ref_reg = ref.AsRegister<CpuRegister>();
6651 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
6652
6653 // Given the numeric representation, it's enough to check the low bit of the rb_state.
6654 static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
6655 static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
6656 constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
6657 constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
6658 constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
6659
6660 // if (rb_state == ReadBarrier::GrayState())
6661 // ref = ReadBarrier::Mark(ref);
6662 // At this point, just do the "if" and make sure that flags are preserved until the branch.
6663 __ testb(Address(obj, monitor_offset + gray_byte_position), Immediate(test_value));
6664 if (needs_null_check) {
6665 MaybeRecordImplicitNullCheck(instruction);
6666 }
6667
6668 // Load fence to prevent load-load reordering.
6669 // Note that this is a no-op, thanks to the x86-64 memory model.
6670 GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
6671
6672 // The actual reference load.
6673 // /* HeapReference<Object> */ ref = *src
6674 __ movl(ref_reg, src); // Flags are unaffected.
6675
6676 // Note: Reference unpoisoning modifies the flags, so we need to delay it after the branch.
6677 // Slow path marking the object `ref` when it is gray.
6678 SlowPathCode* slow_path;
6679 if (always_update_field) {
6680 DCHECK(temp1 != nullptr);
6681 DCHECK(temp2 != nullptr);
6682 slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkAndUpdateFieldSlowPathX86_64(
6683 instruction, ref, obj, src, /* unpoison_ref_before_marking */ true, *temp1, *temp2);
6684 } else {
6685 slow_path = new (GetGraph()->GetArena()) ReadBarrierMarkSlowPathX86_64(
6686 instruction, ref, /* unpoison_ref_before_marking */ true);
6687 }
6688 AddSlowPath(slow_path);
6689
6690 // We have done the "if" of the gray bit check above, now branch based on the flags.
6691 __ j(kNotZero, slow_path->GetEntryLabel());
6692
6693 // Object* ref = ref_addr->AsMirrorPtr()
6694 __ MaybeUnpoisonHeapReference(ref_reg);
6695
6696 __ Bind(slow_path->GetExitLabel());
6697 }
6698
GenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)6699 void CodeGeneratorX86_64::GenerateReadBarrierSlow(HInstruction* instruction,
6700 Location out,
6701 Location ref,
6702 Location obj,
6703 uint32_t offset,
6704 Location index) {
6705 DCHECK(kEmitCompilerReadBarrier);
6706
6707 // Insert a slow path based read barrier *after* the reference load.
6708 //
6709 // If heap poisoning is enabled, the unpoisoning of the loaded
6710 // reference will be carried out by the runtime within the slow
6711 // path.
6712 //
6713 // Note that `ref` currently does not get unpoisoned (when heap
6714 // poisoning is enabled), which is alright as the `ref` argument is
6715 // not used by the artReadBarrierSlow entry point.
6716 //
6717 // TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
6718 SlowPathCode* slow_path = new (GetGraph()->GetArena())
6719 ReadBarrierForHeapReferenceSlowPathX86_64(instruction, out, ref, obj, offset, index);
6720 AddSlowPath(slow_path);
6721
6722 __ jmp(slow_path->GetEntryLabel());
6723 __ Bind(slow_path->GetExitLabel());
6724 }
6725
MaybeGenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)6726 void CodeGeneratorX86_64::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
6727 Location out,
6728 Location ref,
6729 Location obj,
6730 uint32_t offset,
6731 Location index) {
6732 if (kEmitCompilerReadBarrier) {
6733 // Baker's read barriers shall be handled by the fast path
6734 // (CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier).
6735 DCHECK(!kUseBakerReadBarrier);
6736 // If heap poisoning is enabled, unpoisoning will be taken care of
6737 // by the runtime within the slow path.
6738 GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
6739 } else if (kPoisonHeapReferences) {
6740 __ UnpoisonHeapReference(out.AsRegister<CpuRegister>());
6741 }
6742 }
6743
GenerateReadBarrierForRootSlow(HInstruction * instruction,Location out,Location root)6744 void CodeGeneratorX86_64::GenerateReadBarrierForRootSlow(HInstruction* instruction,
6745 Location out,
6746 Location root) {
6747 DCHECK(kEmitCompilerReadBarrier);
6748
6749 // Insert a slow path based read barrier *after* the GC root load.
6750 //
6751 // Note that GC roots are not affected by heap poisoning, so we do
6752 // not need to do anything special for this here.
6753 SlowPathCode* slow_path =
6754 new (GetGraph()->GetArena()) ReadBarrierForRootSlowPathX86_64(instruction, out, root);
6755 AddSlowPath(slow_path);
6756
6757 __ jmp(slow_path->GetEntryLabel());
6758 __ Bind(slow_path->GetExitLabel());
6759 }
6760
VisitBoundType(HBoundType * instruction ATTRIBUTE_UNUSED)6761 void LocationsBuilderX86_64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
6762 // Nothing to do, this should be removed during prepare for register allocator.
6763 LOG(FATAL) << "Unreachable";
6764 }
6765
VisitBoundType(HBoundType * instruction ATTRIBUTE_UNUSED)6766 void InstructionCodeGeneratorX86_64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
6767 // Nothing to do, this should be removed during prepare for register allocator.
6768 LOG(FATAL) << "Unreachable";
6769 }
6770
6771 // Simple implementation of packed switch - generate cascaded compare/jumps.
VisitPackedSwitch(HPackedSwitch * switch_instr)6772 void LocationsBuilderX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
6773 LocationSummary* locations =
6774 new (GetGraph()->GetArena()) LocationSummary(switch_instr, LocationSummary::kNoCall);
6775 locations->SetInAt(0, Location::RequiresRegister());
6776 locations->AddTemp(Location::RequiresRegister());
6777 locations->AddTemp(Location::RequiresRegister());
6778 }
6779
VisitPackedSwitch(HPackedSwitch * switch_instr)6780 void InstructionCodeGeneratorX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
6781 int32_t lower_bound = switch_instr->GetStartValue();
6782 uint32_t num_entries = switch_instr->GetNumEntries();
6783 LocationSummary* locations = switch_instr->GetLocations();
6784 CpuRegister value_reg_in = locations->InAt(0).AsRegister<CpuRegister>();
6785 CpuRegister temp_reg = locations->GetTemp(0).AsRegister<CpuRegister>();
6786 CpuRegister base_reg = locations->GetTemp(1).AsRegister<CpuRegister>();
6787 HBasicBlock* default_block = switch_instr->GetDefaultBlock();
6788
6789 // Should we generate smaller inline compare/jumps?
6790 if (num_entries <= kPackedSwitchJumpTableThreshold) {
6791 // Figure out the correct compare values and jump conditions.
6792 // Handle the first compare/branch as a special case because it might
6793 // jump to the default case.
6794 DCHECK_GT(num_entries, 2u);
6795 Condition first_condition;
6796 uint32_t index;
6797 const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
6798 if (lower_bound != 0) {
6799 first_condition = kLess;
6800 __ cmpl(value_reg_in, Immediate(lower_bound));
6801 __ j(first_condition, codegen_->GetLabelOf(default_block));
6802 __ j(kEqual, codegen_->GetLabelOf(successors[0]));
6803
6804 index = 1;
6805 } else {
6806 // Handle all the compare/jumps below.
6807 first_condition = kBelow;
6808 index = 0;
6809 }
6810
6811 // Handle the rest of the compare/jumps.
6812 for (; index + 1 < num_entries; index += 2) {
6813 int32_t compare_to_value = lower_bound + index + 1;
6814 __ cmpl(value_reg_in, Immediate(compare_to_value));
6815 // Jump to successors[index] if value < case_value[index].
6816 __ j(first_condition, codegen_->GetLabelOf(successors[index]));
6817 // Jump to successors[index + 1] if value == case_value[index + 1].
6818 __ j(kEqual, codegen_->GetLabelOf(successors[index + 1]));
6819 }
6820
6821 if (index != num_entries) {
6822 // There are an odd number of entries. Handle the last one.
6823 DCHECK_EQ(index + 1, num_entries);
6824 __ cmpl(value_reg_in, Immediate(static_cast<int32_t>(lower_bound + index)));
6825 __ j(kEqual, codegen_->GetLabelOf(successors[index]));
6826 }
6827
6828 // And the default for any other value.
6829 if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
6830 __ jmp(codegen_->GetLabelOf(default_block));
6831 }
6832 return;
6833 }
6834
6835 // Remove the bias, if needed.
6836 Register value_reg_out = value_reg_in.AsRegister();
6837 if (lower_bound != 0) {
6838 __ leal(temp_reg, Address(value_reg_in, -lower_bound));
6839 value_reg_out = temp_reg.AsRegister();
6840 }
6841 CpuRegister value_reg(value_reg_out);
6842
6843 // Is the value in range?
6844 __ cmpl(value_reg, Immediate(num_entries - 1));
6845 __ j(kAbove, codegen_->GetLabelOf(default_block));
6846
6847 // We are in the range of the table.
6848 // Load the address of the jump table in the constant area.
6849 __ leaq(base_reg, codegen_->LiteralCaseTable(switch_instr));
6850
6851 // Load the (signed) offset from the jump table.
6852 __ movsxd(temp_reg, Address(base_reg, value_reg, TIMES_4, 0));
6853
6854 // Add the offset to the address of the table base.
6855 __ addq(temp_reg, base_reg);
6856
6857 // And jump.
6858 __ jmp(temp_reg);
6859 }
6860
Load32BitValue(CpuRegister dest,int32_t value)6861 void CodeGeneratorX86_64::Load32BitValue(CpuRegister dest, int32_t value) {
6862 if (value == 0) {
6863 __ xorl(dest, dest);
6864 } else {
6865 __ movl(dest, Immediate(value));
6866 }
6867 }
6868
Load64BitValue(CpuRegister dest,int64_t value)6869 void CodeGeneratorX86_64::Load64BitValue(CpuRegister dest, int64_t value) {
6870 if (value == 0) {
6871 // Clears upper bits too.
6872 __ xorl(dest, dest);
6873 } else if (IsUint<32>(value)) {
6874 // We can use a 32 bit move, as it will zero-extend and is shorter.
6875 __ movl(dest, Immediate(static_cast<int32_t>(value)));
6876 } else {
6877 __ movq(dest, Immediate(value));
6878 }
6879 }
6880
Load32BitValue(XmmRegister dest,int32_t value)6881 void CodeGeneratorX86_64::Load32BitValue(XmmRegister dest, int32_t value) {
6882 if (value == 0) {
6883 __ xorps(dest, dest);
6884 } else {
6885 __ movss(dest, LiteralInt32Address(value));
6886 }
6887 }
6888
Load64BitValue(XmmRegister dest,int64_t value)6889 void CodeGeneratorX86_64::Load64BitValue(XmmRegister dest, int64_t value) {
6890 if (value == 0) {
6891 __ xorpd(dest, dest);
6892 } else {
6893 __ movsd(dest, LiteralInt64Address(value));
6894 }
6895 }
6896
Load32BitValue(XmmRegister dest,float value)6897 void CodeGeneratorX86_64::Load32BitValue(XmmRegister dest, float value) {
6898 Load32BitValue(dest, bit_cast<int32_t, float>(value));
6899 }
6900
Load64BitValue(XmmRegister dest,double value)6901 void CodeGeneratorX86_64::Load64BitValue(XmmRegister dest, double value) {
6902 Load64BitValue(dest, bit_cast<int64_t, double>(value));
6903 }
6904
Compare32BitValue(CpuRegister dest,int32_t value)6905 void CodeGeneratorX86_64::Compare32BitValue(CpuRegister dest, int32_t value) {
6906 if (value == 0) {
6907 __ testl(dest, dest);
6908 } else {
6909 __ cmpl(dest, Immediate(value));
6910 }
6911 }
6912
Compare64BitValue(CpuRegister dest,int64_t value)6913 void CodeGeneratorX86_64::Compare64BitValue(CpuRegister dest, int64_t value) {
6914 if (IsInt<32>(value)) {
6915 if (value == 0) {
6916 __ testq(dest, dest);
6917 } else {
6918 __ cmpq(dest, Immediate(static_cast<int32_t>(value)));
6919 }
6920 } else {
6921 // Value won't fit in an int.
6922 __ cmpq(dest, LiteralInt64Address(value));
6923 }
6924 }
6925
GenerateIntCompare(Location lhs,Location rhs)6926 void CodeGeneratorX86_64::GenerateIntCompare(Location lhs, Location rhs) {
6927 CpuRegister lhs_reg = lhs.AsRegister<CpuRegister>();
6928 GenerateIntCompare(lhs_reg, rhs);
6929 }
6930
GenerateIntCompare(CpuRegister lhs,Location rhs)6931 void CodeGeneratorX86_64::GenerateIntCompare(CpuRegister lhs, Location rhs) {
6932 if (rhs.IsConstant()) {
6933 int32_t value = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
6934 Compare32BitValue(lhs, value);
6935 } else if (rhs.IsStackSlot()) {
6936 __ cmpl(lhs, Address(CpuRegister(RSP), rhs.GetStackIndex()));
6937 } else {
6938 __ cmpl(lhs, rhs.AsRegister<CpuRegister>());
6939 }
6940 }
6941
GenerateLongCompare(Location lhs,Location rhs)6942 void CodeGeneratorX86_64::GenerateLongCompare(Location lhs, Location rhs) {
6943 CpuRegister lhs_reg = lhs.AsRegister<CpuRegister>();
6944 if (rhs.IsConstant()) {
6945 int64_t value = rhs.GetConstant()->AsLongConstant()->GetValue();
6946 Compare64BitValue(lhs_reg, value);
6947 } else if (rhs.IsDoubleStackSlot()) {
6948 __ cmpq(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex()));
6949 } else {
6950 __ cmpq(lhs_reg, rhs.AsRegister<CpuRegister>());
6951 }
6952 }
6953
ArrayAddress(CpuRegister obj,Location index,ScaleFactor scale,uint32_t data_offset)6954 Address CodeGeneratorX86_64::ArrayAddress(CpuRegister obj,
6955 Location index,
6956 ScaleFactor scale,
6957 uint32_t data_offset) {
6958 return index.IsConstant() ?
6959 Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << scale) + data_offset) :
6960 Address(obj, index.AsRegister<CpuRegister>(), scale, data_offset);
6961 }
6962
Store64BitValueToStack(Location dest,int64_t value)6963 void CodeGeneratorX86_64::Store64BitValueToStack(Location dest, int64_t value) {
6964 DCHECK(dest.IsDoubleStackSlot());
6965 if (IsInt<32>(value)) {
6966 // Can move directly as an int32 constant.
6967 __ movq(Address(CpuRegister(RSP), dest.GetStackIndex()),
6968 Immediate(static_cast<int32_t>(value)));
6969 } else {
6970 Load64BitValue(CpuRegister(TMP), value);
6971 __ movq(Address(CpuRegister(RSP), dest.GetStackIndex()), CpuRegister(TMP));
6972 }
6973 }
6974
6975 /**
6976 * Class to handle late fixup of offsets into constant area.
6977 */
6978 class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocCodeGenerator> {
6979 public:
RIPFixup(CodeGeneratorX86_64 & codegen,size_t offset)6980 RIPFixup(CodeGeneratorX86_64& codegen, size_t offset)
6981 : codegen_(&codegen), offset_into_constant_area_(offset) {}
6982
6983 protected:
SetOffset(size_t offset)6984 void SetOffset(size_t offset) { offset_into_constant_area_ = offset; }
6985
6986 CodeGeneratorX86_64* codegen_;
6987
6988 private:
Process(const MemoryRegion & region,int pos)6989 void Process(const MemoryRegion& region, int pos) OVERRIDE {
6990 // Patch the correct offset for the instruction. We use the address of the
6991 // 'next' instruction, which is 'pos' (patch the 4 bytes before).
6992 int32_t constant_offset = codegen_->ConstantAreaStart() + offset_into_constant_area_;
6993 int32_t relative_position = constant_offset - pos;
6994
6995 // Patch in the right value.
6996 region.StoreUnaligned<int32_t>(pos - 4, relative_position);
6997 }
6998
6999 // Location in constant area that the fixup refers to.
7000 size_t offset_into_constant_area_;
7001 };
7002
7003 /**
7004 t * Class to handle late fixup of offsets to a jump table that will be created in the
7005 * constant area.
7006 */
7007 class JumpTableRIPFixup : public RIPFixup {
7008 public:
JumpTableRIPFixup(CodeGeneratorX86_64 & codegen,HPackedSwitch * switch_instr)7009 JumpTableRIPFixup(CodeGeneratorX86_64& codegen, HPackedSwitch* switch_instr)
7010 : RIPFixup(codegen, -1), switch_instr_(switch_instr) {}
7011
CreateJumpTable()7012 void CreateJumpTable() {
7013 X86_64Assembler* assembler = codegen_->GetAssembler();
7014
7015 // Ensure that the reference to the jump table has the correct offset.
7016 const int32_t offset_in_constant_table = assembler->ConstantAreaSize();
7017 SetOffset(offset_in_constant_table);
7018
7019 // Compute the offset from the start of the function to this jump table.
7020 const int32_t current_table_offset = assembler->CodeSize() + offset_in_constant_table;
7021
7022 // Populate the jump table with the correct values for the jump table.
7023 int32_t num_entries = switch_instr_->GetNumEntries();
7024 HBasicBlock* block = switch_instr_->GetBlock();
7025 const ArenaVector<HBasicBlock*>& successors = block->GetSuccessors();
7026 // The value that we want is the target offset - the position of the table.
7027 for (int32_t i = 0; i < num_entries; i++) {
7028 HBasicBlock* b = successors[i];
7029 Label* l = codegen_->GetLabelOf(b);
7030 DCHECK(l->IsBound());
7031 int32_t offset_to_block = l->Position() - current_table_offset;
7032 assembler->AppendInt32(offset_to_block);
7033 }
7034 }
7035
7036 private:
7037 const HPackedSwitch* switch_instr_;
7038 };
7039
Finalize(CodeAllocator * allocator)7040 void CodeGeneratorX86_64::Finalize(CodeAllocator* allocator) {
7041 // Generate the constant area if needed.
7042 X86_64Assembler* assembler = GetAssembler();
7043 if (!assembler->IsConstantAreaEmpty() || !fixups_to_jump_tables_.empty()) {
7044 // Align to 4 byte boundary to reduce cache misses, as the data is 4 and 8 byte values.
7045 assembler->Align(4, 0);
7046 constant_area_start_ = assembler->CodeSize();
7047
7048 // Populate any jump tables.
7049 for (auto jump_table : fixups_to_jump_tables_) {
7050 jump_table->CreateJumpTable();
7051 }
7052
7053 // And now add the constant area to the generated code.
7054 assembler->AddConstantArea();
7055 }
7056
7057 // And finish up.
7058 CodeGenerator::Finalize(allocator);
7059 }
7060
LiteralDoubleAddress(double v)7061 Address CodeGeneratorX86_64::LiteralDoubleAddress(double v) {
7062 AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddDouble(v));
7063 return Address::RIP(fixup);
7064 }
7065
LiteralFloatAddress(float v)7066 Address CodeGeneratorX86_64::LiteralFloatAddress(float v) {
7067 AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddFloat(v));
7068 return Address::RIP(fixup);
7069 }
7070
LiteralInt32Address(int32_t v)7071 Address CodeGeneratorX86_64::LiteralInt32Address(int32_t v) {
7072 AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddInt32(v));
7073 return Address::RIP(fixup);
7074 }
7075
LiteralInt64Address(int64_t v)7076 Address CodeGeneratorX86_64::LiteralInt64Address(int64_t v) {
7077 AssemblerFixup* fixup = new (GetGraph()->GetArena()) RIPFixup(*this, __ AddInt64(v));
7078 return Address::RIP(fixup);
7079 }
7080
7081 // TODO: trg as memory.
MoveFromReturnRegister(Location trg,Primitive::Type type)7082 void CodeGeneratorX86_64::MoveFromReturnRegister(Location trg, Primitive::Type type) {
7083 if (!trg.IsValid()) {
7084 DCHECK_EQ(type, Primitive::kPrimVoid);
7085 return;
7086 }
7087
7088 DCHECK_NE(type, Primitive::kPrimVoid);
7089
7090 Location return_loc = InvokeDexCallingConventionVisitorX86_64().GetReturnLocation(type);
7091 if (trg.Equals(return_loc)) {
7092 return;
7093 }
7094
7095 // Let the parallel move resolver take care of all of this.
7096 HParallelMove parallel_move(GetGraph()->GetArena());
7097 parallel_move.AddMove(return_loc, trg, type, nullptr);
7098 GetMoveResolver()->EmitNativeCode(¶llel_move);
7099 }
7100
LiteralCaseTable(HPackedSwitch * switch_instr)7101 Address CodeGeneratorX86_64::LiteralCaseTable(HPackedSwitch* switch_instr) {
7102 // Create a fixup to be used to create and address the jump table.
7103 JumpTableRIPFixup* table_fixup =
7104 new (GetGraph()->GetArena()) JumpTableRIPFixup(*this, switch_instr);
7105
7106 // We have to populate the jump tables.
7107 fixups_to_jump_tables_.push_back(table_fixup);
7108 return Address::RIP(table_fixup);
7109 }
7110
MoveInt64ToAddress(const Address & addr_low,const Address & addr_high,int64_t v,HInstruction * instruction)7111 void CodeGeneratorX86_64::MoveInt64ToAddress(const Address& addr_low,
7112 const Address& addr_high,
7113 int64_t v,
7114 HInstruction* instruction) {
7115 if (IsInt<32>(v)) {
7116 int32_t v_32 = v;
7117 __ movq(addr_low, Immediate(v_32));
7118 MaybeRecordImplicitNullCheck(instruction);
7119 } else {
7120 // Didn't fit in a register. Do it in pieces.
7121 int32_t low_v = Low32Bits(v);
7122 int32_t high_v = High32Bits(v);
7123 __ movl(addr_low, Immediate(low_v));
7124 MaybeRecordImplicitNullCheck(instruction);
7125 __ movl(addr_high, Immediate(high_v));
7126 }
7127 }
7128
PatchJitRootUse(uint8_t * code,const uint8_t * roots_data,const PatchInfo<Label> & info,uint64_t index_in_table) const7129 void CodeGeneratorX86_64::PatchJitRootUse(uint8_t* code,
7130 const uint8_t* roots_data,
7131 const PatchInfo<Label>& info,
7132 uint64_t index_in_table) const {
7133 uint32_t code_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
7134 uintptr_t address =
7135 reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>);
7136 typedef __attribute__((__aligned__(1))) uint32_t unaligned_uint32_t;
7137 reinterpret_cast<unaligned_uint32_t*>(code + code_offset)[0] =
7138 dchecked_integral_cast<uint32_t>(address);
7139 }
7140
EmitJitRootPatches(uint8_t * code,const uint8_t * roots_data)7141 void CodeGeneratorX86_64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
7142 for (const PatchInfo<Label>& info : jit_string_patches_) {
7143 const auto& it = jit_string_roots_.find(
7144 StringReference(&info.dex_file, dex::StringIndex(info.index)));
7145 DCHECK(it != jit_string_roots_.end());
7146 PatchJitRootUse(code, roots_data, info, it->second);
7147 }
7148
7149 for (const PatchInfo<Label>& info : jit_class_patches_) {
7150 const auto& it = jit_class_roots_.find(
7151 TypeReference(&info.dex_file, dex::TypeIndex(info.index)));
7152 DCHECK(it != jit_class_roots_.end());
7153 PatchJitRootUse(code, roots_data, info, it->second);
7154 }
7155 }
7156
7157 #undef __
7158
7159 } // namespace x86_64
7160 } // namespace art
7161