1 /*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "code_generator_x86_64.h"
18
19 #include "art_method.h"
20 #include "class_table.h"
21 #include "code_generator_utils.h"
22 #include "compiled_method.h"
23 #include "entrypoints/quick/quick_entrypoints.h"
24 #include "gc/accounting/card_table.h"
25 #include "gc/space/image_space.h"
26 #include "heap_poisoning.h"
27 #include "intrinsics.h"
28 #include "intrinsics_x86_64.h"
29 #include "linker/linker_patch.h"
30 #include "lock_word.h"
31 #include "mirror/array-inl.h"
32 #include "mirror/class-inl.h"
33 #include "mirror/object_reference.h"
34 #include "thread.h"
35 #include "utils/assembler.h"
36 #include "utils/stack_checks.h"
37 #include "utils/x86_64/assembler_x86_64.h"
38 #include "utils/x86_64/managed_register_x86_64.h"
39
40 namespace art {
41
42 template<class MirrorType>
43 class GcRoot;
44
45 namespace x86_64 {
46
47 static constexpr int kCurrentMethodStackOffset = 0;
48 static constexpr Register kMethodRegisterArgument = RDI;
49 // The compare/jump sequence will generate about (1.5 * num_entries) instructions. A jump
50 // table version generates 7 instructions and num_entries literals. Compare/jump sequence will
51 // generates less code/data with a small num_entries.
52 static constexpr uint32_t kPackedSwitchJumpTableThreshold = 5;
53
54 static constexpr Register kCoreCalleeSaves[] = { RBX, RBP, R12, R13, R14, R15 };
55 static constexpr FloatRegister kFpuCalleeSaves[] = { XMM12, XMM13, XMM14, XMM15 };
56
57 static constexpr int kC2ConditionMask = 0x400;
58
OneRegInReferenceOutSaveEverythingCallerSaves()59 static RegisterSet OneRegInReferenceOutSaveEverythingCallerSaves() {
60 // Custom calling convention: RAX serves as both input and output.
61 RegisterSet caller_saves = RegisterSet::Empty();
62 caller_saves.Add(Location::RegisterLocation(RAX));
63 return caller_saves;
64 }
65
66 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
67 #define __ down_cast<X86_64Assembler*>(codegen->GetAssembler())-> // NOLINT
68 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kX86_64PointerSize, x).Int32Value()
69
70 class NullCheckSlowPathX86_64 : public SlowPathCode {
71 public:
NullCheckSlowPathX86_64(HNullCheck * instruction)72 explicit NullCheckSlowPathX86_64(HNullCheck* instruction) : SlowPathCode(instruction) {}
73
EmitNativeCode(CodeGenerator * codegen)74 void EmitNativeCode(CodeGenerator* codegen) override {
75 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
76 __ Bind(GetEntryLabel());
77 if (instruction_->CanThrowIntoCatchBlock()) {
78 // Live registers will be restored in the catch block if caught.
79 SaveLiveRegisters(codegen, instruction_->GetLocations());
80 }
81 x86_64_codegen->InvokeRuntime(kQuickThrowNullPointer,
82 instruction_,
83 instruction_->GetDexPc(),
84 this);
85 CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
86 }
87
IsFatal() const88 bool IsFatal() const override { return true; }
89
GetDescription() const90 const char* GetDescription() const override { return "NullCheckSlowPathX86_64"; }
91
92 private:
93 DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathX86_64);
94 };
95
96 class DivZeroCheckSlowPathX86_64 : public SlowPathCode {
97 public:
DivZeroCheckSlowPathX86_64(HDivZeroCheck * instruction)98 explicit DivZeroCheckSlowPathX86_64(HDivZeroCheck* instruction) : SlowPathCode(instruction) {}
99
EmitNativeCode(CodeGenerator * codegen)100 void EmitNativeCode(CodeGenerator* codegen) override {
101 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
102 __ Bind(GetEntryLabel());
103 x86_64_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this);
104 CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
105 }
106
IsFatal() const107 bool IsFatal() const override { return true; }
108
GetDescription() const109 const char* GetDescription() const override { return "DivZeroCheckSlowPathX86_64"; }
110
111 private:
112 DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathX86_64);
113 };
114
115 class DivRemMinusOneSlowPathX86_64 : public SlowPathCode {
116 public:
DivRemMinusOneSlowPathX86_64(HInstruction * at,Register reg,DataType::Type type,bool is_div)117 DivRemMinusOneSlowPathX86_64(HInstruction* at, Register reg, DataType::Type type, bool is_div)
118 : SlowPathCode(at), cpu_reg_(CpuRegister(reg)), type_(type), is_div_(is_div) {}
119
EmitNativeCode(CodeGenerator * codegen)120 void EmitNativeCode(CodeGenerator* codegen) override {
121 __ Bind(GetEntryLabel());
122 if (type_ == DataType::Type::kInt32) {
123 if (is_div_) {
124 __ negl(cpu_reg_);
125 } else {
126 __ xorl(cpu_reg_, cpu_reg_);
127 }
128
129 } else {
130 DCHECK_EQ(DataType::Type::kInt64, type_);
131 if (is_div_) {
132 __ negq(cpu_reg_);
133 } else {
134 __ xorl(cpu_reg_, cpu_reg_);
135 }
136 }
137 __ jmp(GetExitLabel());
138 }
139
GetDescription() const140 const char* GetDescription() const override { return "DivRemMinusOneSlowPathX86_64"; }
141
142 private:
143 const CpuRegister cpu_reg_;
144 const DataType::Type type_;
145 const bool is_div_;
146 DISALLOW_COPY_AND_ASSIGN(DivRemMinusOneSlowPathX86_64);
147 };
148
149 class SuspendCheckSlowPathX86_64 : public SlowPathCode {
150 public:
SuspendCheckSlowPathX86_64(HSuspendCheck * instruction,HBasicBlock * successor)151 SuspendCheckSlowPathX86_64(HSuspendCheck* instruction, HBasicBlock* successor)
152 : SlowPathCode(instruction), successor_(successor) {}
153
EmitNativeCode(CodeGenerator * codegen)154 void EmitNativeCode(CodeGenerator* codegen) override {
155 LocationSummary* locations = instruction_->GetLocations();
156 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
157 __ Bind(GetEntryLabel());
158 SaveLiveRegisters(codegen, locations); // Only saves full width XMM for SIMD.
159 x86_64_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this);
160 CheckEntrypointTypes<kQuickTestSuspend, void, void>();
161 RestoreLiveRegisters(codegen, locations); // Only restores full width XMM for SIMD.
162 if (successor_ == nullptr) {
163 __ jmp(GetReturnLabel());
164 } else {
165 __ jmp(x86_64_codegen->GetLabelOf(successor_));
166 }
167 }
168
GetReturnLabel()169 Label* GetReturnLabel() {
170 DCHECK(successor_ == nullptr);
171 return &return_label_;
172 }
173
GetSuccessor() const174 HBasicBlock* GetSuccessor() const {
175 return successor_;
176 }
177
GetDescription() const178 const char* GetDescription() const override { return "SuspendCheckSlowPathX86_64"; }
179
180 private:
181 HBasicBlock* const successor_;
182 Label return_label_;
183
184 DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathX86_64);
185 };
186
187 class BoundsCheckSlowPathX86_64 : public SlowPathCode {
188 public:
BoundsCheckSlowPathX86_64(HBoundsCheck * instruction)189 explicit BoundsCheckSlowPathX86_64(HBoundsCheck* instruction)
190 : SlowPathCode(instruction) {}
191
EmitNativeCode(CodeGenerator * codegen)192 void EmitNativeCode(CodeGenerator* codegen) override {
193 LocationSummary* locations = instruction_->GetLocations();
194 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
195 __ Bind(GetEntryLabel());
196 if (instruction_->CanThrowIntoCatchBlock()) {
197 // Live registers will be restored in the catch block if caught.
198 SaveLiveRegisters(codegen, instruction_->GetLocations());
199 }
200 // Are we using an array length from memory?
201 HInstruction* array_length = instruction_->InputAt(1);
202 Location length_loc = locations->InAt(1);
203 InvokeRuntimeCallingConvention calling_convention;
204 if (array_length->IsArrayLength() && array_length->IsEmittedAtUseSite()) {
205 // Load the array length into our temporary.
206 HArrayLength* length = array_length->AsArrayLength();
207 uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(length);
208 Location array_loc = array_length->GetLocations()->InAt(0);
209 Address array_len(array_loc.AsRegister<CpuRegister>(), len_offset);
210 length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(1));
211 // Check for conflicts with index.
212 if (length_loc.Equals(locations->InAt(0))) {
213 // We know we aren't using parameter 2.
214 length_loc = Location::RegisterLocation(calling_convention.GetRegisterAt(2));
215 }
216 __ movl(length_loc.AsRegister<CpuRegister>(), array_len);
217 if (mirror::kUseStringCompression && length->IsStringLength()) {
218 __ shrl(length_loc.AsRegister<CpuRegister>(), Immediate(1));
219 }
220 }
221
222 // We're moving two locations to locations that could overlap, so we need a parallel
223 // move resolver.
224 codegen->EmitParallelMoves(
225 locations->InAt(0),
226 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
227 DataType::Type::kInt32,
228 length_loc,
229 Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
230 DataType::Type::kInt32);
231 QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt()
232 ? kQuickThrowStringBounds
233 : kQuickThrowArrayBounds;
234 x86_64_codegen->InvokeRuntime(entrypoint, instruction_, instruction_->GetDexPc(), this);
235 CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>();
236 CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
237 }
238
IsFatal() const239 bool IsFatal() const override { return true; }
240
GetDescription() const241 const char* GetDescription() const override { return "BoundsCheckSlowPathX86_64"; }
242
243 private:
244 DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathX86_64);
245 };
246
247 class LoadClassSlowPathX86_64 : public SlowPathCode {
248 public:
LoadClassSlowPathX86_64(HLoadClass * cls,HInstruction * at)249 LoadClassSlowPathX86_64(HLoadClass* cls, HInstruction* at)
250 : SlowPathCode(at), cls_(cls) {
251 DCHECK(at->IsLoadClass() || at->IsClinitCheck());
252 DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_);
253 }
254
EmitNativeCode(CodeGenerator * codegen)255 void EmitNativeCode(CodeGenerator* codegen) override {
256 LocationSummary* locations = instruction_->GetLocations();
257 Location out = locations->Out();
258 const uint32_t dex_pc = instruction_->GetDexPc();
259 bool must_resolve_type = instruction_->IsLoadClass() && cls_->MustResolveTypeOnSlowPath();
260 bool must_do_clinit = instruction_->IsClinitCheck() || cls_->MustGenerateClinitCheck();
261
262 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
263 __ Bind(GetEntryLabel());
264 SaveLiveRegisters(codegen, locations);
265
266 // Custom calling convention: RAX serves as both input and output.
267 if (must_resolve_type) {
268 DCHECK(IsSameDexFile(cls_->GetDexFile(), x86_64_codegen->GetGraph()->GetDexFile()));
269 dex::TypeIndex type_index = cls_->GetTypeIndex();
270 __ movl(CpuRegister(RAX), Immediate(type_index.index_));
271 x86_64_codegen->InvokeRuntime(kQuickResolveType, instruction_, dex_pc, this);
272 CheckEntrypointTypes<kQuickResolveType, void*, uint32_t>();
273 // If we also must_do_clinit, the resolved type is now in the correct register.
274 } else {
275 DCHECK(must_do_clinit);
276 Location source = instruction_->IsLoadClass() ? out : locations->InAt(0);
277 x86_64_codegen->Move(Location::RegisterLocation(RAX), source);
278 }
279 if (must_do_clinit) {
280 x86_64_codegen->InvokeRuntime(kQuickInitializeStaticStorage, instruction_, dex_pc, this);
281 CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, mirror::Class*>();
282 }
283
284 // Move the class to the desired location.
285 if (out.IsValid()) {
286 DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
287 x86_64_codegen->Move(out, Location::RegisterLocation(RAX));
288 }
289
290 RestoreLiveRegisters(codegen, locations);
291 __ jmp(GetExitLabel());
292 }
293
GetDescription() const294 const char* GetDescription() const override { return "LoadClassSlowPathX86_64"; }
295
296 private:
297 // The class this slow path will load.
298 HLoadClass* const cls_;
299
300 DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathX86_64);
301 };
302
303 class LoadStringSlowPathX86_64 : public SlowPathCode {
304 public:
LoadStringSlowPathX86_64(HLoadString * instruction)305 explicit LoadStringSlowPathX86_64(HLoadString* instruction) : SlowPathCode(instruction) {}
306
EmitNativeCode(CodeGenerator * codegen)307 void EmitNativeCode(CodeGenerator* codegen) override {
308 LocationSummary* locations = instruction_->GetLocations();
309 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
310
311 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
312 __ Bind(GetEntryLabel());
313 SaveLiveRegisters(codegen, locations);
314
315 const dex::StringIndex string_index = instruction_->AsLoadString()->GetStringIndex();
316 // Custom calling convention: RAX serves as both input and output.
317 __ movl(CpuRegister(RAX), Immediate(string_index.index_));
318 x86_64_codegen->InvokeRuntime(kQuickResolveString,
319 instruction_,
320 instruction_->GetDexPc(),
321 this);
322 CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
323 x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX));
324 RestoreLiveRegisters(codegen, locations);
325
326 __ jmp(GetExitLabel());
327 }
328
GetDescription() const329 const char* GetDescription() const override { return "LoadStringSlowPathX86_64"; }
330
331 private:
332 DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathX86_64);
333 };
334
335 class TypeCheckSlowPathX86_64 : public SlowPathCode {
336 public:
TypeCheckSlowPathX86_64(HInstruction * instruction,bool is_fatal)337 TypeCheckSlowPathX86_64(HInstruction* instruction, bool is_fatal)
338 : SlowPathCode(instruction), is_fatal_(is_fatal) {}
339
EmitNativeCode(CodeGenerator * codegen)340 void EmitNativeCode(CodeGenerator* codegen) override {
341 LocationSummary* locations = instruction_->GetLocations();
342 uint32_t dex_pc = instruction_->GetDexPc();
343 DCHECK(instruction_->IsCheckCast()
344 || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
345
346 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
347 __ Bind(GetEntryLabel());
348
349 if (kPoisonHeapReferences &&
350 instruction_->IsCheckCast() &&
351 instruction_->AsCheckCast()->GetTypeCheckKind() == TypeCheckKind::kInterfaceCheck) {
352 // First, unpoison the `cls` reference that was poisoned for direct memory comparison.
353 __ UnpoisonHeapReference(locations->InAt(1).AsRegister<CpuRegister>());
354 }
355
356 if (!is_fatal_ || instruction_->CanThrowIntoCatchBlock()) {
357 SaveLiveRegisters(codegen, locations);
358 }
359
360 // We're moving two locations to locations that could overlap, so we need a parallel
361 // move resolver.
362 InvokeRuntimeCallingConvention calling_convention;
363 codegen->EmitParallelMoves(locations->InAt(0),
364 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
365 DataType::Type::kReference,
366 locations->InAt(1),
367 Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
368 DataType::Type::kReference);
369 if (instruction_->IsInstanceOf()) {
370 x86_64_codegen->InvokeRuntime(kQuickInstanceofNonTrivial, instruction_, dex_pc, this);
371 CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>();
372 } else {
373 DCHECK(instruction_->IsCheckCast());
374 x86_64_codegen->InvokeRuntime(kQuickCheckInstanceOf, instruction_, dex_pc, this);
375 CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>();
376 }
377
378 if (!is_fatal_) {
379 if (instruction_->IsInstanceOf()) {
380 x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX));
381 }
382
383 RestoreLiveRegisters(codegen, locations);
384 __ jmp(GetExitLabel());
385 }
386 }
387
GetDescription() const388 const char* GetDescription() const override { return "TypeCheckSlowPathX86_64"; }
389
IsFatal() const390 bool IsFatal() const override { return is_fatal_; }
391
392 private:
393 const bool is_fatal_;
394
395 DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathX86_64);
396 };
397
398 class DeoptimizationSlowPathX86_64 : public SlowPathCode {
399 public:
DeoptimizationSlowPathX86_64(HDeoptimize * instruction)400 explicit DeoptimizationSlowPathX86_64(HDeoptimize* instruction)
401 : SlowPathCode(instruction) {}
402
EmitNativeCode(CodeGenerator * codegen)403 void EmitNativeCode(CodeGenerator* codegen) override {
404 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
405 __ Bind(GetEntryLabel());
406 LocationSummary* locations = instruction_->GetLocations();
407 SaveLiveRegisters(codegen, locations);
408 InvokeRuntimeCallingConvention calling_convention;
409 x86_64_codegen->Load32BitValue(
410 CpuRegister(calling_convention.GetRegisterAt(0)),
411 static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind()));
412 x86_64_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
413 CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
414 }
415
GetDescription() const416 const char* GetDescription() const override { return "DeoptimizationSlowPathX86_64"; }
417
418 private:
419 DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86_64);
420 };
421
422 class ArraySetSlowPathX86_64 : public SlowPathCode {
423 public:
ArraySetSlowPathX86_64(HInstruction * instruction)424 explicit ArraySetSlowPathX86_64(HInstruction* instruction) : SlowPathCode(instruction) {}
425
EmitNativeCode(CodeGenerator * codegen)426 void EmitNativeCode(CodeGenerator* codegen) override {
427 LocationSummary* locations = instruction_->GetLocations();
428 __ Bind(GetEntryLabel());
429 SaveLiveRegisters(codegen, locations);
430
431 InvokeRuntimeCallingConvention calling_convention;
432 HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
433 parallel_move.AddMove(
434 locations->InAt(0),
435 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
436 DataType::Type::kReference,
437 nullptr);
438 parallel_move.AddMove(
439 locations->InAt(1),
440 Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
441 DataType::Type::kInt32,
442 nullptr);
443 parallel_move.AddMove(
444 locations->InAt(2),
445 Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
446 DataType::Type::kReference,
447 nullptr);
448 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
449
450 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
451 x86_64_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this);
452 CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
453 RestoreLiveRegisters(codegen, locations);
454 __ jmp(GetExitLabel());
455 }
456
GetDescription() const457 const char* GetDescription() const override { return "ArraySetSlowPathX86_64"; }
458
459 private:
460 DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86_64);
461 };
462
463 // Slow path marking an object reference `ref` during a read
464 // barrier. The field `obj.field` in the object `obj` holding this
465 // reference does not get updated by this slow path after marking (see
466 // ReadBarrierMarkAndUpdateFieldSlowPathX86_64 below for that).
467 //
468 // This means that after the execution of this slow path, `ref` will
469 // always be up-to-date, but `obj.field` may not; i.e., after the
470 // flip, `ref` will be a to-space reference, but `obj.field` will
471 // probably still be a from-space reference (unless it gets updated by
472 // another thread, or if another thread installed another object
473 // reference (different from `ref`) in `obj.field`).
474 class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode {
475 public:
ReadBarrierMarkSlowPathX86_64(HInstruction * instruction,Location ref,bool unpoison_ref_before_marking)476 ReadBarrierMarkSlowPathX86_64(HInstruction* instruction,
477 Location ref,
478 bool unpoison_ref_before_marking)
479 : SlowPathCode(instruction),
480 ref_(ref),
481 unpoison_ref_before_marking_(unpoison_ref_before_marking) {
482 DCHECK(kEmitCompilerReadBarrier);
483 }
484
GetDescription() const485 const char* GetDescription() const override { return "ReadBarrierMarkSlowPathX86_64"; }
486
EmitNativeCode(CodeGenerator * codegen)487 void EmitNativeCode(CodeGenerator* codegen) override {
488 LocationSummary* locations = instruction_->GetLocations();
489 CpuRegister ref_cpu_reg = ref_.AsRegister<CpuRegister>();
490 Register ref_reg = ref_cpu_reg.AsRegister();
491 DCHECK(locations->CanCall());
492 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
493 DCHECK(instruction_->IsInstanceFieldGet() ||
494 instruction_->IsStaticFieldGet() ||
495 instruction_->IsArrayGet() ||
496 instruction_->IsArraySet() ||
497 instruction_->IsLoadClass() ||
498 instruction_->IsLoadString() ||
499 instruction_->IsInstanceOf() ||
500 instruction_->IsCheckCast() ||
501 (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()) ||
502 (instruction_->IsInvokeStaticOrDirect() && instruction_->GetLocations()->Intrinsified()))
503 << "Unexpected instruction in read barrier marking slow path: "
504 << instruction_->DebugName();
505
506 __ Bind(GetEntryLabel());
507 if (unpoison_ref_before_marking_) {
508 // Object* ref = ref_addr->AsMirrorPtr()
509 __ MaybeUnpoisonHeapReference(ref_cpu_reg);
510 }
511 // No need to save live registers; it's taken care of by the
512 // entrypoint. Also, there is no need to update the stack mask,
513 // as this runtime call will not trigger a garbage collection.
514 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
515 DCHECK_NE(ref_reg, RSP);
516 DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg;
517 // "Compact" slow path, saving two moves.
518 //
519 // Instead of using the standard runtime calling convention (input
520 // and output in R0):
521 //
522 // RDI <- ref
523 // RAX <- ReadBarrierMark(RDI)
524 // ref <- RAX
525 //
526 // we just use rX (the register containing `ref`) as input and output
527 // of a dedicated entrypoint:
528 //
529 // rX <- ReadBarrierMarkRegX(rX)
530 //
531 int32_t entry_point_offset =
532 Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(ref_reg);
533 // This runtime call does not require a stack map.
534 x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
535 __ jmp(GetExitLabel());
536 }
537
538 private:
539 // The location (register) of the marked object reference.
540 const Location ref_;
541 // Should the reference in `ref_` be unpoisoned prior to marking it?
542 const bool unpoison_ref_before_marking_;
543
544 DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86_64);
545 };
546
547 // Slow path marking an object reference `ref` during a read barrier,
548 // and if needed, atomically updating the field `obj.field` in the
549 // object `obj` holding this reference after marking (contrary to
550 // ReadBarrierMarkSlowPathX86_64 above, which never tries to update
551 // `obj.field`).
552 //
553 // This means that after the execution of this slow path, both `ref`
554 // and `obj.field` will be up-to-date; i.e., after the flip, both will
555 // hold the same to-space reference (unless another thread installed
556 // another object reference (different from `ref`) in `obj.field`).
557 class ReadBarrierMarkAndUpdateFieldSlowPathX86_64 : public SlowPathCode {
558 public:
ReadBarrierMarkAndUpdateFieldSlowPathX86_64(HInstruction * instruction,Location ref,CpuRegister obj,const Address & field_addr,bool unpoison_ref_before_marking,CpuRegister temp1,CpuRegister temp2)559 ReadBarrierMarkAndUpdateFieldSlowPathX86_64(HInstruction* instruction,
560 Location ref,
561 CpuRegister obj,
562 const Address& field_addr,
563 bool unpoison_ref_before_marking,
564 CpuRegister temp1,
565 CpuRegister temp2)
566 : SlowPathCode(instruction),
567 ref_(ref),
568 obj_(obj),
569 field_addr_(field_addr),
570 unpoison_ref_before_marking_(unpoison_ref_before_marking),
571 temp1_(temp1),
572 temp2_(temp2) {
573 DCHECK(kEmitCompilerReadBarrier);
574 }
575
GetDescription() const576 const char* GetDescription() const override {
577 return "ReadBarrierMarkAndUpdateFieldSlowPathX86_64";
578 }
579
EmitNativeCode(CodeGenerator * codegen)580 void EmitNativeCode(CodeGenerator* codegen) override {
581 LocationSummary* locations = instruction_->GetLocations();
582 CpuRegister ref_cpu_reg = ref_.AsRegister<CpuRegister>();
583 Register ref_reg = ref_cpu_reg.AsRegister();
584 DCHECK(locations->CanCall());
585 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
586 // This slow path is only used by the UnsafeCASObject intrinsic.
587 DCHECK((instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
588 << "Unexpected instruction in read barrier marking and field updating slow path: "
589 << instruction_->DebugName();
590 DCHECK(instruction_->GetLocations()->Intrinsified());
591 DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kUnsafeCASObject);
592
593 __ Bind(GetEntryLabel());
594 if (unpoison_ref_before_marking_) {
595 // Object* ref = ref_addr->AsMirrorPtr()
596 __ MaybeUnpoisonHeapReference(ref_cpu_reg);
597 }
598
599 // Save the old (unpoisoned) reference.
600 __ movl(temp1_, ref_cpu_reg);
601
602 // No need to save live registers; it's taken care of by the
603 // entrypoint. Also, there is no need to update the stack mask,
604 // as this runtime call will not trigger a garbage collection.
605 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
606 DCHECK_NE(ref_reg, RSP);
607 DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg;
608 // "Compact" slow path, saving two moves.
609 //
610 // Instead of using the standard runtime calling convention (input
611 // and output in R0):
612 //
613 // RDI <- ref
614 // RAX <- ReadBarrierMark(RDI)
615 // ref <- RAX
616 //
617 // we just use rX (the register containing `ref`) as input and output
618 // of a dedicated entrypoint:
619 //
620 // rX <- ReadBarrierMarkRegX(rX)
621 //
622 int32_t entry_point_offset =
623 Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(ref_reg);
624 // This runtime call does not require a stack map.
625 x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
626
627 // If the new reference is different from the old reference,
628 // update the field in the holder (`*field_addr`).
629 //
630 // Note that this field could also hold a different object, if
631 // another thread had concurrently changed it. In that case, the
632 // LOCK CMPXCHGL instruction in the compare-and-set (CAS)
633 // operation below would abort the CAS, leaving the field as-is.
634 NearLabel done;
635 __ cmpl(temp1_, ref_cpu_reg);
636 __ j(kEqual, &done);
637
638 // Update the the holder's field atomically. This may fail if
639 // mutator updates before us, but it's OK. This is achived
640 // using a strong compare-and-set (CAS) operation with relaxed
641 // memory synchronization ordering, where the expected value is
642 // the old reference and the desired value is the new reference.
643 // This operation is implemented with a 32-bit LOCK CMPXLCHG
644 // instruction, which requires the expected value (the old
645 // reference) to be in EAX. Save RAX beforehand, and move the
646 // expected value (stored in `temp1_`) into EAX.
647 __ movq(temp2_, CpuRegister(RAX));
648 __ movl(CpuRegister(RAX), temp1_);
649
650 // Convenience aliases.
651 CpuRegister base = obj_;
652 CpuRegister expected = CpuRegister(RAX);
653 CpuRegister value = ref_cpu_reg;
654
655 bool base_equals_value = (base.AsRegister() == value.AsRegister());
656 Register value_reg = ref_reg;
657 if (kPoisonHeapReferences) {
658 if (base_equals_value) {
659 // If `base` and `value` are the same register location, move
660 // `value_reg` to a temporary register. This way, poisoning
661 // `value_reg` won't invalidate `base`.
662 value_reg = temp1_.AsRegister();
663 __ movl(CpuRegister(value_reg), base);
664 }
665
666 // Check that the register allocator did not assign the location
667 // of `expected` (RAX) to `value` nor to `base`, so that heap
668 // poisoning (when enabled) works as intended below.
669 // - If `value` were equal to `expected`, both references would
670 // be poisoned twice, meaning they would not be poisoned at
671 // all, as heap poisoning uses address negation.
672 // - If `base` were equal to `expected`, poisoning `expected`
673 // would invalidate `base`.
674 DCHECK_NE(value_reg, expected.AsRegister());
675 DCHECK_NE(base.AsRegister(), expected.AsRegister());
676
677 __ PoisonHeapReference(expected);
678 __ PoisonHeapReference(CpuRegister(value_reg));
679 }
680
681 __ LockCmpxchgl(field_addr_, CpuRegister(value_reg));
682
683 // If heap poisoning is enabled, we need to unpoison the values
684 // that were poisoned earlier.
685 if (kPoisonHeapReferences) {
686 if (base_equals_value) {
687 // `value_reg` has been moved to a temporary register, no need
688 // to unpoison it.
689 } else {
690 __ UnpoisonHeapReference(CpuRegister(value_reg));
691 }
692 // No need to unpoison `expected` (RAX), as it is be overwritten below.
693 }
694
695 // Restore RAX.
696 __ movq(CpuRegister(RAX), temp2_);
697
698 __ Bind(&done);
699 __ jmp(GetExitLabel());
700 }
701
702 private:
703 // The location (register) of the marked object reference.
704 const Location ref_;
705 // The register containing the object holding the marked object reference field.
706 const CpuRegister obj_;
707 // The address of the marked reference field. The base of this address must be `obj_`.
708 const Address field_addr_;
709
710 // Should the reference in `ref_` be unpoisoned prior to marking it?
711 const bool unpoison_ref_before_marking_;
712
713 const CpuRegister temp1_;
714 const CpuRegister temp2_;
715
716 DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathX86_64);
717 };
718
719 // Slow path generating a read barrier for a heap reference.
720 class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode {
721 public:
ReadBarrierForHeapReferenceSlowPathX86_64(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)722 ReadBarrierForHeapReferenceSlowPathX86_64(HInstruction* instruction,
723 Location out,
724 Location ref,
725 Location obj,
726 uint32_t offset,
727 Location index)
728 : SlowPathCode(instruction),
729 out_(out),
730 ref_(ref),
731 obj_(obj),
732 offset_(offset),
733 index_(index) {
734 DCHECK(kEmitCompilerReadBarrier);
735 // If `obj` is equal to `out` or `ref`, it means the initial
736 // object has been overwritten by (or after) the heap object
737 // reference load to be instrumented, e.g.:
738 //
739 // __ movl(out, Address(out, offset));
740 // codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
741 //
742 // In that case, we have lost the information about the original
743 // object, and the emitted read barrier cannot work properly.
744 DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out;
745 DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
746 }
747
EmitNativeCode(CodeGenerator * codegen)748 void EmitNativeCode(CodeGenerator* codegen) override {
749 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
750 LocationSummary* locations = instruction_->GetLocations();
751 CpuRegister reg_out = out_.AsRegister<CpuRegister>();
752 DCHECK(locations->CanCall());
753 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out.AsRegister())) << out_;
754 DCHECK(instruction_->IsInstanceFieldGet() ||
755 instruction_->IsStaticFieldGet() ||
756 instruction_->IsArrayGet() ||
757 instruction_->IsInstanceOf() ||
758 instruction_->IsCheckCast() ||
759 (instruction_->IsInvokeVirtual() && instruction_->GetLocations()->Intrinsified()))
760 << "Unexpected instruction in read barrier for heap reference slow path: "
761 << instruction_->DebugName();
762
763 __ Bind(GetEntryLabel());
764 SaveLiveRegisters(codegen, locations);
765
766 // We may have to change the index's value, but as `index_` is a
767 // constant member (like other "inputs" of this slow path),
768 // introduce a copy of it, `index`.
769 Location index = index_;
770 if (index_.IsValid()) {
771 // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics.
772 if (instruction_->IsArrayGet()) {
773 // Compute real offset and store it in index_.
774 Register index_reg = index_.AsRegister<CpuRegister>().AsRegister();
775 DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg));
776 if (codegen->IsCoreCalleeSaveRegister(index_reg)) {
777 // We are about to change the value of `index_reg` (see the
778 // calls to art::x86_64::X86_64Assembler::shll and
779 // art::x86_64::X86_64Assembler::AddImmediate below), but it
780 // has not been saved by the previous call to
781 // art::SlowPathCode::SaveLiveRegisters, as it is a
782 // callee-save register --
783 // art::SlowPathCode::SaveLiveRegisters does not consider
784 // callee-save registers, as it has been designed with the
785 // assumption that callee-save registers are supposed to be
786 // handled by the called function. So, as a callee-save
787 // register, `index_reg` _would_ eventually be saved onto
788 // the stack, but it would be too late: we would have
789 // changed its value earlier. Therefore, we manually save
790 // it here into another freely available register,
791 // `free_reg`, chosen of course among the caller-save
792 // registers (as a callee-save `free_reg` register would
793 // exhibit the same problem).
794 //
795 // Note we could have requested a temporary register from
796 // the register allocator instead; but we prefer not to, as
797 // this is a slow path, and we know we can find a
798 // caller-save register that is available.
799 Register free_reg = FindAvailableCallerSaveRegister(codegen).AsRegister();
800 __ movl(CpuRegister(free_reg), CpuRegister(index_reg));
801 index_reg = free_reg;
802 index = Location::RegisterLocation(index_reg);
803 } else {
804 // The initial register stored in `index_` has already been
805 // saved in the call to art::SlowPathCode::SaveLiveRegisters
806 // (as it is not a callee-save register), so we can freely
807 // use it.
808 }
809 // Shifting the index value contained in `index_reg` by the
810 // scale factor (2) cannot overflow in practice, as the
811 // runtime is unable to allocate object arrays with a size
812 // larger than 2^26 - 1 (that is, 2^28 - 4 bytes).
813 __ shll(CpuRegister(index_reg), Immediate(TIMES_4));
814 static_assert(
815 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
816 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
817 __ AddImmediate(CpuRegister(index_reg), Immediate(offset_));
818 } else {
819 // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile
820 // intrinsics, `index_` is not shifted by a scale factor of 2
821 // (as in the case of ArrayGet), as it is actually an offset
822 // to an object field within an object.
823 DCHECK(instruction_->IsInvoke()) << instruction_->DebugName();
824 DCHECK(instruction_->GetLocations()->Intrinsified());
825 DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
826 (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile))
827 << instruction_->AsInvoke()->GetIntrinsic();
828 DCHECK_EQ(offset_, 0U);
829 DCHECK(index_.IsRegister());
830 }
831 }
832
833 // We're moving two or three locations to locations that could
834 // overlap, so we need a parallel move resolver.
835 InvokeRuntimeCallingConvention calling_convention;
836 HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
837 parallel_move.AddMove(ref_,
838 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
839 DataType::Type::kReference,
840 nullptr);
841 parallel_move.AddMove(obj_,
842 Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
843 DataType::Type::kReference,
844 nullptr);
845 if (index.IsValid()) {
846 parallel_move.AddMove(index,
847 Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
848 DataType::Type::kInt32,
849 nullptr);
850 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
851 } else {
852 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
853 __ movl(CpuRegister(calling_convention.GetRegisterAt(2)), Immediate(offset_));
854 }
855 x86_64_codegen->InvokeRuntime(kQuickReadBarrierSlow,
856 instruction_,
857 instruction_->GetDexPc(),
858 this);
859 CheckEntrypointTypes<
860 kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
861 x86_64_codegen->Move(out_, Location::RegisterLocation(RAX));
862
863 RestoreLiveRegisters(codegen, locations);
864 __ jmp(GetExitLabel());
865 }
866
GetDescription() const867 const char* GetDescription() const override {
868 return "ReadBarrierForHeapReferenceSlowPathX86_64";
869 }
870
871 private:
FindAvailableCallerSaveRegister(CodeGenerator * codegen)872 CpuRegister FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
873 size_t ref = static_cast<int>(ref_.AsRegister<CpuRegister>().AsRegister());
874 size_t obj = static_cast<int>(obj_.AsRegister<CpuRegister>().AsRegister());
875 for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
876 if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) {
877 return static_cast<CpuRegister>(i);
878 }
879 }
880 // We shall never fail to find a free caller-save register, as
881 // there are more than two core caller-save registers on x86-64
882 // (meaning it is possible to find one which is different from
883 // `ref` and `obj`).
884 DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u);
885 LOG(FATAL) << "Could not find a free caller-save register";
886 UNREACHABLE();
887 }
888
889 const Location out_;
890 const Location ref_;
891 const Location obj_;
892 const uint32_t offset_;
893 // An additional location containing an index to an array.
894 // Only used for HArrayGet and the UnsafeGetObject &
895 // UnsafeGetObjectVolatile intrinsics.
896 const Location index_;
897
898 DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathX86_64);
899 };
900
901 // Slow path generating a read barrier for a GC root.
902 class ReadBarrierForRootSlowPathX86_64 : public SlowPathCode {
903 public:
ReadBarrierForRootSlowPathX86_64(HInstruction * instruction,Location out,Location root)904 ReadBarrierForRootSlowPathX86_64(HInstruction* instruction, Location out, Location root)
905 : SlowPathCode(instruction), out_(out), root_(root) {
906 DCHECK(kEmitCompilerReadBarrier);
907 }
908
EmitNativeCode(CodeGenerator * codegen)909 void EmitNativeCode(CodeGenerator* codegen) override {
910 LocationSummary* locations = instruction_->GetLocations();
911 DCHECK(locations->CanCall());
912 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
913 DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
914 << "Unexpected instruction in read barrier for GC root slow path: "
915 << instruction_->DebugName();
916
917 __ Bind(GetEntryLabel());
918 SaveLiveRegisters(codegen, locations);
919
920 InvokeRuntimeCallingConvention calling_convention;
921 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
922 x86_64_codegen->Move(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), root_);
923 x86_64_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow,
924 instruction_,
925 instruction_->GetDexPc(),
926 this);
927 CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
928 x86_64_codegen->Move(out_, Location::RegisterLocation(RAX));
929
930 RestoreLiveRegisters(codegen, locations);
931 __ jmp(GetExitLabel());
932 }
933
GetDescription() const934 const char* GetDescription() const override { return "ReadBarrierForRootSlowPathX86_64"; }
935
936 private:
937 const Location out_;
938 const Location root_;
939
940 DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathX86_64);
941 };
942
943 #undef __
944 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
945 #define __ down_cast<X86_64Assembler*>(GetAssembler())-> // NOLINT
946
X86_64IntegerCondition(IfCondition cond)947 inline Condition X86_64IntegerCondition(IfCondition cond) {
948 switch (cond) {
949 case kCondEQ: return kEqual;
950 case kCondNE: return kNotEqual;
951 case kCondLT: return kLess;
952 case kCondLE: return kLessEqual;
953 case kCondGT: return kGreater;
954 case kCondGE: return kGreaterEqual;
955 case kCondB: return kBelow;
956 case kCondBE: return kBelowEqual;
957 case kCondA: return kAbove;
958 case kCondAE: return kAboveEqual;
959 }
960 LOG(FATAL) << "Unreachable";
961 UNREACHABLE();
962 }
963
964 // Maps FP condition to x86_64 name.
X86_64FPCondition(IfCondition cond)965 inline Condition X86_64FPCondition(IfCondition cond) {
966 switch (cond) {
967 case kCondEQ: return kEqual;
968 case kCondNE: return kNotEqual;
969 case kCondLT: return kBelow;
970 case kCondLE: return kBelowEqual;
971 case kCondGT: return kAbove;
972 case kCondGE: return kAboveEqual;
973 default: break; // should not happen
974 }
975 LOG(FATAL) << "Unreachable";
976 UNREACHABLE();
977 }
978
GetSupportedInvokeStaticOrDirectDispatch(const HInvokeStaticOrDirect::DispatchInfo & desired_dispatch_info,ArtMethod * method ATTRIBUTE_UNUSED)979 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86_64::GetSupportedInvokeStaticOrDirectDispatch(
980 const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
981 ArtMethod* method ATTRIBUTE_UNUSED) {
982 return desired_dispatch_info;
983 }
984
GenerateStaticOrDirectCall(HInvokeStaticOrDirect * invoke,Location temp,SlowPathCode * slow_path)985 void CodeGeneratorX86_64::GenerateStaticOrDirectCall(
986 HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) {
987 // All registers are assumed to be correctly set up.
988
989 Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp.
990 switch (invoke->GetMethodLoadKind()) {
991 case HInvokeStaticOrDirect::MethodLoadKind::kStringInit: {
992 // temp = thread->string_init_entrypoint
993 uint32_t offset =
994 GetThreadOffset<kX86_64PointerSize>(invoke->GetStringInitEntryPoint()).Int32Value();
995 __ gs()->movq(temp.AsRegister<CpuRegister>(), Address::Absolute(offset, /* no_rip= */ true));
996 break;
997 }
998 case HInvokeStaticOrDirect::MethodLoadKind::kRecursive:
999 callee_method = invoke->GetLocations()->InAt(invoke->GetSpecialInputIndex());
1000 break;
1001 case HInvokeStaticOrDirect::MethodLoadKind::kBootImageLinkTimePcRelative:
1002 DCHECK(GetCompilerOptions().IsBootImage());
1003 __ leal(temp.AsRegister<CpuRegister>(),
1004 Address::Absolute(kDummy32BitOffset, /* no_rip= */ false));
1005 RecordBootImageMethodPatch(invoke);
1006 break;
1007 case HInvokeStaticOrDirect::MethodLoadKind::kBootImageRelRo: {
1008 // Note: Boot image is in the low 4GiB and the entry is 32-bit, so emit a 32-bit load.
1009 __ movl(temp.AsRegister<CpuRegister>(),
1010 Address::Absolute(kDummy32BitOffset, /* no_rip= */ false));
1011 RecordBootImageRelRoPatch(GetBootImageOffset(invoke));
1012 break;
1013 }
1014 case HInvokeStaticOrDirect::MethodLoadKind::kBssEntry: {
1015 __ movq(temp.AsRegister<CpuRegister>(),
1016 Address::Absolute(kDummy32BitOffset, /* no_rip= */ false));
1017 RecordMethodBssEntryPatch(invoke);
1018 break;
1019 }
1020 case HInvokeStaticOrDirect::MethodLoadKind::kJitDirectAddress:
1021 Load64BitValue(temp.AsRegister<CpuRegister>(), invoke->GetMethodAddress());
1022 break;
1023 case HInvokeStaticOrDirect::MethodLoadKind::kRuntimeCall: {
1024 GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path);
1025 return; // No code pointer retrieval; the runtime performs the call directly.
1026 }
1027 }
1028
1029 switch (invoke->GetCodePtrLocation()) {
1030 case HInvokeStaticOrDirect::CodePtrLocation::kCallSelf:
1031 __ call(&frame_entry_label_);
1032 break;
1033 case HInvokeStaticOrDirect::CodePtrLocation::kCallArtMethod:
1034 // (callee_method + offset_of_quick_compiled_code)()
1035 __ call(Address(callee_method.AsRegister<CpuRegister>(),
1036 ArtMethod::EntryPointFromQuickCompiledCodeOffset(
1037 kX86_64PointerSize).SizeValue()));
1038 break;
1039 }
1040 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
1041
1042 DCHECK(!IsLeafMethod());
1043 }
1044
GenerateVirtualCall(HInvokeVirtual * invoke,Location temp_in,SlowPathCode * slow_path)1045 void CodeGeneratorX86_64::GenerateVirtualCall(
1046 HInvokeVirtual* invoke, Location temp_in, SlowPathCode* slow_path) {
1047 CpuRegister temp = temp_in.AsRegister<CpuRegister>();
1048 size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
1049 invoke->GetVTableIndex(), kX86_64PointerSize).SizeValue();
1050
1051 // Use the calling convention instead of the location of the receiver, as
1052 // intrinsics may have put the receiver in a different register. In the intrinsics
1053 // slow path, the arguments have been moved to the right place, so here we are
1054 // guaranteed that the receiver is the first register of the calling convention.
1055 InvokeDexCallingConvention calling_convention;
1056 Register receiver = calling_convention.GetRegisterAt(0);
1057
1058 size_t class_offset = mirror::Object::ClassOffset().SizeValue();
1059 // /* HeapReference<Class> */ temp = receiver->klass_
1060 __ movl(temp, Address(CpuRegister(receiver), class_offset));
1061 MaybeRecordImplicitNullCheck(invoke);
1062 // Instead of simply (possibly) unpoisoning `temp` here, we should
1063 // emit a read barrier for the previous class reference load.
1064 // However this is not required in practice, as this is an
1065 // intermediate/temporary reference and because the current
1066 // concurrent copying collector keeps the from-space memory
1067 // intact/accessible until the end of the marking phase (the
1068 // concurrent copying collector may not in the future).
1069 __ MaybeUnpoisonHeapReference(temp);
1070 // temp = temp->GetMethodAt(method_offset);
1071 __ movq(temp, Address(temp, method_offset));
1072 // call temp->GetEntryPoint();
1073 __ call(Address(temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(
1074 kX86_64PointerSize).SizeValue()));
1075 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
1076 }
1077
RecordBootImageIntrinsicPatch(uint32_t intrinsic_data)1078 void CodeGeneratorX86_64::RecordBootImageIntrinsicPatch(uint32_t intrinsic_data) {
1079 boot_image_intrinsic_patches_.emplace_back(/* target_dex_file= */ nullptr, intrinsic_data);
1080 __ Bind(&boot_image_intrinsic_patches_.back().label);
1081 }
1082
RecordBootImageRelRoPatch(uint32_t boot_image_offset)1083 void CodeGeneratorX86_64::RecordBootImageRelRoPatch(uint32_t boot_image_offset) {
1084 boot_image_method_patches_.emplace_back(/* target_dex_file= */ nullptr, boot_image_offset);
1085 __ Bind(&boot_image_method_patches_.back().label);
1086 }
1087
RecordBootImageMethodPatch(HInvokeStaticOrDirect * invoke)1088 void CodeGeneratorX86_64::RecordBootImageMethodPatch(HInvokeStaticOrDirect* invoke) {
1089 boot_image_method_patches_.emplace_back(
1090 invoke->GetTargetMethod().dex_file, invoke->GetTargetMethod().index);
1091 __ Bind(&boot_image_method_patches_.back().label);
1092 }
1093
RecordMethodBssEntryPatch(HInvokeStaticOrDirect * invoke)1094 void CodeGeneratorX86_64::RecordMethodBssEntryPatch(HInvokeStaticOrDirect* invoke) {
1095 method_bss_entry_patches_.emplace_back(&GetGraph()->GetDexFile(), invoke->GetDexMethodIndex());
1096 __ Bind(&method_bss_entry_patches_.back().label);
1097 }
1098
RecordBootImageTypePatch(HLoadClass * load_class)1099 void CodeGeneratorX86_64::RecordBootImageTypePatch(HLoadClass* load_class) {
1100 boot_image_type_patches_.emplace_back(
1101 &load_class->GetDexFile(), load_class->GetTypeIndex().index_);
1102 __ Bind(&boot_image_type_patches_.back().label);
1103 }
1104
NewTypeBssEntryPatch(HLoadClass * load_class)1105 Label* CodeGeneratorX86_64::NewTypeBssEntryPatch(HLoadClass* load_class) {
1106 type_bss_entry_patches_.emplace_back(
1107 &load_class->GetDexFile(), load_class->GetTypeIndex().index_);
1108 return &type_bss_entry_patches_.back().label;
1109 }
1110
RecordBootImageStringPatch(HLoadString * load_string)1111 void CodeGeneratorX86_64::RecordBootImageStringPatch(HLoadString* load_string) {
1112 boot_image_string_patches_.emplace_back(
1113 &load_string->GetDexFile(), load_string->GetStringIndex().index_);
1114 __ Bind(&boot_image_string_patches_.back().label);
1115 }
1116
NewStringBssEntryPatch(HLoadString * load_string)1117 Label* CodeGeneratorX86_64::NewStringBssEntryPatch(HLoadString* load_string) {
1118 string_bss_entry_patches_.emplace_back(
1119 &load_string->GetDexFile(), load_string->GetStringIndex().index_);
1120 return &string_bss_entry_patches_.back().label;
1121 }
1122
LoadBootImageAddress(CpuRegister reg,uint32_t boot_image_reference)1123 void CodeGeneratorX86_64::LoadBootImageAddress(CpuRegister reg, uint32_t boot_image_reference) {
1124 if (GetCompilerOptions().IsBootImage()) {
1125 __ leal(reg, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip= */ false));
1126 RecordBootImageIntrinsicPatch(boot_image_reference);
1127 } else if (GetCompilerOptions().GetCompilePic()) {
1128 __ movl(reg, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip= */ false));
1129 RecordBootImageRelRoPatch(boot_image_reference);
1130 } else {
1131 DCHECK(Runtime::Current()->UseJitCompilation());
1132 gc::Heap* heap = Runtime::Current()->GetHeap();
1133 DCHECK(!heap->GetBootImageSpaces().empty());
1134 const uint8_t* address = heap->GetBootImageSpaces()[0]->Begin() + boot_image_reference;
1135 __ movl(reg, Immediate(dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(address))));
1136 }
1137 }
1138
AllocateInstanceForIntrinsic(HInvokeStaticOrDirect * invoke,uint32_t boot_image_offset)1139 void CodeGeneratorX86_64::AllocateInstanceForIntrinsic(HInvokeStaticOrDirect* invoke,
1140 uint32_t boot_image_offset) {
1141 DCHECK(invoke->IsStatic());
1142 InvokeRuntimeCallingConvention calling_convention;
1143 CpuRegister argument = CpuRegister(calling_convention.GetRegisterAt(0));
1144 if (GetCompilerOptions().IsBootImage()) {
1145 DCHECK_EQ(boot_image_offset, IntrinsicVisitor::IntegerValueOfInfo::kInvalidReference);
1146 // Load the class the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative.
1147 __ leal(argument,
1148 Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip= */ false));
1149 MethodReference target_method = invoke->GetTargetMethod();
1150 dex::TypeIndex type_idx = target_method.dex_file->GetMethodId(target_method.index).class_idx_;
1151 boot_image_type_patches_.emplace_back(target_method.dex_file, type_idx.index_);
1152 __ Bind(&boot_image_type_patches_.back().label);
1153 } else {
1154 LoadBootImageAddress(argument, boot_image_offset);
1155 }
1156 InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
1157 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
1158 }
1159
1160 // The label points to the end of the "movl" or another instruction but the literal offset
1161 // for method patch needs to point to the embedded constant which occupies the last 4 bytes.
1162 constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u;
1163
1164 template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
EmitPcRelativeLinkerPatches(const ArenaDeque<PatchInfo<Label>> & infos,ArenaVector<linker::LinkerPatch> * linker_patches)1165 inline void CodeGeneratorX86_64::EmitPcRelativeLinkerPatches(
1166 const ArenaDeque<PatchInfo<Label>>& infos,
1167 ArenaVector<linker::LinkerPatch>* linker_patches) {
1168 for (const PatchInfo<Label>& info : infos) {
1169 uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
1170 linker_patches->push_back(
1171 Factory(literal_offset, info.target_dex_file, info.label.Position(), info.offset_or_index));
1172 }
1173 }
1174
1175 template <linker::LinkerPatch (*Factory)(size_t, uint32_t, uint32_t)>
NoDexFileAdapter(size_t literal_offset,const DexFile * target_dex_file,uint32_t pc_insn_offset,uint32_t boot_image_offset)1176 linker::LinkerPatch NoDexFileAdapter(size_t literal_offset,
1177 const DexFile* target_dex_file,
1178 uint32_t pc_insn_offset,
1179 uint32_t boot_image_offset) {
1180 DCHECK(target_dex_file == nullptr); // Unused for these patches, should be null.
1181 return Factory(literal_offset, pc_insn_offset, boot_image_offset);
1182 }
1183
EmitLinkerPatches(ArenaVector<linker::LinkerPatch> * linker_patches)1184 void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) {
1185 DCHECK(linker_patches->empty());
1186 size_t size =
1187 boot_image_method_patches_.size() +
1188 method_bss_entry_patches_.size() +
1189 boot_image_type_patches_.size() +
1190 type_bss_entry_patches_.size() +
1191 boot_image_string_patches_.size() +
1192 string_bss_entry_patches_.size() +
1193 boot_image_intrinsic_patches_.size();
1194 linker_patches->reserve(size);
1195 if (GetCompilerOptions().IsBootImage()) {
1196 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>(
1197 boot_image_method_patches_, linker_patches);
1198 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>(
1199 boot_image_type_patches_, linker_patches);
1200 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>(
1201 boot_image_string_patches_, linker_patches);
1202 EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::IntrinsicReferencePatch>>(
1203 boot_image_intrinsic_patches_, linker_patches);
1204 } else {
1205 EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::DataBimgRelRoPatch>>(
1206 boot_image_method_patches_, linker_patches);
1207 DCHECK(boot_image_type_patches_.empty());
1208 DCHECK(boot_image_string_patches_.empty());
1209 DCHECK(boot_image_intrinsic_patches_.empty());
1210 }
1211 EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>(
1212 method_bss_entry_patches_, linker_patches);
1213 EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeBssEntryPatch>(
1214 type_bss_entry_patches_, linker_patches);
1215 EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringBssEntryPatch>(
1216 string_bss_entry_patches_, linker_patches);
1217 DCHECK_EQ(size, linker_patches->size());
1218 }
1219
DumpCoreRegister(std::ostream & stream,int reg) const1220 void CodeGeneratorX86_64::DumpCoreRegister(std::ostream& stream, int reg) const {
1221 stream << Register(reg);
1222 }
1223
DumpFloatingPointRegister(std::ostream & stream,int reg) const1224 void CodeGeneratorX86_64::DumpFloatingPointRegister(std::ostream& stream, int reg) const {
1225 stream << FloatRegister(reg);
1226 }
1227
GetInstructionSetFeatures() const1228 const X86_64InstructionSetFeatures& CodeGeneratorX86_64::GetInstructionSetFeatures() const {
1229 return *GetCompilerOptions().GetInstructionSetFeatures()->AsX86_64InstructionSetFeatures();
1230 }
1231
SaveCoreRegister(size_t stack_index,uint32_t reg_id)1232 size_t CodeGeneratorX86_64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
1233 __ movq(Address(CpuRegister(RSP), stack_index), CpuRegister(reg_id));
1234 return kX86_64WordSize;
1235 }
1236
RestoreCoreRegister(size_t stack_index,uint32_t reg_id)1237 size_t CodeGeneratorX86_64::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) {
1238 __ movq(CpuRegister(reg_id), Address(CpuRegister(RSP), stack_index));
1239 return kX86_64WordSize;
1240 }
1241
SaveFloatingPointRegister(size_t stack_index,uint32_t reg_id)1242 size_t CodeGeneratorX86_64::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
1243 if (GetGraph()->HasSIMD()) {
1244 __ movups(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id));
1245 } else {
1246 __ movsd(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id));
1247 }
1248 return GetFloatingPointSpillSlotSize();
1249 }
1250
RestoreFloatingPointRegister(size_t stack_index,uint32_t reg_id)1251 size_t CodeGeneratorX86_64::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
1252 if (GetGraph()->HasSIMD()) {
1253 __ movups(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index));
1254 } else {
1255 __ movsd(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index));
1256 }
1257 return GetFloatingPointSpillSlotSize();
1258 }
1259
InvokeRuntime(QuickEntrypointEnum entrypoint,HInstruction * instruction,uint32_t dex_pc,SlowPathCode * slow_path)1260 void CodeGeneratorX86_64::InvokeRuntime(QuickEntrypointEnum entrypoint,
1261 HInstruction* instruction,
1262 uint32_t dex_pc,
1263 SlowPathCode* slow_path) {
1264 ValidateInvokeRuntime(entrypoint, instruction, slow_path);
1265 GenerateInvokeRuntime(GetThreadOffset<kX86_64PointerSize>(entrypoint).Int32Value());
1266 if (EntrypointRequiresStackMap(entrypoint)) {
1267 RecordPcInfo(instruction, dex_pc, slow_path);
1268 }
1269 }
1270
InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,HInstruction * instruction,SlowPathCode * slow_path)1271 void CodeGeneratorX86_64::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
1272 HInstruction* instruction,
1273 SlowPathCode* slow_path) {
1274 ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
1275 GenerateInvokeRuntime(entry_point_offset);
1276 }
1277
GenerateInvokeRuntime(int32_t entry_point_offset)1278 void CodeGeneratorX86_64::GenerateInvokeRuntime(int32_t entry_point_offset) {
1279 __ gs()->call(Address::Absolute(entry_point_offset, /* no_rip= */ true));
1280 }
1281
1282 static constexpr int kNumberOfCpuRegisterPairs = 0;
1283 // Use a fake return address register to mimic Quick.
1284 static constexpr Register kFakeReturnRegister = Register(kLastCpuRegister + 1);
CodeGeneratorX86_64(HGraph * graph,const CompilerOptions & compiler_options,OptimizingCompilerStats * stats)1285 CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph,
1286 const CompilerOptions& compiler_options,
1287 OptimizingCompilerStats* stats)
1288 : CodeGenerator(graph,
1289 kNumberOfCpuRegisters,
1290 kNumberOfFloatRegisters,
1291 kNumberOfCpuRegisterPairs,
1292 ComputeRegisterMask(reinterpret_cast<const int*>(kCoreCalleeSaves),
1293 arraysize(kCoreCalleeSaves))
1294 | (1 << kFakeReturnRegister),
1295 ComputeRegisterMask(reinterpret_cast<const int*>(kFpuCalleeSaves),
1296 arraysize(kFpuCalleeSaves)),
1297 compiler_options,
1298 stats),
1299 block_labels_(nullptr),
1300 location_builder_(graph, this),
1301 instruction_visitor_(graph, this),
1302 move_resolver_(graph->GetAllocator(), this),
1303 assembler_(graph->GetAllocator()),
1304 constant_area_start_(0),
1305 boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1306 method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1307 boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1308 type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1309 boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1310 string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1311 boot_image_intrinsic_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1312 jit_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1313 jit_class_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1314 fixups_to_jump_tables_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) {
1315 AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
1316 }
1317
InstructionCodeGeneratorX86_64(HGraph * graph,CodeGeneratorX86_64 * codegen)1318 InstructionCodeGeneratorX86_64::InstructionCodeGeneratorX86_64(HGraph* graph,
1319 CodeGeneratorX86_64* codegen)
1320 : InstructionCodeGenerator(graph, codegen),
1321 assembler_(codegen->GetAssembler()),
1322 codegen_(codegen) {}
1323
SetupBlockedRegisters() const1324 void CodeGeneratorX86_64::SetupBlockedRegisters() const {
1325 // Stack register is always reserved.
1326 blocked_core_registers_[RSP] = true;
1327
1328 // Block the register used as TMP.
1329 blocked_core_registers_[TMP] = true;
1330 }
1331
DWARFReg(Register reg)1332 static dwarf::Reg DWARFReg(Register reg) {
1333 return dwarf::Reg::X86_64Core(static_cast<int>(reg));
1334 }
1335
DWARFReg(FloatRegister reg)1336 static dwarf::Reg DWARFReg(FloatRegister reg) {
1337 return dwarf::Reg::X86_64Fp(static_cast<int>(reg));
1338 }
1339
GenerateFrameEntry()1340 void CodeGeneratorX86_64::GenerateFrameEntry() {
1341 __ cfi().SetCurrentCFAOffset(kX86_64WordSize); // return address
1342 __ Bind(&frame_entry_label_);
1343 bool skip_overflow_check = IsLeafMethod()
1344 && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86_64);
1345 DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
1346
1347 if (GetCompilerOptions().CountHotnessInCompiledCode()) {
1348 __ addw(Address(CpuRegister(kMethodRegisterArgument),
1349 ArtMethod::HotnessCountOffset().Int32Value()),
1350 Immediate(1));
1351 }
1352
1353 if (!skip_overflow_check) {
1354 size_t reserved_bytes = GetStackOverflowReservedBytes(InstructionSet::kX86_64);
1355 __ testq(CpuRegister(RAX), Address(CpuRegister(RSP), -static_cast<int32_t>(reserved_bytes)));
1356 RecordPcInfo(nullptr, 0);
1357 }
1358
1359 if (HasEmptyFrame()) {
1360 return;
1361 }
1362
1363 for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) {
1364 Register reg = kCoreCalleeSaves[i];
1365 if (allocated_registers_.ContainsCoreRegister(reg)) {
1366 __ pushq(CpuRegister(reg));
1367 __ cfi().AdjustCFAOffset(kX86_64WordSize);
1368 __ cfi().RelOffset(DWARFReg(reg), 0);
1369 }
1370 }
1371
1372 int adjust = GetFrameSize() - GetCoreSpillSize();
1373 __ subq(CpuRegister(RSP), Immediate(adjust));
1374 __ cfi().AdjustCFAOffset(adjust);
1375 uint32_t xmm_spill_location = GetFpuSpillStart();
1376 size_t xmm_spill_slot_size = GetFloatingPointSpillSlotSize();
1377
1378 for (int i = arraysize(kFpuCalleeSaves) - 1; i >= 0; --i) {
1379 if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) {
1380 int offset = xmm_spill_location + (xmm_spill_slot_size * i);
1381 __ movsd(Address(CpuRegister(RSP), offset), XmmRegister(kFpuCalleeSaves[i]));
1382 __ cfi().RelOffset(DWARFReg(kFpuCalleeSaves[i]), offset);
1383 }
1384 }
1385
1386 // Save the current method if we need it. Note that we do not
1387 // do this in HCurrentMethod, as the instruction might have been removed
1388 // in the SSA graph.
1389 if (RequiresCurrentMethod()) {
1390 __ movq(Address(CpuRegister(RSP), kCurrentMethodStackOffset),
1391 CpuRegister(kMethodRegisterArgument));
1392 }
1393
1394 if (GetGraph()->HasShouldDeoptimizeFlag()) {
1395 // Initialize should_deoptimize flag to 0.
1396 __ movl(Address(CpuRegister(RSP), GetStackOffsetOfShouldDeoptimizeFlag()), Immediate(0));
1397 }
1398 }
1399
GenerateFrameExit()1400 void CodeGeneratorX86_64::GenerateFrameExit() {
1401 __ cfi().RememberState();
1402 if (!HasEmptyFrame()) {
1403 uint32_t xmm_spill_location = GetFpuSpillStart();
1404 size_t xmm_spill_slot_size = GetFloatingPointSpillSlotSize();
1405 for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) {
1406 if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) {
1407 int offset = xmm_spill_location + (xmm_spill_slot_size * i);
1408 __ movsd(XmmRegister(kFpuCalleeSaves[i]), Address(CpuRegister(RSP), offset));
1409 __ cfi().Restore(DWARFReg(kFpuCalleeSaves[i]));
1410 }
1411 }
1412
1413 int adjust = GetFrameSize() - GetCoreSpillSize();
1414 __ addq(CpuRegister(RSP), Immediate(adjust));
1415 __ cfi().AdjustCFAOffset(-adjust);
1416
1417 for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
1418 Register reg = kCoreCalleeSaves[i];
1419 if (allocated_registers_.ContainsCoreRegister(reg)) {
1420 __ popq(CpuRegister(reg));
1421 __ cfi().AdjustCFAOffset(-static_cast<int>(kX86_64WordSize));
1422 __ cfi().Restore(DWARFReg(reg));
1423 }
1424 }
1425 }
1426 __ ret();
1427 __ cfi().RestoreState();
1428 __ cfi().DefCFAOffset(GetFrameSize());
1429 }
1430
Bind(HBasicBlock * block)1431 void CodeGeneratorX86_64::Bind(HBasicBlock* block) {
1432 __ Bind(GetLabelOf(block));
1433 }
1434
Move(Location destination,Location source)1435 void CodeGeneratorX86_64::Move(Location destination, Location source) {
1436 if (source.Equals(destination)) {
1437 return;
1438 }
1439 if (destination.IsRegister()) {
1440 CpuRegister dest = destination.AsRegister<CpuRegister>();
1441 if (source.IsRegister()) {
1442 __ movq(dest, source.AsRegister<CpuRegister>());
1443 } else if (source.IsFpuRegister()) {
1444 __ movd(dest, source.AsFpuRegister<XmmRegister>());
1445 } else if (source.IsStackSlot()) {
1446 __ movl(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
1447 } else if (source.IsConstant()) {
1448 HConstant* constant = source.GetConstant();
1449 if (constant->IsLongConstant()) {
1450 Load64BitValue(dest, constant->AsLongConstant()->GetValue());
1451 } else {
1452 Load32BitValue(dest, GetInt32ValueOf(constant));
1453 }
1454 } else {
1455 DCHECK(source.IsDoubleStackSlot());
1456 __ movq(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
1457 }
1458 } else if (destination.IsFpuRegister()) {
1459 XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
1460 if (source.IsRegister()) {
1461 __ movd(dest, source.AsRegister<CpuRegister>());
1462 } else if (source.IsFpuRegister()) {
1463 __ movaps(dest, source.AsFpuRegister<XmmRegister>());
1464 } else if (source.IsConstant()) {
1465 HConstant* constant = source.GetConstant();
1466 int64_t value = CodeGenerator::GetInt64ValueOf(constant);
1467 if (constant->IsFloatConstant()) {
1468 Load32BitValue(dest, static_cast<int32_t>(value));
1469 } else {
1470 Load64BitValue(dest, value);
1471 }
1472 } else if (source.IsStackSlot()) {
1473 __ movss(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
1474 } else {
1475 DCHECK(source.IsDoubleStackSlot());
1476 __ movsd(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
1477 }
1478 } else if (destination.IsStackSlot()) {
1479 if (source.IsRegister()) {
1480 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()),
1481 source.AsRegister<CpuRegister>());
1482 } else if (source.IsFpuRegister()) {
1483 __ movss(Address(CpuRegister(RSP), destination.GetStackIndex()),
1484 source.AsFpuRegister<XmmRegister>());
1485 } else if (source.IsConstant()) {
1486 HConstant* constant = source.GetConstant();
1487 int32_t value = GetInt32ValueOf(constant);
1488 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), Immediate(value));
1489 } else {
1490 DCHECK(source.IsStackSlot()) << source;
1491 __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
1492 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
1493 }
1494 } else {
1495 DCHECK(destination.IsDoubleStackSlot());
1496 if (source.IsRegister()) {
1497 __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()),
1498 source.AsRegister<CpuRegister>());
1499 } else if (source.IsFpuRegister()) {
1500 __ movsd(Address(CpuRegister(RSP), destination.GetStackIndex()),
1501 source.AsFpuRegister<XmmRegister>());
1502 } else if (source.IsConstant()) {
1503 HConstant* constant = source.GetConstant();
1504 DCHECK(constant->IsLongConstant() || constant->IsDoubleConstant());
1505 int64_t value = GetInt64ValueOf(constant);
1506 Store64BitValueToStack(destination, value);
1507 } else {
1508 DCHECK(source.IsDoubleStackSlot());
1509 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
1510 __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
1511 }
1512 }
1513 }
1514
MoveConstant(Location location,int32_t value)1515 void CodeGeneratorX86_64::MoveConstant(Location location, int32_t value) {
1516 DCHECK(location.IsRegister());
1517 Load64BitValue(location.AsRegister<CpuRegister>(), static_cast<int64_t>(value));
1518 }
1519
MoveLocation(Location dst,Location src,DataType::Type dst_type ATTRIBUTE_UNUSED)1520 void CodeGeneratorX86_64::MoveLocation(
1521 Location dst, Location src, DataType::Type dst_type ATTRIBUTE_UNUSED) {
1522 Move(dst, src);
1523 }
1524
AddLocationAsTemp(Location location,LocationSummary * locations)1525 void CodeGeneratorX86_64::AddLocationAsTemp(Location location, LocationSummary* locations) {
1526 if (location.IsRegister()) {
1527 locations->AddTemp(location);
1528 } else {
1529 UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location;
1530 }
1531 }
1532
HandleGoto(HInstruction * got,HBasicBlock * successor)1533 void InstructionCodeGeneratorX86_64::HandleGoto(HInstruction* got, HBasicBlock* successor) {
1534 if (successor->IsExitBlock()) {
1535 DCHECK(got->GetPrevious()->AlwaysThrows());
1536 return; // no code needed
1537 }
1538
1539 HBasicBlock* block = got->GetBlock();
1540 HInstruction* previous = got->GetPrevious();
1541
1542 HLoopInformation* info = block->GetLoopInformation();
1543 if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
1544 if (codegen_->GetCompilerOptions().CountHotnessInCompiledCode()) {
1545 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), 0));
1546 __ addw(Address(CpuRegister(TMP), ArtMethod::HotnessCountOffset().Int32Value()),
1547 Immediate(1));
1548 }
1549 GenerateSuspendCheck(info->GetSuspendCheck(), successor);
1550 return;
1551 }
1552
1553 if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) {
1554 GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
1555 }
1556 if (!codegen_->GoesToNextBlock(got->GetBlock(), successor)) {
1557 __ jmp(codegen_->GetLabelOf(successor));
1558 }
1559 }
1560
VisitGoto(HGoto * got)1561 void LocationsBuilderX86_64::VisitGoto(HGoto* got) {
1562 got->SetLocations(nullptr);
1563 }
1564
VisitGoto(HGoto * got)1565 void InstructionCodeGeneratorX86_64::VisitGoto(HGoto* got) {
1566 HandleGoto(got, got->GetSuccessor());
1567 }
1568
VisitTryBoundary(HTryBoundary * try_boundary)1569 void LocationsBuilderX86_64::VisitTryBoundary(HTryBoundary* try_boundary) {
1570 try_boundary->SetLocations(nullptr);
1571 }
1572
VisitTryBoundary(HTryBoundary * try_boundary)1573 void InstructionCodeGeneratorX86_64::VisitTryBoundary(HTryBoundary* try_boundary) {
1574 HBasicBlock* successor = try_boundary->GetNormalFlowSuccessor();
1575 if (!successor->IsExitBlock()) {
1576 HandleGoto(try_boundary, successor);
1577 }
1578 }
1579
VisitExit(HExit * exit)1580 void LocationsBuilderX86_64::VisitExit(HExit* exit) {
1581 exit->SetLocations(nullptr);
1582 }
1583
VisitExit(HExit * exit ATTRIBUTE_UNUSED)1584 void InstructionCodeGeneratorX86_64::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
1585 }
1586
1587 template<class LabelType>
GenerateFPJumps(HCondition * cond,LabelType * true_label,LabelType * false_label)1588 void InstructionCodeGeneratorX86_64::GenerateFPJumps(HCondition* cond,
1589 LabelType* true_label,
1590 LabelType* false_label) {
1591 if (cond->IsFPConditionTrueIfNaN()) {
1592 __ j(kUnordered, true_label);
1593 } else if (cond->IsFPConditionFalseIfNaN()) {
1594 __ j(kUnordered, false_label);
1595 }
1596 __ j(X86_64FPCondition(cond->GetCondition()), true_label);
1597 }
1598
GenerateCompareTest(HCondition * condition)1599 void InstructionCodeGeneratorX86_64::GenerateCompareTest(HCondition* condition) {
1600 LocationSummary* locations = condition->GetLocations();
1601
1602 Location left = locations->InAt(0);
1603 Location right = locations->InAt(1);
1604 DataType::Type type = condition->InputAt(0)->GetType();
1605 switch (type) {
1606 case DataType::Type::kBool:
1607 case DataType::Type::kUint8:
1608 case DataType::Type::kInt8:
1609 case DataType::Type::kUint16:
1610 case DataType::Type::kInt16:
1611 case DataType::Type::kInt32:
1612 case DataType::Type::kReference: {
1613 codegen_->GenerateIntCompare(left, right);
1614 break;
1615 }
1616 case DataType::Type::kInt64: {
1617 codegen_->GenerateLongCompare(left, right);
1618 break;
1619 }
1620 case DataType::Type::kFloat32: {
1621 if (right.IsFpuRegister()) {
1622 __ ucomiss(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>());
1623 } else if (right.IsConstant()) {
1624 __ ucomiss(left.AsFpuRegister<XmmRegister>(),
1625 codegen_->LiteralFloatAddress(
1626 right.GetConstant()->AsFloatConstant()->GetValue()));
1627 } else {
1628 DCHECK(right.IsStackSlot());
1629 __ ucomiss(left.AsFpuRegister<XmmRegister>(),
1630 Address(CpuRegister(RSP), right.GetStackIndex()));
1631 }
1632 break;
1633 }
1634 case DataType::Type::kFloat64: {
1635 if (right.IsFpuRegister()) {
1636 __ ucomisd(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>());
1637 } else if (right.IsConstant()) {
1638 __ ucomisd(left.AsFpuRegister<XmmRegister>(),
1639 codegen_->LiteralDoubleAddress(
1640 right.GetConstant()->AsDoubleConstant()->GetValue()));
1641 } else {
1642 DCHECK(right.IsDoubleStackSlot());
1643 __ ucomisd(left.AsFpuRegister<XmmRegister>(),
1644 Address(CpuRegister(RSP), right.GetStackIndex()));
1645 }
1646 break;
1647 }
1648 default:
1649 LOG(FATAL) << "Unexpected condition type " << type;
1650 }
1651 }
1652
1653 template<class LabelType>
GenerateCompareTestAndBranch(HCondition * condition,LabelType * true_target_in,LabelType * false_target_in)1654 void InstructionCodeGeneratorX86_64::GenerateCompareTestAndBranch(HCondition* condition,
1655 LabelType* true_target_in,
1656 LabelType* false_target_in) {
1657 // Generated branching requires both targets to be explicit. If either of the
1658 // targets is nullptr (fallthrough) use and bind `fallthrough_target` instead.
1659 LabelType fallthrough_target;
1660 LabelType* true_target = true_target_in == nullptr ? &fallthrough_target : true_target_in;
1661 LabelType* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in;
1662
1663 // Generate the comparison to set the CC.
1664 GenerateCompareTest(condition);
1665
1666 // Now generate the correct jump(s).
1667 DataType::Type type = condition->InputAt(0)->GetType();
1668 switch (type) {
1669 case DataType::Type::kInt64: {
1670 __ j(X86_64IntegerCondition(condition->GetCondition()), true_target);
1671 break;
1672 }
1673 case DataType::Type::kFloat32: {
1674 GenerateFPJumps(condition, true_target, false_target);
1675 break;
1676 }
1677 case DataType::Type::kFloat64: {
1678 GenerateFPJumps(condition, true_target, false_target);
1679 break;
1680 }
1681 default:
1682 LOG(FATAL) << "Unexpected condition type " << type;
1683 }
1684
1685 if (false_target != &fallthrough_target) {
1686 __ jmp(false_target);
1687 }
1688
1689 if (fallthrough_target.IsLinked()) {
1690 __ Bind(&fallthrough_target);
1691 }
1692 }
1693
AreEflagsSetFrom(HInstruction * cond,HInstruction * branch)1694 static bool AreEflagsSetFrom(HInstruction* cond, HInstruction* branch) {
1695 // Moves may affect the eflags register (move zero uses xorl), so the EFLAGS
1696 // are set only strictly before `branch`. We can't use the eflags on long
1697 // conditions if they are materialized due to the complex branching.
1698 return cond->IsCondition() &&
1699 cond->GetNext() == branch &&
1700 !DataType::IsFloatingPointType(cond->InputAt(0)->GetType());
1701 }
1702
1703 template<class LabelType>
GenerateTestAndBranch(HInstruction * instruction,size_t condition_input_index,LabelType * true_target,LabelType * false_target)1704 void InstructionCodeGeneratorX86_64::GenerateTestAndBranch(HInstruction* instruction,
1705 size_t condition_input_index,
1706 LabelType* true_target,
1707 LabelType* false_target) {
1708 HInstruction* cond = instruction->InputAt(condition_input_index);
1709
1710 if (true_target == nullptr && false_target == nullptr) {
1711 // Nothing to do. The code always falls through.
1712 return;
1713 } else if (cond->IsIntConstant()) {
1714 // Constant condition, statically compared against "true" (integer value 1).
1715 if (cond->AsIntConstant()->IsTrue()) {
1716 if (true_target != nullptr) {
1717 __ jmp(true_target);
1718 }
1719 } else {
1720 DCHECK(cond->AsIntConstant()->IsFalse()) << cond->AsIntConstant()->GetValue();
1721 if (false_target != nullptr) {
1722 __ jmp(false_target);
1723 }
1724 }
1725 return;
1726 }
1727
1728 // The following code generates these patterns:
1729 // (1) true_target == nullptr && false_target != nullptr
1730 // - opposite condition true => branch to false_target
1731 // (2) true_target != nullptr && false_target == nullptr
1732 // - condition true => branch to true_target
1733 // (3) true_target != nullptr && false_target != nullptr
1734 // - condition true => branch to true_target
1735 // - branch to false_target
1736 if (IsBooleanValueOrMaterializedCondition(cond)) {
1737 if (AreEflagsSetFrom(cond, instruction)) {
1738 if (true_target == nullptr) {
1739 __ j(X86_64IntegerCondition(cond->AsCondition()->GetOppositeCondition()), false_target);
1740 } else {
1741 __ j(X86_64IntegerCondition(cond->AsCondition()->GetCondition()), true_target);
1742 }
1743 } else {
1744 // Materialized condition, compare against 0.
1745 Location lhs = instruction->GetLocations()->InAt(condition_input_index);
1746 if (lhs.IsRegister()) {
1747 __ testl(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>());
1748 } else {
1749 __ cmpl(Address(CpuRegister(RSP), lhs.GetStackIndex()), Immediate(0));
1750 }
1751 if (true_target == nullptr) {
1752 __ j(kEqual, false_target);
1753 } else {
1754 __ j(kNotEqual, true_target);
1755 }
1756 }
1757 } else {
1758 // Condition has not been materialized, use its inputs as the
1759 // comparison and its condition as the branch condition.
1760 HCondition* condition = cond->AsCondition();
1761
1762 // If this is a long or FP comparison that has been folded into
1763 // the HCondition, generate the comparison directly.
1764 DataType::Type type = condition->InputAt(0)->GetType();
1765 if (type == DataType::Type::kInt64 || DataType::IsFloatingPointType(type)) {
1766 GenerateCompareTestAndBranch(condition, true_target, false_target);
1767 return;
1768 }
1769
1770 Location lhs = condition->GetLocations()->InAt(0);
1771 Location rhs = condition->GetLocations()->InAt(1);
1772 codegen_->GenerateIntCompare(lhs, rhs);
1773 if (true_target == nullptr) {
1774 __ j(X86_64IntegerCondition(condition->GetOppositeCondition()), false_target);
1775 } else {
1776 __ j(X86_64IntegerCondition(condition->GetCondition()), true_target);
1777 }
1778 }
1779
1780 // If neither branch falls through (case 3), the conditional branch to `true_target`
1781 // was already emitted (case 2) and we need to emit a jump to `false_target`.
1782 if (true_target != nullptr && false_target != nullptr) {
1783 __ jmp(false_target);
1784 }
1785 }
1786
VisitIf(HIf * if_instr)1787 void LocationsBuilderX86_64::VisitIf(HIf* if_instr) {
1788 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(if_instr);
1789 if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
1790 locations->SetInAt(0, Location::Any());
1791 }
1792 }
1793
VisitIf(HIf * if_instr)1794 void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) {
1795 HBasicBlock* true_successor = if_instr->IfTrueSuccessor();
1796 HBasicBlock* false_successor = if_instr->IfFalseSuccessor();
1797 Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ?
1798 nullptr : codegen_->GetLabelOf(true_successor);
1799 Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ?
1800 nullptr : codegen_->GetLabelOf(false_successor);
1801 GenerateTestAndBranch(if_instr, /* condition_input_index= */ 0, true_target, false_target);
1802 }
1803
VisitDeoptimize(HDeoptimize * deoptimize)1804 void LocationsBuilderX86_64::VisitDeoptimize(HDeoptimize* deoptimize) {
1805 LocationSummary* locations = new (GetGraph()->GetAllocator())
1806 LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
1807 InvokeRuntimeCallingConvention calling_convention;
1808 RegisterSet caller_saves = RegisterSet::Empty();
1809 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1810 locations->SetCustomSlowPathCallerSaves(caller_saves);
1811 if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
1812 locations->SetInAt(0, Location::Any());
1813 }
1814 }
1815
VisitDeoptimize(HDeoptimize * deoptimize)1816 void InstructionCodeGeneratorX86_64::VisitDeoptimize(HDeoptimize* deoptimize) {
1817 SlowPathCode* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathX86_64>(deoptimize);
1818 GenerateTestAndBranch<Label>(deoptimize,
1819 /* condition_input_index= */ 0,
1820 slow_path->GetEntryLabel(),
1821 /* false_target= */ nullptr);
1822 }
1823
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)1824 void LocationsBuilderX86_64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
1825 LocationSummary* locations = new (GetGraph()->GetAllocator())
1826 LocationSummary(flag, LocationSummary::kNoCall);
1827 locations->SetOut(Location::RequiresRegister());
1828 }
1829
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)1830 void InstructionCodeGeneratorX86_64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
1831 __ movl(flag->GetLocations()->Out().AsRegister<CpuRegister>(),
1832 Address(CpuRegister(RSP), codegen_->GetStackOffsetOfShouldDeoptimizeFlag()));
1833 }
1834
SelectCanUseCMOV(HSelect * select)1835 static bool SelectCanUseCMOV(HSelect* select) {
1836 // There are no conditional move instructions for XMMs.
1837 if (DataType::IsFloatingPointType(select->GetType())) {
1838 return false;
1839 }
1840
1841 // A FP condition doesn't generate the single CC that we need.
1842 HInstruction* condition = select->GetCondition();
1843 if (condition->IsCondition() &&
1844 DataType::IsFloatingPointType(condition->InputAt(0)->GetType())) {
1845 return false;
1846 }
1847
1848 // We can generate a CMOV for this Select.
1849 return true;
1850 }
1851
VisitSelect(HSelect * select)1852 void LocationsBuilderX86_64::VisitSelect(HSelect* select) {
1853 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(select);
1854 if (DataType::IsFloatingPointType(select->GetType())) {
1855 locations->SetInAt(0, Location::RequiresFpuRegister());
1856 locations->SetInAt(1, Location::Any());
1857 } else {
1858 locations->SetInAt(0, Location::RequiresRegister());
1859 if (SelectCanUseCMOV(select)) {
1860 if (select->InputAt(1)->IsConstant()) {
1861 locations->SetInAt(1, Location::RequiresRegister());
1862 } else {
1863 locations->SetInAt(1, Location::Any());
1864 }
1865 } else {
1866 locations->SetInAt(1, Location::Any());
1867 }
1868 }
1869 if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
1870 locations->SetInAt(2, Location::RequiresRegister());
1871 }
1872 locations->SetOut(Location::SameAsFirstInput());
1873 }
1874
VisitSelect(HSelect * select)1875 void InstructionCodeGeneratorX86_64::VisitSelect(HSelect* select) {
1876 LocationSummary* locations = select->GetLocations();
1877 if (SelectCanUseCMOV(select)) {
1878 // If both the condition and the source types are integer, we can generate
1879 // a CMOV to implement Select.
1880 CpuRegister value_false = locations->InAt(0).AsRegister<CpuRegister>();
1881 Location value_true_loc = locations->InAt(1);
1882 DCHECK(locations->InAt(0).Equals(locations->Out()));
1883
1884 HInstruction* select_condition = select->GetCondition();
1885 Condition cond = kNotEqual;
1886
1887 // Figure out how to test the 'condition'.
1888 if (select_condition->IsCondition()) {
1889 HCondition* condition = select_condition->AsCondition();
1890 if (!condition->IsEmittedAtUseSite()) {
1891 // This was a previously materialized condition.
1892 // Can we use the existing condition code?
1893 if (AreEflagsSetFrom(condition, select)) {
1894 // Materialization was the previous instruction. Condition codes are right.
1895 cond = X86_64IntegerCondition(condition->GetCondition());
1896 } else {
1897 // No, we have to recreate the condition code.
1898 CpuRegister cond_reg = locations->InAt(2).AsRegister<CpuRegister>();
1899 __ testl(cond_reg, cond_reg);
1900 }
1901 } else {
1902 GenerateCompareTest(condition);
1903 cond = X86_64IntegerCondition(condition->GetCondition());
1904 }
1905 } else {
1906 // Must be a Boolean condition, which needs to be compared to 0.
1907 CpuRegister cond_reg = locations->InAt(2).AsRegister<CpuRegister>();
1908 __ testl(cond_reg, cond_reg);
1909 }
1910
1911 // If the condition is true, overwrite the output, which already contains false.
1912 // Generate the correct sized CMOV.
1913 bool is_64_bit = DataType::Is64BitType(select->GetType());
1914 if (value_true_loc.IsRegister()) {
1915 __ cmov(cond, value_false, value_true_loc.AsRegister<CpuRegister>(), is_64_bit);
1916 } else {
1917 __ cmov(cond,
1918 value_false,
1919 Address(CpuRegister(RSP), value_true_loc.GetStackIndex()), is_64_bit);
1920 }
1921 } else {
1922 NearLabel false_target;
1923 GenerateTestAndBranch<NearLabel>(select,
1924 /* condition_input_index= */ 2,
1925 /* true_target= */ nullptr,
1926 &false_target);
1927 codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType());
1928 __ Bind(&false_target);
1929 }
1930 }
1931
VisitNativeDebugInfo(HNativeDebugInfo * info)1932 void LocationsBuilderX86_64::VisitNativeDebugInfo(HNativeDebugInfo* info) {
1933 new (GetGraph()->GetAllocator()) LocationSummary(info);
1934 }
1935
VisitNativeDebugInfo(HNativeDebugInfo *)1936 void InstructionCodeGeneratorX86_64::VisitNativeDebugInfo(HNativeDebugInfo*) {
1937 // MaybeRecordNativeDebugInfo is already called implicitly in CodeGenerator::Compile.
1938 }
1939
GenerateNop()1940 void CodeGeneratorX86_64::GenerateNop() {
1941 __ nop();
1942 }
1943
HandleCondition(HCondition * cond)1944 void LocationsBuilderX86_64::HandleCondition(HCondition* cond) {
1945 LocationSummary* locations =
1946 new (GetGraph()->GetAllocator()) LocationSummary(cond, LocationSummary::kNoCall);
1947 // Handle the long/FP comparisons made in instruction simplification.
1948 switch (cond->InputAt(0)->GetType()) {
1949 case DataType::Type::kInt64:
1950 locations->SetInAt(0, Location::RequiresRegister());
1951 locations->SetInAt(1, Location::Any());
1952 break;
1953 case DataType::Type::kFloat32:
1954 case DataType::Type::kFloat64:
1955 locations->SetInAt(0, Location::RequiresFpuRegister());
1956 locations->SetInAt(1, Location::Any());
1957 break;
1958 default:
1959 locations->SetInAt(0, Location::RequiresRegister());
1960 locations->SetInAt(1, Location::Any());
1961 break;
1962 }
1963 if (!cond->IsEmittedAtUseSite()) {
1964 locations->SetOut(Location::RequiresRegister());
1965 }
1966 }
1967
HandleCondition(HCondition * cond)1968 void InstructionCodeGeneratorX86_64::HandleCondition(HCondition* cond) {
1969 if (cond->IsEmittedAtUseSite()) {
1970 return;
1971 }
1972
1973 LocationSummary* locations = cond->GetLocations();
1974 Location lhs = locations->InAt(0);
1975 Location rhs = locations->InAt(1);
1976 CpuRegister reg = locations->Out().AsRegister<CpuRegister>();
1977 NearLabel true_label, false_label;
1978
1979 switch (cond->InputAt(0)->GetType()) {
1980 default:
1981 // Integer case.
1982
1983 // Clear output register: setcc only sets the low byte.
1984 __ xorl(reg, reg);
1985
1986 codegen_->GenerateIntCompare(lhs, rhs);
1987 __ setcc(X86_64IntegerCondition(cond->GetCondition()), reg);
1988 return;
1989 case DataType::Type::kInt64:
1990 // Clear output register: setcc only sets the low byte.
1991 __ xorl(reg, reg);
1992
1993 codegen_->GenerateLongCompare(lhs, rhs);
1994 __ setcc(X86_64IntegerCondition(cond->GetCondition()), reg);
1995 return;
1996 case DataType::Type::kFloat32: {
1997 XmmRegister lhs_reg = lhs.AsFpuRegister<XmmRegister>();
1998 if (rhs.IsConstant()) {
1999 float value = rhs.GetConstant()->AsFloatConstant()->GetValue();
2000 __ ucomiss(lhs_reg, codegen_->LiteralFloatAddress(value));
2001 } else if (rhs.IsStackSlot()) {
2002 __ ucomiss(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex()));
2003 } else {
2004 __ ucomiss(lhs_reg, rhs.AsFpuRegister<XmmRegister>());
2005 }
2006 GenerateFPJumps(cond, &true_label, &false_label);
2007 break;
2008 }
2009 case DataType::Type::kFloat64: {
2010 XmmRegister lhs_reg = lhs.AsFpuRegister<XmmRegister>();
2011 if (rhs.IsConstant()) {
2012 double value = rhs.GetConstant()->AsDoubleConstant()->GetValue();
2013 __ ucomisd(lhs_reg, codegen_->LiteralDoubleAddress(value));
2014 } else if (rhs.IsDoubleStackSlot()) {
2015 __ ucomisd(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex()));
2016 } else {
2017 __ ucomisd(lhs_reg, rhs.AsFpuRegister<XmmRegister>());
2018 }
2019 GenerateFPJumps(cond, &true_label, &false_label);
2020 break;
2021 }
2022 }
2023
2024 // Convert the jumps into the result.
2025 NearLabel done_label;
2026
2027 // False case: result = 0.
2028 __ Bind(&false_label);
2029 __ xorl(reg, reg);
2030 __ jmp(&done_label);
2031
2032 // True case: result = 1.
2033 __ Bind(&true_label);
2034 __ movl(reg, Immediate(1));
2035 __ Bind(&done_label);
2036 }
2037
VisitEqual(HEqual * comp)2038 void LocationsBuilderX86_64::VisitEqual(HEqual* comp) {
2039 HandleCondition(comp);
2040 }
2041
VisitEqual(HEqual * comp)2042 void InstructionCodeGeneratorX86_64::VisitEqual(HEqual* comp) {
2043 HandleCondition(comp);
2044 }
2045
VisitNotEqual(HNotEqual * comp)2046 void LocationsBuilderX86_64::VisitNotEqual(HNotEqual* comp) {
2047 HandleCondition(comp);
2048 }
2049
VisitNotEqual(HNotEqual * comp)2050 void InstructionCodeGeneratorX86_64::VisitNotEqual(HNotEqual* comp) {
2051 HandleCondition(comp);
2052 }
2053
VisitLessThan(HLessThan * comp)2054 void LocationsBuilderX86_64::VisitLessThan(HLessThan* comp) {
2055 HandleCondition(comp);
2056 }
2057
VisitLessThan(HLessThan * comp)2058 void InstructionCodeGeneratorX86_64::VisitLessThan(HLessThan* comp) {
2059 HandleCondition(comp);
2060 }
2061
VisitLessThanOrEqual(HLessThanOrEqual * comp)2062 void LocationsBuilderX86_64::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
2063 HandleCondition(comp);
2064 }
2065
VisitLessThanOrEqual(HLessThanOrEqual * comp)2066 void InstructionCodeGeneratorX86_64::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
2067 HandleCondition(comp);
2068 }
2069
VisitGreaterThan(HGreaterThan * comp)2070 void LocationsBuilderX86_64::VisitGreaterThan(HGreaterThan* comp) {
2071 HandleCondition(comp);
2072 }
2073
VisitGreaterThan(HGreaterThan * comp)2074 void InstructionCodeGeneratorX86_64::VisitGreaterThan(HGreaterThan* comp) {
2075 HandleCondition(comp);
2076 }
2077
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)2078 void LocationsBuilderX86_64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
2079 HandleCondition(comp);
2080 }
2081
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)2082 void InstructionCodeGeneratorX86_64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
2083 HandleCondition(comp);
2084 }
2085
VisitBelow(HBelow * comp)2086 void LocationsBuilderX86_64::VisitBelow(HBelow* comp) {
2087 HandleCondition(comp);
2088 }
2089
VisitBelow(HBelow * comp)2090 void InstructionCodeGeneratorX86_64::VisitBelow(HBelow* comp) {
2091 HandleCondition(comp);
2092 }
2093
VisitBelowOrEqual(HBelowOrEqual * comp)2094 void LocationsBuilderX86_64::VisitBelowOrEqual(HBelowOrEqual* comp) {
2095 HandleCondition(comp);
2096 }
2097
VisitBelowOrEqual(HBelowOrEqual * comp)2098 void InstructionCodeGeneratorX86_64::VisitBelowOrEqual(HBelowOrEqual* comp) {
2099 HandleCondition(comp);
2100 }
2101
VisitAbove(HAbove * comp)2102 void LocationsBuilderX86_64::VisitAbove(HAbove* comp) {
2103 HandleCondition(comp);
2104 }
2105
VisitAbove(HAbove * comp)2106 void InstructionCodeGeneratorX86_64::VisitAbove(HAbove* comp) {
2107 HandleCondition(comp);
2108 }
2109
VisitAboveOrEqual(HAboveOrEqual * comp)2110 void LocationsBuilderX86_64::VisitAboveOrEqual(HAboveOrEqual* comp) {
2111 HandleCondition(comp);
2112 }
2113
VisitAboveOrEqual(HAboveOrEqual * comp)2114 void InstructionCodeGeneratorX86_64::VisitAboveOrEqual(HAboveOrEqual* comp) {
2115 HandleCondition(comp);
2116 }
2117
VisitCompare(HCompare * compare)2118 void LocationsBuilderX86_64::VisitCompare(HCompare* compare) {
2119 LocationSummary* locations =
2120 new (GetGraph()->GetAllocator()) LocationSummary(compare, LocationSummary::kNoCall);
2121 switch (compare->InputAt(0)->GetType()) {
2122 case DataType::Type::kBool:
2123 case DataType::Type::kUint8:
2124 case DataType::Type::kInt8:
2125 case DataType::Type::kUint16:
2126 case DataType::Type::kInt16:
2127 case DataType::Type::kInt32:
2128 case DataType::Type::kInt64: {
2129 locations->SetInAt(0, Location::RequiresRegister());
2130 locations->SetInAt(1, Location::Any());
2131 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2132 break;
2133 }
2134 case DataType::Type::kFloat32:
2135 case DataType::Type::kFloat64: {
2136 locations->SetInAt(0, Location::RequiresFpuRegister());
2137 locations->SetInAt(1, Location::Any());
2138 locations->SetOut(Location::RequiresRegister());
2139 break;
2140 }
2141 default:
2142 LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType();
2143 }
2144 }
2145
VisitCompare(HCompare * compare)2146 void InstructionCodeGeneratorX86_64::VisitCompare(HCompare* compare) {
2147 LocationSummary* locations = compare->GetLocations();
2148 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
2149 Location left = locations->InAt(0);
2150 Location right = locations->InAt(1);
2151
2152 NearLabel less, greater, done;
2153 DataType::Type type = compare->InputAt(0)->GetType();
2154 Condition less_cond = kLess;
2155
2156 switch (type) {
2157 case DataType::Type::kBool:
2158 case DataType::Type::kUint8:
2159 case DataType::Type::kInt8:
2160 case DataType::Type::kUint16:
2161 case DataType::Type::kInt16:
2162 case DataType::Type::kInt32: {
2163 codegen_->GenerateIntCompare(left, right);
2164 break;
2165 }
2166 case DataType::Type::kInt64: {
2167 codegen_->GenerateLongCompare(left, right);
2168 break;
2169 }
2170 case DataType::Type::kFloat32: {
2171 XmmRegister left_reg = left.AsFpuRegister<XmmRegister>();
2172 if (right.IsConstant()) {
2173 float value = right.GetConstant()->AsFloatConstant()->GetValue();
2174 __ ucomiss(left_reg, codegen_->LiteralFloatAddress(value));
2175 } else if (right.IsStackSlot()) {
2176 __ ucomiss(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
2177 } else {
2178 __ ucomiss(left_reg, right.AsFpuRegister<XmmRegister>());
2179 }
2180 __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
2181 less_cond = kBelow; // ucomis{s,d} sets CF
2182 break;
2183 }
2184 case DataType::Type::kFloat64: {
2185 XmmRegister left_reg = left.AsFpuRegister<XmmRegister>();
2186 if (right.IsConstant()) {
2187 double value = right.GetConstant()->AsDoubleConstant()->GetValue();
2188 __ ucomisd(left_reg, codegen_->LiteralDoubleAddress(value));
2189 } else if (right.IsDoubleStackSlot()) {
2190 __ ucomisd(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
2191 } else {
2192 __ ucomisd(left_reg, right.AsFpuRegister<XmmRegister>());
2193 }
2194 __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
2195 less_cond = kBelow; // ucomis{s,d} sets CF
2196 break;
2197 }
2198 default:
2199 LOG(FATAL) << "Unexpected compare type " << type;
2200 }
2201
2202 __ movl(out, Immediate(0));
2203 __ j(kEqual, &done);
2204 __ j(less_cond, &less);
2205
2206 __ Bind(&greater);
2207 __ movl(out, Immediate(1));
2208 __ jmp(&done);
2209
2210 __ Bind(&less);
2211 __ movl(out, Immediate(-1));
2212
2213 __ Bind(&done);
2214 }
2215
VisitIntConstant(HIntConstant * constant)2216 void LocationsBuilderX86_64::VisitIntConstant(HIntConstant* constant) {
2217 LocationSummary* locations =
2218 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2219 locations->SetOut(Location::ConstantLocation(constant));
2220 }
2221
VisitIntConstant(HIntConstant * constant ATTRIBUTE_UNUSED)2222 void InstructionCodeGeneratorX86_64::VisitIntConstant(HIntConstant* constant ATTRIBUTE_UNUSED) {
2223 // Will be generated at use site.
2224 }
2225
VisitNullConstant(HNullConstant * constant)2226 void LocationsBuilderX86_64::VisitNullConstant(HNullConstant* constant) {
2227 LocationSummary* locations =
2228 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2229 locations->SetOut(Location::ConstantLocation(constant));
2230 }
2231
VisitNullConstant(HNullConstant * constant ATTRIBUTE_UNUSED)2232 void InstructionCodeGeneratorX86_64::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) {
2233 // Will be generated at use site.
2234 }
2235
VisitLongConstant(HLongConstant * constant)2236 void LocationsBuilderX86_64::VisitLongConstant(HLongConstant* constant) {
2237 LocationSummary* locations =
2238 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2239 locations->SetOut(Location::ConstantLocation(constant));
2240 }
2241
VisitLongConstant(HLongConstant * constant ATTRIBUTE_UNUSED)2242 void InstructionCodeGeneratorX86_64::VisitLongConstant(HLongConstant* constant ATTRIBUTE_UNUSED) {
2243 // Will be generated at use site.
2244 }
2245
VisitFloatConstant(HFloatConstant * constant)2246 void LocationsBuilderX86_64::VisitFloatConstant(HFloatConstant* constant) {
2247 LocationSummary* locations =
2248 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2249 locations->SetOut(Location::ConstantLocation(constant));
2250 }
2251
VisitFloatConstant(HFloatConstant * constant ATTRIBUTE_UNUSED)2252 void InstructionCodeGeneratorX86_64::VisitFloatConstant(HFloatConstant* constant ATTRIBUTE_UNUSED) {
2253 // Will be generated at use site.
2254 }
2255
VisitDoubleConstant(HDoubleConstant * constant)2256 void LocationsBuilderX86_64::VisitDoubleConstant(HDoubleConstant* constant) {
2257 LocationSummary* locations =
2258 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2259 locations->SetOut(Location::ConstantLocation(constant));
2260 }
2261
VisitDoubleConstant(HDoubleConstant * constant ATTRIBUTE_UNUSED)2262 void InstructionCodeGeneratorX86_64::VisitDoubleConstant(
2263 HDoubleConstant* constant ATTRIBUTE_UNUSED) {
2264 // Will be generated at use site.
2265 }
2266
VisitConstructorFence(HConstructorFence * constructor_fence)2267 void LocationsBuilderX86_64::VisitConstructorFence(HConstructorFence* constructor_fence) {
2268 constructor_fence->SetLocations(nullptr);
2269 }
2270
VisitConstructorFence(HConstructorFence * constructor_fence ATTRIBUTE_UNUSED)2271 void InstructionCodeGeneratorX86_64::VisitConstructorFence(
2272 HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) {
2273 codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
2274 }
2275
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)2276 void LocationsBuilderX86_64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
2277 memory_barrier->SetLocations(nullptr);
2278 }
2279
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)2280 void InstructionCodeGeneratorX86_64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
2281 codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
2282 }
2283
VisitReturnVoid(HReturnVoid * ret)2284 void LocationsBuilderX86_64::VisitReturnVoid(HReturnVoid* ret) {
2285 ret->SetLocations(nullptr);
2286 }
2287
VisitReturnVoid(HReturnVoid * ret ATTRIBUTE_UNUSED)2288 void InstructionCodeGeneratorX86_64::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE_UNUSED) {
2289 codegen_->GenerateFrameExit();
2290 }
2291
VisitReturn(HReturn * ret)2292 void LocationsBuilderX86_64::VisitReturn(HReturn* ret) {
2293 LocationSummary* locations =
2294 new (GetGraph()->GetAllocator()) LocationSummary(ret, LocationSummary::kNoCall);
2295 switch (ret->InputAt(0)->GetType()) {
2296 case DataType::Type::kReference:
2297 case DataType::Type::kBool:
2298 case DataType::Type::kUint8:
2299 case DataType::Type::kInt8:
2300 case DataType::Type::kUint16:
2301 case DataType::Type::kInt16:
2302 case DataType::Type::kInt32:
2303 case DataType::Type::kInt64:
2304 locations->SetInAt(0, Location::RegisterLocation(RAX));
2305 break;
2306
2307 case DataType::Type::kFloat32:
2308 case DataType::Type::kFloat64:
2309 locations->SetInAt(0, Location::FpuRegisterLocation(XMM0));
2310 break;
2311
2312 default:
2313 LOG(FATAL) << "Unexpected return type " << ret->InputAt(0)->GetType();
2314 }
2315 }
2316
VisitReturn(HReturn * ret)2317 void InstructionCodeGeneratorX86_64::VisitReturn(HReturn* ret) {
2318 if (kIsDebugBuild) {
2319 switch (ret->InputAt(0)->GetType()) {
2320 case DataType::Type::kReference:
2321 case DataType::Type::kBool:
2322 case DataType::Type::kUint8:
2323 case DataType::Type::kInt8:
2324 case DataType::Type::kUint16:
2325 case DataType::Type::kInt16:
2326 case DataType::Type::kInt32:
2327 case DataType::Type::kInt64:
2328 DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegister<CpuRegister>().AsRegister(), RAX);
2329 break;
2330
2331 case DataType::Type::kFloat32:
2332 case DataType::Type::kFloat64:
2333 DCHECK_EQ(ret->GetLocations()->InAt(0).AsFpuRegister<XmmRegister>().AsFloatRegister(),
2334 XMM0);
2335 break;
2336
2337 default:
2338 LOG(FATAL) << "Unexpected return type " << ret->InputAt(0)->GetType();
2339 }
2340 }
2341 codegen_->GenerateFrameExit();
2342 }
2343
GetReturnLocation(DataType::Type type) const2344 Location InvokeDexCallingConventionVisitorX86_64::GetReturnLocation(DataType::Type type) const {
2345 switch (type) {
2346 case DataType::Type::kReference:
2347 case DataType::Type::kBool:
2348 case DataType::Type::kUint8:
2349 case DataType::Type::kInt8:
2350 case DataType::Type::kUint16:
2351 case DataType::Type::kInt16:
2352 case DataType::Type::kUint32:
2353 case DataType::Type::kInt32:
2354 case DataType::Type::kUint64:
2355 case DataType::Type::kInt64:
2356 return Location::RegisterLocation(RAX);
2357
2358 case DataType::Type::kVoid:
2359 return Location::NoLocation();
2360
2361 case DataType::Type::kFloat64:
2362 case DataType::Type::kFloat32:
2363 return Location::FpuRegisterLocation(XMM0);
2364 }
2365
2366 UNREACHABLE();
2367 }
2368
GetMethodLocation() const2369 Location InvokeDexCallingConventionVisitorX86_64::GetMethodLocation() const {
2370 return Location::RegisterLocation(kMethodRegisterArgument);
2371 }
2372
GetNextLocation(DataType::Type type)2373 Location InvokeDexCallingConventionVisitorX86_64::GetNextLocation(DataType::Type type) {
2374 switch (type) {
2375 case DataType::Type::kReference:
2376 case DataType::Type::kBool:
2377 case DataType::Type::kUint8:
2378 case DataType::Type::kInt8:
2379 case DataType::Type::kUint16:
2380 case DataType::Type::kInt16:
2381 case DataType::Type::kInt32: {
2382 uint32_t index = gp_index_++;
2383 stack_index_++;
2384 if (index < calling_convention.GetNumberOfRegisters()) {
2385 return Location::RegisterLocation(calling_convention.GetRegisterAt(index));
2386 } else {
2387 return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
2388 }
2389 }
2390
2391 case DataType::Type::kInt64: {
2392 uint32_t index = gp_index_;
2393 stack_index_ += 2;
2394 if (index < calling_convention.GetNumberOfRegisters()) {
2395 gp_index_ += 1;
2396 return Location::RegisterLocation(calling_convention.GetRegisterAt(index));
2397 } else {
2398 gp_index_ += 2;
2399 return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
2400 }
2401 }
2402
2403 case DataType::Type::kFloat32: {
2404 uint32_t index = float_index_++;
2405 stack_index_++;
2406 if (index < calling_convention.GetNumberOfFpuRegisters()) {
2407 return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
2408 } else {
2409 return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
2410 }
2411 }
2412
2413 case DataType::Type::kFloat64: {
2414 uint32_t index = float_index_++;
2415 stack_index_ += 2;
2416 if (index < calling_convention.GetNumberOfFpuRegisters()) {
2417 return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
2418 } else {
2419 return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
2420 }
2421 }
2422
2423 case DataType::Type::kUint32:
2424 case DataType::Type::kUint64:
2425 case DataType::Type::kVoid:
2426 LOG(FATAL) << "Unexpected parameter type " << type;
2427 UNREACHABLE();
2428 }
2429 return Location::NoLocation();
2430 }
2431
VisitInvokeUnresolved(HInvokeUnresolved * invoke)2432 void LocationsBuilderX86_64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
2433 // The trampoline uses the same calling convention as dex calling conventions,
2434 // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
2435 // the method_idx.
2436 HandleInvoke(invoke);
2437 }
2438
VisitInvokeUnresolved(HInvokeUnresolved * invoke)2439 void InstructionCodeGeneratorX86_64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
2440 codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke);
2441 }
2442
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)2443 void LocationsBuilderX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
2444 // Explicit clinit checks triggered by static invokes must have been pruned by
2445 // art::PrepareForRegisterAllocation.
2446 DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
2447
2448 IntrinsicLocationsBuilderX86_64 intrinsic(codegen_);
2449 if (intrinsic.TryDispatch(invoke)) {
2450 return;
2451 }
2452
2453 HandleInvoke(invoke);
2454 }
2455
TryGenerateIntrinsicCode(HInvoke * invoke,CodeGeneratorX86_64 * codegen)2456 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
2457 if (invoke->GetLocations()->Intrinsified()) {
2458 IntrinsicCodeGeneratorX86_64 intrinsic(codegen);
2459 intrinsic.Dispatch(invoke);
2460 return true;
2461 }
2462 return false;
2463 }
2464
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)2465 void InstructionCodeGeneratorX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
2466 // Explicit clinit checks triggered by static invokes must have been pruned by
2467 // art::PrepareForRegisterAllocation.
2468 DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
2469
2470 if (TryGenerateIntrinsicCode(invoke, codegen_)) {
2471 return;
2472 }
2473
2474 LocationSummary* locations = invoke->GetLocations();
2475 codegen_->GenerateStaticOrDirectCall(
2476 invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
2477 }
2478
HandleInvoke(HInvoke * invoke)2479 void LocationsBuilderX86_64::HandleInvoke(HInvoke* invoke) {
2480 InvokeDexCallingConventionVisitorX86_64 calling_convention_visitor;
2481 CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
2482 }
2483
VisitInvokeVirtual(HInvokeVirtual * invoke)2484 void LocationsBuilderX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
2485 IntrinsicLocationsBuilderX86_64 intrinsic(codegen_);
2486 if (intrinsic.TryDispatch(invoke)) {
2487 return;
2488 }
2489
2490 HandleInvoke(invoke);
2491 }
2492
VisitInvokeVirtual(HInvokeVirtual * invoke)2493 void InstructionCodeGeneratorX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
2494 if (TryGenerateIntrinsicCode(invoke, codegen_)) {
2495 return;
2496 }
2497
2498 codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
2499 DCHECK(!codegen_->IsLeafMethod());
2500 }
2501
VisitInvokeInterface(HInvokeInterface * invoke)2502 void LocationsBuilderX86_64::VisitInvokeInterface(HInvokeInterface* invoke) {
2503 HandleInvoke(invoke);
2504 // Add the hidden argument.
2505 invoke->GetLocations()->AddTemp(Location::RegisterLocation(RAX));
2506 }
2507
VisitInvokeInterface(HInvokeInterface * invoke)2508 void InstructionCodeGeneratorX86_64::VisitInvokeInterface(HInvokeInterface* invoke) {
2509 // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
2510 LocationSummary* locations = invoke->GetLocations();
2511 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
2512 CpuRegister hidden_reg = locations->GetTemp(1).AsRegister<CpuRegister>();
2513 Location receiver = locations->InAt(0);
2514 size_t class_offset = mirror::Object::ClassOffset().SizeValue();
2515
2516 // Set the hidden argument. This is safe to do this here, as RAX
2517 // won't be modified thereafter, before the `call` instruction.
2518 DCHECK_EQ(RAX, hidden_reg.AsRegister());
2519 codegen_->Load64BitValue(hidden_reg, invoke->GetDexMethodIndex());
2520
2521 if (receiver.IsStackSlot()) {
2522 __ movl(temp, Address(CpuRegister(RSP), receiver.GetStackIndex()));
2523 // /* HeapReference<Class> */ temp = temp->klass_
2524 __ movl(temp, Address(temp, class_offset));
2525 } else {
2526 // /* HeapReference<Class> */ temp = receiver->klass_
2527 __ movl(temp, Address(receiver.AsRegister<CpuRegister>(), class_offset));
2528 }
2529 codegen_->MaybeRecordImplicitNullCheck(invoke);
2530 // Instead of simply (possibly) unpoisoning `temp` here, we should
2531 // emit a read barrier for the previous class reference load.
2532 // However this is not required in practice, as this is an
2533 // intermediate/temporary reference and because the current
2534 // concurrent copying collector keeps the from-space memory
2535 // intact/accessible until the end of the marking phase (the
2536 // concurrent copying collector may not in the future).
2537 __ MaybeUnpoisonHeapReference(temp);
2538 // temp = temp->GetAddressOfIMT()
2539 __ movq(temp,
2540 Address(temp, mirror::Class::ImtPtrOffset(kX86_64PointerSize).Uint32Value()));
2541 // temp = temp->GetImtEntryAt(method_offset);
2542 uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
2543 invoke->GetImtIndex(), kX86_64PointerSize));
2544 // temp = temp->GetImtEntryAt(method_offset);
2545 __ movq(temp, Address(temp, method_offset));
2546 // call temp->GetEntryPoint();
2547 __ call(Address(
2548 temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86_64PointerSize).SizeValue()));
2549
2550 DCHECK(!codegen_->IsLeafMethod());
2551 codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
2552 }
2553
VisitInvokePolymorphic(HInvokePolymorphic * invoke)2554 void LocationsBuilderX86_64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
2555 HandleInvoke(invoke);
2556 }
2557
VisitInvokePolymorphic(HInvokePolymorphic * invoke)2558 void InstructionCodeGeneratorX86_64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
2559 codegen_->GenerateInvokePolymorphicCall(invoke);
2560 }
2561
VisitInvokeCustom(HInvokeCustom * invoke)2562 void LocationsBuilderX86_64::VisitInvokeCustom(HInvokeCustom* invoke) {
2563 HandleInvoke(invoke);
2564 }
2565
VisitInvokeCustom(HInvokeCustom * invoke)2566 void InstructionCodeGeneratorX86_64::VisitInvokeCustom(HInvokeCustom* invoke) {
2567 codegen_->GenerateInvokeCustomCall(invoke);
2568 }
2569
VisitNeg(HNeg * neg)2570 void LocationsBuilderX86_64::VisitNeg(HNeg* neg) {
2571 LocationSummary* locations =
2572 new (GetGraph()->GetAllocator()) LocationSummary(neg, LocationSummary::kNoCall);
2573 switch (neg->GetResultType()) {
2574 case DataType::Type::kInt32:
2575 case DataType::Type::kInt64:
2576 locations->SetInAt(0, Location::RequiresRegister());
2577 locations->SetOut(Location::SameAsFirstInput());
2578 break;
2579
2580 case DataType::Type::kFloat32:
2581 case DataType::Type::kFloat64:
2582 locations->SetInAt(0, Location::RequiresFpuRegister());
2583 locations->SetOut(Location::SameAsFirstInput());
2584 locations->AddTemp(Location::RequiresFpuRegister());
2585 break;
2586
2587 default:
2588 LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
2589 }
2590 }
2591
VisitNeg(HNeg * neg)2592 void InstructionCodeGeneratorX86_64::VisitNeg(HNeg* neg) {
2593 LocationSummary* locations = neg->GetLocations();
2594 Location out = locations->Out();
2595 Location in = locations->InAt(0);
2596 switch (neg->GetResultType()) {
2597 case DataType::Type::kInt32:
2598 DCHECK(in.IsRegister());
2599 DCHECK(in.Equals(out));
2600 __ negl(out.AsRegister<CpuRegister>());
2601 break;
2602
2603 case DataType::Type::kInt64:
2604 DCHECK(in.IsRegister());
2605 DCHECK(in.Equals(out));
2606 __ negq(out.AsRegister<CpuRegister>());
2607 break;
2608
2609 case DataType::Type::kFloat32: {
2610 DCHECK(in.Equals(out));
2611 XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2612 // Implement float negation with an exclusive or with value
2613 // 0x80000000 (mask for bit 31, representing the sign of a
2614 // single-precision floating-point number).
2615 __ movss(mask, codegen_->LiteralInt32Address(0x80000000));
2616 __ xorps(out.AsFpuRegister<XmmRegister>(), mask);
2617 break;
2618 }
2619
2620 case DataType::Type::kFloat64: {
2621 DCHECK(in.Equals(out));
2622 XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2623 // Implement double negation with an exclusive or with value
2624 // 0x8000000000000000 (mask for bit 63, representing the sign of
2625 // a double-precision floating-point number).
2626 __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x8000000000000000)));
2627 __ xorpd(out.AsFpuRegister<XmmRegister>(), mask);
2628 break;
2629 }
2630
2631 default:
2632 LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
2633 }
2634 }
2635
VisitTypeConversion(HTypeConversion * conversion)2636 void LocationsBuilderX86_64::VisitTypeConversion(HTypeConversion* conversion) {
2637 LocationSummary* locations =
2638 new (GetGraph()->GetAllocator()) LocationSummary(conversion, LocationSummary::kNoCall);
2639 DataType::Type result_type = conversion->GetResultType();
2640 DataType::Type input_type = conversion->GetInputType();
2641 DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
2642 << input_type << " -> " << result_type;
2643
2644 switch (result_type) {
2645 case DataType::Type::kUint8:
2646 case DataType::Type::kInt8:
2647 case DataType::Type::kUint16:
2648 case DataType::Type::kInt16:
2649 DCHECK(DataType::IsIntegralType(input_type)) << input_type;
2650 locations->SetInAt(0, Location::Any());
2651 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2652 break;
2653
2654 case DataType::Type::kInt32:
2655 switch (input_type) {
2656 case DataType::Type::kInt64:
2657 locations->SetInAt(0, Location::Any());
2658 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2659 break;
2660
2661 case DataType::Type::kFloat32:
2662 locations->SetInAt(0, Location::RequiresFpuRegister());
2663 locations->SetOut(Location::RequiresRegister());
2664 break;
2665
2666 case DataType::Type::kFloat64:
2667 locations->SetInAt(0, Location::RequiresFpuRegister());
2668 locations->SetOut(Location::RequiresRegister());
2669 break;
2670
2671 default:
2672 LOG(FATAL) << "Unexpected type conversion from " << input_type
2673 << " to " << result_type;
2674 }
2675 break;
2676
2677 case DataType::Type::kInt64:
2678 switch (input_type) {
2679 case DataType::Type::kBool:
2680 case DataType::Type::kUint8:
2681 case DataType::Type::kInt8:
2682 case DataType::Type::kUint16:
2683 case DataType::Type::kInt16:
2684 case DataType::Type::kInt32:
2685 // TODO: We would benefit from a (to-be-implemented)
2686 // Location::RegisterOrStackSlot requirement for this input.
2687 locations->SetInAt(0, Location::RequiresRegister());
2688 locations->SetOut(Location::RequiresRegister());
2689 break;
2690
2691 case DataType::Type::kFloat32:
2692 locations->SetInAt(0, Location::RequiresFpuRegister());
2693 locations->SetOut(Location::RequiresRegister());
2694 break;
2695
2696 case DataType::Type::kFloat64:
2697 locations->SetInAt(0, Location::RequiresFpuRegister());
2698 locations->SetOut(Location::RequiresRegister());
2699 break;
2700
2701 default:
2702 LOG(FATAL) << "Unexpected type conversion from " << input_type
2703 << " to " << result_type;
2704 }
2705 break;
2706
2707 case DataType::Type::kFloat32:
2708 switch (input_type) {
2709 case DataType::Type::kBool:
2710 case DataType::Type::kUint8:
2711 case DataType::Type::kInt8:
2712 case DataType::Type::kUint16:
2713 case DataType::Type::kInt16:
2714 case DataType::Type::kInt32:
2715 locations->SetInAt(0, Location::Any());
2716 locations->SetOut(Location::RequiresFpuRegister());
2717 break;
2718
2719 case DataType::Type::kInt64:
2720 locations->SetInAt(0, Location::Any());
2721 locations->SetOut(Location::RequiresFpuRegister());
2722 break;
2723
2724 case DataType::Type::kFloat64:
2725 locations->SetInAt(0, Location::Any());
2726 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
2727 break;
2728
2729 default:
2730 LOG(FATAL) << "Unexpected type conversion from " << input_type
2731 << " to " << result_type;
2732 }
2733 break;
2734
2735 case DataType::Type::kFloat64:
2736 switch (input_type) {
2737 case DataType::Type::kBool:
2738 case DataType::Type::kUint8:
2739 case DataType::Type::kInt8:
2740 case DataType::Type::kUint16:
2741 case DataType::Type::kInt16:
2742 case DataType::Type::kInt32:
2743 locations->SetInAt(0, Location::Any());
2744 locations->SetOut(Location::RequiresFpuRegister());
2745 break;
2746
2747 case DataType::Type::kInt64:
2748 locations->SetInAt(0, Location::Any());
2749 locations->SetOut(Location::RequiresFpuRegister());
2750 break;
2751
2752 case DataType::Type::kFloat32:
2753 locations->SetInAt(0, Location::Any());
2754 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
2755 break;
2756
2757 default:
2758 LOG(FATAL) << "Unexpected type conversion from " << input_type
2759 << " to " << result_type;
2760 }
2761 break;
2762
2763 default:
2764 LOG(FATAL) << "Unexpected type conversion from " << input_type
2765 << " to " << result_type;
2766 }
2767 }
2768
VisitTypeConversion(HTypeConversion * conversion)2769 void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conversion) {
2770 LocationSummary* locations = conversion->GetLocations();
2771 Location out = locations->Out();
2772 Location in = locations->InAt(0);
2773 DataType::Type result_type = conversion->GetResultType();
2774 DataType::Type input_type = conversion->GetInputType();
2775 DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
2776 << input_type << " -> " << result_type;
2777 switch (result_type) {
2778 case DataType::Type::kUint8:
2779 switch (input_type) {
2780 case DataType::Type::kInt8:
2781 case DataType::Type::kUint16:
2782 case DataType::Type::kInt16:
2783 case DataType::Type::kInt32:
2784 case DataType::Type::kInt64:
2785 if (in.IsRegister()) {
2786 __ movzxb(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
2787 } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
2788 __ movzxb(out.AsRegister<CpuRegister>(),
2789 Address(CpuRegister(RSP), in.GetStackIndex()));
2790 } else {
2791 __ movl(out.AsRegister<CpuRegister>(),
2792 Immediate(static_cast<uint8_t>(Int64FromConstant(in.GetConstant()))));
2793 }
2794 break;
2795
2796 default:
2797 LOG(FATAL) << "Unexpected type conversion from " << input_type
2798 << " to " << result_type;
2799 }
2800 break;
2801
2802 case DataType::Type::kInt8:
2803 switch (input_type) {
2804 case DataType::Type::kUint8:
2805 case DataType::Type::kUint16:
2806 case DataType::Type::kInt16:
2807 case DataType::Type::kInt32:
2808 case DataType::Type::kInt64:
2809 if (in.IsRegister()) {
2810 __ movsxb(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
2811 } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
2812 __ movsxb(out.AsRegister<CpuRegister>(),
2813 Address(CpuRegister(RSP), in.GetStackIndex()));
2814 } else {
2815 __ movl(out.AsRegister<CpuRegister>(),
2816 Immediate(static_cast<int8_t>(Int64FromConstant(in.GetConstant()))));
2817 }
2818 break;
2819
2820 default:
2821 LOG(FATAL) << "Unexpected type conversion from " << input_type
2822 << " to " << result_type;
2823 }
2824 break;
2825
2826 case DataType::Type::kUint16:
2827 switch (input_type) {
2828 case DataType::Type::kInt8:
2829 case DataType::Type::kInt16:
2830 case DataType::Type::kInt32:
2831 case DataType::Type::kInt64:
2832 if (in.IsRegister()) {
2833 __ movzxw(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
2834 } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
2835 __ movzxw(out.AsRegister<CpuRegister>(),
2836 Address(CpuRegister(RSP), in.GetStackIndex()));
2837 } else {
2838 __ movl(out.AsRegister<CpuRegister>(),
2839 Immediate(static_cast<uint16_t>(Int64FromConstant(in.GetConstant()))));
2840 }
2841 break;
2842
2843 default:
2844 LOG(FATAL) << "Unexpected type conversion from " << input_type
2845 << " to " << result_type;
2846 }
2847 break;
2848
2849 case DataType::Type::kInt16:
2850 switch (input_type) {
2851 case DataType::Type::kUint16:
2852 case DataType::Type::kInt32:
2853 case DataType::Type::kInt64:
2854 if (in.IsRegister()) {
2855 __ movsxw(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
2856 } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
2857 __ movsxw(out.AsRegister<CpuRegister>(),
2858 Address(CpuRegister(RSP), in.GetStackIndex()));
2859 } else {
2860 __ movl(out.AsRegister<CpuRegister>(),
2861 Immediate(static_cast<int16_t>(Int64FromConstant(in.GetConstant()))));
2862 }
2863 break;
2864
2865 default:
2866 LOG(FATAL) << "Unexpected type conversion from " << input_type
2867 << " to " << result_type;
2868 }
2869 break;
2870
2871 case DataType::Type::kInt32:
2872 switch (input_type) {
2873 case DataType::Type::kInt64:
2874 if (in.IsRegister()) {
2875 __ movl(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
2876 } else if (in.IsDoubleStackSlot()) {
2877 __ movl(out.AsRegister<CpuRegister>(),
2878 Address(CpuRegister(RSP), in.GetStackIndex()));
2879 } else {
2880 DCHECK(in.IsConstant());
2881 DCHECK(in.GetConstant()->IsLongConstant());
2882 int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
2883 __ movl(out.AsRegister<CpuRegister>(), Immediate(static_cast<int32_t>(value)));
2884 }
2885 break;
2886
2887 case DataType::Type::kFloat32: {
2888 XmmRegister input = in.AsFpuRegister<XmmRegister>();
2889 CpuRegister output = out.AsRegister<CpuRegister>();
2890 NearLabel done, nan;
2891
2892 __ movl(output, Immediate(kPrimIntMax));
2893 // if input >= (float)INT_MAX goto done
2894 __ comiss(input, codegen_->LiteralFloatAddress(kPrimIntMax));
2895 __ j(kAboveEqual, &done);
2896 // if input == NaN goto nan
2897 __ j(kUnordered, &nan);
2898 // output = float-to-int-truncate(input)
2899 __ cvttss2si(output, input, false);
2900 __ jmp(&done);
2901 __ Bind(&nan);
2902 // output = 0
2903 __ xorl(output, output);
2904 __ Bind(&done);
2905 break;
2906 }
2907
2908 case DataType::Type::kFloat64: {
2909 XmmRegister input = in.AsFpuRegister<XmmRegister>();
2910 CpuRegister output = out.AsRegister<CpuRegister>();
2911 NearLabel done, nan;
2912
2913 __ movl(output, Immediate(kPrimIntMax));
2914 // if input >= (double)INT_MAX goto done
2915 __ comisd(input, codegen_->LiteralDoubleAddress(kPrimIntMax));
2916 __ j(kAboveEqual, &done);
2917 // if input == NaN goto nan
2918 __ j(kUnordered, &nan);
2919 // output = double-to-int-truncate(input)
2920 __ cvttsd2si(output, input);
2921 __ jmp(&done);
2922 __ Bind(&nan);
2923 // output = 0
2924 __ xorl(output, output);
2925 __ Bind(&done);
2926 break;
2927 }
2928
2929 default:
2930 LOG(FATAL) << "Unexpected type conversion from " << input_type
2931 << " to " << result_type;
2932 }
2933 break;
2934
2935 case DataType::Type::kInt64:
2936 switch (input_type) {
2937 DCHECK(out.IsRegister());
2938 case DataType::Type::kBool:
2939 case DataType::Type::kUint8:
2940 case DataType::Type::kInt8:
2941 case DataType::Type::kUint16:
2942 case DataType::Type::kInt16:
2943 case DataType::Type::kInt32:
2944 DCHECK(in.IsRegister());
2945 __ movsxd(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
2946 break;
2947
2948 case DataType::Type::kFloat32: {
2949 XmmRegister input = in.AsFpuRegister<XmmRegister>();
2950 CpuRegister output = out.AsRegister<CpuRegister>();
2951 NearLabel done, nan;
2952
2953 codegen_->Load64BitValue(output, kPrimLongMax);
2954 // if input >= (float)LONG_MAX goto done
2955 __ comiss(input, codegen_->LiteralFloatAddress(kPrimLongMax));
2956 __ j(kAboveEqual, &done);
2957 // if input == NaN goto nan
2958 __ j(kUnordered, &nan);
2959 // output = float-to-long-truncate(input)
2960 __ cvttss2si(output, input, true);
2961 __ jmp(&done);
2962 __ Bind(&nan);
2963 // output = 0
2964 __ xorl(output, output);
2965 __ Bind(&done);
2966 break;
2967 }
2968
2969 case DataType::Type::kFloat64: {
2970 XmmRegister input = in.AsFpuRegister<XmmRegister>();
2971 CpuRegister output = out.AsRegister<CpuRegister>();
2972 NearLabel done, nan;
2973
2974 codegen_->Load64BitValue(output, kPrimLongMax);
2975 // if input >= (double)LONG_MAX goto done
2976 __ comisd(input, codegen_->LiteralDoubleAddress(kPrimLongMax));
2977 __ j(kAboveEqual, &done);
2978 // if input == NaN goto nan
2979 __ j(kUnordered, &nan);
2980 // output = double-to-long-truncate(input)
2981 __ cvttsd2si(output, input, true);
2982 __ jmp(&done);
2983 __ Bind(&nan);
2984 // output = 0
2985 __ xorl(output, output);
2986 __ Bind(&done);
2987 break;
2988 }
2989
2990 default:
2991 LOG(FATAL) << "Unexpected type conversion from " << input_type
2992 << " to " << result_type;
2993 }
2994 break;
2995
2996 case DataType::Type::kFloat32:
2997 switch (input_type) {
2998 case DataType::Type::kBool:
2999 case DataType::Type::kUint8:
3000 case DataType::Type::kInt8:
3001 case DataType::Type::kUint16:
3002 case DataType::Type::kInt16:
3003 case DataType::Type::kInt32:
3004 if (in.IsRegister()) {
3005 __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), false);
3006 } else if (in.IsConstant()) {
3007 int32_t v = in.GetConstant()->AsIntConstant()->GetValue();
3008 XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3009 codegen_->Load32BitValue(dest, static_cast<float>(v));
3010 } else {
3011 __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(),
3012 Address(CpuRegister(RSP), in.GetStackIndex()), false);
3013 }
3014 break;
3015
3016 case DataType::Type::kInt64:
3017 if (in.IsRegister()) {
3018 __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), true);
3019 } else if (in.IsConstant()) {
3020 int64_t v = in.GetConstant()->AsLongConstant()->GetValue();
3021 XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3022 codegen_->Load32BitValue(dest, static_cast<float>(v));
3023 } else {
3024 __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(),
3025 Address(CpuRegister(RSP), in.GetStackIndex()), true);
3026 }
3027 break;
3028
3029 case DataType::Type::kFloat64:
3030 if (in.IsFpuRegister()) {
3031 __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
3032 } else if (in.IsConstant()) {
3033 double v = in.GetConstant()->AsDoubleConstant()->GetValue();
3034 XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3035 codegen_->Load32BitValue(dest, static_cast<float>(v));
3036 } else {
3037 __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(),
3038 Address(CpuRegister(RSP), in.GetStackIndex()));
3039 }
3040 break;
3041
3042 default:
3043 LOG(FATAL) << "Unexpected type conversion from " << input_type
3044 << " to " << result_type;
3045 }
3046 break;
3047
3048 case DataType::Type::kFloat64:
3049 switch (input_type) {
3050 case DataType::Type::kBool:
3051 case DataType::Type::kUint8:
3052 case DataType::Type::kInt8:
3053 case DataType::Type::kUint16:
3054 case DataType::Type::kInt16:
3055 case DataType::Type::kInt32:
3056 if (in.IsRegister()) {
3057 __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), false);
3058 } else if (in.IsConstant()) {
3059 int32_t v = in.GetConstant()->AsIntConstant()->GetValue();
3060 XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3061 codegen_->Load64BitValue(dest, static_cast<double>(v));
3062 } else {
3063 __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(),
3064 Address(CpuRegister(RSP), in.GetStackIndex()), false);
3065 }
3066 break;
3067
3068 case DataType::Type::kInt64:
3069 if (in.IsRegister()) {
3070 __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), true);
3071 } else if (in.IsConstant()) {
3072 int64_t v = in.GetConstant()->AsLongConstant()->GetValue();
3073 XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3074 codegen_->Load64BitValue(dest, static_cast<double>(v));
3075 } else {
3076 __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(),
3077 Address(CpuRegister(RSP), in.GetStackIndex()), true);
3078 }
3079 break;
3080
3081 case DataType::Type::kFloat32:
3082 if (in.IsFpuRegister()) {
3083 __ cvtss2sd(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
3084 } else if (in.IsConstant()) {
3085 float v = in.GetConstant()->AsFloatConstant()->GetValue();
3086 XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3087 codegen_->Load64BitValue(dest, static_cast<double>(v));
3088 } else {
3089 __ cvtss2sd(out.AsFpuRegister<XmmRegister>(),
3090 Address(CpuRegister(RSP), in.GetStackIndex()));
3091 }
3092 break;
3093
3094 default:
3095 LOG(FATAL) << "Unexpected type conversion from " << input_type
3096 << " to " << result_type;
3097 }
3098 break;
3099
3100 default:
3101 LOG(FATAL) << "Unexpected type conversion from " << input_type
3102 << " to " << result_type;
3103 }
3104 }
3105
VisitAdd(HAdd * add)3106 void LocationsBuilderX86_64::VisitAdd(HAdd* add) {
3107 LocationSummary* locations =
3108 new (GetGraph()->GetAllocator()) LocationSummary(add, LocationSummary::kNoCall);
3109 switch (add->GetResultType()) {
3110 case DataType::Type::kInt32: {
3111 locations->SetInAt(0, Location::RequiresRegister());
3112 locations->SetInAt(1, Location::RegisterOrConstant(add->InputAt(1)));
3113 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3114 break;
3115 }
3116
3117 case DataType::Type::kInt64: {
3118 locations->SetInAt(0, Location::RequiresRegister());
3119 // We can use a leaq or addq if the constant can fit in an immediate.
3120 locations->SetInAt(1, Location::RegisterOrInt32Constant(add->InputAt(1)));
3121 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3122 break;
3123 }
3124
3125 case DataType::Type::kFloat64:
3126 case DataType::Type::kFloat32: {
3127 locations->SetInAt(0, Location::RequiresFpuRegister());
3128 locations->SetInAt(1, Location::Any());
3129 locations->SetOut(Location::SameAsFirstInput());
3130 break;
3131 }
3132
3133 default:
3134 LOG(FATAL) << "Unexpected add type " << add->GetResultType();
3135 }
3136 }
3137
VisitAdd(HAdd * add)3138 void InstructionCodeGeneratorX86_64::VisitAdd(HAdd* add) {
3139 LocationSummary* locations = add->GetLocations();
3140 Location first = locations->InAt(0);
3141 Location second = locations->InAt(1);
3142 Location out = locations->Out();
3143
3144 switch (add->GetResultType()) {
3145 case DataType::Type::kInt32: {
3146 if (second.IsRegister()) {
3147 if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3148 __ addl(out.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3149 } else if (out.AsRegister<Register>() == second.AsRegister<Register>()) {
3150 __ addl(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>());
3151 } else {
3152 __ leal(out.AsRegister<CpuRegister>(), Address(
3153 first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>(), TIMES_1, 0));
3154 }
3155 } else if (second.IsConstant()) {
3156 if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3157 __ addl(out.AsRegister<CpuRegister>(),
3158 Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
3159 } else {
3160 __ leal(out.AsRegister<CpuRegister>(), Address(
3161 first.AsRegister<CpuRegister>(), second.GetConstant()->AsIntConstant()->GetValue()));
3162 }
3163 } else {
3164 DCHECK(first.Equals(locations->Out()));
3165 __ addl(first.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), second.GetStackIndex()));
3166 }
3167 break;
3168 }
3169
3170 case DataType::Type::kInt64: {
3171 if (second.IsRegister()) {
3172 if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3173 __ addq(out.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3174 } else if (out.AsRegister<Register>() == second.AsRegister<Register>()) {
3175 __ addq(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>());
3176 } else {
3177 __ leaq(out.AsRegister<CpuRegister>(), Address(
3178 first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>(), TIMES_1, 0));
3179 }
3180 } else {
3181 DCHECK(second.IsConstant());
3182 int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3183 int32_t int32_value = Low32Bits(value);
3184 DCHECK_EQ(int32_value, value);
3185 if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3186 __ addq(out.AsRegister<CpuRegister>(), Immediate(int32_value));
3187 } else {
3188 __ leaq(out.AsRegister<CpuRegister>(), Address(
3189 first.AsRegister<CpuRegister>(), int32_value));
3190 }
3191 }
3192 break;
3193 }
3194
3195 case DataType::Type::kFloat32: {
3196 if (second.IsFpuRegister()) {
3197 __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3198 } else if (second.IsConstant()) {
3199 __ addss(first.AsFpuRegister<XmmRegister>(),
3200 codegen_->LiteralFloatAddress(
3201 second.GetConstant()->AsFloatConstant()->GetValue()));
3202 } else {
3203 DCHECK(second.IsStackSlot());
3204 __ addss(first.AsFpuRegister<XmmRegister>(),
3205 Address(CpuRegister(RSP), second.GetStackIndex()));
3206 }
3207 break;
3208 }
3209
3210 case DataType::Type::kFloat64: {
3211 if (second.IsFpuRegister()) {
3212 __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3213 } else if (second.IsConstant()) {
3214 __ addsd(first.AsFpuRegister<XmmRegister>(),
3215 codegen_->LiteralDoubleAddress(
3216 second.GetConstant()->AsDoubleConstant()->GetValue()));
3217 } else {
3218 DCHECK(second.IsDoubleStackSlot());
3219 __ addsd(first.AsFpuRegister<XmmRegister>(),
3220 Address(CpuRegister(RSP), second.GetStackIndex()));
3221 }
3222 break;
3223 }
3224
3225 default:
3226 LOG(FATAL) << "Unexpected add type " << add->GetResultType();
3227 }
3228 }
3229
VisitSub(HSub * sub)3230 void LocationsBuilderX86_64::VisitSub(HSub* sub) {
3231 LocationSummary* locations =
3232 new (GetGraph()->GetAllocator()) LocationSummary(sub, LocationSummary::kNoCall);
3233 switch (sub->GetResultType()) {
3234 case DataType::Type::kInt32: {
3235 locations->SetInAt(0, Location::RequiresRegister());
3236 locations->SetInAt(1, Location::Any());
3237 locations->SetOut(Location::SameAsFirstInput());
3238 break;
3239 }
3240 case DataType::Type::kInt64: {
3241 locations->SetInAt(0, Location::RequiresRegister());
3242 locations->SetInAt(1, Location::RegisterOrInt32Constant(sub->InputAt(1)));
3243 locations->SetOut(Location::SameAsFirstInput());
3244 break;
3245 }
3246 case DataType::Type::kFloat32:
3247 case DataType::Type::kFloat64: {
3248 locations->SetInAt(0, Location::RequiresFpuRegister());
3249 locations->SetInAt(1, Location::Any());
3250 locations->SetOut(Location::SameAsFirstInput());
3251 break;
3252 }
3253 default:
3254 LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
3255 }
3256 }
3257
VisitSub(HSub * sub)3258 void InstructionCodeGeneratorX86_64::VisitSub(HSub* sub) {
3259 LocationSummary* locations = sub->GetLocations();
3260 Location first = locations->InAt(0);
3261 Location second = locations->InAt(1);
3262 DCHECK(first.Equals(locations->Out()));
3263 switch (sub->GetResultType()) {
3264 case DataType::Type::kInt32: {
3265 if (second.IsRegister()) {
3266 __ subl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3267 } else if (second.IsConstant()) {
3268 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue());
3269 __ subl(first.AsRegister<CpuRegister>(), imm);
3270 } else {
3271 __ subl(first.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), second.GetStackIndex()));
3272 }
3273 break;
3274 }
3275 case DataType::Type::kInt64: {
3276 if (second.IsConstant()) {
3277 int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3278 DCHECK(IsInt<32>(value));
3279 __ subq(first.AsRegister<CpuRegister>(), Immediate(static_cast<int32_t>(value)));
3280 } else {
3281 __ subq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3282 }
3283 break;
3284 }
3285
3286 case DataType::Type::kFloat32: {
3287 if (second.IsFpuRegister()) {
3288 __ subss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3289 } else if (second.IsConstant()) {
3290 __ subss(first.AsFpuRegister<XmmRegister>(),
3291 codegen_->LiteralFloatAddress(
3292 second.GetConstant()->AsFloatConstant()->GetValue()));
3293 } else {
3294 DCHECK(second.IsStackSlot());
3295 __ subss(first.AsFpuRegister<XmmRegister>(),
3296 Address(CpuRegister(RSP), second.GetStackIndex()));
3297 }
3298 break;
3299 }
3300
3301 case DataType::Type::kFloat64: {
3302 if (second.IsFpuRegister()) {
3303 __ subsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3304 } else if (second.IsConstant()) {
3305 __ subsd(first.AsFpuRegister<XmmRegister>(),
3306 codegen_->LiteralDoubleAddress(
3307 second.GetConstant()->AsDoubleConstant()->GetValue()));
3308 } else {
3309 DCHECK(second.IsDoubleStackSlot());
3310 __ subsd(first.AsFpuRegister<XmmRegister>(),
3311 Address(CpuRegister(RSP), second.GetStackIndex()));
3312 }
3313 break;
3314 }
3315
3316 default:
3317 LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
3318 }
3319 }
3320
VisitMul(HMul * mul)3321 void LocationsBuilderX86_64::VisitMul(HMul* mul) {
3322 LocationSummary* locations =
3323 new (GetGraph()->GetAllocator()) LocationSummary(mul, LocationSummary::kNoCall);
3324 switch (mul->GetResultType()) {
3325 case DataType::Type::kInt32: {
3326 locations->SetInAt(0, Location::RequiresRegister());
3327 locations->SetInAt(1, Location::Any());
3328 if (mul->InputAt(1)->IsIntConstant()) {
3329 // Can use 3 operand multiply.
3330 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3331 } else {
3332 locations->SetOut(Location::SameAsFirstInput());
3333 }
3334 break;
3335 }
3336 case DataType::Type::kInt64: {
3337 locations->SetInAt(0, Location::RequiresRegister());
3338 locations->SetInAt(1, Location::Any());
3339 if (mul->InputAt(1)->IsLongConstant() &&
3340 IsInt<32>(mul->InputAt(1)->AsLongConstant()->GetValue())) {
3341 // Can use 3 operand multiply.
3342 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3343 } else {
3344 locations->SetOut(Location::SameAsFirstInput());
3345 }
3346 break;
3347 }
3348 case DataType::Type::kFloat32:
3349 case DataType::Type::kFloat64: {
3350 locations->SetInAt(0, Location::RequiresFpuRegister());
3351 locations->SetInAt(1, Location::Any());
3352 locations->SetOut(Location::SameAsFirstInput());
3353 break;
3354 }
3355
3356 default:
3357 LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
3358 }
3359 }
3360
VisitMul(HMul * mul)3361 void InstructionCodeGeneratorX86_64::VisitMul(HMul* mul) {
3362 LocationSummary* locations = mul->GetLocations();
3363 Location first = locations->InAt(0);
3364 Location second = locations->InAt(1);
3365 Location out = locations->Out();
3366 switch (mul->GetResultType()) {
3367 case DataType::Type::kInt32:
3368 // The constant may have ended up in a register, so test explicitly to avoid
3369 // problems where the output may not be the same as the first operand.
3370 if (mul->InputAt(1)->IsIntConstant()) {
3371 Immediate imm(mul->InputAt(1)->AsIntConstant()->GetValue());
3372 __ imull(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(), imm);
3373 } else if (second.IsRegister()) {
3374 DCHECK(first.Equals(out));
3375 __ imull(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3376 } else {
3377 DCHECK(first.Equals(out));
3378 DCHECK(second.IsStackSlot());
3379 __ imull(first.AsRegister<CpuRegister>(),
3380 Address(CpuRegister(RSP), second.GetStackIndex()));
3381 }
3382 break;
3383 case DataType::Type::kInt64: {
3384 // The constant may have ended up in a register, so test explicitly to avoid
3385 // problems where the output may not be the same as the first operand.
3386 if (mul->InputAt(1)->IsLongConstant()) {
3387 int64_t value = mul->InputAt(1)->AsLongConstant()->GetValue();
3388 if (IsInt<32>(value)) {
3389 __ imulq(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(),
3390 Immediate(static_cast<int32_t>(value)));
3391 } else {
3392 // Have to use the constant area.
3393 DCHECK(first.Equals(out));
3394 __ imulq(first.AsRegister<CpuRegister>(), codegen_->LiteralInt64Address(value));
3395 }
3396 } else if (second.IsRegister()) {
3397 DCHECK(first.Equals(out));
3398 __ imulq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3399 } else {
3400 DCHECK(second.IsDoubleStackSlot());
3401 DCHECK(first.Equals(out));
3402 __ imulq(first.AsRegister<CpuRegister>(),
3403 Address(CpuRegister(RSP), second.GetStackIndex()));
3404 }
3405 break;
3406 }
3407
3408 case DataType::Type::kFloat32: {
3409 DCHECK(first.Equals(out));
3410 if (second.IsFpuRegister()) {
3411 __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3412 } else if (second.IsConstant()) {
3413 __ mulss(first.AsFpuRegister<XmmRegister>(),
3414 codegen_->LiteralFloatAddress(
3415 second.GetConstant()->AsFloatConstant()->GetValue()));
3416 } else {
3417 DCHECK(second.IsStackSlot());
3418 __ mulss(first.AsFpuRegister<XmmRegister>(),
3419 Address(CpuRegister(RSP), second.GetStackIndex()));
3420 }
3421 break;
3422 }
3423
3424 case DataType::Type::kFloat64: {
3425 DCHECK(first.Equals(out));
3426 if (second.IsFpuRegister()) {
3427 __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3428 } else if (second.IsConstant()) {
3429 __ mulsd(first.AsFpuRegister<XmmRegister>(),
3430 codegen_->LiteralDoubleAddress(
3431 second.GetConstant()->AsDoubleConstant()->GetValue()));
3432 } else {
3433 DCHECK(second.IsDoubleStackSlot());
3434 __ mulsd(first.AsFpuRegister<XmmRegister>(),
3435 Address(CpuRegister(RSP), second.GetStackIndex()));
3436 }
3437 break;
3438 }
3439
3440 default:
3441 LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
3442 }
3443 }
3444
PushOntoFPStack(Location source,uint32_t temp_offset,uint32_t stack_adjustment,bool is_float)3445 void InstructionCodeGeneratorX86_64::PushOntoFPStack(Location source, uint32_t temp_offset,
3446 uint32_t stack_adjustment, bool is_float) {
3447 if (source.IsStackSlot()) {
3448 DCHECK(is_float);
3449 __ flds(Address(CpuRegister(RSP), source.GetStackIndex() + stack_adjustment));
3450 } else if (source.IsDoubleStackSlot()) {
3451 DCHECK(!is_float);
3452 __ fldl(Address(CpuRegister(RSP), source.GetStackIndex() + stack_adjustment));
3453 } else {
3454 // Write the value to the temporary location on the stack and load to FP stack.
3455 if (is_float) {
3456 Location stack_temp = Location::StackSlot(temp_offset);
3457 codegen_->Move(stack_temp, source);
3458 __ flds(Address(CpuRegister(RSP), temp_offset));
3459 } else {
3460 Location stack_temp = Location::DoubleStackSlot(temp_offset);
3461 codegen_->Move(stack_temp, source);
3462 __ fldl(Address(CpuRegister(RSP), temp_offset));
3463 }
3464 }
3465 }
3466
GenerateRemFP(HRem * rem)3467 void InstructionCodeGeneratorX86_64::GenerateRemFP(HRem *rem) {
3468 DataType::Type type = rem->GetResultType();
3469 bool is_float = type == DataType::Type::kFloat32;
3470 size_t elem_size = DataType::Size(type);
3471 LocationSummary* locations = rem->GetLocations();
3472 Location first = locations->InAt(0);
3473 Location second = locations->InAt(1);
3474 Location out = locations->Out();
3475
3476 // Create stack space for 2 elements.
3477 // TODO: enhance register allocator to ask for stack temporaries.
3478 __ subq(CpuRegister(RSP), Immediate(2 * elem_size));
3479
3480 // Load the values to the FP stack in reverse order, using temporaries if needed.
3481 PushOntoFPStack(second, elem_size, 2 * elem_size, is_float);
3482 PushOntoFPStack(first, 0, 2 * elem_size, is_float);
3483
3484 // Loop doing FPREM until we stabilize.
3485 NearLabel retry;
3486 __ Bind(&retry);
3487 __ fprem();
3488
3489 // Move FP status to AX.
3490 __ fstsw();
3491
3492 // And see if the argument reduction is complete. This is signaled by the
3493 // C2 FPU flag bit set to 0.
3494 __ andl(CpuRegister(RAX), Immediate(kC2ConditionMask));
3495 __ j(kNotEqual, &retry);
3496
3497 // We have settled on the final value. Retrieve it into an XMM register.
3498 // Store FP top of stack to real stack.
3499 if (is_float) {
3500 __ fsts(Address(CpuRegister(RSP), 0));
3501 } else {
3502 __ fstl(Address(CpuRegister(RSP), 0));
3503 }
3504
3505 // Pop the 2 items from the FP stack.
3506 __ fucompp();
3507
3508 // Load the value from the stack into an XMM register.
3509 DCHECK(out.IsFpuRegister()) << out;
3510 if (is_float) {
3511 __ movss(out.AsFpuRegister<XmmRegister>(), Address(CpuRegister(RSP), 0));
3512 } else {
3513 __ movsd(out.AsFpuRegister<XmmRegister>(), Address(CpuRegister(RSP), 0));
3514 }
3515
3516 // And remove the temporary stack space we allocated.
3517 __ addq(CpuRegister(RSP), Immediate(2 * elem_size));
3518 }
3519
DivRemOneOrMinusOne(HBinaryOperation * instruction)3520 void InstructionCodeGeneratorX86_64::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
3521 DCHECK(instruction->IsDiv() || instruction->IsRem());
3522
3523 LocationSummary* locations = instruction->GetLocations();
3524 Location second = locations->InAt(1);
3525 DCHECK(second.IsConstant());
3526
3527 CpuRegister output_register = locations->Out().AsRegister<CpuRegister>();
3528 CpuRegister input_register = locations->InAt(0).AsRegister<CpuRegister>();
3529 int64_t imm = Int64FromConstant(second.GetConstant());
3530
3531 DCHECK(imm == 1 || imm == -1);
3532
3533 switch (instruction->GetResultType()) {
3534 case DataType::Type::kInt32: {
3535 if (instruction->IsRem()) {
3536 __ xorl(output_register, output_register);
3537 } else {
3538 __ movl(output_register, input_register);
3539 if (imm == -1) {
3540 __ negl(output_register);
3541 }
3542 }
3543 break;
3544 }
3545
3546 case DataType::Type::kInt64: {
3547 if (instruction->IsRem()) {
3548 __ xorl(output_register, output_register);
3549 } else {
3550 __ movq(output_register, input_register);
3551 if (imm == -1) {
3552 __ negq(output_register);
3553 }
3554 }
3555 break;
3556 }
3557
3558 default:
3559 LOG(FATAL) << "Unexpected type for div by (-)1 " << instruction->GetResultType();
3560 }
3561 }
RemByPowerOfTwo(HRem * instruction)3562 void InstructionCodeGeneratorX86_64::RemByPowerOfTwo(HRem* instruction) {
3563 LocationSummary* locations = instruction->GetLocations();
3564 Location second = locations->InAt(1);
3565 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
3566 CpuRegister numerator = locations->InAt(0).AsRegister<CpuRegister>();
3567 int64_t imm = Int64FromConstant(second.GetConstant());
3568 DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
3569 uint64_t abs_imm = AbsOrMin(imm);
3570 CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>();
3571 if (instruction->GetResultType() == DataType::Type::kInt32) {
3572 NearLabel done;
3573 __ movl(out, numerator);
3574 __ andl(out, Immediate(abs_imm-1));
3575 __ j(Condition::kZero, &done);
3576 __ leal(tmp, Address(out, static_cast<int32_t>(~(abs_imm-1))));
3577 __ testl(numerator, numerator);
3578 __ cmov(Condition::kLess, out, tmp, false);
3579 __ Bind(&done);
3580
3581 } else {
3582 DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
3583 codegen_->Load64BitValue(tmp, abs_imm - 1);
3584 NearLabel done;
3585
3586 __ movq(out, numerator);
3587 __ andq(out, tmp);
3588 __ j(Condition::kZero, &done);
3589 __ movq(tmp, numerator);
3590 __ sarq(tmp, Immediate(63));
3591 __ shlq(tmp, Immediate(WhichPowerOf2(abs_imm)));
3592 __ orq(out, tmp);
3593 __ Bind(&done);
3594 }
3595 }
DivByPowerOfTwo(HDiv * instruction)3596 void InstructionCodeGeneratorX86_64::DivByPowerOfTwo(HDiv* instruction) {
3597 LocationSummary* locations = instruction->GetLocations();
3598 Location second = locations->InAt(1);
3599
3600 CpuRegister output_register = locations->Out().AsRegister<CpuRegister>();
3601 CpuRegister numerator = locations->InAt(0).AsRegister<CpuRegister>();
3602
3603 int64_t imm = Int64FromConstant(second.GetConstant());
3604 DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
3605 uint64_t abs_imm = AbsOrMin(imm);
3606
3607 CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>();
3608
3609 if (instruction->GetResultType() == DataType::Type::kInt32) {
3610 // When denominator is equal to 2, we can add signed bit and numerator to tmp.
3611 // Below we are using addl instruction instead of cmov which give us 1 cycle benefit.
3612 if (abs_imm == 2) {
3613 __ leal(tmp, Address(numerator, 0));
3614 __ shrl(tmp, Immediate(31));
3615 __ addl(tmp, numerator);
3616 } else {
3617 __ leal(tmp, Address(numerator, abs_imm - 1));
3618 __ testl(numerator, numerator);
3619 __ cmov(kGreaterEqual, tmp, numerator);
3620 }
3621 int shift = CTZ(imm);
3622 __ sarl(tmp, Immediate(shift));
3623
3624 if (imm < 0) {
3625 __ negl(tmp);
3626 }
3627
3628 __ movl(output_register, tmp);
3629 } else {
3630 DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
3631 CpuRegister rdx = locations->GetTemp(0).AsRegister<CpuRegister>();
3632 if (abs_imm == 2) {
3633 __ movq(rdx, numerator);
3634 __ shrq(rdx, Immediate(63));
3635 __ addq(rdx, numerator);
3636 } else {
3637 codegen_->Load64BitValue(rdx, abs_imm - 1);
3638 __ addq(rdx, numerator);
3639 __ testq(numerator, numerator);
3640 __ cmov(kGreaterEqual, rdx, numerator);
3641 }
3642 int shift = CTZ(imm);
3643 __ sarq(rdx, Immediate(shift));
3644
3645 if (imm < 0) {
3646 __ negq(rdx);
3647 }
3648
3649 __ movq(output_register, rdx);
3650 }
3651 }
3652
GenerateDivRemWithAnyConstant(HBinaryOperation * instruction)3653 void InstructionCodeGeneratorX86_64::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) {
3654 DCHECK(instruction->IsDiv() || instruction->IsRem());
3655
3656 LocationSummary* locations = instruction->GetLocations();
3657 Location second = locations->InAt(1);
3658
3659 CpuRegister numerator = instruction->IsDiv() ? locations->GetTemp(1).AsRegister<CpuRegister>()
3660 : locations->GetTemp(0).AsRegister<CpuRegister>();
3661 CpuRegister eax = locations->InAt(0).AsRegister<CpuRegister>();
3662 CpuRegister edx = instruction->IsDiv() ? locations->GetTemp(0).AsRegister<CpuRegister>()
3663 : locations->Out().AsRegister<CpuRegister>();
3664 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
3665
3666 DCHECK_EQ(RAX, eax.AsRegister());
3667 DCHECK_EQ(RDX, edx.AsRegister());
3668 if (instruction->IsDiv()) {
3669 DCHECK_EQ(RAX, out.AsRegister());
3670 } else {
3671 DCHECK_EQ(RDX, out.AsRegister());
3672 }
3673
3674 int64_t magic;
3675 int shift;
3676
3677 // TODO: can these branches be written as one?
3678 if (instruction->GetResultType() == DataType::Type::kInt32) {
3679 int imm = second.GetConstant()->AsIntConstant()->GetValue();
3680
3681 CalculateMagicAndShiftForDivRem(imm, false /* is_long= */, &magic, &shift);
3682
3683 __ movl(numerator, eax);
3684
3685 __ movl(eax, Immediate(magic));
3686 __ imull(numerator);
3687
3688 if (imm > 0 && magic < 0) {
3689 __ addl(edx, numerator);
3690 } else if (imm < 0 && magic > 0) {
3691 __ subl(edx, numerator);
3692 }
3693
3694 if (shift != 0) {
3695 __ sarl(edx, Immediate(shift));
3696 }
3697
3698 __ movl(eax, edx);
3699 __ shrl(edx, Immediate(31));
3700 __ addl(edx, eax);
3701
3702 if (instruction->IsRem()) {
3703 __ movl(eax, numerator);
3704 __ imull(edx, Immediate(imm));
3705 __ subl(eax, edx);
3706 __ movl(edx, eax);
3707 } else {
3708 __ movl(eax, edx);
3709 }
3710 } else {
3711 int64_t imm = second.GetConstant()->AsLongConstant()->GetValue();
3712
3713 DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
3714
3715 CpuRegister rax = eax;
3716 CpuRegister rdx = edx;
3717
3718 CalculateMagicAndShiftForDivRem(imm, true /* is_long= */, &magic, &shift);
3719
3720 // Save the numerator.
3721 __ movq(numerator, rax);
3722
3723 // RAX = magic
3724 codegen_->Load64BitValue(rax, magic);
3725
3726 // RDX:RAX = magic * numerator
3727 __ imulq(numerator);
3728
3729 if (imm > 0 && magic < 0) {
3730 // RDX += numerator
3731 __ addq(rdx, numerator);
3732 } else if (imm < 0 && magic > 0) {
3733 // RDX -= numerator
3734 __ subq(rdx, numerator);
3735 }
3736
3737 // Shift if needed.
3738 if (shift != 0) {
3739 __ sarq(rdx, Immediate(shift));
3740 }
3741
3742 // RDX += 1 if RDX < 0
3743 __ movq(rax, rdx);
3744 __ shrq(rdx, Immediate(63));
3745 __ addq(rdx, rax);
3746
3747 if (instruction->IsRem()) {
3748 __ movq(rax, numerator);
3749
3750 if (IsInt<32>(imm)) {
3751 __ imulq(rdx, Immediate(static_cast<int32_t>(imm)));
3752 } else {
3753 __ imulq(rdx, codegen_->LiteralInt64Address(imm));
3754 }
3755
3756 __ subq(rax, rdx);
3757 __ movq(rdx, rax);
3758 } else {
3759 __ movq(rax, rdx);
3760 }
3761 }
3762 }
3763
GenerateDivRemIntegral(HBinaryOperation * instruction)3764 void InstructionCodeGeneratorX86_64::GenerateDivRemIntegral(HBinaryOperation* instruction) {
3765 DCHECK(instruction->IsDiv() || instruction->IsRem());
3766 DataType::Type type = instruction->GetResultType();
3767 DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
3768
3769 bool is_div = instruction->IsDiv();
3770 LocationSummary* locations = instruction->GetLocations();
3771
3772 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
3773 Location second = locations->InAt(1);
3774
3775 DCHECK_EQ(RAX, locations->InAt(0).AsRegister<CpuRegister>().AsRegister());
3776 DCHECK_EQ(is_div ? RAX : RDX, out.AsRegister());
3777
3778 if (second.IsConstant()) {
3779 int64_t imm = Int64FromConstant(second.GetConstant());
3780
3781 if (imm == 0) {
3782 // Do not generate anything. DivZeroCheck would prevent any code to be executed.
3783 } else if (imm == 1 || imm == -1) {
3784 DivRemOneOrMinusOne(instruction);
3785 } else if (IsPowerOfTwo(AbsOrMin(imm))) {
3786 if (is_div) {
3787 DivByPowerOfTwo(instruction->AsDiv());
3788 } else {
3789 RemByPowerOfTwo(instruction->AsRem());
3790 }
3791 } else {
3792 DCHECK(imm <= -2 || imm >= 2);
3793 GenerateDivRemWithAnyConstant(instruction);
3794 }
3795 } else {
3796 SlowPathCode* slow_path =
3797 new (codegen_->GetScopedAllocator()) DivRemMinusOneSlowPathX86_64(
3798 instruction, out.AsRegister(), type, is_div);
3799 codegen_->AddSlowPath(slow_path);
3800
3801 CpuRegister second_reg = second.AsRegister<CpuRegister>();
3802 // 0x80000000(00000000)/-1 triggers an arithmetic exception!
3803 // Dividing by -1 is actually negation and -0x800000000(00000000) = 0x80000000(00000000)
3804 // so it's safe to just use negl instead of more complex comparisons.
3805 if (type == DataType::Type::kInt32) {
3806 __ cmpl(second_reg, Immediate(-1));
3807 __ j(kEqual, slow_path->GetEntryLabel());
3808 // edx:eax <- sign-extended of eax
3809 __ cdq();
3810 // eax = quotient, edx = remainder
3811 __ idivl(second_reg);
3812 } else {
3813 __ cmpq(second_reg, Immediate(-1));
3814 __ j(kEqual, slow_path->GetEntryLabel());
3815 // rdx:rax <- sign-extended of rax
3816 __ cqo();
3817 // rax = quotient, rdx = remainder
3818 __ idivq(second_reg);
3819 }
3820 __ Bind(slow_path->GetExitLabel());
3821 }
3822 }
3823
VisitDiv(HDiv * div)3824 void LocationsBuilderX86_64::VisitDiv(HDiv* div) {
3825 LocationSummary* locations =
3826 new (GetGraph()->GetAllocator()) LocationSummary(div, LocationSummary::kNoCall);
3827 switch (div->GetResultType()) {
3828 case DataType::Type::kInt32:
3829 case DataType::Type::kInt64: {
3830 locations->SetInAt(0, Location::RegisterLocation(RAX));
3831 locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1)));
3832 locations->SetOut(Location::SameAsFirstInput());
3833 // Intel uses edx:eax as the dividend.
3834 locations->AddTemp(Location::RegisterLocation(RDX));
3835 // We need to save the numerator while we tweak rax and rdx. As we are using imul in a way
3836 // which enforces results to be in RAX and RDX, things are simpler if we use RDX also as
3837 // output and request another temp.
3838 if (div->InputAt(1)->IsConstant()) {
3839 locations->AddTemp(Location::RequiresRegister());
3840 }
3841 break;
3842 }
3843
3844 case DataType::Type::kFloat32:
3845 case DataType::Type::kFloat64: {
3846 locations->SetInAt(0, Location::RequiresFpuRegister());
3847 locations->SetInAt(1, Location::Any());
3848 locations->SetOut(Location::SameAsFirstInput());
3849 break;
3850 }
3851
3852 default:
3853 LOG(FATAL) << "Unexpected div type " << div->GetResultType();
3854 }
3855 }
3856
VisitDiv(HDiv * div)3857 void InstructionCodeGeneratorX86_64::VisitDiv(HDiv* div) {
3858 LocationSummary* locations = div->GetLocations();
3859 Location first = locations->InAt(0);
3860 Location second = locations->InAt(1);
3861 DCHECK(first.Equals(locations->Out()));
3862
3863 DataType::Type type = div->GetResultType();
3864 switch (type) {
3865 case DataType::Type::kInt32:
3866 case DataType::Type::kInt64: {
3867 GenerateDivRemIntegral(div);
3868 break;
3869 }
3870
3871 case DataType::Type::kFloat32: {
3872 if (second.IsFpuRegister()) {
3873 __ divss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3874 } else if (second.IsConstant()) {
3875 __ divss(first.AsFpuRegister<XmmRegister>(),
3876 codegen_->LiteralFloatAddress(
3877 second.GetConstant()->AsFloatConstant()->GetValue()));
3878 } else {
3879 DCHECK(second.IsStackSlot());
3880 __ divss(first.AsFpuRegister<XmmRegister>(),
3881 Address(CpuRegister(RSP), second.GetStackIndex()));
3882 }
3883 break;
3884 }
3885
3886 case DataType::Type::kFloat64: {
3887 if (second.IsFpuRegister()) {
3888 __ divsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3889 } else if (second.IsConstant()) {
3890 __ divsd(first.AsFpuRegister<XmmRegister>(),
3891 codegen_->LiteralDoubleAddress(
3892 second.GetConstant()->AsDoubleConstant()->GetValue()));
3893 } else {
3894 DCHECK(second.IsDoubleStackSlot());
3895 __ divsd(first.AsFpuRegister<XmmRegister>(),
3896 Address(CpuRegister(RSP), second.GetStackIndex()));
3897 }
3898 break;
3899 }
3900
3901 default:
3902 LOG(FATAL) << "Unexpected div type " << div->GetResultType();
3903 }
3904 }
3905
VisitRem(HRem * rem)3906 void LocationsBuilderX86_64::VisitRem(HRem* rem) {
3907 DataType::Type type = rem->GetResultType();
3908 LocationSummary* locations =
3909 new (GetGraph()->GetAllocator()) LocationSummary(rem, LocationSummary::kNoCall);
3910
3911 switch (type) {
3912 case DataType::Type::kInt32:
3913 case DataType::Type::kInt64: {
3914 locations->SetInAt(0, Location::RegisterLocation(RAX));
3915 locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1)));
3916 // Intel uses rdx:rax as the dividend and puts the remainder in rdx
3917 locations->SetOut(Location::RegisterLocation(RDX));
3918 // We need to save the numerator while we tweak eax and edx. As we are using imul in a way
3919 // which enforces results to be in RAX and RDX, things are simpler if we use EAX also as
3920 // output and request another temp.
3921 if (rem->InputAt(1)->IsConstant()) {
3922 locations->AddTemp(Location::RequiresRegister());
3923 }
3924 break;
3925 }
3926
3927 case DataType::Type::kFloat32:
3928 case DataType::Type::kFloat64: {
3929 locations->SetInAt(0, Location::Any());
3930 locations->SetInAt(1, Location::Any());
3931 locations->SetOut(Location::RequiresFpuRegister());
3932 locations->AddTemp(Location::RegisterLocation(RAX));
3933 break;
3934 }
3935
3936 default:
3937 LOG(FATAL) << "Unexpected rem type " << type;
3938 }
3939 }
3940
VisitRem(HRem * rem)3941 void InstructionCodeGeneratorX86_64::VisitRem(HRem* rem) {
3942 DataType::Type type = rem->GetResultType();
3943 switch (type) {
3944 case DataType::Type::kInt32:
3945 case DataType::Type::kInt64: {
3946 GenerateDivRemIntegral(rem);
3947 break;
3948 }
3949 case DataType::Type::kFloat32:
3950 case DataType::Type::kFloat64: {
3951 GenerateRemFP(rem);
3952 break;
3953 }
3954 default:
3955 LOG(FATAL) << "Unexpected rem type " << rem->GetResultType();
3956 }
3957 }
3958
CreateMinMaxLocations(ArenaAllocator * allocator,HBinaryOperation * minmax)3959 static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) {
3960 LocationSummary* locations = new (allocator) LocationSummary(minmax);
3961 switch (minmax->GetResultType()) {
3962 case DataType::Type::kInt32:
3963 case DataType::Type::kInt64:
3964 locations->SetInAt(0, Location::RequiresRegister());
3965 locations->SetInAt(1, Location::RequiresRegister());
3966 locations->SetOut(Location::SameAsFirstInput());
3967 break;
3968 case DataType::Type::kFloat32:
3969 case DataType::Type::kFloat64:
3970 locations->SetInAt(0, Location::RequiresFpuRegister());
3971 locations->SetInAt(1, Location::RequiresFpuRegister());
3972 // The following is sub-optimal, but all we can do for now. It would be fine to also accept
3973 // the second input to be the output (we can simply swap inputs).
3974 locations->SetOut(Location::SameAsFirstInput());
3975 break;
3976 default:
3977 LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType();
3978 }
3979 }
3980
GenerateMinMaxInt(LocationSummary * locations,bool is_min,DataType::Type type)3981 void InstructionCodeGeneratorX86_64::GenerateMinMaxInt(LocationSummary* locations,
3982 bool is_min,
3983 DataType::Type type) {
3984 Location op1_loc = locations->InAt(0);
3985 Location op2_loc = locations->InAt(1);
3986
3987 // Shortcut for same input locations.
3988 if (op1_loc.Equals(op2_loc)) {
3989 // Can return immediately, as op1_loc == out_loc.
3990 // Note: if we ever support separate registers, e.g., output into memory, we need to check for
3991 // a copy here.
3992 DCHECK(locations->Out().Equals(op1_loc));
3993 return;
3994 }
3995
3996 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
3997 CpuRegister op2 = op2_loc.AsRegister<CpuRegister>();
3998
3999 // (out := op1)
4000 // out <=? op2
4001 // if out is min jmp done
4002 // out := op2
4003 // done:
4004
4005 if (type == DataType::Type::kInt64) {
4006 __ cmpq(out, op2);
4007 __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, /*is64bit*/ true);
4008 } else {
4009 DCHECK_EQ(type, DataType::Type::kInt32);
4010 __ cmpl(out, op2);
4011 __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, /*is64bit*/ false);
4012 }
4013 }
4014
GenerateMinMaxFP(LocationSummary * locations,bool is_min,DataType::Type type)4015 void InstructionCodeGeneratorX86_64::GenerateMinMaxFP(LocationSummary* locations,
4016 bool is_min,
4017 DataType::Type type) {
4018 Location op1_loc = locations->InAt(0);
4019 Location op2_loc = locations->InAt(1);
4020 Location out_loc = locations->Out();
4021 XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
4022
4023 // Shortcut for same input locations.
4024 if (op1_loc.Equals(op2_loc)) {
4025 DCHECK(out_loc.Equals(op1_loc));
4026 return;
4027 }
4028
4029 // (out := op1)
4030 // out <=? op2
4031 // if Nan jmp Nan_label
4032 // if out is min jmp done
4033 // if op2 is min jmp op2_label
4034 // handle -0/+0
4035 // jmp done
4036 // Nan_label:
4037 // out := NaN
4038 // op2_label:
4039 // out := op2
4040 // done:
4041 //
4042 // This removes one jmp, but needs to copy one input (op1) to out.
4043 //
4044 // TODO: This is straight from Quick. Make NaN an out-of-line slowpath?
4045
4046 XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
4047
4048 NearLabel nan, done, op2_label;
4049 if (type == DataType::Type::kFloat64) {
4050 __ ucomisd(out, op2);
4051 } else {
4052 DCHECK_EQ(type, DataType::Type::kFloat32);
4053 __ ucomiss(out, op2);
4054 }
4055
4056 __ j(Condition::kParityEven, &nan);
4057
4058 __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label);
4059 __ j(is_min ? Condition::kBelow : Condition::kAbove, &done);
4060
4061 // Handle 0.0/-0.0.
4062 if (is_min) {
4063 if (type == DataType::Type::kFloat64) {
4064 __ orpd(out, op2);
4065 } else {
4066 __ orps(out, op2);
4067 }
4068 } else {
4069 if (type == DataType::Type::kFloat64) {
4070 __ andpd(out, op2);
4071 } else {
4072 __ andps(out, op2);
4073 }
4074 }
4075 __ jmp(&done);
4076
4077 // NaN handling.
4078 __ Bind(&nan);
4079 if (type == DataType::Type::kFloat64) {
4080 __ movsd(out, codegen_->LiteralInt64Address(INT64_C(0x7FF8000000000000)));
4081 } else {
4082 __ movss(out, codegen_->LiteralInt32Address(INT32_C(0x7FC00000)));
4083 }
4084 __ jmp(&done);
4085
4086 // out := op2;
4087 __ Bind(&op2_label);
4088 if (type == DataType::Type::kFloat64) {
4089 __ movsd(out, op2);
4090 } else {
4091 __ movss(out, op2);
4092 }
4093
4094 // Done.
4095 __ Bind(&done);
4096 }
4097
GenerateMinMax(HBinaryOperation * minmax,bool is_min)4098 void InstructionCodeGeneratorX86_64::GenerateMinMax(HBinaryOperation* minmax, bool is_min) {
4099 DataType::Type type = minmax->GetResultType();
4100 switch (type) {
4101 case DataType::Type::kInt32:
4102 case DataType::Type::kInt64:
4103 GenerateMinMaxInt(minmax->GetLocations(), is_min, type);
4104 break;
4105 case DataType::Type::kFloat32:
4106 case DataType::Type::kFloat64:
4107 GenerateMinMaxFP(minmax->GetLocations(), is_min, type);
4108 break;
4109 default:
4110 LOG(FATAL) << "Unexpected type for HMinMax " << type;
4111 }
4112 }
4113
VisitMin(HMin * min)4114 void LocationsBuilderX86_64::VisitMin(HMin* min) {
4115 CreateMinMaxLocations(GetGraph()->GetAllocator(), min);
4116 }
4117
VisitMin(HMin * min)4118 void InstructionCodeGeneratorX86_64::VisitMin(HMin* min) {
4119 GenerateMinMax(min, /*is_min*/ true);
4120 }
4121
VisitMax(HMax * max)4122 void LocationsBuilderX86_64::VisitMax(HMax* max) {
4123 CreateMinMaxLocations(GetGraph()->GetAllocator(), max);
4124 }
4125
VisitMax(HMax * max)4126 void InstructionCodeGeneratorX86_64::VisitMax(HMax* max) {
4127 GenerateMinMax(max, /*is_min*/ false);
4128 }
4129
VisitAbs(HAbs * abs)4130 void LocationsBuilderX86_64::VisitAbs(HAbs* abs) {
4131 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs);
4132 switch (abs->GetResultType()) {
4133 case DataType::Type::kInt32:
4134 case DataType::Type::kInt64:
4135 locations->SetInAt(0, Location::RequiresRegister());
4136 locations->SetOut(Location::SameAsFirstInput());
4137 locations->AddTemp(Location::RequiresRegister());
4138 break;
4139 case DataType::Type::kFloat32:
4140 case DataType::Type::kFloat64:
4141 locations->SetInAt(0, Location::RequiresFpuRegister());
4142 locations->SetOut(Location::SameAsFirstInput());
4143 locations->AddTemp(Location::RequiresFpuRegister());
4144 break;
4145 default:
4146 LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType();
4147 }
4148 }
4149
VisitAbs(HAbs * abs)4150 void InstructionCodeGeneratorX86_64::VisitAbs(HAbs* abs) {
4151 LocationSummary* locations = abs->GetLocations();
4152 switch (abs->GetResultType()) {
4153 case DataType::Type::kInt32: {
4154 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
4155 CpuRegister mask = locations->GetTemp(0).AsRegister<CpuRegister>();
4156 // Create mask.
4157 __ movl(mask, out);
4158 __ sarl(mask, Immediate(31));
4159 // Add mask.
4160 __ addl(out, mask);
4161 __ xorl(out, mask);
4162 break;
4163 }
4164 case DataType::Type::kInt64: {
4165 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
4166 CpuRegister mask = locations->GetTemp(0).AsRegister<CpuRegister>();
4167 // Create mask.
4168 __ movq(mask, out);
4169 __ sarq(mask, Immediate(63));
4170 // Add mask.
4171 __ addq(out, mask);
4172 __ xorq(out, mask);
4173 break;
4174 }
4175 case DataType::Type::kFloat32: {
4176 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
4177 XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
4178 __ movss(mask, codegen_->LiteralInt32Address(INT32_C(0x7FFFFFFF)));
4179 __ andps(out, mask);
4180 break;
4181 }
4182 case DataType::Type::kFloat64: {
4183 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
4184 XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
4185 __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x7FFFFFFFFFFFFFFF)));
4186 __ andpd(out, mask);
4187 break;
4188 }
4189 default:
4190 LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType();
4191 }
4192 }
4193
VisitDivZeroCheck(HDivZeroCheck * instruction)4194 void LocationsBuilderX86_64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
4195 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
4196 locations->SetInAt(0, Location::Any());
4197 }
4198
VisitDivZeroCheck(HDivZeroCheck * instruction)4199 void InstructionCodeGeneratorX86_64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
4200 SlowPathCode* slow_path =
4201 new (codegen_->GetScopedAllocator()) DivZeroCheckSlowPathX86_64(instruction);
4202 codegen_->AddSlowPath(slow_path);
4203
4204 LocationSummary* locations = instruction->GetLocations();
4205 Location value = locations->InAt(0);
4206
4207 switch (instruction->GetType()) {
4208 case DataType::Type::kBool:
4209 case DataType::Type::kUint8:
4210 case DataType::Type::kInt8:
4211 case DataType::Type::kUint16:
4212 case DataType::Type::kInt16:
4213 case DataType::Type::kInt32: {
4214 if (value.IsRegister()) {
4215 __ testl(value.AsRegister<CpuRegister>(), value.AsRegister<CpuRegister>());
4216 __ j(kEqual, slow_path->GetEntryLabel());
4217 } else if (value.IsStackSlot()) {
4218 __ cmpl(Address(CpuRegister(RSP), value.GetStackIndex()), Immediate(0));
4219 __ j(kEqual, slow_path->GetEntryLabel());
4220 } else {
4221 DCHECK(value.IsConstant()) << value;
4222 if (value.GetConstant()->AsIntConstant()->GetValue() == 0) {
4223 __ jmp(slow_path->GetEntryLabel());
4224 }
4225 }
4226 break;
4227 }
4228 case DataType::Type::kInt64: {
4229 if (value.IsRegister()) {
4230 __ testq(value.AsRegister<CpuRegister>(), value.AsRegister<CpuRegister>());
4231 __ j(kEqual, slow_path->GetEntryLabel());
4232 } else if (value.IsDoubleStackSlot()) {
4233 __ cmpq(Address(CpuRegister(RSP), value.GetStackIndex()), Immediate(0));
4234 __ j(kEqual, slow_path->GetEntryLabel());
4235 } else {
4236 DCHECK(value.IsConstant()) << value;
4237 if (value.GetConstant()->AsLongConstant()->GetValue() == 0) {
4238 __ jmp(slow_path->GetEntryLabel());
4239 }
4240 }
4241 break;
4242 }
4243 default:
4244 LOG(FATAL) << "Unexpected type for HDivZeroCheck " << instruction->GetType();
4245 }
4246 }
4247
HandleShift(HBinaryOperation * op)4248 void LocationsBuilderX86_64::HandleShift(HBinaryOperation* op) {
4249 DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
4250
4251 LocationSummary* locations =
4252 new (GetGraph()->GetAllocator()) LocationSummary(op, LocationSummary::kNoCall);
4253
4254 switch (op->GetResultType()) {
4255 case DataType::Type::kInt32:
4256 case DataType::Type::kInt64: {
4257 locations->SetInAt(0, Location::RequiresRegister());
4258 // The shift count needs to be in CL.
4259 locations->SetInAt(1, Location::ByteRegisterOrConstant(RCX, op->InputAt(1)));
4260 locations->SetOut(Location::SameAsFirstInput());
4261 break;
4262 }
4263 default:
4264 LOG(FATAL) << "Unexpected operation type " << op->GetResultType();
4265 }
4266 }
4267
HandleShift(HBinaryOperation * op)4268 void InstructionCodeGeneratorX86_64::HandleShift(HBinaryOperation* op) {
4269 DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
4270
4271 LocationSummary* locations = op->GetLocations();
4272 CpuRegister first_reg = locations->InAt(0).AsRegister<CpuRegister>();
4273 Location second = locations->InAt(1);
4274
4275 switch (op->GetResultType()) {
4276 case DataType::Type::kInt32: {
4277 if (second.IsRegister()) {
4278 CpuRegister second_reg = second.AsRegister<CpuRegister>();
4279 if (op->IsShl()) {
4280 __ shll(first_reg, second_reg);
4281 } else if (op->IsShr()) {
4282 __ sarl(first_reg, second_reg);
4283 } else {
4284 __ shrl(first_reg, second_reg);
4285 }
4286 } else {
4287 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance);
4288 if (op->IsShl()) {
4289 __ shll(first_reg, imm);
4290 } else if (op->IsShr()) {
4291 __ sarl(first_reg, imm);
4292 } else {
4293 __ shrl(first_reg, imm);
4294 }
4295 }
4296 break;
4297 }
4298 case DataType::Type::kInt64: {
4299 if (second.IsRegister()) {
4300 CpuRegister second_reg = second.AsRegister<CpuRegister>();
4301 if (op->IsShl()) {
4302 __ shlq(first_reg, second_reg);
4303 } else if (op->IsShr()) {
4304 __ sarq(first_reg, second_reg);
4305 } else {
4306 __ shrq(first_reg, second_reg);
4307 }
4308 } else {
4309 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance);
4310 if (op->IsShl()) {
4311 __ shlq(first_reg, imm);
4312 } else if (op->IsShr()) {
4313 __ sarq(first_reg, imm);
4314 } else {
4315 __ shrq(first_reg, imm);
4316 }
4317 }
4318 break;
4319 }
4320 default:
4321 LOG(FATAL) << "Unexpected operation type " << op->GetResultType();
4322 UNREACHABLE();
4323 }
4324 }
4325
VisitRor(HRor * ror)4326 void LocationsBuilderX86_64::VisitRor(HRor* ror) {
4327 LocationSummary* locations =
4328 new (GetGraph()->GetAllocator()) LocationSummary(ror, LocationSummary::kNoCall);
4329
4330 switch (ror->GetResultType()) {
4331 case DataType::Type::kInt32:
4332 case DataType::Type::kInt64: {
4333 locations->SetInAt(0, Location::RequiresRegister());
4334 // The shift count needs to be in CL (unless it is a constant).
4335 locations->SetInAt(1, Location::ByteRegisterOrConstant(RCX, ror->InputAt(1)));
4336 locations->SetOut(Location::SameAsFirstInput());
4337 break;
4338 }
4339 default:
4340 LOG(FATAL) << "Unexpected operation type " << ror->GetResultType();
4341 UNREACHABLE();
4342 }
4343 }
4344
VisitRor(HRor * ror)4345 void InstructionCodeGeneratorX86_64::VisitRor(HRor* ror) {
4346 LocationSummary* locations = ror->GetLocations();
4347 CpuRegister first_reg = locations->InAt(0).AsRegister<CpuRegister>();
4348 Location second = locations->InAt(1);
4349
4350 switch (ror->GetResultType()) {
4351 case DataType::Type::kInt32:
4352 if (second.IsRegister()) {
4353 CpuRegister second_reg = second.AsRegister<CpuRegister>();
4354 __ rorl(first_reg, second_reg);
4355 } else {
4356 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance);
4357 __ rorl(first_reg, imm);
4358 }
4359 break;
4360 case DataType::Type::kInt64:
4361 if (second.IsRegister()) {
4362 CpuRegister second_reg = second.AsRegister<CpuRegister>();
4363 __ rorq(first_reg, second_reg);
4364 } else {
4365 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance);
4366 __ rorq(first_reg, imm);
4367 }
4368 break;
4369 default:
4370 LOG(FATAL) << "Unexpected operation type " << ror->GetResultType();
4371 UNREACHABLE();
4372 }
4373 }
4374
VisitShl(HShl * shl)4375 void LocationsBuilderX86_64::VisitShl(HShl* shl) {
4376 HandleShift(shl);
4377 }
4378
VisitShl(HShl * shl)4379 void InstructionCodeGeneratorX86_64::VisitShl(HShl* shl) {
4380 HandleShift(shl);
4381 }
4382
VisitShr(HShr * shr)4383 void LocationsBuilderX86_64::VisitShr(HShr* shr) {
4384 HandleShift(shr);
4385 }
4386
VisitShr(HShr * shr)4387 void InstructionCodeGeneratorX86_64::VisitShr(HShr* shr) {
4388 HandleShift(shr);
4389 }
4390
VisitUShr(HUShr * ushr)4391 void LocationsBuilderX86_64::VisitUShr(HUShr* ushr) {
4392 HandleShift(ushr);
4393 }
4394
VisitUShr(HUShr * ushr)4395 void InstructionCodeGeneratorX86_64::VisitUShr(HUShr* ushr) {
4396 HandleShift(ushr);
4397 }
4398
VisitNewInstance(HNewInstance * instruction)4399 void LocationsBuilderX86_64::VisitNewInstance(HNewInstance* instruction) {
4400 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
4401 instruction, LocationSummary::kCallOnMainOnly);
4402 InvokeRuntimeCallingConvention calling_convention;
4403 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
4404 locations->SetOut(Location::RegisterLocation(RAX));
4405 }
4406
VisitNewInstance(HNewInstance * instruction)4407 void InstructionCodeGeneratorX86_64::VisitNewInstance(HNewInstance* instruction) {
4408 codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
4409 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
4410 DCHECK(!codegen_->IsLeafMethod());
4411 }
4412
VisitNewArray(HNewArray * instruction)4413 void LocationsBuilderX86_64::VisitNewArray(HNewArray* instruction) {
4414 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
4415 instruction, LocationSummary::kCallOnMainOnly);
4416 InvokeRuntimeCallingConvention calling_convention;
4417 locations->SetOut(Location::RegisterLocation(RAX));
4418 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
4419 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
4420 }
4421
VisitNewArray(HNewArray * instruction)4422 void InstructionCodeGeneratorX86_64::VisitNewArray(HNewArray* instruction) {
4423 // Note: if heap poisoning is enabled, the entry point takes care of poisoning the reference.
4424 QuickEntrypointEnum entrypoint = CodeGenerator::GetArrayAllocationEntrypoint(instruction);
4425 codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc());
4426 CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
4427 DCHECK(!codegen_->IsLeafMethod());
4428 }
4429
VisitParameterValue(HParameterValue * instruction)4430 void LocationsBuilderX86_64::VisitParameterValue(HParameterValue* instruction) {
4431 LocationSummary* locations =
4432 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
4433 Location location = parameter_visitor_.GetNextLocation(instruction->GetType());
4434 if (location.IsStackSlot()) {
4435 location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
4436 } else if (location.IsDoubleStackSlot()) {
4437 location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
4438 }
4439 locations->SetOut(location);
4440 }
4441
VisitParameterValue(HParameterValue * instruction ATTRIBUTE_UNUSED)4442 void InstructionCodeGeneratorX86_64::VisitParameterValue(
4443 HParameterValue* instruction ATTRIBUTE_UNUSED) {
4444 // Nothing to do, the parameter is already at its location.
4445 }
4446
VisitCurrentMethod(HCurrentMethod * instruction)4447 void LocationsBuilderX86_64::VisitCurrentMethod(HCurrentMethod* instruction) {
4448 LocationSummary* locations =
4449 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
4450 locations->SetOut(Location::RegisterLocation(kMethodRegisterArgument));
4451 }
4452
VisitCurrentMethod(HCurrentMethod * instruction ATTRIBUTE_UNUSED)4453 void InstructionCodeGeneratorX86_64::VisitCurrentMethod(
4454 HCurrentMethod* instruction ATTRIBUTE_UNUSED) {
4455 // Nothing to do, the method is already at its location.
4456 }
4457
VisitClassTableGet(HClassTableGet * instruction)4458 void LocationsBuilderX86_64::VisitClassTableGet(HClassTableGet* instruction) {
4459 LocationSummary* locations =
4460 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
4461 locations->SetInAt(0, Location::RequiresRegister());
4462 locations->SetOut(Location::RequiresRegister());
4463 }
4464
VisitClassTableGet(HClassTableGet * instruction)4465 void InstructionCodeGeneratorX86_64::VisitClassTableGet(HClassTableGet* instruction) {
4466 LocationSummary* locations = instruction->GetLocations();
4467 if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
4468 uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
4469 instruction->GetIndex(), kX86_64PointerSize).SizeValue();
4470 __ movq(locations->Out().AsRegister<CpuRegister>(),
4471 Address(locations->InAt(0).AsRegister<CpuRegister>(), method_offset));
4472 } else {
4473 uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
4474 instruction->GetIndex(), kX86_64PointerSize));
4475 __ movq(locations->Out().AsRegister<CpuRegister>(),
4476 Address(locations->InAt(0).AsRegister<CpuRegister>(),
4477 mirror::Class::ImtPtrOffset(kX86_64PointerSize).Uint32Value()));
4478 __ movq(locations->Out().AsRegister<CpuRegister>(),
4479 Address(locations->Out().AsRegister<CpuRegister>(), method_offset));
4480 }
4481 }
4482
VisitNot(HNot * not_)4483 void LocationsBuilderX86_64::VisitNot(HNot* not_) {
4484 LocationSummary* locations =
4485 new (GetGraph()->GetAllocator()) LocationSummary(not_, LocationSummary::kNoCall);
4486 locations->SetInAt(0, Location::RequiresRegister());
4487 locations->SetOut(Location::SameAsFirstInput());
4488 }
4489
VisitNot(HNot * not_)4490 void InstructionCodeGeneratorX86_64::VisitNot(HNot* not_) {
4491 LocationSummary* locations = not_->GetLocations();
4492 DCHECK_EQ(locations->InAt(0).AsRegister<CpuRegister>().AsRegister(),
4493 locations->Out().AsRegister<CpuRegister>().AsRegister());
4494 Location out = locations->Out();
4495 switch (not_->GetResultType()) {
4496 case DataType::Type::kInt32:
4497 __ notl(out.AsRegister<CpuRegister>());
4498 break;
4499
4500 case DataType::Type::kInt64:
4501 __ notq(out.AsRegister<CpuRegister>());
4502 break;
4503
4504 default:
4505 LOG(FATAL) << "Unimplemented type for not operation " << not_->GetResultType();
4506 }
4507 }
4508
VisitBooleanNot(HBooleanNot * bool_not)4509 void LocationsBuilderX86_64::VisitBooleanNot(HBooleanNot* bool_not) {
4510 LocationSummary* locations =
4511 new (GetGraph()->GetAllocator()) LocationSummary(bool_not, LocationSummary::kNoCall);
4512 locations->SetInAt(0, Location::RequiresRegister());
4513 locations->SetOut(Location::SameAsFirstInput());
4514 }
4515
VisitBooleanNot(HBooleanNot * bool_not)4516 void InstructionCodeGeneratorX86_64::VisitBooleanNot(HBooleanNot* bool_not) {
4517 LocationSummary* locations = bool_not->GetLocations();
4518 DCHECK_EQ(locations->InAt(0).AsRegister<CpuRegister>().AsRegister(),
4519 locations->Out().AsRegister<CpuRegister>().AsRegister());
4520 Location out = locations->Out();
4521 __ xorl(out.AsRegister<CpuRegister>(), Immediate(1));
4522 }
4523
VisitPhi(HPhi * instruction)4524 void LocationsBuilderX86_64::VisitPhi(HPhi* instruction) {
4525 LocationSummary* locations =
4526 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
4527 for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
4528 locations->SetInAt(i, Location::Any());
4529 }
4530 locations->SetOut(Location::Any());
4531 }
4532
VisitPhi(HPhi * instruction ATTRIBUTE_UNUSED)4533 void InstructionCodeGeneratorX86_64::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) {
4534 LOG(FATAL) << "Unimplemented";
4535 }
4536
GenerateMemoryBarrier(MemBarrierKind kind)4537 void CodeGeneratorX86_64::GenerateMemoryBarrier(MemBarrierKind kind) {
4538 /*
4539 * According to the JSR-133 Cookbook, for x86-64 only StoreLoad/AnyAny barriers need memory fence.
4540 * All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86-64 memory model.
4541 * For those cases, all we need to ensure is that there is a scheduling barrier in place.
4542 */
4543 switch (kind) {
4544 case MemBarrierKind::kAnyAny: {
4545 MemoryFence();
4546 break;
4547 }
4548 case MemBarrierKind::kAnyStore:
4549 case MemBarrierKind::kLoadAny:
4550 case MemBarrierKind::kStoreStore: {
4551 // nop
4552 break;
4553 }
4554 case MemBarrierKind::kNTStoreStore:
4555 // Non-Temporal Store/Store needs an explicit fence.
4556 MemoryFence(/* non-temporal= */ true);
4557 break;
4558 }
4559 }
4560
HandleFieldGet(HInstruction * instruction)4561 void LocationsBuilderX86_64::HandleFieldGet(HInstruction* instruction) {
4562 DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
4563
4564 bool object_field_get_with_read_barrier =
4565 kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference);
4566 LocationSummary* locations =
4567 new (GetGraph()->GetAllocator()) LocationSummary(instruction,
4568 object_field_get_with_read_barrier
4569 ? LocationSummary::kCallOnSlowPath
4570 : LocationSummary::kNoCall);
4571 if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
4572 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
4573 }
4574 locations->SetInAt(0, Location::RequiresRegister());
4575 if (DataType::IsFloatingPointType(instruction->GetType())) {
4576 locations->SetOut(Location::RequiresFpuRegister());
4577 } else {
4578 // The output overlaps for an object field get when read barriers
4579 // are enabled: we do not want the move to overwrite the object's
4580 // location, as we need it to emit the read barrier.
4581 locations->SetOut(
4582 Location::RequiresRegister(),
4583 object_field_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
4584 }
4585 }
4586
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)4587 void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction,
4588 const FieldInfo& field_info) {
4589 DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
4590
4591 LocationSummary* locations = instruction->GetLocations();
4592 Location base_loc = locations->InAt(0);
4593 CpuRegister base = base_loc.AsRegister<CpuRegister>();
4594 Location out = locations->Out();
4595 bool is_volatile = field_info.IsVolatile();
4596 DCHECK_EQ(DataType::Size(field_info.GetFieldType()), DataType::Size(instruction->GetType()));
4597 DataType::Type load_type = instruction->GetType();
4598 uint32_t offset = field_info.GetFieldOffset().Uint32Value();
4599
4600 switch (load_type) {
4601 case DataType::Type::kBool:
4602 case DataType::Type::kUint8: {
4603 __ movzxb(out.AsRegister<CpuRegister>(), Address(base, offset));
4604 break;
4605 }
4606
4607 case DataType::Type::kInt8: {
4608 __ movsxb(out.AsRegister<CpuRegister>(), Address(base, offset));
4609 break;
4610 }
4611
4612 case DataType::Type::kUint16: {
4613 __ movzxw(out.AsRegister<CpuRegister>(), Address(base, offset));
4614 break;
4615 }
4616
4617 case DataType::Type::kInt16: {
4618 __ movsxw(out.AsRegister<CpuRegister>(), Address(base, offset));
4619 break;
4620 }
4621
4622 case DataType::Type::kInt32: {
4623 __ movl(out.AsRegister<CpuRegister>(), Address(base, offset));
4624 break;
4625 }
4626
4627 case DataType::Type::kReference: {
4628 // /* HeapReference<Object> */ out = *(base + offset)
4629 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
4630 // Note that a potential implicit null check is handled in this
4631 // CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier call.
4632 codegen_->GenerateFieldLoadWithBakerReadBarrier(
4633 instruction, out, base, offset, /* needs_null_check= */ true);
4634 if (is_volatile) {
4635 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
4636 }
4637 } else {
4638 __ movl(out.AsRegister<CpuRegister>(), Address(base, offset));
4639 codegen_->MaybeRecordImplicitNullCheck(instruction);
4640 if (is_volatile) {
4641 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
4642 }
4643 // If read barriers are enabled, emit read barriers other than
4644 // Baker's using a slow path (and also unpoison the loaded
4645 // reference, if heap poisoning is enabled).
4646 codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset);
4647 }
4648 break;
4649 }
4650
4651 case DataType::Type::kInt64: {
4652 __ movq(out.AsRegister<CpuRegister>(), Address(base, offset));
4653 break;
4654 }
4655
4656 case DataType::Type::kFloat32: {
4657 __ movss(out.AsFpuRegister<XmmRegister>(), Address(base, offset));
4658 break;
4659 }
4660
4661 case DataType::Type::kFloat64: {
4662 __ movsd(out.AsFpuRegister<XmmRegister>(), Address(base, offset));
4663 break;
4664 }
4665
4666 case DataType::Type::kUint32:
4667 case DataType::Type::kUint64:
4668 case DataType::Type::kVoid:
4669 LOG(FATAL) << "Unreachable type " << load_type;
4670 UNREACHABLE();
4671 }
4672
4673 if (load_type == DataType::Type::kReference) {
4674 // Potential implicit null checks, in the case of reference
4675 // fields, are handled in the previous switch statement.
4676 } else {
4677 codegen_->MaybeRecordImplicitNullCheck(instruction);
4678 }
4679
4680 if (is_volatile) {
4681 if (load_type == DataType::Type::kReference) {
4682 // Memory barriers, in the case of references, are also handled
4683 // in the previous switch statement.
4684 } else {
4685 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
4686 }
4687 }
4688 }
4689
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info)4690 void LocationsBuilderX86_64::HandleFieldSet(HInstruction* instruction,
4691 const FieldInfo& field_info) {
4692 DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
4693
4694 LocationSummary* locations =
4695 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
4696 DataType::Type field_type = field_info.GetFieldType();
4697 bool is_volatile = field_info.IsVolatile();
4698 bool needs_write_barrier =
4699 CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1));
4700
4701 locations->SetInAt(0, Location::RequiresRegister());
4702 if (DataType::IsFloatingPointType(instruction->InputAt(1)->GetType())) {
4703 if (is_volatile) {
4704 // In order to satisfy the semantics of volatile, this must be a single instruction store.
4705 locations->SetInAt(1, Location::FpuRegisterOrInt32Constant(instruction->InputAt(1)));
4706 } else {
4707 locations->SetInAt(1, Location::FpuRegisterOrConstant(instruction->InputAt(1)));
4708 }
4709 } else {
4710 if (is_volatile) {
4711 // In order to satisfy the semantics of volatile, this must be a single instruction store.
4712 locations->SetInAt(1, Location::RegisterOrInt32Constant(instruction->InputAt(1)));
4713 } else {
4714 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
4715 }
4716 }
4717 if (needs_write_barrier) {
4718 // Temporary registers for the write barrier.
4719 locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too.
4720 locations->AddTemp(Location::RequiresRegister());
4721 } else if (kPoisonHeapReferences && field_type == DataType::Type::kReference) {
4722 // Temporary register for the reference poisoning.
4723 locations->AddTemp(Location::RequiresRegister());
4724 }
4725 }
4726
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info,bool value_can_be_null)4727 void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction,
4728 const FieldInfo& field_info,
4729 bool value_can_be_null) {
4730 DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
4731
4732 LocationSummary* locations = instruction->GetLocations();
4733 CpuRegister base = locations->InAt(0).AsRegister<CpuRegister>();
4734 Location value = locations->InAt(1);
4735 bool is_volatile = field_info.IsVolatile();
4736 DataType::Type field_type = field_info.GetFieldType();
4737 uint32_t offset = field_info.GetFieldOffset().Uint32Value();
4738
4739 if (is_volatile) {
4740 codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
4741 }
4742
4743 bool maybe_record_implicit_null_check_done = false;
4744
4745 switch (field_type) {
4746 case DataType::Type::kBool:
4747 case DataType::Type::kUint8:
4748 case DataType::Type::kInt8: {
4749 if (value.IsConstant()) {
4750 __ movb(Address(base, offset),
4751 Immediate(CodeGenerator::GetInt8ValueOf(value.GetConstant())));
4752 } else {
4753 __ movb(Address(base, offset), value.AsRegister<CpuRegister>());
4754 }
4755 break;
4756 }
4757
4758 case DataType::Type::kUint16:
4759 case DataType::Type::kInt16: {
4760 if (value.IsConstant()) {
4761 __ movw(Address(base, offset),
4762 Immediate(CodeGenerator::GetInt16ValueOf(value.GetConstant())));
4763 } else {
4764 __ movw(Address(base, offset), value.AsRegister<CpuRegister>());
4765 }
4766 break;
4767 }
4768
4769 case DataType::Type::kInt32:
4770 case DataType::Type::kReference: {
4771 if (value.IsConstant()) {
4772 int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
4773 // `field_type == DataType::Type::kReference` implies `v == 0`.
4774 DCHECK((field_type != DataType::Type::kReference) || (v == 0));
4775 // Note: if heap poisoning is enabled, no need to poison
4776 // (negate) `v` if it is a reference, as it would be null.
4777 __ movl(Address(base, offset), Immediate(v));
4778 } else {
4779 if (kPoisonHeapReferences && field_type == DataType::Type::kReference) {
4780 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
4781 __ movl(temp, value.AsRegister<CpuRegister>());
4782 __ PoisonHeapReference(temp);
4783 __ movl(Address(base, offset), temp);
4784 } else {
4785 __ movl(Address(base, offset), value.AsRegister<CpuRegister>());
4786 }
4787 }
4788 break;
4789 }
4790
4791 case DataType::Type::kInt64: {
4792 if (value.IsConstant()) {
4793 int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
4794 codegen_->MoveInt64ToAddress(Address(base, offset),
4795 Address(base, offset + sizeof(int32_t)),
4796 v,
4797 instruction);
4798 maybe_record_implicit_null_check_done = true;
4799 } else {
4800 __ movq(Address(base, offset), value.AsRegister<CpuRegister>());
4801 }
4802 break;
4803 }
4804
4805 case DataType::Type::kFloat32: {
4806 if (value.IsConstant()) {
4807 int32_t v =
4808 bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue());
4809 __ movl(Address(base, offset), Immediate(v));
4810 } else {
4811 __ movss(Address(base, offset), value.AsFpuRegister<XmmRegister>());
4812 }
4813 break;
4814 }
4815
4816 case DataType::Type::kFloat64: {
4817 if (value.IsConstant()) {
4818 int64_t v =
4819 bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue());
4820 codegen_->MoveInt64ToAddress(Address(base, offset),
4821 Address(base, offset + sizeof(int32_t)),
4822 v,
4823 instruction);
4824 maybe_record_implicit_null_check_done = true;
4825 } else {
4826 __ movsd(Address(base, offset), value.AsFpuRegister<XmmRegister>());
4827 }
4828 break;
4829 }
4830
4831 case DataType::Type::kUint32:
4832 case DataType::Type::kUint64:
4833 case DataType::Type::kVoid:
4834 LOG(FATAL) << "Unreachable type " << field_type;
4835 UNREACHABLE();
4836 }
4837
4838 if (!maybe_record_implicit_null_check_done) {
4839 codegen_->MaybeRecordImplicitNullCheck(instruction);
4840 }
4841
4842 if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1))) {
4843 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
4844 CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>();
4845 codegen_->MarkGCCard(temp, card, base, value.AsRegister<CpuRegister>(), value_can_be_null);
4846 }
4847
4848 if (is_volatile) {
4849 codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
4850 }
4851 }
4852
VisitInstanceFieldSet(HInstanceFieldSet * instruction)4853 void LocationsBuilderX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
4854 HandleFieldSet(instruction, instruction->GetFieldInfo());
4855 }
4856
VisitInstanceFieldSet(HInstanceFieldSet * instruction)4857 void InstructionCodeGeneratorX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
4858 HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
4859 }
4860
VisitInstanceFieldGet(HInstanceFieldGet * instruction)4861 void LocationsBuilderX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
4862 HandleFieldGet(instruction);
4863 }
4864
VisitInstanceFieldGet(HInstanceFieldGet * instruction)4865 void InstructionCodeGeneratorX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
4866 HandleFieldGet(instruction, instruction->GetFieldInfo());
4867 }
4868
VisitStaticFieldGet(HStaticFieldGet * instruction)4869 void LocationsBuilderX86_64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
4870 HandleFieldGet(instruction);
4871 }
4872
VisitStaticFieldGet(HStaticFieldGet * instruction)4873 void InstructionCodeGeneratorX86_64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
4874 HandleFieldGet(instruction, instruction->GetFieldInfo());
4875 }
4876
VisitStaticFieldSet(HStaticFieldSet * instruction)4877 void LocationsBuilderX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
4878 HandleFieldSet(instruction, instruction->GetFieldInfo());
4879 }
4880
VisitStaticFieldSet(HStaticFieldSet * instruction)4881 void InstructionCodeGeneratorX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
4882 HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
4883 }
4884
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)4885 void LocationsBuilderX86_64::VisitUnresolvedInstanceFieldGet(
4886 HUnresolvedInstanceFieldGet* instruction) {
4887 FieldAccessCallingConventionX86_64 calling_convention;
4888 codegen_->CreateUnresolvedFieldLocationSummary(
4889 instruction, instruction->GetFieldType(), calling_convention);
4890 }
4891
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)4892 void InstructionCodeGeneratorX86_64::VisitUnresolvedInstanceFieldGet(
4893 HUnresolvedInstanceFieldGet* instruction) {
4894 FieldAccessCallingConventionX86_64 calling_convention;
4895 codegen_->GenerateUnresolvedFieldAccess(instruction,
4896 instruction->GetFieldType(),
4897 instruction->GetFieldIndex(),
4898 instruction->GetDexPc(),
4899 calling_convention);
4900 }
4901
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)4902 void LocationsBuilderX86_64::VisitUnresolvedInstanceFieldSet(
4903 HUnresolvedInstanceFieldSet* instruction) {
4904 FieldAccessCallingConventionX86_64 calling_convention;
4905 codegen_->CreateUnresolvedFieldLocationSummary(
4906 instruction, instruction->GetFieldType(), calling_convention);
4907 }
4908
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)4909 void InstructionCodeGeneratorX86_64::VisitUnresolvedInstanceFieldSet(
4910 HUnresolvedInstanceFieldSet* instruction) {
4911 FieldAccessCallingConventionX86_64 calling_convention;
4912 codegen_->GenerateUnresolvedFieldAccess(instruction,
4913 instruction->GetFieldType(),
4914 instruction->GetFieldIndex(),
4915 instruction->GetDexPc(),
4916 calling_convention);
4917 }
4918
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)4919 void LocationsBuilderX86_64::VisitUnresolvedStaticFieldGet(
4920 HUnresolvedStaticFieldGet* instruction) {
4921 FieldAccessCallingConventionX86_64 calling_convention;
4922 codegen_->CreateUnresolvedFieldLocationSummary(
4923 instruction, instruction->GetFieldType(), calling_convention);
4924 }
4925
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)4926 void InstructionCodeGeneratorX86_64::VisitUnresolvedStaticFieldGet(
4927 HUnresolvedStaticFieldGet* instruction) {
4928 FieldAccessCallingConventionX86_64 calling_convention;
4929 codegen_->GenerateUnresolvedFieldAccess(instruction,
4930 instruction->GetFieldType(),
4931 instruction->GetFieldIndex(),
4932 instruction->GetDexPc(),
4933 calling_convention);
4934 }
4935
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)4936 void LocationsBuilderX86_64::VisitUnresolvedStaticFieldSet(
4937 HUnresolvedStaticFieldSet* instruction) {
4938 FieldAccessCallingConventionX86_64 calling_convention;
4939 codegen_->CreateUnresolvedFieldLocationSummary(
4940 instruction, instruction->GetFieldType(), calling_convention);
4941 }
4942
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)4943 void InstructionCodeGeneratorX86_64::VisitUnresolvedStaticFieldSet(
4944 HUnresolvedStaticFieldSet* instruction) {
4945 FieldAccessCallingConventionX86_64 calling_convention;
4946 codegen_->GenerateUnresolvedFieldAccess(instruction,
4947 instruction->GetFieldType(),
4948 instruction->GetFieldIndex(),
4949 instruction->GetDexPc(),
4950 calling_convention);
4951 }
4952
VisitNullCheck(HNullCheck * instruction)4953 void LocationsBuilderX86_64::VisitNullCheck(HNullCheck* instruction) {
4954 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
4955 Location loc = codegen_->GetCompilerOptions().GetImplicitNullChecks()
4956 ? Location::RequiresRegister()
4957 : Location::Any();
4958 locations->SetInAt(0, loc);
4959 }
4960
GenerateImplicitNullCheck(HNullCheck * instruction)4961 void CodeGeneratorX86_64::GenerateImplicitNullCheck(HNullCheck* instruction) {
4962 if (CanMoveNullCheckToUser(instruction)) {
4963 return;
4964 }
4965 LocationSummary* locations = instruction->GetLocations();
4966 Location obj = locations->InAt(0);
4967
4968 __ testl(CpuRegister(RAX), Address(obj.AsRegister<CpuRegister>(), 0));
4969 RecordPcInfo(instruction, instruction->GetDexPc());
4970 }
4971
GenerateExplicitNullCheck(HNullCheck * instruction)4972 void CodeGeneratorX86_64::GenerateExplicitNullCheck(HNullCheck* instruction) {
4973 SlowPathCode* slow_path = new (GetScopedAllocator()) NullCheckSlowPathX86_64(instruction);
4974 AddSlowPath(slow_path);
4975
4976 LocationSummary* locations = instruction->GetLocations();
4977 Location obj = locations->InAt(0);
4978
4979 if (obj.IsRegister()) {
4980 __ testl(obj.AsRegister<CpuRegister>(), obj.AsRegister<CpuRegister>());
4981 } else if (obj.IsStackSlot()) {
4982 __ cmpl(Address(CpuRegister(RSP), obj.GetStackIndex()), Immediate(0));
4983 } else {
4984 DCHECK(obj.IsConstant()) << obj;
4985 DCHECK(obj.GetConstant()->IsNullConstant());
4986 __ jmp(slow_path->GetEntryLabel());
4987 return;
4988 }
4989 __ j(kEqual, slow_path->GetEntryLabel());
4990 }
4991
VisitNullCheck(HNullCheck * instruction)4992 void InstructionCodeGeneratorX86_64::VisitNullCheck(HNullCheck* instruction) {
4993 codegen_->GenerateNullCheck(instruction);
4994 }
4995
VisitArrayGet(HArrayGet * instruction)4996 void LocationsBuilderX86_64::VisitArrayGet(HArrayGet* instruction) {
4997 bool object_array_get_with_read_barrier =
4998 kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference);
4999 LocationSummary* locations =
5000 new (GetGraph()->GetAllocator()) LocationSummary(instruction,
5001 object_array_get_with_read_barrier
5002 ? LocationSummary::kCallOnSlowPath
5003 : LocationSummary::kNoCall);
5004 if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
5005 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
5006 }
5007 locations->SetInAt(0, Location::RequiresRegister());
5008 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
5009 if (DataType::IsFloatingPointType(instruction->GetType())) {
5010 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
5011 } else {
5012 // The output overlaps for an object array get when read barriers
5013 // are enabled: we do not want the move to overwrite the array's
5014 // location, as we need it to emit the read barrier.
5015 locations->SetOut(
5016 Location::RequiresRegister(),
5017 object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
5018 }
5019 }
5020
VisitArrayGet(HArrayGet * instruction)5021 void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) {
5022 LocationSummary* locations = instruction->GetLocations();
5023 Location obj_loc = locations->InAt(0);
5024 CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
5025 Location index = locations->InAt(1);
5026 Location out_loc = locations->Out();
5027 uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
5028
5029 DataType::Type type = instruction->GetType();
5030 switch (type) {
5031 case DataType::Type::kBool:
5032 case DataType::Type::kUint8: {
5033 CpuRegister out = out_loc.AsRegister<CpuRegister>();
5034 __ movzxb(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_1, data_offset));
5035 break;
5036 }
5037
5038 case DataType::Type::kInt8: {
5039 CpuRegister out = out_loc.AsRegister<CpuRegister>();
5040 __ movsxb(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_1, data_offset));
5041 break;
5042 }
5043
5044 case DataType::Type::kUint16: {
5045 CpuRegister out = out_loc.AsRegister<CpuRegister>();
5046 if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
5047 // Branch cases into compressed and uncompressed for each index's type.
5048 uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
5049 NearLabel done, not_compressed;
5050 __ testb(Address(obj, count_offset), Immediate(1));
5051 codegen_->MaybeRecordImplicitNullCheck(instruction);
5052 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
5053 "Expecting 0=compressed, 1=uncompressed");
5054 __ j(kNotZero, ¬_compressed);
5055 __ movzxb(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_1, data_offset));
5056 __ jmp(&done);
5057 __ Bind(¬_compressed);
5058 __ movzxw(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_2, data_offset));
5059 __ Bind(&done);
5060 } else {
5061 __ movzxw(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_2, data_offset));
5062 }
5063 break;
5064 }
5065
5066 case DataType::Type::kInt16: {
5067 CpuRegister out = out_loc.AsRegister<CpuRegister>();
5068 __ movsxw(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_2, data_offset));
5069 break;
5070 }
5071
5072 case DataType::Type::kInt32: {
5073 CpuRegister out = out_loc.AsRegister<CpuRegister>();
5074 __ movl(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset));
5075 break;
5076 }
5077
5078 case DataType::Type::kReference: {
5079 static_assert(
5080 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
5081 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
5082 // /* HeapReference<Object> */ out =
5083 // *(obj + data_offset + index * sizeof(HeapReference<Object>))
5084 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
5085 // Note that a potential implicit null check is handled in this
5086 // CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier call.
5087 codegen_->GenerateArrayLoadWithBakerReadBarrier(
5088 instruction, out_loc, obj, data_offset, index, /* needs_null_check= */ true);
5089 } else {
5090 CpuRegister out = out_loc.AsRegister<CpuRegister>();
5091 __ movl(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset));
5092 codegen_->MaybeRecordImplicitNullCheck(instruction);
5093 // If read barriers are enabled, emit read barriers other than
5094 // Baker's using a slow path (and also unpoison the loaded
5095 // reference, if heap poisoning is enabled).
5096 if (index.IsConstant()) {
5097 uint32_t offset =
5098 (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
5099 codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset);
5100 } else {
5101 codegen_->MaybeGenerateReadBarrierSlow(
5102 instruction, out_loc, out_loc, obj_loc, data_offset, index);
5103 }
5104 }
5105 break;
5106 }
5107
5108 case DataType::Type::kInt64: {
5109 CpuRegister out = out_loc.AsRegister<CpuRegister>();
5110 __ movq(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_8, data_offset));
5111 break;
5112 }
5113
5114 case DataType::Type::kFloat32: {
5115 XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
5116 __ movss(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset));
5117 break;
5118 }
5119
5120 case DataType::Type::kFloat64: {
5121 XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
5122 __ movsd(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_8, data_offset));
5123 break;
5124 }
5125
5126 case DataType::Type::kUint32:
5127 case DataType::Type::kUint64:
5128 case DataType::Type::kVoid:
5129 LOG(FATAL) << "Unreachable type " << type;
5130 UNREACHABLE();
5131 }
5132
5133 if (type == DataType::Type::kReference) {
5134 // Potential implicit null checks, in the case of reference
5135 // arrays, are handled in the previous switch statement.
5136 } else {
5137 codegen_->MaybeRecordImplicitNullCheck(instruction);
5138 }
5139 }
5140
VisitArraySet(HArraySet * instruction)5141 void LocationsBuilderX86_64::VisitArraySet(HArraySet* instruction) {
5142 DataType::Type value_type = instruction->GetComponentType();
5143
5144 bool needs_write_barrier =
5145 CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
5146 bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
5147
5148 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5149 instruction,
5150 may_need_runtime_call_for_type_check ?
5151 LocationSummary::kCallOnSlowPath :
5152 LocationSummary::kNoCall);
5153
5154 locations->SetInAt(0, Location::RequiresRegister());
5155 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
5156 if (DataType::IsFloatingPointType(value_type)) {
5157 locations->SetInAt(2, Location::FpuRegisterOrConstant(instruction->InputAt(2)));
5158 } else {
5159 locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2)));
5160 }
5161
5162 if (needs_write_barrier) {
5163 // Temporary registers for the write barrier.
5164 locations->AddTemp(Location::RequiresRegister()); // Possibly used for ref. poisoning too.
5165 locations->AddTemp(Location::RequiresRegister());
5166 }
5167 }
5168
VisitArraySet(HArraySet * instruction)5169 void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) {
5170 LocationSummary* locations = instruction->GetLocations();
5171 Location array_loc = locations->InAt(0);
5172 CpuRegister array = array_loc.AsRegister<CpuRegister>();
5173 Location index = locations->InAt(1);
5174 Location value = locations->InAt(2);
5175 DataType::Type value_type = instruction->GetComponentType();
5176 bool may_need_runtime_call_for_type_check = instruction->NeedsTypeCheck();
5177 bool needs_write_barrier =
5178 CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
5179 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
5180 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
5181 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
5182
5183 switch (value_type) {
5184 case DataType::Type::kBool:
5185 case DataType::Type::kUint8:
5186 case DataType::Type::kInt8: {
5187 uint32_t offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
5188 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_1, offset);
5189 if (value.IsRegister()) {
5190 __ movb(address, value.AsRegister<CpuRegister>());
5191 } else {
5192 __ movb(address, Immediate(CodeGenerator::GetInt8ValueOf(value.GetConstant())));
5193 }
5194 codegen_->MaybeRecordImplicitNullCheck(instruction);
5195 break;
5196 }
5197
5198 case DataType::Type::kUint16:
5199 case DataType::Type::kInt16: {
5200 uint32_t offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
5201 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_2, offset);
5202 if (value.IsRegister()) {
5203 __ movw(address, value.AsRegister<CpuRegister>());
5204 } else {
5205 DCHECK(value.IsConstant()) << value;
5206 __ movw(address, Immediate(CodeGenerator::GetInt16ValueOf(value.GetConstant())));
5207 }
5208 codegen_->MaybeRecordImplicitNullCheck(instruction);
5209 break;
5210 }
5211
5212 case DataType::Type::kReference: {
5213 uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
5214 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_4, offset);
5215
5216 if (!value.IsRegister()) {
5217 // Just setting null.
5218 DCHECK(instruction->InputAt(2)->IsNullConstant());
5219 DCHECK(value.IsConstant()) << value;
5220 __ movl(address, Immediate(0));
5221 codegen_->MaybeRecordImplicitNullCheck(instruction);
5222 DCHECK(!needs_write_barrier);
5223 DCHECK(!may_need_runtime_call_for_type_check);
5224 break;
5225 }
5226
5227 DCHECK(needs_write_barrier);
5228 CpuRegister register_value = value.AsRegister<CpuRegister>();
5229 // We cannot use a NearLabel for `done`, as its range may be too
5230 // short when Baker read barriers are enabled.
5231 Label done;
5232 NearLabel not_null, do_put;
5233 SlowPathCode* slow_path = nullptr;
5234 Location temp_loc = locations->GetTemp(0);
5235 CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
5236 if (may_need_runtime_call_for_type_check) {
5237 slow_path = new (codegen_->GetScopedAllocator()) ArraySetSlowPathX86_64(instruction);
5238 codegen_->AddSlowPath(slow_path);
5239 if (instruction->GetValueCanBeNull()) {
5240 __ testl(register_value, register_value);
5241 __ j(kNotEqual, ¬_null);
5242 __ movl(address, Immediate(0));
5243 codegen_->MaybeRecordImplicitNullCheck(instruction);
5244 __ jmp(&done);
5245 __ Bind(¬_null);
5246 }
5247
5248 // Note that when Baker read barriers are enabled, the type
5249 // checks are performed without read barriers. This is fine,
5250 // even in the case where a class object is in the from-space
5251 // after the flip, as a comparison involving such a type would
5252 // not produce a false positive; it may of course produce a
5253 // false negative, in which case we would take the ArraySet
5254 // slow path.
5255
5256 // /* HeapReference<Class> */ temp = array->klass_
5257 __ movl(temp, Address(array, class_offset));
5258 codegen_->MaybeRecordImplicitNullCheck(instruction);
5259 __ MaybeUnpoisonHeapReference(temp);
5260
5261 // /* HeapReference<Class> */ temp = temp->component_type_
5262 __ movl(temp, Address(temp, component_offset));
5263 // If heap poisoning is enabled, no need to unpoison `temp`
5264 // nor the object reference in `register_value->klass`, as
5265 // we are comparing two poisoned references.
5266 __ cmpl(temp, Address(register_value, class_offset));
5267
5268 if (instruction->StaticTypeOfArrayIsObjectArray()) {
5269 __ j(kEqual, &do_put);
5270 // If heap poisoning is enabled, the `temp` reference has
5271 // not been unpoisoned yet; unpoison it now.
5272 __ MaybeUnpoisonHeapReference(temp);
5273
5274 // If heap poisoning is enabled, no need to unpoison the
5275 // heap reference loaded below, as it is only used for a
5276 // comparison with null.
5277 __ cmpl(Address(temp, super_offset), Immediate(0));
5278 __ j(kNotEqual, slow_path->GetEntryLabel());
5279 __ Bind(&do_put);
5280 } else {
5281 __ j(kNotEqual, slow_path->GetEntryLabel());
5282 }
5283 }
5284
5285 if (kPoisonHeapReferences) {
5286 __ movl(temp, register_value);
5287 __ PoisonHeapReference(temp);
5288 __ movl(address, temp);
5289 } else {
5290 __ movl(address, register_value);
5291 }
5292 if (!may_need_runtime_call_for_type_check) {
5293 codegen_->MaybeRecordImplicitNullCheck(instruction);
5294 }
5295
5296 CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>();
5297 codegen_->MarkGCCard(
5298 temp, card, array, value.AsRegister<CpuRegister>(), instruction->GetValueCanBeNull());
5299 __ Bind(&done);
5300
5301 if (slow_path != nullptr) {
5302 __ Bind(slow_path->GetExitLabel());
5303 }
5304
5305 break;
5306 }
5307
5308 case DataType::Type::kInt32: {
5309 uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
5310 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_4, offset);
5311 if (value.IsRegister()) {
5312 __ movl(address, value.AsRegister<CpuRegister>());
5313 } else {
5314 DCHECK(value.IsConstant()) << value;
5315 int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
5316 __ movl(address, Immediate(v));
5317 }
5318 codegen_->MaybeRecordImplicitNullCheck(instruction);
5319 break;
5320 }
5321
5322 case DataType::Type::kInt64: {
5323 uint32_t offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
5324 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset);
5325 if (value.IsRegister()) {
5326 __ movq(address, value.AsRegister<CpuRegister>());
5327 codegen_->MaybeRecordImplicitNullCheck(instruction);
5328 } else {
5329 int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
5330 Address address_high =
5331 CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset + sizeof(int32_t));
5332 codegen_->MoveInt64ToAddress(address, address_high, v, instruction);
5333 }
5334 break;
5335 }
5336
5337 case DataType::Type::kFloat32: {
5338 uint32_t offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
5339 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_4, offset);
5340 if (value.IsFpuRegister()) {
5341 __ movss(address, value.AsFpuRegister<XmmRegister>());
5342 } else {
5343 DCHECK(value.IsConstant());
5344 int32_t v = bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue());
5345 __ movl(address, Immediate(v));
5346 }
5347 codegen_->MaybeRecordImplicitNullCheck(instruction);
5348 break;
5349 }
5350
5351 case DataType::Type::kFloat64: {
5352 uint32_t offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
5353 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset);
5354 if (value.IsFpuRegister()) {
5355 __ movsd(address, value.AsFpuRegister<XmmRegister>());
5356 codegen_->MaybeRecordImplicitNullCheck(instruction);
5357 } else {
5358 int64_t v =
5359 bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue());
5360 Address address_high =
5361 CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset + sizeof(int32_t));
5362 codegen_->MoveInt64ToAddress(address, address_high, v, instruction);
5363 }
5364 break;
5365 }
5366
5367 case DataType::Type::kUint32:
5368 case DataType::Type::kUint64:
5369 case DataType::Type::kVoid:
5370 LOG(FATAL) << "Unreachable type " << instruction->GetType();
5371 UNREACHABLE();
5372 }
5373 }
5374
VisitArrayLength(HArrayLength * instruction)5375 void LocationsBuilderX86_64::VisitArrayLength(HArrayLength* instruction) {
5376 LocationSummary* locations =
5377 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5378 locations->SetInAt(0, Location::RequiresRegister());
5379 if (!instruction->IsEmittedAtUseSite()) {
5380 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5381 }
5382 }
5383
VisitArrayLength(HArrayLength * instruction)5384 void InstructionCodeGeneratorX86_64::VisitArrayLength(HArrayLength* instruction) {
5385 if (instruction->IsEmittedAtUseSite()) {
5386 return;
5387 }
5388
5389 LocationSummary* locations = instruction->GetLocations();
5390 uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
5391 CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
5392 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
5393 __ movl(out, Address(obj, offset));
5394 codegen_->MaybeRecordImplicitNullCheck(instruction);
5395 // Mask out most significant bit in case the array is String's array of char.
5396 if (mirror::kUseStringCompression && instruction->IsStringLength()) {
5397 __ shrl(out, Immediate(1));
5398 }
5399 }
5400
VisitBoundsCheck(HBoundsCheck * instruction)5401 void LocationsBuilderX86_64::VisitBoundsCheck(HBoundsCheck* instruction) {
5402 RegisterSet caller_saves = RegisterSet::Empty();
5403 InvokeRuntimeCallingConvention calling_convention;
5404 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
5405 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
5406 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves);
5407 locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
5408 HInstruction* length = instruction->InputAt(1);
5409 if (!length->IsEmittedAtUseSite()) {
5410 locations->SetInAt(1, Location::RegisterOrConstant(length));
5411 }
5412 }
5413
VisitBoundsCheck(HBoundsCheck * instruction)5414 void InstructionCodeGeneratorX86_64::VisitBoundsCheck(HBoundsCheck* instruction) {
5415 LocationSummary* locations = instruction->GetLocations();
5416 Location index_loc = locations->InAt(0);
5417 Location length_loc = locations->InAt(1);
5418 SlowPathCode* slow_path =
5419 new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathX86_64(instruction);
5420
5421 if (length_loc.IsConstant()) {
5422 int32_t length = CodeGenerator::GetInt32ValueOf(length_loc.GetConstant());
5423 if (index_loc.IsConstant()) {
5424 // BCE will remove the bounds check if we are guarenteed to pass.
5425 int32_t index = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
5426 if (index < 0 || index >= length) {
5427 codegen_->AddSlowPath(slow_path);
5428 __ jmp(slow_path->GetEntryLabel());
5429 } else {
5430 // Some optimization after BCE may have generated this, and we should not
5431 // generate a bounds check if it is a valid range.
5432 }
5433 return;
5434 }
5435
5436 // We have to reverse the jump condition because the length is the constant.
5437 CpuRegister index_reg = index_loc.AsRegister<CpuRegister>();
5438 __ cmpl(index_reg, Immediate(length));
5439 codegen_->AddSlowPath(slow_path);
5440 __ j(kAboveEqual, slow_path->GetEntryLabel());
5441 } else {
5442 HInstruction* array_length = instruction->InputAt(1);
5443 if (array_length->IsEmittedAtUseSite()) {
5444 // Address the length field in the array.
5445 DCHECK(array_length->IsArrayLength());
5446 uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length->AsArrayLength());
5447 Location array_loc = array_length->GetLocations()->InAt(0);
5448 Address array_len(array_loc.AsRegister<CpuRegister>(), len_offset);
5449 if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
5450 // TODO: if index_loc.IsConstant(), compare twice the index (to compensate for
5451 // the string compression flag) with the in-memory length and avoid the temporary.
5452 CpuRegister length_reg = CpuRegister(TMP);
5453 __ movl(length_reg, array_len);
5454 codegen_->MaybeRecordImplicitNullCheck(array_length);
5455 __ shrl(length_reg, Immediate(1));
5456 codegen_->GenerateIntCompare(length_reg, index_loc);
5457 } else {
5458 // Checking the bound for general case:
5459 // Array of char or String's array when the compression feature off.
5460 if (index_loc.IsConstant()) {
5461 int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
5462 __ cmpl(array_len, Immediate(value));
5463 } else {
5464 __ cmpl(array_len, index_loc.AsRegister<CpuRegister>());
5465 }
5466 codegen_->MaybeRecordImplicitNullCheck(array_length);
5467 }
5468 } else {
5469 codegen_->GenerateIntCompare(length_loc, index_loc);
5470 }
5471 codegen_->AddSlowPath(slow_path);
5472 __ j(kBelowEqual, slow_path->GetEntryLabel());
5473 }
5474 }
5475
MarkGCCard(CpuRegister temp,CpuRegister card,CpuRegister object,CpuRegister value,bool value_can_be_null)5476 void CodeGeneratorX86_64::MarkGCCard(CpuRegister temp,
5477 CpuRegister card,
5478 CpuRegister object,
5479 CpuRegister value,
5480 bool value_can_be_null) {
5481 NearLabel is_null;
5482 if (value_can_be_null) {
5483 __ testl(value, value);
5484 __ j(kEqual, &is_null);
5485 }
5486 // Load the address of the card table into `card`.
5487 __ gs()->movq(card, Address::Absolute(Thread::CardTableOffset<kX86_64PointerSize>().Int32Value(),
5488 /* no_rip= */ true));
5489 // Calculate the offset (in the card table) of the card corresponding to
5490 // `object`.
5491 __ movq(temp, object);
5492 __ shrq(temp, Immediate(gc::accounting::CardTable::kCardShift));
5493 // Write the `art::gc::accounting::CardTable::kCardDirty` value into the
5494 // `object`'s card.
5495 //
5496 // Register `card` contains the address of the card table. Note that the card
5497 // table's base is biased during its creation so that it always starts at an
5498 // address whose least-significant byte is equal to `kCardDirty` (see
5499 // art::gc::accounting::CardTable::Create). Therefore the MOVB instruction
5500 // below writes the `kCardDirty` (byte) value into the `object`'s card
5501 // (located at `card + object >> kCardShift`).
5502 //
5503 // This dual use of the value in register `card` (1. to calculate the location
5504 // of the card to mark; and 2. to load the `kCardDirty` value) saves a load
5505 // (no need to explicitly load `kCardDirty` as an immediate value).
5506 __ movb(Address(temp, card, TIMES_1, 0), card);
5507 if (value_can_be_null) {
5508 __ Bind(&is_null);
5509 }
5510 }
5511
VisitParallelMove(HParallelMove * instruction ATTRIBUTE_UNUSED)5512 void LocationsBuilderX86_64::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) {
5513 LOG(FATAL) << "Unimplemented";
5514 }
5515
VisitParallelMove(HParallelMove * instruction)5516 void InstructionCodeGeneratorX86_64::VisitParallelMove(HParallelMove* instruction) {
5517 if (instruction->GetNext()->IsSuspendCheck() &&
5518 instruction->GetBlock()->GetLoopInformation() != nullptr) {
5519 HSuspendCheck* suspend_check = instruction->GetNext()->AsSuspendCheck();
5520 // The back edge will generate the suspend check.
5521 codegen_->ClearSpillSlotsFromLoopPhisInStackMap(suspend_check, instruction);
5522 }
5523
5524 codegen_->GetMoveResolver()->EmitNativeCode(instruction);
5525 }
5526
VisitSuspendCheck(HSuspendCheck * instruction)5527 void LocationsBuilderX86_64::VisitSuspendCheck(HSuspendCheck* instruction) {
5528 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5529 instruction, LocationSummary::kCallOnSlowPath);
5530 // In suspend check slow path, usually there are no caller-save registers at all.
5531 // If SIMD instructions are present, however, we force spilling all live SIMD
5532 // registers in full width (since the runtime only saves/restores lower part).
5533 locations->SetCustomSlowPathCallerSaves(
5534 GetGraph()->HasSIMD() ? RegisterSet::AllFpu() : RegisterSet::Empty());
5535 }
5536
VisitSuspendCheck(HSuspendCheck * instruction)5537 void InstructionCodeGeneratorX86_64::VisitSuspendCheck(HSuspendCheck* instruction) {
5538 HBasicBlock* block = instruction->GetBlock();
5539 if (block->GetLoopInformation() != nullptr) {
5540 DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction);
5541 // The back edge will generate the suspend check.
5542 return;
5543 }
5544 if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) {
5545 // The goto will generate the suspend check.
5546 return;
5547 }
5548 GenerateSuspendCheck(instruction, nullptr);
5549 }
5550
GenerateSuspendCheck(HSuspendCheck * instruction,HBasicBlock * successor)5551 void InstructionCodeGeneratorX86_64::GenerateSuspendCheck(HSuspendCheck* instruction,
5552 HBasicBlock* successor) {
5553 SuspendCheckSlowPathX86_64* slow_path =
5554 down_cast<SuspendCheckSlowPathX86_64*>(instruction->GetSlowPath());
5555 if (slow_path == nullptr) {
5556 slow_path =
5557 new (codegen_->GetScopedAllocator()) SuspendCheckSlowPathX86_64(instruction, successor);
5558 instruction->SetSlowPath(slow_path);
5559 codegen_->AddSlowPath(slow_path);
5560 if (successor != nullptr) {
5561 DCHECK(successor->IsLoopHeader());
5562 }
5563 } else {
5564 DCHECK_EQ(slow_path->GetSuccessor(), successor);
5565 }
5566
5567 __ gs()->cmpw(Address::Absolute(Thread::ThreadFlagsOffset<kX86_64PointerSize>().Int32Value(),
5568 /* no_rip= */ true),
5569 Immediate(0));
5570 if (successor == nullptr) {
5571 __ j(kNotEqual, slow_path->GetEntryLabel());
5572 __ Bind(slow_path->GetReturnLabel());
5573 } else {
5574 __ j(kEqual, codegen_->GetLabelOf(successor));
5575 __ jmp(slow_path->GetEntryLabel());
5576 }
5577 }
5578
GetAssembler() const5579 X86_64Assembler* ParallelMoveResolverX86_64::GetAssembler() const {
5580 return codegen_->GetAssembler();
5581 }
5582
EmitMove(size_t index)5583 void ParallelMoveResolverX86_64::EmitMove(size_t index) {
5584 MoveOperands* move = moves_[index];
5585 Location source = move->GetSource();
5586 Location destination = move->GetDestination();
5587
5588 if (source.IsRegister()) {
5589 if (destination.IsRegister()) {
5590 __ movq(destination.AsRegister<CpuRegister>(), source.AsRegister<CpuRegister>());
5591 } else if (destination.IsStackSlot()) {
5592 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()),
5593 source.AsRegister<CpuRegister>());
5594 } else {
5595 DCHECK(destination.IsDoubleStackSlot());
5596 __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()),
5597 source.AsRegister<CpuRegister>());
5598 }
5599 } else if (source.IsStackSlot()) {
5600 if (destination.IsRegister()) {
5601 __ movl(destination.AsRegister<CpuRegister>(),
5602 Address(CpuRegister(RSP), source.GetStackIndex()));
5603 } else if (destination.IsFpuRegister()) {
5604 __ movss(destination.AsFpuRegister<XmmRegister>(),
5605 Address(CpuRegister(RSP), source.GetStackIndex()));
5606 } else {
5607 DCHECK(destination.IsStackSlot());
5608 __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
5609 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
5610 }
5611 } else if (source.IsDoubleStackSlot()) {
5612 if (destination.IsRegister()) {
5613 __ movq(destination.AsRegister<CpuRegister>(),
5614 Address(CpuRegister(RSP), source.GetStackIndex()));
5615 } else if (destination.IsFpuRegister()) {
5616 __ movsd(destination.AsFpuRegister<XmmRegister>(),
5617 Address(CpuRegister(RSP), source.GetStackIndex()));
5618 } else {
5619 DCHECK(destination.IsDoubleStackSlot()) << destination;
5620 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
5621 __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
5622 }
5623 } else if (source.IsSIMDStackSlot()) {
5624 if (destination.IsFpuRegister()) {
5625 __ movups(destination.AsFpuRegister<XmmRegister>(),
5626 Address(CpuRegister(RSP), source.GetStackIndex()));
5627 } else {
5628 DCHECK(destination.IsSIMDStackSlot());
5629 size_t high = kX86_64WordSize;
5630 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
5631 __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
5632 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex() + high));
5633 __ movq(Address(CpuRegister(RSP), destination.GetStackIndex() + high), CpuRegister(TMP));
5634 }
5635 } else if (source.IsConstant()) {
5636 HConstant* constant = source.GetConstant();
5637 if (constant->IsIntConstant() || constant->IsNullConstant()) {
5638 int32_t value = CodeGenerator::GetInt32ValueOf(constant);
5639 if (destination.IsRegister()) {
5640 if (value == 0) {
5641 __ xorl(destination.AsRegister<CpuRegister>(), destination.AsRegister<CpuRegister>());
5642 } else {
5643 __ movl(destination.AsRegister<CpuRegister>(), Immediate(value));
5644 }
5645 } else {
5646 DCHECK(destination.IsStackSlot()) << destination;
5647 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), Immediate(value));
5648 }
5649 } else if (constant->IsLongConstant()) {
5650 int64_t value = constant->AsLongConstant()->GetValue();
5651 if (destination.IsRegister()) {
5652 codegen_->Load64BitValue(destination.AsRegister<CpuRegister>(), value);
5653 } else {
5654 DCHECK(destination.IsDoubleStackSlot()) << destination;
5655 codegen_->Store64BitValueToStack(destination, value);
5656 }
5657 } else if (constant->IsFloatConstant()) {
5658 float fp_value = constant->AsFloatConstant()->GetValue();
5659 if (destination.IsFpuRegister()) {
5660 XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
5661 codegen_->Load32BitValue(dest, fp_value);
5662 } else {
5663 DCHECK(destination.IsStackSlot()) << destination;
5664 Immediate imm(bit_cast<int32_t, float>(fp_value));
5665 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), imm);
5666 }
5667 } else {
5668 DCHECK(constant->IsDoubleConstant()) << constant->DebugName();
5669 double fp_value = constant->AsDoubleConstant()->GetValue();
5670 int64_t value = bit_cast<int64_t, double>(fp_value);
5671 if (destination.IsFpuRegister()) {
5672 XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
5673 codegen_->Load64BitValue(dest, fp_value);
5674 } else {
5675 DCHECK(destination.IsDoubleStackSlot()) << destination;
5676 codegen_->Store64BitValueToStack(destination, value);
5677 }
5678 }
5679 } else if (source.IsFpuRegister()) {
5680 if (destination.IsFpuRegister()) {
5681 __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
5682 } else if (destination.IsStackSlot()) {
5683 __ movss(Address(CpuRegister(RSP), destination.GetStackIndex()),
5684 source.AsFpuRegister<XmmRegister>());
5685 } else if (destination.IsDoubleStackSlot()) {
5686 __ movsd(Address(CpuRegister(RSP), destination.GetStackIndex()),
5687 source.AsFpuRegister<XmmRegister>());
5688 } else {
5689 DCHECK(destination.IsSIMDStackSlot());
5690 __ movups(Address(CpuRegister(RSP), destination.GetStackIndex()),
5691 source.AsFpuRegister<XmmRegister>());
5692 }
5693 }
5694 }
5695
Exchange32(CpuRegister reg,int mem)5696 void ParallelMoveResolverX86_64::Exchange32(CpuRegister reg, int mem) {
5697 __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
5698 __ movl(Address(CpuRegister(RSP), mem), reg);
5699 __ movl(reg, CpuRegister(TMP));
5700 }
5701
Exchange64(CpuRegister reg1,CpuRegister reg2)5702 void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg1, CpuRegister reg2) {
5703 __ movq(CpuRegister(TMP), reg1);
5704 __ movq(reg1, reg2);
5705 __ movq(reg2, CpuRegister(TMP));
5706 }
5707
Exchange64(CpuRegister reg,int mem)5708 void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg, int mem) {
5709 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
5710 __ movq(Address(CpuRegister(RSP), mem), reg);
5711 __ movq(reg, CpuRegister(TMP));
5712 }
5713
Exchange32(XmmRegister reg,int mem)5714 void ParallelMoveResolverX86_64::Exchange32(XmmRegister reg, int mem) {
5715 __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
5716 __ movss(Address(CpuRegister(RSP), mem), reg);
5717 __ movd(reg, CpuRegister(TMP));
5718 }
5719
Exchange64(XmmRegister reg,int mem)5720 void ParallelMoveResolverX86_64::Exchange64(XmmRegister reg, int mem) {
5721 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
5722 __ movsd(Address(CpuRegister(RSP), mem), reg);
5723 __ movd(reg, CpuRegister(TMP));
5724 }
5725
Exchange128(XmmRegister reg,int mem)5726 void ParallelMoveResolverX86_64::Exchange128(XmmRegister reg, int mem) {
5727 size_t extra_slot = 2 * kX86_64WordSize;
5728 __ subq(CpuRegister(RSP), Immediate(extra_slot));
5729 __ movups(Address(CpuRegister(RSP), 0), XmmRegister(reg));
5730 ExchangeMemory64(0, mem + extra_slot, 2);
5731 __ movups(XmmRegister(reg), Address(CpuRegister(RSP), 0));
5732 __ addq(CpuRegister(RSP), Immediate(extra_slot));
5733 }
5734
ExchangeMemory32(int mem1,int mem2)5735 void ParallelMoveResolverX86_64::ExchangeMemory32(int mem1, int mem2) {
5736 ScratchRegisterScope ensure_scratch(
5737 this, TMP, RAX, codegen_->GetNumberOfCoreRegisters());
5738
5739 int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0;
5740 __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem1 + stack_offset));
5741 __ movl(CpuRegister(ensure_scratch.GetRegister()),
5742 Address(CpuRegister(RSP), mem2 + stack_offset));
5743 __ movl(Address(CpuRegister(RSP), mem2 + stack_offset), CpuRegister(TMP));
5744 __ movl(Address(CpuRegister(RSP), mem1 + stack_offset),
5745 CpuRegister(ensure_scratch.GetRegister()));
5746 }
5747
ExchangeMemory64(int mem1,int mem2,int num_of_qwords)5748 void ParallelMoveResolverX86_64::ExchangeMemory64(int mem1, int mem2, int num_of_qwords) {
5749 ScratchRegisterScope ensure_scratch(
5750 this, TMP, RAX, codegen_->GetNumberOfCoreRegisters());
5751
5752 int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0;
5753
5754 // Now that temp registers are available (possibly spilled), exchange blocks of memory.
5755 for (int i = 0; i < num_of_qwords; i++) {
5756 __ movq(CpuRegister(TMP),
5757 Address(CpuRegister(RSP), mem1 + stack_offset));
5758 __ movq(CpuRegister(ensure_scratch.GetRegister()),
5759 Address(CpuRegister(RSP), mem2 + stack_offset));
5760 __ movq(Address(CpuRegister(RSP), mem2 + stack_offset),
5761 CpuRegister(TMP));
5762 __ movq(Address(CpuRegister(RSP), mem1 + stack_offset),
5763 CpuRegister(ensure_scratch.GetRegister()));
5764 stack_offset += kX86_64WordSize;
5765 }
5766 }
5767
EmitSwap(size_t index)5768 void ParallelMoveResolverX86_64::EmitSwap(size_t index) {
5769 MoveOperands* move = moves_[index];
5770 Location source = move->GetSource();
5771 Location destination = move->GetDestination();
5772
5773 if (source.IsRegister() && destination.IsRegister()) {
5774 Exchange64(source.AsRegister<CpuRegister>(), destination.AsRegister<CpuRegister>());
5775 } else if (source.IsRegister() && destination.IsStackSlot()) {
5776 Exchange32(source.AsRegister<CpuRegister>(), destination.GetStackIndex());
5777 } else if (source.IsStackSlot() && destination.IsRegister()) {
5778 Exchange32(destination.AsRegister<CpuRegister>(), source.GetStackIndex());
5779 } else if (source.IsStackSlot() && destination.IsStackSlot()) {
5780 ExchangeMemory32(destination.GetStackIndex(), source.GetStackIndex());
5781 } else if (source.IsRegister() && destination.IsDoubleStackSlot()) {
5782 Exchange64(source.AsRegister<CpuRegister>(), destination.GetStackIndex());
5783 } else if (source.IsDoubleStackSlot() && destination.IsRegister()) {
5784 Exchange64(destination.AsRegister<CpuRegister>(), source.GetStackIndex());
5785 } else if (source.IsDoubleStackSlot() && destination.IsDoubleStackSlot()) {
5786 ExchangeMemory64(destination.GetStackIndex(), source.GetStackIndex(), 1);
5787 } else if (source.IsFpuRegister() && destination.IsFpuRegister()) {
5788 __ movd(CpuRegister(TMP), source.AsFpuRegister<XmmRegister>());
5789 __ movaps(source.AsFpuRegister<XmmRegister>(), destination.AsFpuRegister<XmmRegister>());
5790 __ movd(destination.AsFpuRegister<XmmRegister>(), CpuRegister(TMP));
5791 } else if (source.IsFpuRegister() && destination.IsStackSlot()) {
5792 Exchange32(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
5793 } else if (source.IsStackSlot() && destination.IsFpuRegister()) {
5794 Exchange32(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
5795 } else if (source.IsFpuRegister() && destination.IsDoubleStackSlot()) {
5796 Exchange64(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
5797 } else if (source.IsDoubleStackSlot() && destination.IsFpuRegister()) {
5798 Exchange64(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
5799 } else if (source.IsSIMDStackSlot() && destination.IsSIMDStackSlot()) {
5800 ExchangeMemory64(destination.GetStackIndex(), source.GetStackIndex(), 2);
5801 } else if (source.IsFpuRegister() && destination.IsSIMDStackSlot()) {
5802 Exchange128(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
5803 } else if (destination.IsFpuRegister() && source.IsSIMDStackSlot()) {
5804 Exchange128(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
5805 } else {
5806 LOG(FATAL) << "Unimplemented swap between " << source << " and " << destination;
5807 }
5808 }
5809
5810
SpillScratch(int reg)5811 void ParallelMoveResolverX86_64::SpillScratch(int reg) {
5812 __ pushq(CpuRegister(reg));
5813 }
5814
5815
RestoreScratch(int reg)5816 void ParallelMoveResolverX86_64::RestoreScratch(int reg) {
5817 __ popq(CpuRegister(reg));
5818 }
5819
GenerateClassInitializationCheck(SlowPathCode * slow_path,CpuRegister class_reg)5820 void InstructionCodeGeneratorX86_64::GenerateClassInitializationCheck(
5821 SlowPathCode* slow_path, CpuRegister class_reg) {
5822 constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf();
5823 const size_t status_byte_offset =
5824 mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte);
5825 constexpr uint32_t shifted_initialized_value =
5826 enum_cast<uint32_t>(ClassStatus::kInitialized) << (status_lsb_position % kBitsPerByte);
5827
5828 __ cmpb(Address(class_reg, status_byte_offset), Immediate(shifted_initialized_value));
5829 __ j(kBelow, slow_path->GetEntryLabel());
5830 __ Bind(slow_path->GetExitLabel());
5831 // No need for memory fence, thanks to the x86-64 memory model.
5832 }
5833
GenerateBitstringTypeCheckCompare(HTypeCheckInstruction * check,CpuRegister temp)5834 void InstructionCodeGeneratorX86_64::GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check,
5835 CpuRegister temp) {
5836 uint32_t path_to_root = check->GetBitstringPathToRoot();
5837 uint32_t mask = check->GetBitstringMask();
5838 DCHECK(IsPowerOfTwo(mask + 1));
5839 size_t mask_bits = WhichPowerOf2(mask + 1);
5840
5841 if (mask_bits == 16u) {
5842 // Compare the bitstring in memory.
5843 __ cmpw(Address(temp, mirror::Class::StatusOffset()), Immediate(path_to_root));
5844 } else {
5845 // /* uint32_t */ temp = temp->status_
5846 __ movl(temp, Address(temp, mirror::Class::StatusOffset()));
5847 // Compare the bitstring bits using SUB.
5848 __ subl(temp, Immediate(path_to_root));
5849 // Shift out bits that do not contribute to the comparison.
5850 __ shll(temp, Immediate(32u - mask_bits));
5851 }
5852 }
5853
GetSupportedLoadClassKind(HLoadClass::LoadKind desired_class_load_kind)5854 HLoadClass::LoadKind CodeGeneratorX86_64::GetSupportedLoadClassKind(
5855 HLoadClass::LoadKind desired_class_load_kind) {
5856 switch (desired_class_load_kind) {
5857 case HLoadClass::LoadKind::kInvalid:
5858 LOG(FATAL) << "UNREACHABLE";
5859 UNREACHABLE();
5860 case HLoadClass::LoadKind::kReferrersClass:
5861 break;
5862 case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
5863 case HLoadClass::LoadKind::kBootImageRelRo:
5864 case HLoadClass::LoadKind::kBssEntry:
5865 DCHECK(!Runtime::Current()->UseJitCompilation());
5866 break;
5867 case HLoadClass::LoadKind::kJitBootImageAddress:
5868 case HLoadClass::LoadKind::kJitTableAddress:
5869 DCHECK(Runtime::Current()->UseJitCompilation());
5870 break;
5871 case HLoadClass::LoadKind::kRuntimeCall:
5872 break;
5873 }
5874 return desired_class_load_kind;
5875 }
5876
VisitLoadClass(HLoadClass * cls)5877 void LocationsBuilderX86_64::VisitLoadClass(HLoadClass* cls) {
5878 HLoadClass::LoadKind load_kind = cls->GetLoadKind();
5879 if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
5880 // Custom calling convention: RAX serves as both input and output.
5881 CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
5882 cls,
5883 Location::RegisterLocation(RAX),
5884 Location::RegisterLocation(RAX));
5885 return;
5886 }
5887 DCHECK(!cls->NeedsAccessCheck());
5888
5889 const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage();
5890 LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
5891 ? LocationSummary::kCallOnSlowPath
5892 : LocationSummary::kNoCall;
5893 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(cls, call_kind);
5894 if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) {
5895 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
5896 }
5897
5898 if (load_kind == HLoadClass::LoadKind::kReferrersClass) {
5899 locations->SetInAt(0, Location::RequiresRegister());
5900 }
5901 locations->SetOut(Location::RequiresRegister());
5902 if (load_kind == HLoadClass::LoadKind::kBssEntry) {
5903 if (!kUseReadBarrier || kUseBakerReadBarrier) {
5904 // Rely on the type resolution and/or initialization to save everything.
5905 locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
5906 } else {
5907 // For non-Baker read barrier we have a temp-clobbering call.
5908 }
5909 }
5910 }
5911
NewJitRootClassPatch(const DexFile & dex_file,dex::TypeIndex type_index,Handle<mirror::Class> handle)5912 Label* CodeGeneratorX86_64::NewJitRootClassPatch(const DexFile& dex_file,
5913 dex::TypeIndex type_index,
5914 Handle<mirror::Class> handle) {
5915 ReserveJitClassRoot(TypeReference(&dex_file, type_index), handle);
5916 // Add a patch entry and return the label.
5917 jit_class_patches_.emplace_back(&dex_file, type_index.index_);
5918 PatchInfo<Label>* info = &jit_class_patches_.back();
5919 return &info->label;
5920 }
5921
5922 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
5923 // move.
VisitLoadClass(HLoadClass * cls)5924 void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS {
5925 HLoadClass::LoadKind load_kind = cls->GetLoadKind();
5926 if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
5927 codegen_->GenerateLoadClassRuntimeCall(cls);
5928 return;
5929 }
5930 DCHECK(!cls->NeedsAccessCheck());
5931
5932 LocationSummary* locations = cls->GetLocations();
5933 Location out_loc = locations->Out();
5934 CpuRegister out = out_loc.AsRegister<CpuRegister>();
5935
5936 const ReadBarrierOption read_barrier_option = cls->IsInBootImage()
5937 ? kWithoutReadBarrier
5938 : kCompilerReadBarrierOption;
5939 bool generate_null_check = false;
5940 switch (load_kind) {
5941 case HLoadClass::LoadKind::kReferrersClass: {
5942 DCHECK(!cls->CanCallRuntime());
5943 DCHECK(!cls->MustGenerateClinitCheck());
5944 // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
5945 CpuRegister current_method = locations->InAt(0).AsRegister<CpuRegister>();
5946 GenerateGcRootFieldLoad(
5947 cls,
5948 out_loc,
5949 Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()),
5950 /* fixup_label= */ nullptr,
5951 read_barrier_option);
5952 break;
5953 }
5954 case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
5955 DCHECK(codegen_->GetCompilerOptions().IsBootImage());
5956 DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
5957 __ leal(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip= */ false));
5958 codegen_->RecordBootImageTypePatch(cls);
5959 break;
5960 case HLoadClass::LoadKind::kBootImageRelRo: {
5961 DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
5962 __ movl(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip= */ false));
5963 codegen_->RecordBootImageRelRoPatch(codegen_->GetBootImageOffset(cls));
5964 break;
5965 }
5966 case HLoadClass::LoadKind::kBssEntry: {
5967 Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset,
5968 /* no_rip= */ false);
5969 Label* fixup_label = codegen_->NewTypeBssEntryPatch(cls);
5970 // /* GcRoot<mirror::Class> */ out = *address /* PC-relative */
5971 GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option);
5972 generate_null_check = true;
5973 break;
5974 }
5975 case HLoadClass::LoadKind::kJitBootImageAddress: {
5976 DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
5977 uint32_t address = reinterpret_cast32<uint32_t>(cls->GetClass().Get());
5978 DCHECK_NE(address, 0u);
5979 __ movl(out, Immediate(static_cast<int32_t>(address))); // Zero-extended.
5980 break;
5981 }
5982 case HLoadClass::LoadKind::kJitTableAddress: {
5983 Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset,
5984 /* no_rip= */ true);
5985 Label* fixup_label =
5986 codegen_->NewJitRootClassPatch(cls->GetDexFile(), cls->GetTypeIndex(), cls->GetClass());
5987 // /* GcRoot<mirror::Class> */ out = *address
5988 GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option);
5989 break;
5990 }
5991 default:
5992 LOG(FATAL) << "Unexpected load kind: " << cls->GetLoadKind();
5993 UNREACHABLE();
5994 }
5995
5996 if (generate_null_check || cls->MustGenerateClinitCheck()) {
5997 DCHECK(cls->CanCallRuntime());
5998 SlowPathCode* slow_path =
5999 new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86_64(cls, cls);
6000 codegen_->AddSlowPath(slow_path);
6001 if (generate_null_check) {
6002 __ testl(out, out);
6003 __ j(kEqual, slow_path->GetEntryLabel());
6004 }
6005 if (cls->MustGenerateClinitCheck()) {
6006 GenerateClassInitializationCheck(slow_path, out);
6007 } else {
6008 __ Bind(slow_path->GetExitLabel());
6009 }
6010 }
6011 }
6012
VisitClinitCheck(HClinitCheck * check)6013 void LocationsBuilderX86_64::VisitClinitCheck(HClinitCheck* check) {
6014 LocationSummary* locations =
6015 new (GetGraph()->GetAllocator()) LocationSummary(check, LocationSummary::kCallOnSlowPath);
6016 locations->SetInAt(0, Location::RequiresRegister());
6017 if (check->HasUses()) {
6018 locations->SetOut(Location::SameAsFirstInput());
6019 }
6020 // Rely on the type initialization to save everything we need.
6021 locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
6022 }
6023
VisitLoadMethodHandle(HLoadMethodHandle * load)6024 void LocationsBuilderX86_64::VisitLoadMethodHandle(HLoadMethodHandle* load) {
6025 // Custom calling convention: RAX serves as both input and output.
6026 Location location = Location::RegisterLocation(RAX);
6027 CodeGenerator::CreateLoadMethodHandleRuntimeCallLocationSummary(load, location, location);
6028 }
6029
VisitLoadMethodHandle(HLoadMethodHandle * load)6030 void InstructionCodeGeneratorX86_64::VisitLoadMethodHandle(HLoadMethodHandle* load) {
6031 codegen_->GenerateLoadMethodHandleRuntimeCall(load);
6032 }
6033
VisitLoadMethodType(HLoadMethodType * load)6034 void LocationsBuilderX86_64::VisitLoadMethodType(HLoadMethodType* load) {
6035 // Custom calling convention: RAX serves as both input and output.
6036 Location location = Location::RegisterLocation(RAX);
6037 CodeGenerator::CreateLoadMethodTypeRuntimeCallLocationSummary(load, location, location);
6038 }
6039
VisitLoadMethodType(HLoadMethodType * load)6040 void InstructionCodeGeneratorX86_64::VisitLoadMethodType(HLoadMethodType* load) {
6041 codegen_->GenerateLoadMethodTypeRuntimeCall(load);
6042 }
6043
VisitClinitCheck(HClinitCheck * check)6044 void InstructionCodeGeneratorX86_64::VisitClinitCheck(HClinitCheck* check) {
6045 // We assume the class to not be null.
6046 SlowPathCode* slow_path =
6047 new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86_64(check->GetLoadClass(), check);
6048 codegen_->AddSlowPath(slow_path);
6049 GenerateClassInitializationCheck(slow_path,
6050 check->GetLocations()->InAt(0).AsRegister<CpuRegister>());
6051 }
6052
GetSupportedLoadStringKind(HLoadString::LoadKind desired_string_load_kind)6053 HLoadString::LoadKind CodeGeneratorX86_64::GetSupportedLoadStringKind(
6054 HLoadString::LoadKind desired_string_load_kind) {
6055 switch (desired_string_load_kind) {
6056 case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
6057 case HLoadString::LoadKind::kBootImageRelRo:
6058 case HLoadString::LoadKind::kBssEntry:
6059 DCHECK(!Runtime::Current()->UseJitCompilation());
6060 break;
6061 case HLoadString::LoadKind::kJitBootImageAddress:
6062 case HLoadString::LoadKind::kJitTableAddress:
6063 DCHECK(Runtime::Current()->UseJitCompilation());
6064 break;
6065 case HLoadString::LoadKind::kRuntimeCall:
6066 break;
6067 }
6068 return desired_string_load_kind;
6069 }
6070
VisitLoadString(HLoadString * load)6071 void LocationsBuilderX86_64::VisitLoadString(HLoadString* load) {
6072 LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load);
6073 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(load, call_kind);
6074 if (load->GetLoadKind() == HLoadString::LoadKind::kRuntimeCall) {
6075 locations->SetOut(Location::RegisterLocation(RAX));
6076 } else {
6077 locations->SetOut(Location::RequiresRegister());
6078 if (load->GetLoadKind() == HLoadString::LoadKind::kBssEntry) {
6079 if (!kUseReadBarrier || kUseBakerReadBarrier) {
6080 // Rely on the pResolveString to save everything.
6081 locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
6082 } else {
6083 // For non-Baker read barrier we have a temp-clobbering call.
6084 }
6085 }
6086 }
6087 }
6088
NewJitRootStringPatch(const DexFile & dex_file,dex::StringIndex string_index,Handle<mirror::String> handle)6089 Label* CodeGeneratorX86_64::NewJitRootStringPatch(const DexFile& dex_file,
6090 dex::StringIndex string_index,
6091 Handle<mirror::String> handle) {
6092 ReserveJitStringRoot(StringReference(&dex_file, string_index), handle);
6093 // Add a patch entry and return the label.
6094 jit_string_patches_.emplace_back(&dex_file, string_index.index_);
6095 PatchInfo<Label>* info = &jit_string_patches_.back();
6096 return &info->label;
6097 }
6098
6099 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
6100 // move.
VisitLoadString(HLoadString * load)6101 void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) NO_THREAD_SAFETY_ANALYSIS {
6102 LocationSummary* locations = load->GetLocations();
6103 Location out_loc = locations->Out();
6104 CpuRegister out = out_loc.AsRegister<CpuRegister>();
6105
6106 switch (load->GetLoadKind()) {
6107 case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
6108 DCHECK(codegen_->GetCompilerOptions().IsBootImage());
6109 __ leal(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip= */ false));
6110 codegen_->RecordBootImageStringPatch(load);
6111 return;
6112 }
6113 case HLoadString::LoadKind::kBootImageRelRo: {
6114 DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
6115 __ movl(out, Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset, /* no_rip= */ false));
6116 codegen_->RecordBootImageRelRoPatch(codegen_->GetBootImageOffset(load));
6117 return;
6118 }
6119 case HLoadString::LoadKind::kBssEntry: {
6120 Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset,
6121 /* no_rip= */ false);
6122 Label* fixup_label = codegen_->NewStringBssEntryPatch(load);
6123 // /* GcRoot<mirror::Class> */ out = *address /* PC-relative */
6124 GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption);
6125 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadStringSlowPathX86_64(load);
6126 codegen_->AddSlowPath(slow_path);
6127 __ testl(out, out);
6128 __ j(kEqual, slow_path->GetEntryLabel());
6129 __ Bind(slow_path->GetExitLabel());
6130 return;
6131 }
6132 case HLoadString::LoadKind::kJitBootImageAddress: {
6133 uint32_t address = reinterpret_cast32<uint32_t>(load->GetString().Get());
6134 DCHECK_NE(address, 0u);
6135 __ movl(out, Immediate(static_cast<int32_t>(address))); // Zero-extended.
6136 return;
6137 }
6138 case HLoadString::LoadKind::kJitTableAddress: {
6139 Address address = Address::Absolute(CodeGeneratorX86_64::kDummy32BitOffset,
6140 /* no_rip= */ true);
6141 Label* fixup_label = codegen_->NewJitRootStringPatch(
6142 load->GetDexFile(), load->GetStringIndex(), load->GetString());
6143 // /* GcRoot<mirror::String> */ out = *address
6144 GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption);
6145 return;
6146 }
6147 default:
6148 break;
6149 }
6150
6151 // TODO: Re-add the compiler code to do string dex cache lookup again.
6152 // Custom calling convention: RAX serves as both input and output.
6153 __ movl(CpuRegister(RAX), Immediate(load->GetStringIndex().index_));
6154 codegen_->InvokeRuntime(kQuickResolveString,
6155 load,
6156 load->GetDexPc());
6157 CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
6158 }
6159
GetExceptionTlsAddress()6160 static Address GetExceptionTlsAddress() {
6161 return Address::Absolute(Thread::ExceptionOffset<kX86_64PointerSize>().Int32Value(),
6162 /* no_rip= */ true);
6163 }
6164
VisitLoadException(HLoadException * load)6165 void LocationsBuilderX86_64::VisitLoadException(HLoadException* load) {
6166 LocationSummary* locations =
6167 new (GetGraph()->GetAllocator()) LocationSummary(load, LocationSummary::kNoCall);
6168 locations->SetOut(Location::RequiresRegister());
6169 }
6170
VisitLoadException(HLoadException * load)6171 void InstructionCodeGeneratorX86_64::VisitLoadException(HLoadException* load) {
6172 __ gs()->movl(load->GetLocations()->Out().AsRegister<CpuRegister>(), GetExceptionTlsAddress());
6173 }
6174
VisitClearException(HClearException * clear)6175 void LocationsBuilderX86_64::VisitClearException(HClearException* clear) {
6176 new (GetGraph()->GetAllocator()) LocationSummary(clear, LocationSummary::kNoCall);
6177 }
6178
VisitClearException(HClearException * clear ATTRIBUTE_UNUSED)6179 void InstructionCodeGeneratorX86_64::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) {
6180 __ gs()->movl(GetExceptionTlsAddress(), Immediate(0));
6181 }
6182
VisitThrow(HThrow * instruction)6183 void LocationsBuilderX86_64::VisitThrow(HThrow* instruction) {
6184 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
6185 instruction, LocationSummary::kCallOnMainOnly);
6186 InvokeRuntimeCallingConvention calling_convention;
6187 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
6188 }
6189
VisitThrow(HThrow * instruction)6190 void InstructionCodeGeneratorX86_64::VisitThrow(HThrow* instruction) {
6191 codegen_->InvokeRuntime(kQuickDeliverException, instruction, instruction->GetDexPc());
6192 CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
6193 }
6194
6195 // Temp is used for read barrier.
NumberOfInstanceOfTemps(TypeCheckKind type_check_kind)6196 static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) {
6197 if (kEmitCompilerReadBarrier &&
6198 !kUseBakerReadBarrier &&
6199 (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
6200 type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
6201 type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
6202 return 1;
6203 }
6204 return 0;
6205 }
6206
6207 // Interface case has 2 temps, one for holding the number of interfaces, one for the current
6208 // interface pointer, the current interface is compared in memory.
6209 // The other checks have one temp for loading the object's class.
NumberOfCheckCastTemps(TypeCheckKind type_check_kind)6210 static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) {
6211 if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
6212 return 2;
6213 }
6214 return 1 + NumberOfInstanceOfTemps(type_check_kind);
6215 }
6216
VisitInstanceOf(HInstanceOf * instruction)6217 void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) {
6218 LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
6219 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
6220 bool baker_read_barrier_slow_path = false;
6221 switch (type_check_kind) {
6222 case TypeCheckKind::kExactCheck:
6223 case TypeCheckKind::kAbstractClassCheck:
6224 case TypeCheckKind::kClassHierarchyCheck:
6225 case TypeCheckKind::kArrayObjectCheck: {
6226 bool needs_read_barrier = CodeGenerator::InstanceOfNeedsReadBarrier(instruction);
6227 call_kind = needs_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
6228 baker_read_barrier_slow_path = kUseBakerReadBarrier && needs_read_barrier;
6229 break;
6230 }
6231 case TypeCheckKind::kArrayCheck:
6232 case TypeCheckKind::kUnresolvedCheck:
6233 case TypeCheckKind::kInterfaceCheck:
6234 call_kind = LocationSummary::kCallOnSlowPath;
6235 break;
6236 case TypeCheckKind::kBitstringCheck:
6237 break;
6238 }
6239
6240 LocationSummary* locations =
6241 new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
6242 if (baker_read_barrier_slow_path) {
6243 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
6244 }
6245 locations->SetInAt(0, Location::RequiresRegister());
6246 if (type_check_kind == TypeCheckKind::kBitstringCheck) {
6247 locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
6248 locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
6249 locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
6250 } else {
6251 locations->SetInAt(1, Location::Any());
6252 }
6253 // Note that TypeCheckSlowPathX86_64 uses this "out" register too.
6254 locations->SetOut(Location::RequiresRegister());
6255 locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind));
6256 }
6257
VisitInstanceOf(HInstanceOf * instruction)6258 void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) {
6259 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
6260 LocationSummary* locations = instruction->GetLocations();
6261 Location obj_loc = locations->InAt(0);
6262 CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
6263 Location cls = locations->InAt(1);
6264 Location out_loc = locations->Out();
6265 CpuRegister out = out_loc.AsRegister<CpuRegister>();
6266 const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind);
6267 DCHECK_LE(num_temps, 1u);
6268 Location maybe_temp_loc = (num_temps >= 1u) ? locations->GetTemp(0) : Location::NoLocation();
6269 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
6270 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
6271 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
6272 uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
6273 SlowPathCode* slow_path = nullptr;
6274 NearLabel done, zero;
6275
6276 // Return 0 if `obj` is null.
6277 // Avoid null check if we know obj is not null.
6278 if (instruction->MustDoNullCheck()) {
6279 __ testl(obj, obj);
6280 __ j(kEqual, &zero);
6281 }
6282
6283 switch (type_check_kind) {
6284 case TypeCheckKind::kExactCheck: {
6285 ReadBarrierOption read_barrier_option =
6286 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
6287 // /* HeapReference<Class> */ out = obj->klass_
6288 GenerateReferenceLoadTwoRegisters(instruction,
6289 out_loc,
6290 obj_loc,
6291 class_offset,
6292 read_barrier_option);
6293 if (cls.IsRegister()) {
6294 __ cmpl(out, cls.AsRegister<CpuRegister>());
6295 } else {
6296 DCHECK(cls.IsStackSlot()) << cls;
6297 __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
6298 }
6299 if (zero.IsLinked()) {
6300 // Classes must be equal for the instanceof to succeed.
6301 __ j(kNotEqual, &zero);
6302 __ movl(out, Immediate(1));
6303 __ jmp(&done);
6304 } else {
6305 __ setcc(kEqual, out);
6306 // setcc only sets the low byte.
6307 __ andl(out, Immediate(1));
6308 }
6309 break;
6310 }
6311
6312 case TypeCheckKind::kAbstractClassCheck: {
6313 ReadBarrierOption read_barrier_option =
6314 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
6315 // /* HeapReference<Class> */ out = obj->klass_
6316 GenerateReferenceLoadTwoRegisters(instruction,
6317 out_loc,
6318 obj_loc,
6319 class_offset,
6320 read_barrier_option);
6321 // If the class is abstract, we eagerly fetch the super class of the
6322 // object to avoid doing a comparison we know will fail.
6323 NearLabel loop, success;
6324 __ Bind(&loop);
6325 // /* HeapReference<Class> */ out = out->super_class_
6326 GenerateReferenceLoadOneRegister(instruction,
6327 out_loc,
6328 super_offset,
6329 maybe_temp_loc,
6330 read_barrier_option);
6331 __ testl(out, out);
6332 // If `out` is null, we use it for the result, and jump to `done`.
6333 __ j(kEqual, &done);
6334 if (cls.IsRegister()) {
6335 __ cmpl(out, cls.AsRegister<CpuRegister>());
6336 } else {
6337 DCHECK(cls.IsStackSlot()) << cls;
6338 __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
6339 }
6340 __ j(kNotEqual, &loop);
6341 __ movl(out, Immediate(1));
6342 if (zero.IsLinked()) {
6343 __ jmp(&done);
6344 }
6345 break;
6346 }
6347
6348 case TypeCheckKind::kClassHierarchyCheck: {
6349 ReadBarrierOption read_barrier_option =
6350 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
6351 // /* HeapReference<Class> */ out = obj->klass_
6352 GenerateReferenceLoadTwoRegisters(instruction,
6353 out_loc,
6354 obj_loc,
6355 class_offset,
6356 read_barrier_option);
6357 // Walk over the class hierarchy to find a match.
6358 NearLabel loop, success;
6359 __ Bind(&loop);
6360 if (cls.IsRegister()) {
6361 __ cmpl(out, cls.AsRegister<CpuRegister>());
6362 } else {
6363 DCHECK(cls.IsStackSlot()) << cls;
6364 __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
6365 }
6366 __ j(kEqual, &success);
6367 // /* HeapReference<Class> */ out = out->super_class_
6368 GenerateReferenceLoadOneRegister(instruction,
6369 out_loc,
6370 super_offset,
6371 maybe_temp_loc,
6372 read_barrier_option);
6373 __ testl(out, out);
6374 __ j(kNotEqual, &loop);
6375 // If `out` is null, we use it for the result, and jump to `done`.
6376 __ jmp(&done);
6377 __ Bind(&success);
6378 __ movl(out, Immediate(1));
6379 if (zero.IsLinked()) {
6380 __ jmp(&done);
6381 }
6382 break;
6383 }
6384
6385 case TypeCheckKind::kArrayObjectCheck: {
6386 ReadBarrierOption read_barrier_option =
6387 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
6388 // /* HeapReference<Class> */ out = obj->klass_
6389 GenerateReferenceLoadTwoRegisters(instruction,
6390 out_loc,
6391 obj_loc,
6392 class_offset,
6393 read_barrier_option);
6394 // Do an exact check.
6395 NearLabel exact_check;
6396 if (cls.IsRegister()) {
6397 __ cmpl(out, cls.AsRegister<CpuRegister>());
6398 } else {
6399 DCHECK(cls.IsStackSlot()) << cls;
6400 __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
6401 }
6402 __ j(kEqual, &exact_check);
6403 // Otherwise, we need to check that the object's class is a non-primitive array.
6404 // /* HeapReference<Class> */ out = out->component_type_
6405 GenerateReferenceLoadOneRegister(instruction,
6406 out_loc,
6407 component_offset,
6408 maybe_temp_loc,
6409 read_barrier_option);
6410 __ testl(out, out);
6411 // If `out` is null, we use it for the result, and jump to `done`.
6412 __ j(kEqual, &done);
6413 __ cmpw(Address(out, primitive_offset), Immediate(Primitive::kPrimNot));
6414 __ j(kNotEqual, &zero);
6415 __ Bind(&exact_check);
6416 __ movl(out, Immediate(1));
6417 __ jmp(&done);
6418 break;
6419 }
6420
6421 case TypeCheckKind::kArrayCheck: {
6422 // No read barrier since the slow path will retry upon failure.
6423 // /* HeapReference<Class> */ out = obj->klass_
6424 GenerateReferenceLoadTwoRegisters(instruction,
6425 out_loc,
6426 obj_loc,
6427 class_offset,
6428 kWithoutReadBarrier);
6429 if (cls.IsRegister()) {
6430 __ cmpl(out, cls.AsRegister<CpuRegister>());
6431 } else {
6432 DCHECK(cls.IsStackSlot()) << cls;
6433 __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
6434 }
6435 DCHECK(locations->OnlyCallsOnSlowPath());
6436 slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86_64(
6437 instruction, /* is_fatal= */ false);
6438 codegen_->AddSlowPath(slow_path);
6439 __ j(kNotEqual, slow_path->GetEntryLabel());
6440 __ movl(out, Immediate(1));
6441 if (zero.IsLinked()) {
6442 __ jmp(&done);
6443 }
6444 break;
6445 }
6446
6447 case TypeCheckKind::kUnresolvedCheck:
6448 case TypeCheckKind::kInterfaceCheck: {
6449 // Note that we indeed only call on slow path, but we always go
6450 // into the slow path for the unresolved and interface check
6451 // cases.
6452 //
6453 // We cannot directly call the InstanceofNonTrivial runtime
6454 // entry point without resorting to a type checking slow path
6455 // here (i.e. by calling InvokeRuntime directly), as it would
6456 // require to assign fixed registers for the inputs of this
6457 // HInstanceOf instruction (following the runtime calling
6458 // convention), which might be cluttered by the potential first
6459 // read barrier emission at the beginning of this method.
6460 //
6461 // TODO: Introduce a new runtime entry point taking the object
6462 // to test (instead of its class) as argument, and let it deal
6463 // with the read barrier issues. This will let us refactor this
6464 // case of the `switch` code as it was previously (with a direct
6465 // call to the runtime not using a type checking slow path).
6466 // This should also be beneficial for the other cases above.
6467 DCHECK(locations->OnlyCallsOnSlowPath());
6468 slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86_64(
6469 instruction, /* is_fatal= */ false);
6470 codegen_->AddSlowPath(slow_path);
6471 __ jmp(slow_path->GetEntryLabel());
6472 if (zero.IsLinked()) {
6473 __ jmp(&done);
6474 }
6475 break;
6476 }
6477
6478 case TypeCheckKind::kBitstringCheck: {
6479 // /* HeapReference<Class> */ temp = obj->klass_
6480 GenerateReferenceLoadTwoRegisters(instruction,
6481 out_loc,
6482 obj_loc,
6483 class_offset,
6484 kWithoutReadBarrier);
6485
6486 GenerateBitstringTypeCheckCompare(instruction, out);
6487 if (zero.IsLinked()) {
6488 __ j(kNotEqual, &zero);
6489 __ movl(out, Immediate(1));
6490 __ jmp(&done);
6491 } else {
6492 __ setcc(kEqual, out);
6493 // setcc only sets the low byte.
6494 __ andl(out, Immediate(1));
6495 }
6496 break;
6497 }
6498 }
6499
6500 if (zero.IsLinked()) {
6501 __ Bind(&zero);
6502 __ xorl(out, out);
6503 }
6504
6505 if (done.IsLinked()) {
6506 __ Bind(&done);
6507 }
6508
6509 if (slow_path != nullptr) {
6510 __ Bind(slow_path->GetExitLabel());
6511 }
6512 }
6513
VisitCheckCast(HCheckCast * instruction)6514 void LocationsBuilderX86_64::VisitCheckCast(HCheckCast* instruction) {
6515 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
6516 LocationSummary::CallKind call_kind = CodeGenerator::GetCheckCastCallKind(instruction);
6517 LocationSummary* locations =
6518 new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
6519 locations->SetInAt(0, Location::RequiresRegister());
6520 if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
6521 // Require a register for the interface check since there is a loop that compares the class to
6522 // a memory address.
6523 locations->SetInAt(1, Location::RequiresRegister());
6524 } else if (type_check_kind == TypeCheckKind::kBitstringCheck) {
6525 locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
6526 locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
6527 locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
6528 } else {
6529 locations->SetInAt(1, Location::Any());
6530 }
6531 // Add temps for read barriers and other uses. One is used by TypeCheckSlowPathX86.
6532 locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
6533 }
6534
VisitCheckCast(HCheckCast * instruction)6535 void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
6536 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
6537 LocationSummary* locations = instruction->GetLocations();
6538 Location obj_loc = locations->InAt(0);
6539 CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
6540 Location cls = locations->InAt(1);
6541 Location temp_loc = locations->GetTemp(0);
6542 CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
6543 const size_t num_temps = NumberOfCheckCastTemps(type_check_kind);
6544 DCHECK_GE(num_temps, 1u);
6545 DCHECK_LE(num_temps, 2u);
6546 Location maybe_temp2_loc = (num_temps >= 2u) ? locations->GetTemp(1) : Location::NoLocation();
6547 const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
6548 const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
6549 const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
6550 const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
6551 const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
6552 const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
6553 const uint32_t object_array_data_offset =
6554 mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
6555
6556 bool is_type_check_slow_path_fatal = CodeGenerator::IsTypeCheckSlowPathFatal(instruction);
6557 SlowPathCode* type_check_slow_path =
6558 new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86_64(
6559 instruction, is_type_check_slow_path_fatal);
6560 codegen_->AddSlowPath(type_check_slow_path);
6561
6562
6563 NearLabel done;
6564 // Avoid null check if we know obj is not null.
6565 if (instruction->MustDoNullCheck()) {
6566 __ testl(obj, obj);
6567 __ j(kEqual, &done);
6568 }
6569
6570 switch (type_check_kind) {
6571 case TypeCheckKind::kExactCheck:
6572 case TypeCheckKind::kArrayCheck: {
6573 // /* HeapReference<Class> */ temp = obj->klass_
6574 GenerateReferenceLoadTwoRegisters(instruction,
6575 temp_loc,
6576 obj_loc,
6577 class_offset,
6578 kWithoutReadBarrier);
6579 if (cls.IsRegister()) {
6580 __ cmpl(temp, cls.AsRegister<CpuRegister>());
6581 } else {
6582 DCHECK(cls.IsStackSlot()) << cls;
6583 __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
6584 }
6585 // Jump to slow path for throwing the exception or doing a
6586 // more involved array check.
6587 __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
6588 break;
6589 }
6590
6591 case TypeCheckKind::kAbstractClassCheck: {
6592 // /* HeapReference<Class> */ temp = obj->klass_
6593 GenerateReferenceLoadTwoRegisters(instruction,
6594 temp_loc,
6595 obj_loc,
6596 class_offset,
6597 kWithoutReadBarrier);
6598 // If the class is abstract, we eagerly fetch the super class of the
6599 // object to avoid doing a comparison we know will fail.
6600 NearLabel loop;
6601 __ Bind(&loop);
6602 // /* HeapReference<Class> */ temp = temp->super_class_
6603 GenerateReferenceLoadOneRegister(instruction,
6604 temp_loc,
6605 super_offset,
6606 maybe_temp2_loc,
6607 kWithoutReadBarrier);
6608
6609 // If the class reference currently in `temp` is null, jump to the slow path to throw the
6610 // exception.
6611 __ testl(temp, temp);
6612 // Otherwise, compare the classes.
6613 __ j(kZero, type_check_slow_path->GetEntryLabel());
6614 if (cls.IsRegister()) {
6615 __ cmpl(temp, cls.AsRegister<CpuRegister>());
6616 } else {
6617 DCHECK(cls.IsStackSlot()) << cls;
6618 __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
6619 }
6620 __ j(kNotEqual, &loop);
6621 break;
6622 }
6623
6624 case TypeCheckKind::kClassHierarchyCheck: {
6625 // /* HeapReference<Class> */ temp = obj->klass_
6626 GenerateReferenceLoadTwoRegisters(instruction,
6627 temp_loc,
6628 obj_loc,
6629 class_offset,
6630 kWithoutReadBarrier);
6631 // Walk over the class hierarchy to find a match.
6632 NearLabel loop;
6633 __ Bind(&loop);
6634 if (cls.IsRegister()) {
6635 __ cmpl(temp, cls.AsRegister<CpuRegister>());
6636 } else {
6637 DCHECK(cls.IsStackSlot()) << cls;
6638 __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
6639 }
6640 __ j(kEqual, &done);
6641
6642 // /* HeapReference<Class> */ temp = temp->super_class_
6643 GenerateReferenceLoadOneRegister(instruction,
6644 temp_loc,
6645 super_offset,
6646 maybe_temp2_loc,
6647 kWithoutReadBarrier);
6648
6649 // If the class reference currently in `temp` is not null, jump
6650 // back at the beginning of the loop.
6651 __ testl(temp, temp);
6652 __ j(kNotZero, &loop);
6653 // Otherwise, jump to the slow path to throw the exception.
6654 __ jmp(type_check_slow_path->GetEntryLabel());
6655 break;
6656 }
6657
6658 case TypeCheckKind::kArrayObjectCheck: {
6659 // /* HeapReference<Class> */ temp = obj->klass_
6660 GenerateReferenceLoadTwoRegisters(instruction,
6661 temp_loc,
6662 obj_loc,
6663 class_offset,
6664 kWithoutReadBarrier);
6665 // Do an exact check.
6666 NearLabel check_non_primitive_component_type;
6667 if (cls.IsRegister()) {
6668 __ cmpl(temp, cls.AsRegister<CpuRegister>());
6669 } else {
6670 DCHECK(cls.IsStackSlot()) << cls;
6671 __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
6672 }
6673 __ j(kEqual, &done);
6674
6675 // Otherwise, we need to check that the object's class is a non-primitive array.
6676 // /* HeapReference<Class> */ temp = temp->component_type_
6677 GenerateReferenceLoadOneRegister(instruction,
6678 temp_loc,
6679 component_offset,
6680 maybe_temp2_loc,
6681 kWithoutReadBarrier);
6682
6683 // If the component type is not null (i.e. the object is indeed
6684 // an array), jump to label `check_non_primitive_component_type`
6685 // to further check that this component type is not a primitive
6686 // type.
6687 __ testl(temp, temp);
6688 // Otherwise, jump to the slow path to throw the exception.
6689 __ j(kZero, type_check_slow_path->GetEntryLabel());
6690 __ cmpw(Address(temp, primitive_offset), Immediate(Primitive::kPrimNot));
6691 __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
6692 break;
6693 }
6694
6695 case TypeCheckKind::kUnresolvedCheck: {
6696 // We always go into the type check slow path for the unresolved case.
6697 //
6698 // We cannot directly call the CheckCast runtime entry point
6699 // without resorting to a type checking slow path here (i.e. by
6700 // calling InvokeRuntime directly), as it would require to
6701 // assign fixed registers for the inputs of this HInstanceOf
6702 // instruction (following the runtime calling convention), which
6703 // might be cluttered by the potential first read barrier
6704 // emission at the beginning of this method.
6705 __ jmp(type_check_slow_path->GetEntryLabel());
6706 break;
6707 }
6708
6709 case TypeCheckKind::kInterfaceCheck: {
6710 // Fast path for the interface check. Try to avoid read barriers to improve the fast path.
6711 // We can not get false positives by doing this.
6712 // /* HeapReference<Class> */ temp = obj->klass_
6713 GenerateReferenceLoadTwoRegisters(instruction,
6714 temp_loc,
6715 obj_loc,
6716 class_offset,
6717 kWithoutReadBarrier);
6718
6719 // /* HeapReference<Class> */ temp = temp->iftable_
6720 GenerateReferenceLoadTwoRegisters(instruction,
6721 temp_loc,
6722 temp_loc,
6723 iftable_offset,
6724 kWithoutReadBarrier);
6725 // Iftable is never null.
6726 __ movl(maybe_temp2_loc.AsRegister<CpuRegister>(), Address(temp, array_length_offset));
6727 // Maybe poison the `cls` for direct comparison with memory.
6728 __ MaybePoisonHeapReference(cls.AsRegister<CpuRegister>());
6729 // Loop through the iftable and check if any class matches.
6730 NearLabel start_loop;
6731 __ Bind(&start_loop);
6732 // Need to subtract first to handle the empty array case.
6733 __ subl(maybe_temp2_loc.AsRegister<CpuRegister>(), Immediate(2));
6734 __ j(kNegative, type_check_slow_path->GetEntryLabel());
6735 // Go to next interface if the classes do not match.
6736 __ cmpl(cls.AsRegister<CpuRegister>(),
6737 CodeGeneratorX86_64::ArrayAddress(temp,
6738 maybe_temp2_loc,
6739 TIMES_4,
6740 object_array_data_offset));
6741 __ j(kNotEqual, &start_loop); // Return if same class.
6742 // If `cls` was poisoned above, unpoison it.
6743 __ MaybeUnpoisonHeapReference(cls.AsRegister<CpuRegister>());
6744 break;
6745 }
6746
6747 case TypeCheckKind::kBitstringCheck: {
6748 // /* HeapReference<Class> */ temp = obj->klass_
6749 GenerateReferenceLoadTwoRegisters(instruction,
6750 temp_loc,
6751 obj_loc,
6752 class_offset,
6753 kWithoutReadBarrier);
6754
6755 GenerateBitstringTypeCheckCompare(instruction, temp);
6756 __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
6757 break;
6758 }
6759 }
6760
6761 if (done.IsLinked()) {
6762 __ Bind(&done);
6763 }
6764
6765 __ Bind(type_check_slow_path->GetExitLabel());
6766 }
6767
VisitMonitorOperation(HMonitorOperation * instruction)6768 void LocationsBuilderX86_64::VisitMonitorOperation(HMonitorOperation* instruction) {
6769 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
6770 instruction, LocationSummary::kCallOnMainOnly);
6771 InvokeRuntimeCallingConvention calling_convention;
6772 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
6773 }
6774
VisitMonitorOperation(HMonitorOperation * instruction)6775 void InstructionCodeGeneratorX86_64::VisitMonitorOperation(HMonitorOperation* instruction) {
6776 codegen_->InvokeRuntime(instruction->IsEnter() ? kQuickLockObject : kQuickUnlockObject,
6777 instruction,
6778 instruction->GetDexPc());
6779 if (instruction->IsEnter()) {
6780 CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
6781 } else {
6782 CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
6783 }
6784 }
6785
VisitX86AndNot(HX86AndNot * instruction)6786 void LocationsBuilderX86_64::VisitX86AndNot(HX86AndNot* instruction) {
6787 DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
6788 DCHECK(DataType::IsIntOrLongType(instruction->GetType())) << instruction->GetType();
6789 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
6790 locations->SetInAt(0, Location::RequiresRegister());
6791 // There is no immediate variant of negated bitwise and in X86.
6792 locations->SetInAt(1, Location::RequiresRegister());
6793 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
6794 }
6795
VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit * instruction)6796 void LocationsBuilderX86_64::VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit* instruction) {
6797 DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
6798 DCHECK(DataType::IsIntOrLongType(instruction->GetType())) << instruction->GetType();
6799 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
6800 locations->SetInAt(0, Location::RequiresRegister());
6801 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
6802 }
6803
VisitX86AndNot(HX86AndNot * instruction)6804 void InstructionCodeGeneratorX86_64::VisitX86AndNot(HX86AndNot* instruction) {
6805 LocationSummary* locations = instruction->GetLocations();
6806 Location first = locations->InAt(0);
6807 Location second = locations->InAt(1);
6808 Location dest = locations->Out();
6809 __ andn(dest.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
6810 }
6811
VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit * instruction)6812 void InstructionCodeGeneratorX86_64::VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit* instruction) {
6813 LocationSummary* locations = instruction->GetLocations();
6814 Location src = locations->InAt(0);
6815 Location dest = locations->Out();
6816 switch (instruction->GetOpKind()) {
6817 case HInstruction::kAnd:
6818 __ blsr(dest.AsRegister<CpuRegister>(), src.AsRegister<CpuRegister>());
6819 break;
6820 case HInstruction::kXor:
6821 __ blsmsk(dest.AsRegister<CpuRegister>(), src.AsRegister<CpuRegister>());
6822 break;
6823 default:
6824 LOG(FATAL) << "Unreachable";
6825 }
6826 }
6827
VisitAnd(HAnd * instruction)6828 void LocationsBuilderX86_64::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction); }
VisitOr(HOr * instruction)6829 void LocationsBuilderX86_64::VisitOr(HOr* instruction) { HandleBitwiseOperation(instruction); }
VisitXor(HXor * instruction)6830 void LocationsBuilderX86_64::VisitXor(HXor* instruction) { HandleBitwiseOperation(instruction); }
6831
HandleBitwiseOperation(HBinaryOperation * instruction)6832 void LocationsBuilderX86_64::HandleBitwiseOperation(HBinaryOperation* instruction) {
6833 LocationSummary* locations =
6834 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
6835 DCHECK(instruction->GetResultType() == DataType::Type::kInt32
6836 || instruction->GetResultType() == DataType::Type::kInt64);
6837 locations->SetInAt(0, Location::RequiresRegister());
6838 locations->SetInAt(1, Location::Any());
6839 locations->SetOut(Location::SameAsFirstInput());
6840 }
6841
VisitAnd(HAnd * instruction)6842 void InstructionCodeGeneratorX86_64::VisitAnd(HAnd* instruction) {
6843 HandleBitwiseOperation(instruction);
6844 }
6845
VisitOr(HOr * instruction)6846 void InstructionCodeGeneratorX86_64::VisitOr(HOr* instruction) {
6847 HandleBitwiseOperation(instruction);
6848 }
6849
VisitXor(HXor * instruction)6850 void InstructionCodeGeneratorX86_64::VisitXor(HXor* instruction) {
6851 HandleBitwiseOperation(instruction);
6852 }
6853
HandleBitwiseOperation(HBinaryOperation * instruction)6854 void InstructionCodeGeneratorX86_64::HandleBitwiseOperation(HBinaryOperation* instruction) {
6855 LocationSummary* locations = instruction->GetLocations();
6856 Location first = locations->InAt(0);
6857 Location second = locations->InAt(1);
6858 DCHECK(first.Equals(locations->Out()));
6859
6860 if (instruction->GetResultType() == DataType::Type::kInt32) {
6861 if (second.IsRegister()) {
6862 if (instruction->IsAnd()) {
6863 __ andl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
6864 } else if (instruction->IsOr()) {
6865 __ orl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
6866 } else {
6867 DCHECK(instruction->IsXor());
6868 __ xorl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
6869 }
6870 } else if (second.IsConstant()) {
6871 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue());
6872 if (instruction->IsAnd()) {
6873 __ andl(first.AsRegister<CpuRegister>(), imm);
6874 } else if (instruction->IsOr()) {
6875 __ orl(first.AsRegister<CpuRegister>(), imm);
6876 } else {
6877 DCHECK(instruction->IsXor());
6878 __ xorl(first.AsRegister<CpuRegister>(), imm);
6879 }
6880 } else {
6881 Address address(CpuRegister(RSP), second.GetStackIndex());
6882 if (instruction->IsAnd()) {
6883 __ andl(first.AsRegister<CpuRegister>(), address);
6884 } else if (instruction->IsOr()) {
6885 __ orl(first.AsRegister<CpuRegister>(), address);
6886 } else {
6887 DCHECK(instruction->IsXor());
6888 __ xorl(first.AsRegister<CpuRegister>(), address);
6889 }
6890 }
6891 } else {
6892 DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
6893 CpuRegister first_reg = first.AsRegister<CpuRegister>();
6894 bool second_is_constant = false;
6895 int64_t value = 0;
6896 if (second.IsConstant()) {
6897 second_is_constant = true;
6898 value = second.GetConstant()->AsLongConstant()->GetValue();
6899 }
6900 bool is_int32_value = IsInt<32>(value);
6901
6902 if (instruction->IsAnd()) {
6903 if (second_is_constant) {
6904 if (is_int32_value) {
6905 __ andq(first_reg, Immediate(static_cast<int32_t>(value)));
6906 } else {
6907 __ andq(first_reg, codegen_->LiteralInt64Address(value));
6908 }
6909 } else if (second.IsDoubleStackSlot()) {
6910 __ andq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
6911 } else {
6912 __ andq(first_reg, second.AsRegister<CpuRegister>());
6913 }
6914 } else if (instruction->IsOr()) {
6915 if (second_is_constant) {
6916 if (is_int32_value) {
6917 __ orq(first_reg, Immediate(static_cast<int32_t>(value)));
6918 } else {
6919 __ orq(first_reg, codegen_->LiteralInt64Address(value));
6920 }
6921 } else if (second.IsDoubleStackSlot()) {
6922 __ orq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
6923 } else {
6924 __ orq(first_reg, second.AsRegister<CpuRegister>());
6925 }
6926 } else {
6927 DCHECK(instruction->IsXor());
6928 if (second_is_constant) {
6929 if (is_int32_value) {
6930 __ xorq(first_reg, Immediate(static_cast<int32_t>(value)));
6931 } else {
6932 __ xorq(first_reg, codegen_->LiteralInt64Address(value));
6933 }
6934 } else if (second.IsDoubleStackSlot()) {
6935 __ xorq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
6936 } else {
6937 __ xorq(first_reg, second.AsRegister<CpuRegister>());
6938 }
6939 }
6940 }
6941 }
6942
GenerateReferenceLoadOneRegister(HInstruction * instruction,Location out,uint32_t offset,Location maybe_temp,ReadBarrierOption read_barrier_option)6943 void InstructionCodeGeneratorX86_64::GenerateReferenceLoadOneRegister(
6944 HInstruction* instruction,
6945 Location out,
6946 uint32_t offset,
6947 Location maybe_temp,
6948 ReadBarrierOption read_barrier_option) {
6949 CpuRegister out_reg = out.AsRegister<CpuRegister>();
6950 if (read_barrier_option == kWithReadBarrier) {
6951 CHECK(kEmitCompilerReadBarrier);
6952 if (kUseBakerReadBarrier) {
6953 // Load with fast path based Baker's read barrier.
6954 // /* HeapReference<Object> */ out = *(out + offset)
6955 codegen_->GenerateFieldLoadWithBakerReadBarrier(
6956 instruction, out, out_reg, offset, /* needs_null_check= */ false);
6957 } else {
6958 // Load with slow path based read barrier.
6959 // Save the value of `out` into `maybe_temp` before overwriting it
6960 // in the following move operation, as we will need it for the
6961 // read barrier below.
6962 DCHECK(maybe_temp.IsRegister()) << maybe_temp;
6963 __ movl(maybe_temp.AsRegister<CpuRegister>(), out_reg);
6964 // /* HeapReference<Object> */ out = *(out + offset)
6965 __ movl(out_reg, Address(out_reg, offset));
6966 codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset);
6967 }
6968 } else {
6969 // Plain load with no read barrier.
6970 // /* HeapReference<Object> */ out = *(out + offset)
6971 __ movl(out_reg, Address(out_reg, offset));
6972 __ MaybeUnpoisonHeapReference(out_reg);
6973 }
6974 }
6975
GenerateReferenceLoadTwoRegisters(HInstruction * instruction,Location out,Location obj,uint32_t offset,ReadBarrierOption read_barrier_option)6976 void InstructionCodeGeneratorX86_64::GenerateReferenceLoadTwoRegisters(
6977 HInstruction* instruction,
6978 Location out,
6979 Location obj,
6980 uint32_t offset,
6981 ReadBarrierOption read_barrier_option) {
6982 CpuRegister out_reg = out.AsRegister<CpuRegister>();
6983 CpuRegister obj_reg = obj.AsRegister<CpuRegister>();
6984 if (read_barrier_option == kWithReadBarrier) {
6985 CHECK(kEmitCompilerReadBarrier);
6986 if (kUseBakerReadBarrier) {
6987 // Load with fast path based Baker's read barrier.
6988 // /* HeapReference<Object> */ out = *(obj + offset)
6989 codegen_->GenerateFieldLoadWithBakerReadBarrier(
6990 instruction, out, obj_reg, offset, /* needs_null_check= */ false);
6991 } else {
6992 // Load with slow path based read barrier.
6993 // /* HeapReference<Object> */ out = *(obj + offset)
6994 __ movl(out_reg, Address(obj_reg, offset));
6995 codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
6996 }
6997 } else {
6998 // Plain load with no read barrier.
6999 // /* HeapReference<Object> */ out = *(obj + offset)
7000 __ movl(out_reg, Address(obj_reg, offset));
7001 __ MaybeUnpoisonHeapReference(out_reg);
7002 }
7003 }
7004
GenerateGcRootFieldLoad(HInstruction * instruction,Location root,const Address & address,Label * fixup_label,ReadBarrierOption read_barrier_option)7005 void InstructionCodeGeneratorX86_64::GenerateGcRootFieldLoad(
7006 HInstruction* instruction,
7007 Location root,
7008 const Address& address,
7009 Label* fixup_label,
7010 ReadBarrierOption read_barrier_option) {
7011 CpuRegister root_reg = root.AsRegister<CpuRegister>();
7012 if (read_barrier_option == kWithReadBarrier) {
7013 DCHECK(kEmitCompilerReadBarrier);
7014 if (kUseBakerReadBarrier) {
7015 // Fast path implementation of art::ReadBarrier::BarrierForRoot when
7016 // Baker's read barrier are used:
7017 //
7018 // root = obj.field;
7019 // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
7020 // if (temp != null) {
7021 // root = temp(root)
7022 // }
7023
7024 // /* GcRoot<mirror::Object> */ root = *address
7025 __ movl(root_reg, address);
7026 if (fixup_label != nullptr) {
7027 __ Bind(fixup_label);
7028 }
7029 static_assert(
7030 sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
7031 "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
7032 "have different sizes.");
7033 static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
7034 "art::mirror::CompressedReference<mirror::Object> and int32_t "
7035 "have different sizes.");
7036
7037 // Slow path marking the GC root `root`.
7038 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) ReadBarrierMarkSlowPathX86_64(
7039 instruction, root, /* unpoison_ref_before_marking= */ false);
7040 codegen_->AddSlowPath(slow_path);
7041
7042 // Test the `Thread::Current()->pReadBarrierMarkReg ## root.reg()` entrypoint.
7043 const int32_t entry_point_offset =
7044 Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(root.reg());
7045 __ gs()->cmpl(Address::Absolute(entry_point_offset, /* no_rip= */ true), Immediate(0));
7046 // The entrypoint is null when the GC is not marking.
7047 __ j(kNotEqual, slow_path->GetEntryLabel());
7048 __ Bind(slow_path->GetExitLabel());
7049 } else {
7050 // GC root loaded through a slow path for read barriers other
7051 // than Baker's.
7052 // /* GcRoot<mirror::Object>* */ root = address
7053 __ leaq(root_reg, address);
7054 if (fixup_label != nullptr) {
7055 __ Bind(fixup_label);
7056 }
7057 // /* mirror::Object* */ root = root->Read()
7058 codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
7059 }
7060 } else {
7061 // Plain GC root load with no read barrier.
7062 // /* GcRoot<mirror::Object> */ root = *address
7063 __ movl(root_reg, address);
7064 if (fixup_label != nullptr) {
7065 __ Bind(fixup_label);
7066 }
7067 // Note that GC roots are not affected by heap poisoning, thus we
7068 // do not have to unpoison `root_reg` here.
7069 }
7070 }
7071
GenerateFieldLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,CpuRegister obj,uint32_t offset,bool needs_null_check)7072 void CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
7073 Location ref,
7074 CpuRegister obj,
7075 uint32_t offset,
7076 bool needs_null_check) {
7077 DCHECK(kEmitCompilerReadBarrier);
7078 DCHECK(kUseBakerReadBarrier);
7079
7080 // /* HeapReference<Object> */ ref = *(obj + offset)
7081 Address src(obj, offset);
7082 GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check);
7083 }
7084
GenerateArrayLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,CpuRegister obj,uint32_t data_offset,Location index,bool needs_null_check)7085 void CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
7086 Location ref,
7087 CpuRegister obj,
7088 uint32_t data_offset,
7089 Location index,
7090 bool needs_null_check) {
7091 DCHECK(kEmitCompilerReadBarrier);
7092 DCHECK(kUseBakerReadBarrier);
7093
7094 static_assert(
7095 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
7096 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
7097 // /* HeapReference<Object> */ ref =
7098 // *(obj + data_offset + index * sizeof(HeapReference<Object>))
7099 Address src = CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset);
7100 GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check);
7101 }
7102
GenerateReferenceLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,CpuRegister obj,const Address & src,bool needs_null_check,bool always_update_field,CpuRegister * temp1,CpuRegister * temp2)7103 void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
7104 Location ref,
7105 CpuRegister obj,
7106 const Address& src,
7107 bool needs_null_check,
7108 bool always_update_field,
7109 CpuRegister* temp1,
7110 CpuRegister* temp2) {
7111 DCHECK(kEmitCompilerReadBarrier);
7112 DCHECK(kUseBakerReadBarrier);
7113
7114 // In slow path based read barriers, the read barrier call is
7115 // inserted after the original load. However, in fast path based
7116 // Baker's read barriers, we need to perform the load of
7117 // mirror::Object::monitor_ *before* the original reference load.
7118 // This load-load ordering is required by the read barrier.
7119 // The fast path/slow path (for Baker's algorithm) should look like:
7120 //
7121 // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
7122 // lfence; // Load fence or artificial data dependency to prevent load-load reordering
7123 // HeapReference<Object> ref = *src; // Original reference load.
7124 // bool is_gray = (rb_state == ReadBarrier::GrayState());
7125 // if (is_gray) {
7126 // ref = ReadBarrier::Mark(ref); // Performed by runtime entrypoint slow path.
7127 // }
7128 //
7129 // Note: the original implementation in ReadBarrier::Barrier is
7130 // slightly more complex as:
7131 // - it implements the load-load fence using a data dependency on
7132 // the high-bits of rb_state, which are expected to be all zeroes
7133 // (we use CodeGeneratorX86_64::GenerateMemoryBarrier instead
7134 // here, which is a no-op thanks to the x86-64 memory model);
7135 // - it performs additional checks that we do not do here for
7136 // performance reasons.
7137
7138 CpuRegister ref_reg = ref.AsRegister<CpuRegister>();
7139 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
7140
7141 // Given the numeric representation, it's enough to check the low bit of the rb_state.
7142 static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
7143 static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
7144 constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
7145 constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
7146 constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
7147
7148 // if (rb_state == ReadBarrier::GrayState())
7149 // ref = ReadBarrier::Mark(ref);
7150 // At this point, just do the "if" and make sure that flags are preserved until the branch.
7151 __ testb(Address(obj, monitor_offset + gray_byte_position), Immediate(test_value));
7152 if (needs_null_check) {
7153 MaybeRecordImplicitNullCheck(instruction);
7154 }
7155
7156 // Load fence to prevent load-load reordering.
7157 // Note that this is a no-op, thanks to the x86-64 memory model.
7158 GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
7159
7160 // The actual reference load.
7161 // /* HeapReference<Object> */ ref = *src
7162 __ movl(ref_reg, src); // Flags are unaffected.
7163
7164 // Note: Reference unpoisoning modifies the flags, so we need to delay it after the branch.
7165 // Slow path marking the object `ref` when it is gray.
7166 SlowPathCode* slow_path;
7167 if (always_update_field) {
7168 DCHECK(temp1 != nullptr);
7169 DCHECK(temp2 != nullptr);
7170 slow_path = new (GetScopedAllocator()) ReadBarrierMarkAndUpdateFieldSlowPathX86_64(
7171 instruction, ref, obj, src, /* unpoison_ref_before_marking= */ true, *temp1, *temp2);
7172 } else {
7173 slow_path = new (GetScopedAllocator()) ReadBarrierMarkSlowPathX86_64(
7174 instruction, ref, /* unpoison_ref_before_marking= */ true);
7175 }
7176 AddSlowPath(slow_path);
7177
7178 // We have done the "if" of the gray bit check above, now branch based on the flags.
7179 __ j(kNotZero, slow_path->GetEntryLabel());
7180
7181 // Object* ref = ref_addr->AsMirrorPtr()
7182 __ MaybeUnpoisonHeapReference(ref_reg);
7183
7184 __ Bind(slow_path->GetExitLabel());
7185 }
7186
GenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)7187 void CodeGeneratorX86_64::GenerateReadBarrierSlow(HInstruction* instruction,
7188 Location out,
7189 Location ref,
7190 Location obj,
7191 uint32_t offset,
7192 Location index) {
7193 DCHECK(kEmitCompilerReadBarrier);
7194
7195 // Insert a slow path based read barrier *after* the reference load.
7196 //
7197 // If heap poisoning is enabled, the unpoisoning of the loaded
7198 // reference will be carried out by the runtime within the slow
7199 // path.
7200 //
7201 // Note that `ref` currently does not get unpoisoned (when heap
7202 // poisoning is enabled), which is alright as the `ref` argument is
7203 // not used by the artReadBarrierSlow entry point.
7204 //
7205 // TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
7206 SlowPathCode* slow_path = new (GetScopedAllocator())
7207 ReadBarrierForHeapReferenceSlowPathX86_64(instruction, out, ref, obj, offset, index);
7208 AddSlowPath(slow_path);
7209
7210 __ jmp(slow_path->GetEntryLabel());
7211 __ Bind(slow_path->GetExitLabel());
7212 }
7213
MaybeGenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)7214 void CodeGeneratorX86_64::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
7215 Location out,
7216 Location ref,
7217 Location obj,
7218 uint32_t offset,
7219 Location index) {
7220 if (kEmitCompilerReadBarrier) {
7221 // Baker's read barriers shall be handled by the fast path
7222 // (CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier).
7223 DCHECK(!kUseBakerReadBarrier);
7224 // If heap poisoning is enabled, unpoisoning will be taken care of
7225 // by the runtime within the slow path.
7226 GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
7227 } else if (kPoisonHeapReferences) {
7228 __ UnpoisonHeapReference(out.AsRegister<CpuRegister>());
7229 }
7230 }
7231
GenerateReadBarrierForRootSlow(HInstruction * instruction,Location out,Location root)7232 void CodeGeneratorX86_64::GenerateReadBarrierForRootSlow(HInstruction* instruction,
7233 Location out,
7234 Location root) {
7235 DCHECK(kEmitCompilerReadBarrier);
7236
7237 // Insert a slow path based read barrier *after* the GC root load.
7238 //
7239 // Note that GC roots are not affected by heap poisoning, so we do
7240 // not need to do anything special for this here.
7241 SlowPathCode* slow_path =
7242 new (GetScopedAllocator()) ReadBarrierForRootSlowPathX86_64(instruction, out, root);
7243 AddSlowPath(slow_path);
7244
7245 __ jmp(slow_path->GetEntryLabel());
7246 __ Bind(slow_path->GetExitLabel());
7247 }
7248
VisitBoundType(HBoundType * instruction ATTRIBUTE_UNUSED)7249 void LocationsBuilderX86_64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
7250 // Nothing to do, this should be removed during prepare for register allocator.
7251 LOG(FATAL) << "Unreachable";
7252 }
7253
VisitBoundType(HBoundType * instruction ATTRIBUTE_UNUSED)7254 void InstructionCodeGeneratorX86_64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
7255 // Nothing to do, this should be removed during prepare for register allocator.
7256 LOG(FATAL) << "Unreachable";
7257 }
7258
7259 // Simple implementation of packed switch - generate cascaded compare/jumps.
VisitPackedSwitch(HPackedSwitch * switch_instr)7260 void LocationsBuilderX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
7261 LocationSummary* locations =
7262 new (GetGraph()->GetAllocator()) LocationSummary(switch_instr, LocationSummary::kNoCall);
7263 locations->SetInAt(0, Location::RequiresRegister());
7264 locations->AddTemp(Location::RequiresRegister());
7265 locations->AddTemp(Location::RequiresRegister());
7266 }
7267
VisitPackedSwitch(HPackedSwitch * switch_instr)7268 void InstructionCodeGeneratorX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
7269 int32_t lower_bound = switch_instr->GetStartValue();
7270 uint32_t num_entries = switch_instr->GetNumEntries();
7271 LocationSummary* locations = switch_instr->GetLocations();
7272 CpuRegister value_reg_in = locations->InAt(0).AsRegister<CpuRegister>();
7273 CpuRegister temp_reg = locations->GetTemp(0).AsRegister<CpuRegister>();
7274 CpuRegister base_reg = locations->GetTemp(1).AsRegister<CpuRegister>();
7275 HBasicBlock* default_block = switch_instr->GetDefaultBlock();
7276
7277 // Should we generate smaller inline compare/jumps?
7278 if (num_entries <= kPackedSwitchJumpTableThreshold) {
7279 // Figure out the correct compare values and jump conditions.
7280 // Handle the first compare/branch as a special case because it might
7281 // jump to the default case.
7282 DCHECK_GT(num_entries, 2u);
7283 Condition first_condition;
7284 uint32_t index;
7285 const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
7286 if (lower_bound != 0) {
7287 first_condition = kLess;
7288 __ cmpl(value_reg_in, Immediate(lower_bound));
7289 __ j(first_condition, codegen_->GetLabelOf(default_block));
7290 __ j(kEqual, codegen_->GetLabelOf(successors[0]));
7291
7292 index = 1;
7293 } else {
7294 // Handle all the compare/jumps below.
7295 first_condition = kBelow;
7296 index = 0;
7297 }
7298
7299 // Handle the rest of the compare/jumps.
7300 for (; index + 1 < num_entries; index += 2) {
7301 int32_t compare_to_value = lower_bound + index + 1;
7302 __ cmpl(value_reg_in, Immediate(compare_to_value));
7303 // Jump to successors[index] if value < case_value[index].
7304 __ j(first_condition, codegen_->GetLabelOf(successors[index]));
7305 // Jump to successors[index + 1] if value == case_value[index + 1].
7306 __ j(kEqual, codegen_->GetLabelOf(successors[index + 1]));
7307 }
7308
7309 if (index != num_entries) {
7310 // There are an odd number of entries. Handle the last one.
7311 DCHECK_EQ(index + 1, num_entries);
7312 __ cmpl(value_reg_in, Immediate(static_cast<int32_t>(lower_bound + index)));
7313 __ j(kEqual, codegen_->GetLabelOf(successors[index]));
7314 }
7315
7316 // And the default for any other value.
7317 if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
7318 __ jmp(codegen_->GetLabelOf(default_block));
7319 }
7320 return;
7321 }
7322
7323 // Remove the bias, if needed.
7324 Register value_reg_out = value_reg_in.AsRegister();
7325 if (lower_bound != 0) {
7326 __ leal(temp_reg, Address(value_reg_in, -lower_bound));
7327 value_reg_out = temp_reg.AsRegister();
7328 }
7329 CpuRegister value_reg(value_reg_out);
7330
7331 // Is the value in range?
7332 __ cmpl(value_reg, Immediate(num_entries - 1));
7333 __ j(kAbove, codegen_->GetLabelOf(default_block));
7334
7335 // We are in the range of the table.
7336 // Load the address of the jump table in the constant area.
7337 __ leaq(base_reg, codegen_->LiteralCaseTable(switch_instr));
7338
7339 // Load the (signed) offset from the jump table.
7340 __ movsxd(temp_reg, Address(base_reg, value_reg, TIMES_4, 0));
7341
7342 // Add the offset to the address of the table base.
7343 __ addq(temp_reg, base_reg);
7344
7345 // And jump.
7346 __ jmp(temp_reg);
7347 }
7348
VisitIntermediateAddress(HIntermediateAddress * instruction ATTRIBUTE_UNUSED)7349 void LocationsBuilderX86_64::VisitIntermediateAddress(HIntermediateAddress* instruction
7350 ATTRIBUTE_UNUSED) {
7351 LOG(FATAL) << "Unreachable";
7352 }
7353
VisitIntermediateAddress(HIntermediateAddress * instruction ATTRIBUTE_UNUSED)7354 void InstructionCodeGeneratorX86_64::VisitIntermediateAddress(HIntermediateAddress* instruction
7355 ATTRIBUTE_UNUSED) {
7356 LOG(FATAL) << "Unreachable";
7357 }
7358
Load32BitValue(CpuRegister dest,int32_t value)7359 void CodeGeneratorX86_64::Load32BitValue(CpuRegister dest, int32_t value) {
7360 if (value == 0) {
7361 __ xorl(dest, dest);
7362 } else {
7363 __ movl(dest, Immediate(value));
7364 }
7365 }
7366
Load64BitValue(CpuRegister dest,int64_t value)7367 void CodeGeneratorX86_64::Load64BitValue(CpuRegister dest, int64_t value) {
7368 if (value == 0) {
7369 // Clears upper bits too.
7370 __ xorl(dest, dest);
7371 } else if (IsUint<32>(value)) {
7372 // We can use a 32 bit move, as it will zero-extend and is shorter.
7373 __ movl(dest, Immediate(static_cast<int32_t>(value)));
7374 } else {
7375 __ movq(dest, Immediate(value));
7376 }
7377 }
7378
Load32BitValue(XmmRegister dest,int32_t value)7379 void CodeGeneratorX86_64::Load32BitValue(XmmRegister dest, int32_t value) {
7380 if (value == 0) {
7381 __ xorps(dest, dest);
7382 } else {
7383 __ movss(dest, LiteralInt32Address(value));
7384 }
7385 }
7386
Load64BitValue(XmmRegister dest,int64_t value)7387 void CodeGeneratorX86_64::Load64BitValue(XmmRegister dest, int64_t value) {
7388 if (value == 0) {
7389 __ xorpd(dest, dest);
7390 } else {
7391 __ movsd(dest, LiteralInt64Address(value));
7392 }
7393 }
7394
Load32BitValue(XmmRegister dest,float value)7395 void CodeGeneratorX86_64::Load32BitValue(XmmRegister dest, float value) {
7396 Load32BitValue(dest, bit_cast<int32_t, float>(value));
7397 }
7398
Load64BitValue(XmmRegister dest,double value)7399 void CodeGeneratorX86_64::Load64BitValue(XmmRegister dest, double value) {
7400 Load64BitValue(dest, bit_cast<int64_t, double>(value));
7401 }
7402
Compare32BitValue(CpuRegister dest,int32_t value)7403 void CodeGeneratorX86_64::Compare32BitValue(CpuRegister dest, int32_t value) {
7404 if (value == 0) {
7405 __ testl(dest, dest);
7406 } else {
7407 __ cmpl(dest, Immediate(value));
7408 }
7409 }
7410
Compare64BitValue(CpuRegister dest,int64_t value)7411 void CodeGeneratorX86_64::Compare64BitValue(CpuRegister dest, int64_t value) {
7412 if (IsInt<32>(value)) {
7413 if (value == 0) {
7414 __ testq(dest, dest);
7415 } else {
7416 __ cmpq(dest, Immediate(static_cast<int32_t>(value)));
7417 }
7418 } else {
7419 // Value won't fit in an int.
7420 __ cmpq(dest, LiteralInt64Address(value));
7421 }
7422 }
7423
GenerateIntCompare(Location lhs,Location rhs)7424 void CodeGeneratorX86_64::GenerateIntCompare(Location lhs, Location rhs) {
7425 CpuRegister lhs_reg = lhs.AsRegister<CpuRegister>();
7426 GenerateIntCompare(lhs_reg, rhs);
7427 }
7428
GenerateIntCompare(CpuRegister lhs,Location rhs)7429 void CodeGeneratorX86_64::GenerateIntCompare(CpuRegister lhs, Location rhs) {
7430 if (rhs.IsConstant()) {
7431 int32_t value = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
7432 Compare32BitValue(lhs, value);
7433 } else if (rhs.IsStackSlot()) {
7434 __ cmpl(lhs, Address(CpuRegister(RSP), rhs.GetStackIndex()));
7435 } else {
7436 __ cmpl(lhs, rhs.AsRegister<CpuRegister>());
7437 }
7438 }
7439
GenerateLongCompare(Location lhs,Location rhs)7440 void CodeGeneratorX86_64::GenerateLongCompare(Location lhs, Location rhs) {
7441 CpuRegister lhs_reg = lhs.AsRegister<CpuRegister>();
7442 if (rhs.IsConstant()) {
7443 int64_t value = rhs.GetConstant()->AsLongConstant()->GetValue();
7444 Compare64BitValue(lhs_reg, value);
7445 } else if (rhs.IsDoubleStackSlot()) {
7446 __ cmpq(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex()));
7447 } else {
7448 __ cmpq(lhs_reg, rhs.AsRegister<CpuRegister>());
7449 }
7450 }
7451
ArrayAddress(CpuRegister obj,Location index,ScaleFactor scale,uint32_t data_offset)7452 Address CodeGeneratorX86_64::ArrayAddress(CpuRegister obj,
7453 Location index,
7454 ScaleFactor scale,
7455 uint32_t data_offset) {
7456 return index.IsConstant() ?
7457 Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << scale) + data_offset) :
7458 Address(obj, index.AsRegister<CpuRegister>(), scale, data_offset);
7459 }
7460
Store64BitValueToStack(Location dest,int64_t value)7461 void CodeGeneratorX86_64::Store64BitValueToStack(Location dest, int64_t value) {
7462 DCHECK(dest.IsDoubleStackSlot());
7463 if (IsInt<32>(value)) {
7464 // Can move directly as an int32 constant.
7465 __ movq(Address(CpuRegister(RSP), dest.GetStackIndex()),
7466 Immediate(static_cast<int32_t>(value)));
7467 } else {
7468 Load64BitValue(CpuRegister(TMP), value);
7469 __ movq(Address(CpuRegister(RSP), dest.GetStackIndex()), CpuRegister(TMP));
7470 }
7471 }
7472
7473 /**
7474 * Class to handle late fixup of offsets into constant area.
7475 */
7476 class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocCodeGenerator> {
7477 public:
RIPFixup(CodeGeneratorX86_64 & codegen,size_t offset)7478 RIPFixup(CodeGeneratorX86_64& codegen, size_t offset)
7479 : codegen_(&codegen), offset_into_constant_area_(offset) {}
7480
7481 protected:
SetOffset(size_t offset)7482 void SetOffset(size_t offset) { offset_into_constant_area_ = offset; }
7483
7484 CodeGeneratorX86_64* codegen_;
7485
7486 private:
Process(const MemoryRegion & region,int pos)7487 void Process(const MemoryRegion& region, int pos) override {
7488 // Patch the correct offset for the instruction. We use the address of the
7489 // 'next' instruction, which is 'pos' (patch the 4 bytes before).
7490 int32_t constant_offset = codegen_->ConstantAreaStart() + offset_into_constant_area_;
7491 int32_t relative_position = constant_offset - pos;
7492
7493 // Patch in the right value.
7494 region.StoreUnaligned<int32_t>(pos - 4, relative_position);
7495 }
7496
7497 // Location in constant area that the fixup refers to.
7498 size_t offset_into_constant_area_;
7499 };
7500
7501 /**
7502 t * Class to handle late fixup of offsets to a jump table that will be created in the
7503 * constant area.
7504 */
7505 class JumpTableRIPFixup : public RIPFixup {
7506 public:
JumpTableRIPFixup(CodeGeneratorX86_64 & codegen,HPackedSwitch * switch_instr)7507 JumpTableRIPFixup(CodeGeneratorX86_64& codegen, HPackedSwitch* switch_instr)
7508 : RIPFixup(codegen, -1), switch_instr_(switch_instr) {}
7509
CreateJumpTable()7510 void CreateJumpTable() {
7511 X86_64Assembler* assembler = codegen_->GetAssembler();
7512
7513 // Ensure that the reference to the jump table has the correct offset.
7514 const int32_t offset_in_constant_table = assembler->ConstantAreaSize();
7515 SetOffset(offset_in_constant_table);
7516
7517 // Compute the offset from the start of the function to this jump table.
7518 const int32_t current_table_offset = assembler->CodeSize() + offset_in_constant_table;
7519
7520 // Populate the jump table with the correct values for the jump table.
7521 int32_t num_entries = switch_instr_->GetNumEntries();
7522 HBasicBlock* block = switch_instr_->GetBlock();
7523 const ArenaVector<HBasicBlock*>& successors = block->GetSuccessors();
7524 // The value that we want is the target offset - the position of the table.
7525 for (int32_t i = 0; i < num_entries; i++) {
7526 HBasicBlock* b = successors[i];
7527 Label* l = codegen_->GetLabelOf(b);
7528 DCHECK(l->IsBound());
7529 int32_t offset_to_block = l->Position() - current_table_offset;
7530 assembler->AppendInt32(offset_to_block);
7531 }
7532 }
7533
7534 private:
7535 const HPackedSwitch* switch_instr_;
7536 };
7537
Finalize(CodeAllocator * allocator)7538 void CodeGeneratorX86_64::Finalize(CodeAllocator* allocator) {
7539 // Generate the constant area if needed.
7540 X86_64Assembler* assembler = GetAssembler();
7541 if (!assembler->IsConstantAreaEmpty() || !fixups_to_jump_tables_.empty()) {
7542 // Align to 4 byte boundary to reduce cache misses, as the data is 4 and 8 byte values.
7543 assembler->Align(4, 0);
7544 constant_area_start_ = assembler->CodeSize();
7545
7546 // Populate any jump tables.
7547 for (JumpTableRIPFixup* jump_table : fixups_to_jump_tables_) {
7548 jump_table->CreateJumpTable();
7549 }
7550
7551 // And now add the constant area to the generated code.
7552 assembler->AddConstantArea();
7553 }
7554
7555 // And finish up.
7556 CodeGenerator::Finalize(allocator);
7557 }
7558
LiteralDoubleAddress(double v)7559 Address CodeGeneratorX86_64::LiteralDoubleAddress(double v) {
7560 AssemblerFixup* fixup = new (GetGraph()->GetAllocator()) RIPFixup(*this, __ AddDouble(v));
7561 return Address::RIP(fixup);
7562 }
7563
LiteralFloatAddress(float v)7564 Address CodeGeneratorX86_64::LiteralFloatAddress(float v) {
7565 AssemblerFixup* fixup = new (GetGraph()->GetAllocator()) RIPFixup(*this, __ AddFloat(v));
7566 return Address::RIP(fixup);
7567 }
7568
LiteralInt32Address(int32_t v)7569 Address CodeGeneratorX86_64::LiteralInt32Address(int32_t v) {
7570 AssemblerFixup* fixup = new (GetGraph()->GetAllocator()) RIPFixup(*this, __ AddInt32(v));
7571 return Address::RIP(fixup);
7572 }
7573
LiteralInt64Address(int64_t v)7574 Address CodeGeneratorX86_64::LiteralInt64Address(int64_t v) {
7575 AssemblerFixup* fixup = new (GetGraph()->GetAllocator()) RIPFixup(*this, __ AddInt64(v));
7576 return Address::RIP(fixup);
7577 }
7578
7579 // TODO: trg as memory.
MoveFromReturnRegister(Location trg,DataType::Type type)7580 void CodeGeneratorX86_64::MoveFromReturnRegister(Location trg, DataType::Type type) {
7581 if (!trg.IsValid()) {
7582 DCHECK_EQ(type, DataType::Type::kVoid);
7583 return;
7584 }
7585
7586 DCHECK_NE(type, DataType::Type::kVoid);
7587
7588 Location return_loc = InvokeDexCallingConventionVisitorX86_64().GetReturnLocation(type);
7589 if (trg.Equals(return_loc)) {
7590 return;
7591 }
7592
7593 // Let the parallel move resolver take care of all of this.
7594 HParallelMove parallel_move(GetGraph()->GetAllocator());
7595 parallel_move.AddMove(return_loc, trg, type, nullptr);
7596 GetMoveResolver()->EmitNativeCode(¶llel_move);
7597 }
7598
LiteralCaseTable(HPackedSwitch * switch_instr)7599 Address CodeGeneratorX86_64::LiteralCaseTable(HPackedSwitch* switch_instr) {
7600 // Create a fixup to be used to create and address the jump table.
7601 JumpTableRIPFixup* table_fixup =
7602 new (GetGraph()->GetAllocator()) JumpTableRIPFixup(*this, switch_instr);
7603
7604 // We have to populate the jump tables.
7605 fixups_to_jump_tables_.push_back(table_fixup);
7606 return Address::RIP(table_fixup);
7607 }
7608
MoveInt64ToAddress(const Address & addr_low,const Address & addr_high,int64_t v,HInstruction * instruction)7609 void CodeGeneratorX86_64::MoveInt64ToAddress(const Address& addr_low,
7610 const Address& addr_high,
7611 int64_t v,
7612 HInstruction* instruction) {
7613 if (IsInt<32>(v)) {
7614 int32_t v_32 = v;
7615 __ movq(addr_low, Immediate(v_32));
7616 MaybeRecordImplicitNullCheck(instruction);
7617 } else {
7618 // Didn't fit in a register. Do it in pieces.
7619 int32_t low_v = Low32Bits(v);
7620 int32_t high_v = High32Bits(v);
7621 __ movl(addr_low, Immediate(low_v));
7622 MaybeRecordImplicitNullCheck(instruction);
7623 __ movl(addr_high, Immediate(high_v));
7624 }
7625 }
7626
PatchJitRootUse(uint8_t * code,const uint8_t * roots_data,const PatchInfo<Label> & info,uint64_t index_in_table) const7627 void CodeGeneratorX86_64::PatchJitRootUse(uint8_t* code,
7628 const uint8_t* roots_data,
7629 const PatchInfo<Label>& info,
7630 uint64_t index_in_table) const {
7631 uint32_t code_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
7632 uintptr_t address =
7633 reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>);
7634 using unaligned_uint32_t __attribute__((__aligned__(1))) = uint32_t;
7635 reinterpret_cast<unaligned_uint32_t*>(code + code_offset)[0] =
7636 dchecked_integral_cast<uint32_t>(address);
7637 }
7638
EmitJitRootPatches(uint8_t * code,const uint8_t * roots_data)7639 void CodeGeneratorX86_64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
7640 for (const PatchInfo<Label>& info : jit_string_patches_) {
7641 StringReference string_reference(info.target_dex_file, dex::StringIndex(info.offset_or_index));
7642 uint64_t index_in_table = GetJitStringRootIndex(string_reference);
7643 PatchJitRootUse(code, roots_data, info, index_in_table);
7644 }
7645
7646 for (const PatchInfo<Label>& info : jit_class_patches_) {
7647 TypeReference type_reference(info.target_dex_file, dex::TypeIndex(info.offset_or_index));
7648 uint64_t index_in_table = GetJitClassRootIndex(type_reference);
7649 PatchJitRootUse(code, roots_data, info, index_in_table);
7650 }
7651 }
7652
7653 #undef __
7654
7655 } // namespace x86_64
7656 } // namespace art
7657