1 /*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "code_generator_x86_64.h"
18
19 #include "arch/x86_64/jni_frame_x86_64.h"
20 #include "art_method-inl.h"
21 #include "class_root-inl.h"
22 #include "class_table.h"
23 #include "code_generator_utils.h"
24 #include "compiled_method.h"
25 #include "entrypoints/quick/quick_entrypoints.h"
26 #include "gc/accounting/card_table.h"
27 #include "gc/space/image_space.h"
28 #include "heap_poisoning.h"
29 #include "interpreter/mterp/nterp.h"
30 #include "intrinsics.h"
31 #include "intrinsics_utils.h"
32 #include "intrinsics_x86_64.h"
33 #include "jit/profiling_info.h"
34 #include "linker/linker_patch.h"
35 #include "lock_word.h"
36 #include "mirror/array-inl.h"
37 #include "mirror/class-inl.h"
38 #include "mirror/object_reference.h"
39 #include "mirror/var_handle.h"
40 #include "scoped_thread_state_change-inl.h"
41 #include "thread.h"
42 #include "utils/assembler.h"
43 #include "utils/stack_checks.h"
44 #include "utils/x86_64/assembler_x86_64.h"
45 #include "utils/x86_64/constants_x86_64.h"
46 #include "utils/x86_64/managed_register_x86_64.h"
47
48 namespace art {
49
50 template<class MirrorType>
51 class GcRoot;
52
53 namespace x86_64 {
54
55 static constexpr int kCurrentMethodStackOffset = 0;
56 static constexpr Register kMethodRegisterArgument = RDI;
57 // The compare/jump sequence will generate about (1.5 * num_entries) instructions. A jump
58 // table version generates 7 instructions and num_entries literals. Compare/jump sequence will
59 // generates less code/data with a small num_entries.
60 static constexpr uint32_t kPackedSwitchJumpTableThreshold = 5;
61
62 static constexpr Register kCoreCalleeSaves[] = { RBX, RBP, R12, R13, R14, R15 };
63 static constexpr FloatRegister kFpuCalleeSaves[] = { XMM12, XMM13, XMM14, XMM15 };
64
65 static constexpr int kC2ConditionMask = 0x400;
66
OneRegInReferenceOutSaveEverythingCallerSaves()67 static RegisterSet OneRegInReferenceOutSaveEverythingCallerSaves() {
68 // Custom calling convention: RAX serves as both input and output.
69 RegisterSet caller_saves = RegisterSet::Empty();
70 caller_saves.Add(Location::RegisterLocation(RAX));
71 return caller_saves;
72 }
73
74 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
75 #define __ down_cast<X86_64Assembler*>(codegen->GetAssembler())-> // NOLINT
76 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kX86_64PointerSize, x).Int32Value()
77
78 class NullCheckSlowPathX86_64 : public SlowPathCode {
79 public:
NullCheckSlowPathX86_64(HNullCheck * instruction)80 explicit NullCheckSlowPathX86_64(HNullCheck* instruction) : SlowPathCode(instruction) {}
81
EmitNativeCode(CodeGenerator * codegen)82 void EmitNativeCode(CodeGenerator* codegen) override {
83 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
84 __ Bind(GetEntryLabel());
85 if (instruction_->CanThrowIntoCatchBlock()) {
86 // Live registers will be restored in the catch block if caught.
87 SaveLiveRegisters(codegen, instruction_->GetLocations());
88 }
89 x86_64_codegen->InvokeRuntime(kQuickThrowNullPointer,
90 instruction_,
91 instruction_->GetDexPc(),
92 this);
93 CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
94 }
95
IsFatal() const96 bool IsFatal() const override { return true; }
97
GetDescription() const98 const char* GetDescription() const override { return "NullCheckSlowPathX86_64"; }
99
100 private:
101 DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathX86_64);
102 };
103
104 class DivZeroCheckSlowPathX86_64 : public SlowPathCode {
105 public:
DivZeroCheckSlowPathX86_64(HDivZeroCheck * instruction)106 explicit DivZeroCheckSlowPathX86_64(HDivZeroCheck* instruction) : SlowPathCode(instruction) {}
107
EmitNativeCode(CodeGenerator * codegen)108 void EmitNativeCode(CodeGenerator* codegen) override {
109 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
110 __ Bind(GetEntryLabel());
111 x86_64_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, instruction_->GetDexPc(), this);
112 CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
113 }
114
IsFatal() const115 bool IsFatal() const override { return true; }
116
GetDescription() const117 const char* GetDescription() const override { return "DivZeroCheckSlowPathX86_64"; }
118
119 private:
120 DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathX86_64);
121 };
122
123 class DivRemMinusOneSlowPathX86_64 : public SlowPathCode {
124 public:
DivRemMinusOneSlowPathX86_64(HInstruction * at,Register reg,DataType::Type type,bool is_div)125 DivRemMinusOneSlowPathX86_64(HInstruction* at, Register reg, DataType::Type type, bool is_div)
126 : SlowPathCode(at), cpu_reg_(CpuRegister(reg)), type_(type), is_div_(is_div) {}
127
EmitNativeCode(CodeGenerator * codegen)128 void EmitNativeCode(CodeGenerator* codegen) override {
129 __ Bind(GetEntryLabel());
130 if (type_ == DataType::Type::kInt32) {
131 if (is_div_) {
132 __ negl(cpu_reg_);
133 } else {
134 __ xorl(cpu_reg_, cpu_reg_);
135 }
136
137 } else {
138 DCHECK_EQ(DataType::Type::kInt64, type_);
139 if (is_div_) {
140 __ negq(cpu_reg_);
141 } else {
142 __ xorl(cpu_reg_, cpu_reg_);
143 }
144 }
145 __ jmp(GetExitLabel());
146 }
147
GetDescription() const148 const char* GetDescription() const override { return "DivRemMinusOneSlowPathX86_64"; }
149
150 private:
151 const CpuRegister cpu_reg_;
152 const DataType::Type type_;
153 const bool is_div_;
154 DISALLOW_COPY_AND_ASSIGN(DivRemMinusOneSlowPathX86_64);
155 };
156
157 class SuspendCheckSlowPathX86_64 : public SlowPathCode {
158 public:
SuspendCheckSlowPathX86_64(HSuspendCheck * instruction,HBasicBlock * successor)159 SuspendCheckSlowPathX86_64(HSuspendCheck* instruction, HBasicBlock* successor)
160 : SlowPathCode(instruction), successor_(successor) {}
161
EmitNativeCode(CodeGenerator * codegen)162 void EmitNativeCode(CodeGenerator* codegen) override {
163 LocationSummary* locations = instruction_->GetLocations();
164 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
165 __ Bind(GetEntryLabel());
166 SaveLiveRegisters(codegen, locations); // Only saves full width XMM for SIMD.
167 x86_64_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, instruction_->GetDexPc(), this);
168 CheckEntrypointTypes<kQuickTestSuspend, void, void>();
169 RestoreLiveRegisters(codegen, locations); // Only restores full width XMM for SIMD.
170 if (successor_ == nullptr) {
171 __ jmp(GetReturnLabel());
172 } else {
173 __ jmp(x86_64_codegen->GetLabelOf(successor_));
174 }
175 }
176
GetReturnLabel()177 Label* GetReturnLabel() {
178 DCHECK(successor_ == nullptr);
179 return &return_label_;
180 }
181
GetSuccessor() const182 HBasicBlock* GetSuccessor() const {
183 return successor_;
184 }
185
GetDescription() const186 const char* GetDescription() const override { return "SuspendCheckSlowPathX86_64"; }
187
188 private:
189 HBasicBlock* const successor_;
190 Label return_label_;
191
192 DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathX86_64);
193 };
194
195 class BoundsCheckSlowPathX86_64 : public SlowPathCode {
196 public:
BoundsCheckSlowPathX86_64(HBoundsCheck * instruction)197 explicit BoundsCheckSlowPathX86_64(HBoundsCheck* instruction)
198 : SlowPathCode(instruction) {}
199
EmitNativeCode(CodeGenerator * codegen)200 void EmitNativeCode(CodeGenerator* codegen) override {
201 LocationSummary* locations = instruction_->GetLocations();
202 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
203 __ Bind(GetEntryLabel());
204 if (instruction_->CanThrowIntoCatchBlock()) {
205 // Live registers will be restored in the catch block if caught.
206 SaveLiveRegisters(codegen, locations);
207 }
208
209 Location index_loc = locations->InAt(0);
210 Location length_loc = locations->InAt(1);
211 InvokeRuntimeCallingConvention calling_convention;
212 Location index_arg = Location::RegisterLocation(calling_convention.GetRegisterAt(0));
213 Location length_arg = Location::RegisterLocation(calling_convention.GetRegisterAt(1));
214
215 // Are we using an array length from memory?
216 if (!length_loc.IsValid()) {
217 DCHECK(instruction_->InputAt(1)->IsArrayLength());
218 HArrayLength* array_length = instruction_->InputAt(1)->AsArrayLength();
219 DCHECK(array_length->IsEmittedAtUseSite());
220 uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length);
221 Location array_loc = array_length->GetLocations()->InAt(0);
222 Address array_len(array_loc.AsRegister<CpuRegister>(), len_offset);
223 if (!index_loc.Equals(length_arg)) {
224 // The index is not clobbered by loading the length directly to `length_arg`.
225 __ movl(length_arg.AsRegister<CpuRegister>(), array_len);
226 x86_64_codegen->Move(index_arg, index_loc);
227 } else if (!array_loc.Equals(index_arg)) {
228 // The array reference is not clobbered by the index move.
229 x86_64_codegen->Move(index_arg, index_loc);
230 __ movl(length_arg.AsRegister<CpuRegister>(), array_len);
231 } else {
232 // Load the array length into `TMP`.
233 DCHECK(codegen->IsBlockedCoreRegister(TMP));
234 __ movl(CpuRegister(TMP), array_len);
235 // Single move to CPU register does not clobber `TMP`.
236 x86_64_codegen->Move(index_arg, index_loc);
237 __ movl(length_arg.AsRegister<CpuRegister>(), CpuRegister(TMP));
238 }
239 if (mirror::kUseStringCompression && array_length->IsStringLength()) {
240 __ shrl(length_arg.AsRegister<CpuRegister>(), Immediate(1));
241 }
242 } else {
243 // We're moving two locations to locations that could overlap,
244 // so we need a parallel move resolver.
245 codegen->EmitParallelMoves(
246 index_loc,
247 index_arg,
248 DataType::Type::kInt32,
249 length_loc,
250 length_arg,
251 DataType::Type::kInt32);
252 }
253
254 QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt()
255 ? kQuickThrowStringBounds
256 : kQuickThrowArrayBounds;
257 x86_64_codegen->InvokeRuntime(entrypoint, instruction_, instruction_->GetDexPc(), this);
258 CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>();
259 CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
260 }
261
IsFatal() const262 bool IsFatal() const override { return true; }
263
GetDescription() const264 const char* GetDescription() const override { return "BoundsCheckSlowPathX86_64"; }
265
266 private:
267 DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathX86_64);
268 };
269
270 class LoadClassSlowPathX86_64 : public SlowPathCode {
271 public:
LoadClassSlowPathX86_64(HLoadClass * cls,HInstruction * at)272 LoadClassSlowPathX86_64(HLoadClass* cls, HInstruction* at)
273 : SlowPathCode(at), cls_(cls) {
274 DCHECK(at->IsLoadClass() || at->IsClinitCheck());
275 DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_);
276 }
277
EmitNativeCode(CodeGenerator * codegen)278 void EmitNativeCode(CodeGenerator* codegen) override {
279 LocationSummary* locations = instruction_->GetLocations();
280 Location out = locations->Out();
281 const uint32_t dex_pc = instruction_->GetDexPc();
282 bool must_resolve_type = instruction_->IsLoadClass() && cls_->MustResolveTypeOnSlowPath();
283 bool must_do_clinit = instruction_->IsClinitCheck() || cls_->MustGenerateClinitCheck();
284
285 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
286 __ Bind(GetEntryLabel());
287 SaveLiveRegisters(codegen, locations);
288
289 // Custom calling convention: RAX serves as both input and output.
290 if (must_resolve_type) {
291 DCHECK(IsSameDexFile(cls_->GetDexFile(), x86_64_codegen->GetGraph()->GetDexFile()) ||
292 x86_64_codegen->GetCompilerOptions().WithinOatFile(&cls_->GetDexFile()) ||
293 ContainsElement(Runtime::Current()->GetClassLinker()->GetBootClassPath(),
294 &cls_->GetDexFile()));
295 dex::TypeIndex type_index = cls_->GetTypeIndex();
296 __ movl(CpuRegister(RAX), Immediate(type_index.index_));
297 if (cls_->NeedsAccessCheck()) {
298 CheckEntrypointTypes<kQuickResolveTypeAndVerifyAccess, void*, uint32_t>();
299 x86_64_codegen->InvokeRuntime(kQuickResolveTypeAndVerifyAccess, instruction_, dex_pc, this);
300 } else {
301 CheckEntrypointTypes<kQuickResolveType, void*, uint32_t>();
302 x86_64_codegen->InvokeRuntime(kQuickResolveType, instruction_, dex_pc, this);
303 }
304 // If we also must_do_clinit, the resolved type is now in the correct register.
305 } else {
306 DCHECK(must_do_clinit);
307 Location source = instruction_->IsLoadClass() ? out : locations->InAt(0);
308 x86_64_codegen->Move(Location::RegisterLocation(RAX), source);
309 }
310 if (must_do_clinit) {
311 x86_64_codegen->InvokeRuntime(kQuickInitializeStaticStorage, instruction_, dex_pc, this);
312 CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, mirror::Class*>();
313 }
314
315 // Move the class to the desired location.
316 if (out.IsValid()) {
317 DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
318 x86_64_codegen->Move(out, Location::RegisterLocation(RAX));
319 }
320
321 RestoreLiveRegisters(codegen, locations);
322 __ jmp(GetExitLabel());
323 }
324
GetDescription() const325 const char* GetDescription() const override { return "LoadClassSlowPathX86_64"; }
326
327 private:
328 // The class this slow path will load.
329 HLoadClass* const cls_;
330
331 DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathX86_64);
332 };
333
334 class LoadStringSlowPathX86_64 : public SlowPathCode {
335 public:
LoadStringSlowPathX86_64(HLoadString * instruction)336 explicit LoadStringSlowPathX86_64(HLoadString* instruction) : SlowPathCode(instruction) {}
337
EmitNativeCode(CodeGenerator * codegen)338 void EmitNativeCode(CodeGenerator* codegen) override {
339 LocationSummary* locations = instruction_->GetLocations();
340 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
341
342 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
343 __ Bind(GetEntryLabel());
344 SaveLiveRegisters(codegen, locations);
345
346 const dex::StringIndex string_index = instruction_->AsLoadString()->GetStringIndex();
347 // Custom calling convention: RAX serves as both input and output.
348 __ movl(CpuRegister(RAX), Immediate(string_index.index_));
349 x86_64_codegen->InvokeRuntime(kQuickResolveString,
350 instruction_,
351 instruction_->GetDexPc(),
352 this);
353 CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
354 x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX));
355 RestoreLiveRegisters(codegen, locations);
356
357 __ jmp(GetExitLabel());
358 }
359
GetDescription() const360 const char* GetDescription() const override { return "LoadStringSlowPathX86_64"; }
361
362 private:
363 DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathX86_64);
364 };
365
366 class TypeCheckSlowPathX86_64 : public SlowPathCode {
367 public:
TypeCheckSlowPathX86_64(HInstruction * instruction,bool is_fatal)368 TypeCheckSlowPathX86_64(HInstruction* instruction, bool is_fatal)
369 : SlowPathCode(instruction), is_fatal_(is_fatal) {}
370
EmitNativeCode(CodeGenerator * codegen)371 void EmitNativeCode(CodeGenerator* codegen) override {
372 LocationSummary* locations = instruction_->GetLocations();
373 uint32_t dex_pc = instruction_->GetDexPc();
374 DCHECK(instruction_->IsCheckCast()
375 || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
376
377 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
378 __ Bind(GetEntryLabel());
379
380 if (kPoisonHeapReferences &&
381 instruction_->IsCheckCast() &&
382 instruction_->AsCheckCast()->GetTypeCheckKind() == TypeCheckKind::kInterfaceCheck) {
383 // First, unpoison the `cls` reference that was poisoned for direct memory comparison.
384 __ UnpoisonHeapReference(locations->InAt(1).AsRegister<CpuRegister>());
385 }
386
387 if (!is_fatal_ || instruction_->CanThrowIntoCatchBlock()) {
388 SaveLiveRegisters(codegen, locations);
389 }
390
391 // We're moving two locations to locations that could overlap, so we need a parallel
392 // move resolver.
393 InvokeRuntimeCallingConvention calling_convention;
394 codegen->EmitParallelMoves(locations->InAt(0),
395 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
396 DataType::Type::kReference,
397 locations->InAt(1),
398 Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
399 DataType::Type::kReference);
400 if (instruction_->IsInstanceOf()) {
401 x86_64_codegen->InvokeRuntime(kQuickInstanceofNonTrivial, instruction_, dex_pc, this);
402 CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>();
403 } else {
404 DCHECK(instruction_->IsCheckCast());
405 x86_64_codegen->InvokeRuntime(kQuickCheckInstanceOf, instruction_, dex_pc, this);
406 CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>();
407 }
408
409 if (!is_fatal_) {
410 if (instruction_->IsInstanceOf()) {
411 x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX));
412 }
413
414 RestoreLiveRegisters(codegen, locations);
415 __ jmp(GetExitLabel());
416 }
417 }
418
GetDescription() const419 const char* GetDescription() const override { return "TypeCheckSlowPathX86_64"; }
420
IsFatal() const421 bool IsFatal() const override { return is_fatal_; }
422
423 private:
424 const bool is_fatal_;
425
426 DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathX86_64);
427 };
428
429 class DeoptimizationSlowPathX86_64 : public SlowPathCode {
430 public:
DeoptimizationSlowPathX86_64(HDeoptimize * instruction)431 explicit DeoptimizationSlowPathX86_64(HDeoptimize* instruction)
432 : SlowPathCode(instruction) {}
433
EmitNativeCode(CodeGenerator * codegen)434 void EmitNativeCode(CodeGenerator* codegen) override {
435 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
436 __ Bind(GetEntryLabel());
437 LocationSummary* locations = instruction_->GetLocations();
438 SaveLiveRegisters(codegen, locations);
439 InvokeRuntimeCallingConvention calling_convention;
440 x86_64_codegen->Load32BitValue(
441 CpuRegister(calling_convention.GetRegisterAt(0)),
442 static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind()));
443 x86_64_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, instruction_->GetDexPc(), this);
444 CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
445 }
446
GetDescription() const447 const char* GetDescription() const override { return "DeoptimizationSlowPathX86_64"; }
448
449 private:
450 DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86_64);
451 };
452
453 class ArraySetSlowPathX86_64 : public SlowPathCode {
454 public:
ArraySetSlowPathX86_64(HInstruction * instruction)455 explicit ArraySetSlowPathX86_64(HInstruction* instruction) : SlowPathCode(instruction) {}
456
EmitNativeCode(CodeGenerator * codegen)457 void EmitNativeCode(CodeGenerator* codegen) override {
458 LocationSummary* locations = instruction_->GetLocations();
459 __ Bind(GetEntryLabel());
460 SaveLiveRegisters(codegen, locations);
461
462 InvokeRuntimeCallingConvention calling_convention;
463 HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
464 parallel_move.AddMove(
465 locations->InAt(0),
466 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
467 DataType::Type::kReference,
468 nullptr);
469 parallel_move.AddMove(
470 locations->InAt(1),
471 Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
472 DataType::Type::kInt32,
473 nullptr);
474 parallel_move.AddMove(
475 locations->InAt(2),
476 Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
477 DataType::Type::kReference,
478 nullptr);
479 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
480
481 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
482 x86_64_codegen->InvokeRuntime(kQuickAputObject, instruction_, instruction_->GetDexPc(), this);
483 CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
484 RestoreLiveRegisters(codegen, locations);
485 __ jmp(GetExitLabel());
486 }
487
GetDescription() const488 const char* GetDescription() const override { return "ArraySetSlowPathX86_64"; }
489
490 private:
491 DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86_64);
492 };
493
494 // Slow path marking an object reference `ref` during a read
495 // barrier. The field `obj.field` in the object `obj` holding this
496 // reference does not get updated by this slow path after marking (see
497 // ReadBarrierMarkAndUpdateFieldSlowPathX86_64 below for that).
498 //
499 // This means that after the execution of this slow path, `ref` will
500 // always be up-to-date, but `obj.field` may not; i.e., after the
501 // flip, `ref` will be a to-space reference, but `obj.field` will
502 // probably still be a from-space reference (unless it gets updated by
503 // another thread, or if another thread installed another object
504 // reference (different from `ref`) in `obj.field`).
505 class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode {
506 public:
ReadBarrierMarkSlowPathX86_64(HInstruction * instruction,Location ref,bool unpoison_ref_before_marking)507 ReadBarrierMarkSlowPathX86_64(HInstruction* instruction,
508 Location ref,
509 bool unpoison_ref_before_marking)
510 : SlowPathCode(instruction),
511 ref_(ref),
512 unpoison_ref_before_marking_(unpoison_ref_before_marking) {
513 DCHECK(kEmitCompilerReadBarrier);
514 }
515
GetDescription() const516 const char* GetDescription() const override { return "ReadBarrierMarkSlowPathX86_64"; }
517
EmitNativeCode(CodeGenerator * codegen)518 void EmitNativeCode(CodeGenerator* codegen) override {
519 LocationSummary* locations = instruction_->GetLocations();
520 CpuRegister ref_cpu_reg = ref_.AsRegister<CpuRegister>();
521 Register ref_reg = ref_cpu_reg.AsRegister();
522 DCHECK(locations->CanCall());
523 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
524 DCHECK(instruction_->IsInstanceFieldGet() ||
525 instruction_->IsPredicatedInstanceFieldGet() ||
526 instruction_->IsStaticFieldGet() ||
527 instruction_->IsArrayGet() ||
528 instruction_->IsArraySet() ||
529 instruction_->IsLoadClass() ||
530 instruction_->IsLoadString() ||
531 instruction_->IsInstanceOf() ||
532 instruction_->IsCheckCast() ||
533 (instruction_->IsInvoke() && instruction_->GetLocations()->Intrinsified()))
534 << "Unexpected instruction in read barrier marking slow path: "
535 << instruction_->DebugName();
536
537 __ Bind(GetEntryLabel());
538 if (unpoison_ref_before_marking_) {
539 // Object* ref = ref_addr->AsMirrorPtr()
540 __ MaybeUnpoisonHeapReference(ref_cpu_reg);
541 }
542 // No need to save live registers; it's taken care of by the
543 // entrypoint. Also, there is no need to update the stack mask,
544 // as this runtime call will not trigger a garbage collection.
545 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
546 DCHECK_NE(ref_reg, RSP);
547 DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg;
548 // "Compact" slow path, saving two moves.
549 //
550 // Instead of using the standard runtime calling convention (input
551 // and output in R0):
552 //
553 // RDI <- ref
554 // RAX <- ReadBarrierMark(RDI)
555 // ref <- RAX
556 //
557 // we just use rX (the register containing `ref`) as input and output
558 // of a dedicated entrypoint:
559 //
560 // rX <- ReadBarrierMarkRegX(rX)
561 //
562 int32_t entry_point_offset =
563 Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(ref_reg);
564 // This runtime call does not require a stack map.
565 x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
566 __ jmp(GetExitLabel());
567 }
568
569 private:
570 // The location (register) of the marked object reference.
571 const Location ref_;
572 // Should the reference in `ref_` be unpoisoned prior to marking it?
573 const bool unpoison_ref_before_marking_;
574
575 DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86_64);
576 };
577
578 // Slow path marking an object reference `ref` during a read barrier,
579 // and if needed, atomically updating the field `obj.field` in the
580 // object `obj` holding this reference after marking (contrary to
581 // ReadBarrierMarkSlowPathX86_64 above, which never tries to update
582 // `obj.field`).
583 //
584 // This means that after the execution of this slow path, both `ref`
585 // and `obj.field` will be up-to-date; i.e., after the flip, both will
586 // hold the same to-space reference (unless another thread installed
587 // another object reference (different from `ref`) in `obj.field`).
588 class ReadBarrierMarkAndUpdateFieldSlowPathX86_64 : public SlowPathCode {
589 public:
ReadBarrierMarkAndUpdateFieldSlowPathX86_64(HInstruction * instruction,Location ref,CpuRegister obj,const Address & field_addr,bool unpoison_ref_before_marking,CpuRegister temp1,CpuRegister temp2)590 ReadBarrierMarkAndUpdateFieldSlowPathX86_64(HInstruction* instruction,
591 Location ref,
592 CpuRegister obj,
593 const Address& field_addr,
594 bool unpoison_ref_before_marking,
595 CpuRegister temp1,
596 CpuRegister temp2)
597 : SlowPathCode(instruction),
598 ref_(ref),
599 obj_(obj),
600 field_addr_(field_addr),
601 unpoison_ref_before_marking_(unpoison_ref_before_marking),
602 temp1_(temp1),
603 temp2_(temp2) {
604 DCHECK(kEmitCompilerReadBarrier);
605 }
606
GetDescription() const607 const char* GetDescription() const override {
608 return "ReadBarrierMarkAndUpdateFieldSlowPathX86_64";
609 }
610
EmitNativeCode(CodeGenerator * codegen)611 void EmitNativeCode(CodeGenerator* codegen) override {
612 LocationSummary* locations = instruction_->GetLocations();
613 CpuRegister ref_cpu_reg = ref_.AsRegister<CpuRegister>();
614 Register ref_reg = ref_cpu_reg.AsRegister();
615 DCHECK(locations->CanCall());
616 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
617 DCHECK((instruction_->IsInvoke() && instruction_->GetLocations()->Intrinsified()))
618 << "Unexpected instruction in read barrier marking and field updating slow path: "
619 << instruction_->DebugName();
620 HInvoke* invoke = instruction_->AsInvoke();
621 DCHECK(IsUnsafeCASObject(invoke) || IsVarHandleCASFamily(invoke)) << invoke->GetIntrinsic();
622
623 __ Bind(GetEntryLabel());
624 if (unpoison_ref_before_marking_) {
625 // Object* ref = ref_addr->AsMirrorPtr()
626 __ MaybeUnpoisonHeapReference(ref_cpu_reg);
627 }
628
629 // Save the old (unpoisoned) reference.
630 __ movl(temp1_, ref_cpu_reg);
631
632 // No need to save live registers; it's taken care of by the
633 // entrypoint. Also, there is no need to update the stack mask,
634 // as this runtime call will not trigger a garbage collection.
635 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
636 DCHECK_NE(ref_reg, RSP);
637 DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg;
638 // "Compact" slow path, saving two moves.
639 //
640 // Instead of using the standard runtime calling convention (input
641 // and output in R0):
642 //
643 // RDI <- ref
644 // RAX <- ReadBarrierMark(RDI)
645 // ref <- RAX
646 //
647 // we just use rX (the register containing `ref`) as input and output
648 // of a dedicated entrypoint:
649 //
650 // rX <- ReadBarrierMarkRegX(rX)
651 //
652 int32_t entry_point_offset =
653 Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(ref_reg);
654 // This runtime call does not require a stack map.
655 x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
656
657 // If the new reference is different from the old reference,
658 // update the field in the holder (`*field_addr`).
659 //
660 // Note that this field could also hold a different object, if
661 // another thread had concurrently changed it. In that case, the
662 // LOCK CMPXCHGL instruction in the compare-and-set (CAS)
663 // operation below would abort the CAS, leaving the field as-is.
664 NearLabel done;
665 __ cmpl(temp1_, ref_cpu_reg);
666 __ j(kEqual, &done);
667
668 // Update the the holder's field atomically. This may fail if
669 // mutator updates before us, but it's OK. This is achived
670 // using a strong compare-and-set (CAS) operation with relaxed
671 // memory synchronization ordering, where the expected value is
672 // the old reference and the desired value is the new reference.
673 // This operation is implemented with a 32-bit LOCK CMPXLCHG
674 // instruction, which requires the expected value (the old
675 // reference) to be in EAX. Save RAX beforehand, and move the
676 // expected value (stored in `temp1_`) into EAX.
677 __ movq(temp2_, CpuRegister(RAX));
678 __ movl(CpuRegister(RAX), temp1_);
679
680 // Convenience aliases.
681 CpuRegister base = obj_;
682 CpuRegister expected = CpuRegister(RAX);
683 CpuRegister value = ref_cpu_reg;
684
685 bool base_equals_value = (base.AsRegister() == value.AsRegister());
686 Register value_reg = ref_reg;
687 if (kPoisonHeapReferences) {
688 if (base_equals_value) {
689 // If `base` and `value` are the same register location, move
690 // `value_reg` to a temporary register. This way, poisoning
691 // `value_reg` won't invalidate `base`.
692 value_reg = temp1_.AsRegister();
693 __ movl(CpuRegister(value_reg), base);
694 }
695
696 // Check that the register allocator did not assign the location
697 // of `expected` (RAX) to `value` nor to `base`, so that heap
698 // poisoning (when enabled) works as intended below.
699 // - If `value` were equal to `expected`, both references would
700 // be poisoned twice, meaning they would not be poisoned at
701 // all, as heap poisoning uses address negation.
702 // - If `base` were equal to `expected`, poisoning `expected`
703 // would invalidate `base`.
704 DCHECK_NE(value_reg, expected.AsRegister());
705 DCHECK_NE(base.AsRegister(), expected.AsRegister());
706
707 __ PoisonHeapReference(expected);
708 __ PoisonHeapReference(CpuRegister(value_reg));
709 }
710
711 __ LockCmpxchgl(field_addr_, CpuRegister(value_reg));
712
713 // If heap poisoning is enabled, we need to unpoison the values
714 // that were poisoned earlier.
715 if (kPoisonHeapReferences) {
716 if (base_equals_value) {
717 // `value_reg` has been moved to a temporary register, no need
718 // to unpoison it.
719 } else {
720 __ UnpoisonHeapReference(CpuRegister(value_reg));
721 }
722 // No need to unpoison `expected` (RAX), as it is be overwritten below.
723 }
724
725 // Restore RAX.
726 __ movq(CpuRegister(RAX), temp2_);
727
728 __ Bind(&done);
729 __ jmp(GetExitLabel());
730 }
731
732 private:
733 // The location (register) of the marked object reference.
734 const Location ref_;
735 // The register containing the object holding the marked object reference field.
736 const CpuRegister obj_;
737 // The address of the marked reference field. The base of this address must be `obj_`.
738 const Address field_addr_;
739
740 // Should the reference in `ref_` be unpoisoned prior to marking it?
741 const bool unpoison_ref_before_marking_;
742
743 const CpuRegister temp1_;
744 const CpuRegister temp2_;
745
746 DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathX86_64);
747 };
748
749 // Slow path generating a read barrier for a heap reference.
750 class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode {
751 public:
ReadBarrierForHeapReferenceSlowPathX86_64(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)752 ReadBarrierForHeapReferenceSlowPathX86_64(HInstruction* instruction,
753 Location out,
754 Location ref,
755 Location obj,
756 uint32_t offset,
757 Location index)
758 : SlowPathCode(instruction),
759 out_(out),
760 ref_(ref),
761 obj_(obj),
762 offset_(offset),
763 index_(index) {
764 DCHECK(kEmitCompilerReadBarrier);
765 // If `obj` is equal to `out` or `ref`, it means the initial
766 // object has been overwritten by (or after) the heap object
767 // reference load to be instrumented, e.g.:
768 //
769 // __ movl(out, Address(out, offset));
770 // codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
771 //
772 // In that case, we have lost the information about the original
773 // object, and the emitted read barrier cannot work properly.
774 DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out;
775 DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
776 }
777
EmitNativeCode(CodeGenerator * codegen)778 void EmitNativeCode(CodeGenerator* codegen) override {
779 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
780 LocationSummary* locations = instruction_->GetLocations();
781 CpuRegister reg_out = out_.AsRegister<CpuRegister>();
782 DCHECK(locations->CanCall());
783 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out.AsRegister())) << out_;
784 DCHECK(instruction_->IsInstanceFieldGet() ||
785 instruction_->IsPredicatedInstanceFieldGet() ||
786 instruction_->IsStaticFieldGet() ||
787 instruction_->IsArrayGet() ||
788 instruction_->IsInstanceOf() ||
789 instruction_->IsCheckCast() ||
790 (instruction_->IsInvoke() && instruction_->GetLocations()->Intrinsified()))
791 << "Unexpected instruction in read barrier for heap reference slow path: "
792 << instruction_->DebugName();
793
794 __ Bind(GetEntryLabel());
795 SaveLiveRegisters(codegen, locations);
796
797 // We may have to change the index's value, but as `index_` is a
798 // constant member (like other "inputs" of this slow path),
799 // introduce a copy of it, `index`.
800 Location index = index_;
801 if (index_.IsValid()) {
802 // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics.
803 if (instruction_->IsArrayGet()) {
804 // Compute real offset and store it in index_.
805 Register index_reg = index_.AsRegister<CpuRegister>().AsRegister();
806 DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg));
807 if (codegen->IsCoreCalleeSaveRegister(index_reg)) {
808 // We are about to change the value of `index_reg` (see the
809 // calls to art::x86_64::X86_64Assembler::shll and
810 // art::x86_64::X86_64Assembler::AddImmediate below), but it
811 // has not been saved by the previous call to
812 // art::SlowPathCode::SaveLiveRegisters, as it is a
813 // callee-save register --
814 // art::SlowPathCode::SaveLiveRegisters does not consider
815 // callee-save registers, as it has been designed with the
816 // assumption that callee-save registers are supposed to be
817 // handled by the called function. So, as a callee-save
818 // register, `index_reg` _would_ eventually be saved onto
819 // the stack, but it would be too late: we would have
820 // changed its value earlier. Therefore, we manually save
821 // it here into another freely available register,
822 // `free_reg`, chosen of course among the caller-save
823 // registers (as a callee-save `free_reg` register would
824 // exhibit the same problem).
825 //
826 // Note we could have requested a temporary register from
827 // the register allocator instead; but we prefer not to, as
828 // this is a slow path, and we know we can find a
829 // caller-save register that is available.
830 Register free_reg = FindAvailableCallerSaveRegister(codegen).AsRegister();
831 __ movl(CpuRegister(free_reg), CpuRegister(index_reg));
832 index_reg = free_reg;
833 index = Location::RegisterLocation(index_reg);
834 } else {
835 // The initial register stored in `index_` has already been
836 // saved in the call to art::SlowPathCode::SaveLiveRegisters
837 // (as it is not a callee-save register), so we can freely
838 // use it.
839 }
840 // Shifting the index value contained in `index_reg` by the
841 // scale factor (2) cannot overflow in practice, as the
842 // runtime is unable to allocate object arrays with a size
843 // larger than 2^26 - 1 (that is, 2^28 - 4 bytes).
844 __ shll(CpuRegister(index_reg), Immediate(TIMES_4));
845 static_assert(
846 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
847 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
848 __ AddImmediate(CpuRegister(index_reg), Immediate(offset_));
849 } else {
850 // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile
851 // intrinsics, `index_` is not shifted by a scale factor of 2
852 // (as in the case of ArrayGet), as it is actually an offset
853 // to an object field within an object.
854 DCHECK(instruction_->IsInvoke()) << instruction_->DebugName();
855 DCHECK(instruction_->GetLocations()->Intrinsified());
856 DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
857 (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile) ||
858 (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kJdkUnsafeGetObject) ||
859 (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kJdkUnsafeGetObjectVolatile) ||
860 (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kJdkUnsafeGetObjectAcquire))
861 << instruction_->AsInvoke()->GetIntrinsic();
862 DCHECK_EQ(offset_, 0U);
863 DCHECK(index_.IsRegister());
864 }
865 }
866
867 // We're moving two or three locations to locations that could
868 // overlap, so we need a parallel move resolver.
869 InvokeRuntimeCallingConvention calling_convention;
870 HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
871 parallel_move.AddMove(ref_,
872 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
873 DataType::Type::kReference,
874 nullptr);
875 parallel_move.AddMove(obj_,
876 Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
877 DataType::Type::kReference,
878 nullptr);
879 if (index.IsValid()) {
880 parallel_move.AddMove(index,
881 Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
882 DataType::Type::kInt32,
883 nullptr);
884 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
885 } else {
886 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
887 __ movl(CpuRegister(calling_convention.GetRegisterAt(2)), Immediate(offset_));
888 }
889 x86_64_codegen->InvokeRuntime(kQuickReadBarrierSlow,
890 instruction_,
891 instruction_->GetDexPc(),
892 this);
893 CheckEntrypointTypes<
894 kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
895 x86_64_codegen->Move(out_, Location::RegisterLocation(RAX));
896
897 RestoreLiveRegisters(codegen, locations);
898 __ jmp(GetExitLabel());
899 }
900
GetDescription() const901 const char* GetDescription() const override {
902 return "ReadBarrierForHeapReferenceSlowPathX86_64";
903 }
904
905 private:
FindAvailableCallerSaveRegister(CodeGenerator * codegen)906 CpuRegister FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
907 size_t ref = static_cast<int>(ref_.AsRegister<CpuRegister>().AsRegister());
908 size_t obj = static_cast<int>(obj_.AsRegister<CpuRegister>().AsRegister());
909 for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
910 if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) {
911 return static_cast<CpuRegister>(i);
912 }
913 }
914 // We shall never fail to find a free caller-save register, as
915 // there are more than two core caller-save registers on x86-64
916 // (meaning it is possible to find one which is different from
917 // `ref` and `obj`).
918 DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u);
919 LOG(FATAL) << "Could not find a free caller-save register";
920 UNREACHABLE();
921 }
922
923 const Location out_;
924 const Location ref_;
925 const Location obj_;
926 const uint32_t offset_;
927 // An additional location containing an index to an array.
928 // Only used for HArrayGet and the UnsafeGetObject &
929 // UnsafeGetObjectVolatile intrinsics.
930 const Location index_;
931
932 DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathX86_64);
933 };
934
935 // Slow path generating a read barrier for a GC root.
936 class ReadBarrierForRootSlowPathX86_64 : public SlowPathCode {
937 public:
ReadBarrierForRootSlowPathX86_64(HInstruction * instruction,Location out,Location root)938 ReadBarrierForRootSlowPathX86_64(HInstruction* instruction, Location out, Location root)
939 : SlowPathCode(instruction), out_(out), root_(root) {
940 DCHECK(kEmitCompilerReadBarrier);
941 }
942
EmitNativeCode(CodeGenerator * codegen)943 void EmitNativeCode(CodeGenerator* codegen) override {
944 LocationSummary* locations = instruction_->GetLocations();
945 DCHECK(locations->CanCall());
946 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
947 DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
948 << "Unexpected instruction in read barrier for GC root slow path: "
949 << instruction_->DebugName();
950
951 __ Bind(GetEntryLabel());
952 SaveLiveRegisters(codegen, locations);
953
954 InvokeRuntimeCallingConvention calling_convention;
955 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
956 x86_64_codegen->Move(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), root_);
957 x86_64_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow,
958 instruction_,
959 instruction_->GetDexPc(),
960 this);
961 CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
962 x86_64_codegen->Move(out_, Location::RegisterLocation(RAX));
963
964 RestoreLiveRegisters(codegen, locations);
965 __ jmp(GetExitLabel());
966 }
967
GetDescription() const968 const char* GetDescription() const override { return "ReadBarrierForRootSlowPathX86_64"; }
969
970 private:
971 const Location out_;
972 const Location root_;
973
974 DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathX86_64);
975 };
976
977 class MethodEntryExitHooksSlowPathX86_64 : public SlowPathCode {
978 public:
MethodEntryExitHooksSlowPathX86_64(HInstruction * instruction)979 explicit MethodEntryExitHooksSlowPathX86_64(HInstruction* instruction)
980 : SlowPathCode(instruction) {}
981
EmitNativeCode(CodeGenerator * codegen)982 void EmitNativeCode(CodeGenerator* codegen) override {
983 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
984 LocationSummary* locations = instruction_->GetLocations();
985 QuickEntrypointEnum entry_point =
986 (instruction_->IsMethodEntryHook()) ? kQuickMethodEntryHook : kQuickMethodExitHook;
987 __ Bind(GetEntryLabel());
988 SaveLiveRegisters(codegen, locations);
989 x86_64_codegen->InvokeRuntime(entry_point, instruction_, instruction_->GetDexPc(), this);
990 RestoreLiveRegisters(codegen, locations);
991 __ jmp(GetExitLabel());
992 }
993
GetDescription() const994 const char* GetDescription() const override {
995 return "MethodEntryExitHooksSlowPath";
996 }
997
998 private:
999 DISALLOW_COPY_AND_ASSIGN(MethodEntryExitHooksSlowPathX86_64);
1000 };
1001
1002 class CompileOptimizedSlowPathX86_64 : public SlowPathCode {
1003 public:
CompileOptimizedSlowPathX86_64()1004 CompileOptimizedSlowPathX86_64() : SlowPathCode(/* instruction= */ nullptr) {}
1005
EmitNativeCode(CodeGenerator * codegen)1006 void EmitNativeCode(CodeGenerator* codegen) override {
1007 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
1008 __ Bind(GetEntryLabel());
1009 x86_64_codegen->GenerateInvokeRuntime(
1010 GetThreadOffset<kX86_64PointerSize>(kQuickCompileOptimized).Int32Value());
1011 __ jmp(GetExitLabel());
1012 }
1013
GetDescription() const1014 const char* GetDescription() const override {
1015 return "CompileOptimizedSlowPath";
1016 }
1017
1018 private:
1019 DISALLOW_COPY_AND_ASSIGN(CompileOptimizedSlowPathX86_64);
1020 };
1021
1022 #undef __
1023 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
1024 #define __ down_cast<X86_64Assembler*>(GetAssembler())-> // NOLINT
1025
X86_64IntegerCondition(IfCondition cond)1026 inline Condition X86_64IntegerCondition(IfCondition cond) {
1027 switch (cond) {
1028 case kCondEQ: return kEqual;
1029 case kCondNE: return kNotEqual;
1030 case kCondLT: return kLess;
1031 case kCondLE: return kLessEqual;
1032 case kCondGT: return kGreater;
1033 case kCondGE: return kGreaterEqual;
1034 case kCondB: return kBelow;
1035 case kCondBE: return kBelowEqual;
1036 case kCondA: return kAbove;
1037 case kCondAE: return kAboveEqual;
1038 }
1039 LOG(FATAL) << "Unreachable";
1040 UNREACHABLE();
1041 }
1042
1043 // Maps FP condition to x86_64 name.
X86_64FPCondition(IfCondition cond)1044 inline Condition X86_64FPCondition(IfCondition cond) {
1045 switch (cond) {
1046 case kCondEQ: return kEqual;
1047 case kCondNE: return kNotEqual;
1048 case kCondLT: return kBelow;
1049 case kCondLE: return kBelowEqual;
1050 case kCondGT: return kAbove;
1051 case kCondGE: return kAboveEqual;
1052 default: break; // should not happen
1053 }
1054 LOG(FATAL) << "Unreachable";
1055 UNREACHABLE();
1056 }
1057
BlockNonVolatileXmmRegisters(LocationSummary * locations)1058 void CodeGeneratorX86_64::BlockNonVolatileXmmRegisters(LocationSummary* locations) {
1059 // We have to ensure that the native code we call directly (such as @CriticalNative
1060 // or some intrinsic helpers, say Math.sin()) doesn't clobber the XMM registers
1061 // which are non-volatile for ART, but volatile for Native calls. This will ensure
1062 // that they are saved in the prologue and properly restored.
1063 for (FloatRegister fp_reg : non_volatile_xmm_regs) {
1064 locations->AddTemp(Location::FpuRegisterLocation(fp_reg));
1065 }
1066 }
1067
GetSupportedInvokeStaticOrDirectDispatch(const HInvokeStaticOrDirect::DispatchInfo & desired_dispatch_info,ArtMethod * method ATTRIBUTE_UNUSED)1068 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86_64::GetSupportedInvokeStaticOrDirectDispatch(
1069 const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
1070 ArtMethod* method ATTRIBUTE_UNUSED) {
1071 return desired_dispatch_info;
1072 }
1073
LoadMethod(MethodLoadKind load_kind,Location temp,HInvoke * invoke)1074 void CodeGeneratorX86_64::LoadMethod(MethodLoadKind load_kind, Location temp, HInvoke* invoke) {
1075 switch (load_kind) {
1076 case MethodLoadKind::kBootImageLinkTimePcRelative:
1077 DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
1078 __ leal(temp.AsRegister<CpuRegister>(),
1079 Address::Absolute(kPlaceholder32BitOffset, /* no_rip= */ false));
1080 RecordBootImageMethodPatch(invoke);
1081 break;
1082 case MethodLoadKind::kBootImageRelRo: {
1083 // Note: Boot image is in the low 4GiB and the entry is 32-bit, so emit a 32-bit load.
1084 __ movl(temp.AsRegister<CpuRegister>(),
1085 Address::Absolute(kPlaceholder32BitOffset, /* no_rip= */ false));
1086 RecordBootImageRelRoPatch(GetBootImageOffset(invoke));
1087 break;
1088 }
1089 case MethodLoadKind::kBssEntry: {
1090 __ movq(temp.AsRegister<CpuRegister>(),
1091 Address::Absolute(kPlaceholder32BitOffset, /* no_rip= */ false));
1092 RecordMethodBssEntryPatch(invoke);
1093 // No need for memory fence, thanks to the x86-64 memory model.
1094 break;
1095 }
1096 case MethodLoadKind::kJitDirectAddress: {
1097 Load64BitValue(temp.AsRegister<CpuRegister>(),
1098 reinterpret_cast<int64_t>(invoke->GetResolvedMethod()));
1099 break;
1100 }
1101 case MethodLoadKind::kRuntimeCall: {
1102 // Test situation, don't do anything.
1103 break;
1104 }
1105 default: {
1106 LOG(FATAL) << "Load kind should have already been handled " << load_kind;
1107 UNREACHABLE();
1108 }
1109 }
1110 }
1111
GenerateStaticOrDirectCall(HInvokeStaticOrDirect * invoke,Location temp,SlowPathCode * slow_path)1112 void CodeGeneratorX86_64::GenerateStaticOrDirectCall(
1113 HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) {
1114 // All registers are assumed to be correctly set up.
1115
1116 Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp.
1117 switch (invoke->GetMethodLoadKind()) {
1118 case MethodLoadKind::kStringInit: {
1119 // temp = thread->string_init_entrypoint
1120 uint32_t offset =
1121 GetThreadOffset<kX86_64PointerSize>(invoke->GetStringInitEntryPoint()).Int32Value();
1122 __ gs()->movq(temp.AsRegister<CpuRegister>(), Address::Absolute(offset, /* no_rip= */ true));
1123 break;
1124 }
1125 case MethodLoadKind::kRecursive: {
1126 callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodIndex());
1127 break;
1128 }
1129 case MethodLoadKind::kRuntimeCall: {
1130 GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path);
1131 return; // No code pointer retrieval; the runtime performs the call directly.
1132 }
1133 case MethodLoadKind::kBootImageLinkTimePcRelative:
1134 // For kCallCriticalNative we skip loading the method and do the call directly.
1135 if (invoke->GetCodePtrLocation() == CodePtrLocation::kCallCriticalNative) {
1136 break;
1137 }
1138 FALLTHROUGH_INTENDED;
1139 default: {
1140 LoadMethod(invoke->GetMethodLoadKind(), temp, invoke);
1141 break;
1142 }
1143 }
1144
1145 switch (invoke->GetCodePtrLocation()) {
1146 case CodePtrLocation::kCallSelf:
1147 DCHECK(!GetGraph()->HasShouldDeoptimizeFlag());
1148 __ call(&frame_entry_label_);
1149 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
1150 break;
1151 case CodePtrLocation::kCallCriticalNative: {
1152 size_t out_frame_size =
1153 PrepareCriticalNativeCall<CriticalNativeCallingConventionVisitorX86_64,
1154 kNativeStackAlignment,
1155 GetCriticalNativeDirectCallFrameSize>(invoke);
1156 if (invoke->GetMethodLoadKind() == MethodLoadKind::kBootImageLinkTimePcRelative) {
1157 DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
1158 __ call(Address::Absolute(kPlaceholder32BitOffset, /* no_rip= */ false));
1159 RecordBootImageJniEntrypointPatch(invoke);
1160 } else {
1161 // (callee_method + offset_of_jni_entry_point)()
1162 __ call(Address(callee_method.AsRegister<CpuRegister>(),
1163 ArtMethod::EntryPointFromJniOffset(kX86_64PointerSize).SizeValue()));
1164 }
1165 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
1166 // Zero-/sign-extend the result when needed due to native and managed ABI mismatch.
1167 switch (invoke->GetType()) {
1168 case DataType::Type::kBool:
1169 __ movzxb(CpuRegister(RAX), CpuRegister(RAX));
1170 break;
1171 case DataType::Type::kInt8:
1172 __ movsxb(CpuRegister(RAX), CpuRegister(RAX));
1173 break;
1174 case DataType::Type::kUint16:
1175 __ movzxw(CpuRegister(RAX), CpuRegister(RAX));
1176 break;
1177 case DataType::Type::kInt16:
1178 __ movsxw(CpuRegister(RAX), CpuRegister(RAX));
1179 break;
1180 case DataType::Type::kInt32:
1181 case DataType::Type::kInt64:
1182 case DataType::Type::kFloat32:
1183 case DataType::Type::kFloat64:
1184 case DataType::Type::kVoid:
1185 break;
1186 default:
1187 DCHECK(false) << invoke->GetType();
1188 break;
1189 }
1190 if (out_frame_size != 0u) {
1191 DecreaseFrame(out_frame_size);
1192 }
1193 break;
1194 }
1195 case CodePtrLocation::kCallArtMethod:
1196 // (callee_method + offset_of_quick_compiled_code)()
1197 __ call(Address(callee_method.AsRegister<CpuRegister>(),
1198 ArtMethod::EntryPointFromQuickCompiledCodeOffset(
1199 kX86_64PointerSize).SizeValue()));
1200 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
1201 break;
1202 }
1203
1204 DCHECK(!IsLeafMethod());
1205 }
1206
GenerateVirtualCall(HInvokeVirtual * invoke,Location temp_in,SlowPathCode * slow_path)1207 void CodeGeneratorX86_64::GenerateVirtualCall(
1208 HInvokeVirtual* invoke, Location temp_in, SlowPathCode* slow_path) {
1209 CpuRegister temp = temp_in.AsRegister<CpuRegister>();
1210 size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
1211 invoke->GetVTableIndex(), kX86_64PointerSize).SizeValue();
1212
1213 // Use the calling convention instead of the location of the receiver, as
1214 // intrinsics may have put the receiver in a different register. In the intrinsics
1215 // slow path, the arguments have been moved to the right place, so here we are
1216 // guaranteed that the receiver is the first register of the calling convention.
1217 InvokeDexCallingConvention calling_convention;
1218 Register receiver = calling_convention.GetRegisterAt(0);
1219
1220 size_t class_offset = mirror::Object::ClassOffset().SizeValue();
1221 // /* HeapReference<Class> */ temp = receiver->klass_
1222 __ movl(temp, Address(CpuRegister(receiver), class_offset));
1223 MaybeRecordImplicitNullCheck(invoke);
1224 // Instead of simply (possibly) unpoisoning `temp` here, we should
1225 // emit a read barrier for the previous class reference load.
1226 // However this is not required in practice, as this is an
1227 // intermediate/temporary reference and because the current
1228 // concurrent copying collector keeps the from-space memory
1229 // intact/accessible until the end of the marking phase (the
1230 // concurrent copying collector may not in the future).
1231 __ MaybeUnpoisonHeapReference(temp);
1232
1233 MaybeGenerateInlineCacheCheck(invoke, temp);
1234
1235 // temp = temp->GetMethodAt(method_offset);
1236 __ movq(temp, Address(temp, method_offset));
1237 // call temp->GetEntryPoint();
1238 __ call(Address(temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(
1239 kX86_64PointerSize).SizeValue()));
1240 RecordPcInfo(invoke, invoke->GetDexPc(), slow_path);
1241 }
1242
RecordBootImageIntrinsicPatch(uint32_t intrinsic_data)1243 void CodeGeneratorX86_64::RecordBootImageIntrinsicPatch(uint32_t intrinsic_data) {
1244 boot_image_other_patches_.emplace_back(/* target_dex_file= */ nullptr, intrinsic_data);
1245 __ Bind(&boot_image_other_patches_.back().label);
1246 }
1247
RecordBootImageRelRoPatch(uint32_t boot_image_offset)1248 void CodeGeneratorX86_64::RecordBootImageRelRoPatch(uint32_t boot_image_offset) {
1249 boot_image_other_patches_.emplace_back(/* target_dex_file= */ nullptr, boot_image_offset);
1250 __ Bind(&boot_image_other_patches_.back().label);
1251 }
1252
RecordBootImageMethodPatch(HInvoke * invoke)1253 void CodeGeneratorX86_64::RecordBootImageMethodPatch(HInvoke* invoke) {
1254 boot_image_method_patches_.emplace_back(invoke->GetResolvedMethodReference().dex_file,
1255 invoke->GetResolvedMethodReference().index);
1256 __ Bind(&boot_image_method_patches_.back().label);
1257 }
1258
RecordMethodBssEntryPatch(HInvoke * invoke)1259 void CodeGeneratorX86_64::RecordMethodBssEntryPatch(HInvoke* invoke) {
1260 DCHECK(IsSameDexFile(GetGraph()->GetDexFile(), *invoke->GetMethodReference().dex_file) ||
1261 GetCompilerOptions().WithinOatFile(invoke->GetMethodReference().dex_file) ||
1262 ContainsElement(Runtime::Current()->GetClassLinker()->GetBootClassPath(),
1263 invoke->GetMethodReference().dex_file));
1264 method_bss_entry_patches_.emplace_back(invoke->GetMethodReference().dex_file,
1265 invoke->GetMethodReference().index);
1266 __ Bind(&method_bss_entry_patches_.back().label);
1267 }
1268
RecordBootImageTypePatch(const DexFile & dex_file,dex::TypeIndex type_index)1269 void CodeGeneratorX86_64::RecordBootImageTypePatch(const DexFile& dex_file,
1270 dex::TypeIndex type_index) {
1271 boot_image_type_patches_.emplace_back(&dex_file, type_index.index_);
1272 __ Bind(&boot_image_type_patches_.back().label);
1273 }
1274
NewTypeBssEntryPatch(HLoadClass * load_class)1275 Label* CodeGeneratorX86_64::NewTypeBssEntryPatch(HLoadClass* load_class) {
1276 ArenaDeque<PatchInfo<Label>>* patches = nullptr;
1277 switch (load_class->GetLoadKind()) {
1278 case HLoadClass::LoadKind::kBssEntry:
1279 patches = &type_bss_entry_patches_;
1280 break;
1281 case HLoadClass::LoadKind::kBssEntryPublic:
1282 patches = &public_type_bss_entry_patches_;
1283 break;
1284 case HLoadClass::LoadKind::kBssEntryPackage:
1285 patches = &package_type_bss_entry_patches_;
1286 break;
1287 default:
1288 LOG(FATAL) << "Unexpected load kind: " << load_class->GetLoadKind();
1289 UNREACHABLE();
1290 }
1291 patches->emplace_back(&load_class->GetDexFile(), load_class->GetTypeIndex().index_);
1292 return &patches->back().label;
1293 }
1294
RecordBootImageStringPatch(HLoadString * load_string)1295 void CodeGeneratorX86_64::RecordBootImageStringPatch(HLoadString* load_string) {
1296 boot_image_string_patches_.emplace_back(
1297 &load_string->GetDexFile(), load_string->GetStringIndex().index_);
1298 __ Bind(&boot_image_string_patches_.back().label);
1299 }
1300
NewStringBssEntryPatch(HLoadString * load_string)1301 Label* CodeGeneratorX86_64::NewStringBssEntryPatch(HLoadString* load_string) {
1302 string_bss_entry_patches_.emplace_back(
1303 &load_string->GetDexFile(), load_string->GetStringIndex().index_);
1304 return &string_bss_entry_patches_.back().label;
1305 }
1306
RecordBootImageJniEntrypointPatch(HInvokeStaticOrDirect * invoke)1307 void CodeGeneratorX86_64::RecordBootImageJniEntrypointPatch(HInvokeStaticOrDirect* invoke) {
1308 boot_image_jni_entrypoint_patches_.emplace_back(invoke->GetResolvedMethodReference().dex_file,
1309 invoke->GetResolvedMethodReference().index);
1310 __ Bind(&boot_image_jni_entrypoint_patches_.back().label);
1311 }
1312
LoadBootImageAddress(CpuRegister reg,uint32_t boot_image_reference)1313 void CodeGeneratorX86_64::LoadBootImageAddress(CpuRegister reg, uint32_t boot_image_reference) {
1314 if (GetCompilerOptions().IsBootImage()) {
1315 __ leal(reg,
1316 Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
1317 RecordBootImageIntrinsicPatch(boot_image_reference);
1318 } else if (GetCompilerOptions().GetCompilePic()) {
1319 __ movl(reg,
1320 Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
1321 RecordBootImageRelRoPatch(boot_image_reference);
1322 } else {
1323 DCHECK(GetCompilerOptions().IsJitCompiler());
1324 gc::Heap* heap = Runtime::Current()->GetHeap();
1325 DCHECK(!heap->GetBootImageSpaces().empty());
1326 const uint8_t* address = heap->GetBootImageSpaces()[0]->Begin() + boot_image_reference;
1327 __ movl(reg, Immediate(dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(address))));
1328 }
1329 }
1330
LoadIntrinsicDeclaringClass(CpuRegister reg,HInvoke * invoke)1331 void CodeGeneratorX86_64::LoadIntrinsicDeclaringClass(CpuRegister reg, HInvoke* invoke) {
1332 DCHECK_NE(invoke->GetIntrinsic(), Intrinsics::kNone);
1333 if (GetCompilerOptions().IsBootImage()) {
1334 // Load the class the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative.
1335 __ leal(reg,
1336 Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
1337 MethodReference target_method = invoke->GetResolvedMethodReference();
1338 dex::TypeIndex type_idx = target_method.dex_file->GetMethodId(target_method.index).class_idx_;
1339 boot_image_type_patches_.emplace_back(target_method.dex_file, type_idx.index_);
1340 __ Bind(&boot_image_type_patches_.back().label);
1341 } else {
1342 uint32_t boot_image_offset = GetBootImageOffsetOfIntrinsicDeclaringClass(invoke);
1343 LoadBootImageAddress(reg, boot_image_offset);
1344 }
1345 }
1346
LoadClassRootForIntrinsic(CpuRegister reg,ClassRoot class_root)1347 void CodeGeneratorX86_64::LoadClassRootForIntrinsic(CpuRegister reg, ClassRoot class_root) {
1348 if (GetCompilerOptions().IsBootImage()) {
1349 ScopedObjectAccess soa(Thread::Current());
1350 ObjPtr<mirror::Class> klass = GetClassRoot(class_root);
1351 boot_image_type_patches_.emplace_back(&klass->GetDexFile(), klass->GetDexTypeIndex().index_);
1352 __ Bind(&boot_image_type_patches_.back().label);
1353 } else {
1354 uint32_t boot_image_offset = GetBootImageOffset(class_root);
1355 LoadBootImageAddress(reg, boot_image_offset);
1356 }
1357 }
1358
1359 // The label points to the end of the "movl" or another instruction but the literal offset
1360 // for method patch needs to point to the embedded constant which occupies the last 4 bytes.
1361 constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u;
1362
1363 template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
EmitPcRelativeLinkerPatches(const ArenaDeque<PatchInfo<Label>> & infos,ArenaVector<linker::LinkerPatch> * linker_patches)1364 inline void CodeGeneratorX86_64::EmitPcRelativeLinkerPatches(
1365 const ArenaDeque<PatchInfo<Label>>& infos,
1366 ArenaVector<linker::LinkerPatch>* linker_patches) {
1367 for (const PatchInfo<Label>& info : infos) {
1368 uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
1369 linker_patches->push_back(
1370 Factory(literal_offset, info.target_dex_file, info.label.Position(), info.offset_or_index));
1371 }
1372 }
1373
1374 template <linker::LinkerPatch (*Factory)(size_t, uint32_t, uint32_t)>
NoDexFileAdapter(size_t literal_offset,const DexFile * target_dex_file,uint32_t pc_insn_offset,uint32_t boot_image_offset)1375 linker::LinkerPatch NoDexFileAdapter(size_t literal_offset,
1376 const DexFile* target_dex_file,
1377 uint32_t pc_insn_offset,
1378 uint32_t boot_image_offset) {
1379 DCHECK(target_dex_file == nullptr); // Unused for these patches, should be null.
1380 return Factory(literal_offset, pc_insn_offset, boot_image_offset);
1381 }
1382
EmitLinkerPatches(ArenaVector<linker::LinkerPatch> * linker_patches)1383 void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) {
1384 DCHECK(linker_patches->empty());
1385 size_t size =
1386 boot_image_method_patches_.size() +
1387 method_bss_entry_patches_.size() +
1388 boot_image_type_patches_.size() +
1389 type_bss_entry_patches_.size() +
1390 public_type_bss_entry_patches_.size() +
1391 package_type_bss_entry_patches_.size() +
1392 boot_image_string_patches_.size() +
1393 string_bss_entry_patches_.size() +
1394 boot_image_jni_entrypoint_patches_.size() +
1395 boot_image_other_patches_.size();
1396 linker_patches->reserve(size);
1397 if (GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension()) {
1398 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>(
1399 boot_image_method_patches_, linker_patches);
1400 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>(
1401 boot_image_type_patches_, linker_patches);
1402 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>(
1403 boot_image_string_patches_, linker_patches);
1404 } else {
1405 DCHECK(boot_image_method_patches_.empty());
1406 DCHECK(boot_image_type_patches_.empty());
1407 DCHECK(boot_image_string_patches_.empty());
1408 }
1409 if (GetCompilerOptions().IsBootImage()) {
1410 EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::IntrinsicReferencePatch>>(
1411 boot_image_other_patches_, linker_patches);
1412 } else {
1413 EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::DataBimgRelRoPatch>>(
1414 boot_image_other_patches_, linker_patches);
1415 }
1416 EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>(
1417 method_bss_entry_patches_, linker_patches);
1418 EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeBssEntryPatch>(
1419 type_bss_entry_patches_, linker_patches);
1420 EmitPcRelativeLinkerPatches<linker::LinkerPatch::PublicTypeBssEntryPatch>(
1421 public_type_bss_entry_patches_, linker_patches);
1422 EmitPcRelativeLinkerPatches<linker::LinkerPatch::PackageTypeBssEntryPatch>(
1423 package_type_bss_entry_patches_, linker_patches);
1424 EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringBssEntryPatch>(
1425 string_bss_entry_patches_, linker_patches);
1426 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeJniEntrypointPatch>(
1427 boot_image_jni_entrypoint_patches_, linker_patches);
1428 DCHECK_EQ(size, linker_patches->size());
1429 }
1430
DumpCoreRegister(std::ostream & stream,int reg) const1431 void CodeGeneratorX86_64::DumpCoreRegister(std::ostream& stream, int reg) const {
1432 stream << Register(reg);
1433 }
1434
DumpFloatingPointRegister(std::ostream & stream,int reg) const1435 void CodeGeneratorX86_64::DumpFloatingPointRegister(std::ostream& stream, int reg) const {
1436 stream << FloatRegister(reg);
1437 }
1438
GetInstructionSetFeatures() const1439 const X86_64InstructionSetFeatures& CodeGeneratorX86_64::GetInstructionSetFeatures() const {
1440 return *GetCompilerOptions().GetInstructionSetFeatures()->AsX86_64InstructionSetFeatures();
1441 }
1442
SaveCoreRegister(size_t stack_index,uint32_t reg_id)1443 size_t CodeGeneratorX86_64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
1444 __ movq(Address(CpuRegister(RSP), stack_index), CpuRegister(reg_id));
1445 return kX86_64WordSize;
1446 }
1447
RestoreCoreRegister(size_t stack_index,uint32_t reg_id)1448 size_t CodeGeneratorX86_64::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) {
1449 __ movq(CpuRegister(reg_id), Address(CpuRegister(RSP), stack_index));
1450 return kX86_64WordSize;
1451 }
1452
SaveFloatingPointRegister(size_t stack_index,uint32_t reg_id)1453 size_t CodeGeneratorX86_64::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
1454 if (GetGraph()->HasSIMD()) {
1455 __ movups(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id));
1456 } else {
1457 __ movsd(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id));
1458 }
1459 return GetSlowPathFPWidth();
1460 }
1461
RestoreFloatingPointRegister(size_t stack_index,uint32_t reg_id)1462 size_t CodeGeneratorX86_64::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
1463 if (GetGraph()->HasSIMD()) {
1464 __ movups(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index));
1465 } else {
1466 __ movsd(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index));
1467 }
1468 return GetSlowPathFPWidth();
1469 }
1470
InvokeRuntime(QuickEntrypointEnum entrypoint,HInstruction * instruction,uint32_t dex_pc,SlowPathCode * slow_path)1471 void CodeGeneratorX86_64::InvokeRuntime(QuickEntrypointEnum entrypoint,
1472 HInstruction* instruction,
1473 uint32_t dex_pc,
1474 SlowPathCode* slow_path) {
1475 ValidateInvokeRuntime(entrypoint, instruction, slow_path);
1476 GenerateInvokeRuntime(GetThreadOffset<kX86_64PointerSize>(entrypoint).Int32Value());
1477 if (EntrypointRequiresStackMap(entrypoint)) {
1478 RecordPcInfo(instruction, dex_pc, slow_path);
1479 }
1480 }
1481
InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,HInstruction * instruction,SlowPathCode * slow_path)1482 void CodeGeneratorX86_64::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
1483 HInstruction* instruction,
1484 SlowPathCode* slow_path) {
1485 ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
1486 GenerateInvokeRuntime(entry_point_offset);
1487 }
1488
GenerateInvokeRuntime(int32_t entry_point_offset)1489 void CodeGeneratorX86_64::GenerateInvokeRuntime(int32_t entry_point_offset) {
1490 __ gs()->call(Address::Absolute(entry_point_offset, /* no_rip= */ true));
1491 }
1492
1493 static constexpr int kNumberOfCpuRegisterPairs = 0;
1494 // Use a fake return address register to mimic Quick.
1495 static constexpr Register kFakeReturnRegister = Register(kLastCpuRegister + 1);
CodeGeneratorX86_64(HGraph * graph,const CompilerOptions & compiler_options,OptimizingCompilerStats * stats)1496 CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph,
1497 const CompilerOptions& compiler_options,
1498 OptimizingCompilerStats* stats)
1499 : CodeGenerator(graph,
1500 kNumberOfCpuRegisters,
1501 kNumberOfFloatRegisters,
1502 kNumberOfCpuRegisterPairs,
1503 ComputeRegisterMask(reinterpret_cast<const int*>(kCoreCalleeSaves),
1504 arraysize(kCoreCalleeSaves))
1505 | (1 << kFakeReturnRegister),
1506 ComputeRegisterMask(reinterpret_cast<const int*>(kFpuCalleeSaves),
1507 arraysize(kFpuCalleeSaves)),
1508 compiler_options,
1509 stats),
1510 block_labels_(nullptr),
1511 location_builder_(graph, this),
1512 instruction_visitor_(graph, this),
1513 move_resolver_(graph->GetAllocator(), this),
1514 assembler_(graph->GetAllocator(),
1515 compiler_options.GetInstructionSetFeatures()->AsX86_64InstructionSetFeatures()),
1516 constant_area_start_(0),
1517 boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1518 method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1519 boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1520 type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1521 public_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1522 package_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1523 boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1524 string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1525 boot_image_jni_entrypoint_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1526 boot_image_other_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1527 jit_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1528 jit_class_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1529 fixups_to_jump_tables_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) {
1530 AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
1531 }
1532
InstructionCodeGeneratorX86_64(HGraph * graph,CodeGeneratorX86_64 * codegen)1533 InstructionCodeGeneratorX86_64::InstructionCodeGeneratorX86_64(HGraph* graph,
1534 CodeGeneratorX86_64* codegen)
1535 : InstructionCodeGenerator(graph, codegen),
1536 assembler_(codegen->GetAssembler()),
1537 codegen_(codegen) {}
1538
SetupBlockedRegisters() const1539 void CodeGeneratorX86_64::SetupBlockedRegisters() const {
1540 // Stack register is always reserved.
1541 blocked_core_registers_[RSP] = true;
1542
1543 // Block the register used as TMP.
1544 blocked_core_registers_[TMP] = true;
1545 }
1546
DWARFReg(Register reg)1547 static dwarf::Reg DWARFReg(Register reg) {
1548 return dwarf::Reg::X86_64Core(static_cast<int>(reg));
1549 }
1550
DWARFReg(FloatRegister reg)1551 static dwarf::Reg DWARFReg(FloatRegister reg) {
1552 return dwarf::Reg::X86_64Fp(static_cast<int>(reg));
1553 }
1554
VisitMethodEntryHook(HMethodEntryHook * method_hook)1555 void LocationsBuilderX86_64::VisitMethodEntryHook(HMethodEntryHook* method_hook) {
1556 new (GetGraph()->GetAllocator()) LocationSummary(method_hook, LocationSummary::kCallOnSlowPath);
1557 }
1558
GenerateMethodEntryExitHook(HInstruction * instruction)1559 void InstructionCodeGeneratorX86_64::GenerateMethodEntryExitHook(HInstruction* instruction) {
1560 SlowPathCode* slow_path =
1561 new (codegen_->GetScopedAllocator()) MethodEntryExitHooksSlowPathX86_64(instruction);
1562 codegen_->AddSlowPath(slow_path);
1563
1564 uint64_t address = reinterpret_cast64<uint64_t>(Runtime::Current()->GetInstrumentation());
1565 int offset = instrumentation::Instrumentation::NeedsEntryExitHooksOffset().Int32Value();
1566 __ movq(CpuRegister(TMP), Immediate(address + offset));
1567 __ cmpb(Address(CpuRegister(TMP), 0), Immediate(0));
1568 __ j(kNotEqual, slow_path->GetEntryLabel());
1569 __ Bind(slow_path->GetExitLabel());
1570 }
1571
VisitMethodEntryHook(HMethodEntryHook * instruction)1572 void InstructionCodeGeneratorX86_64::VisitMethodEntryHook(HMethodEntryHook* instruction) {
1573 DCHECK(codegen_->GetCompilerOptions().IsJitCompiler() && GetGraph()->IsDebuggable());
1574 DCHECK(codegen_->RequiresCurrentMethod());
1575 GenerateMethodEntryExitHook(instruction);
1576 }
1577
SetInForReturnValue(HInstruction * instr,LocationSummary * locations)1578 void SetInForReturnValue(HInstruction* instr, LocationSummary* locations) {
1579 switch (instr->InputAt(0)->GetType()) {
1580 case DataType::Type::kReference:
1581 case DataType::Type::kBool:
1582 case DataType::Type::kUint8:
1583 case DataType::Type::kInt8:
1584 case DataType::Type::kUint16:
1585 case DataType::Type::kInt16:
1586 case DataType::Type::kInt32:
1587 case DataType::Type::kInt64:
1588 locations->SetInAt(0, Location::RegisterLocation(RAX));
1589 break;
1590
1591 case DataType::Type::kFloat32:
1592 case DataType::Type::kFloat64:
1593 locations->SetInAt(0, Location::FpuRegisterLocation(XMM0));
1594 break;
1595
1596 case DataType::Type::kVoid:
1597 locations->SetInAt(0, Location::NoLocation());
1598 break;
1599
1600 default:
1601 LOG(FATAL) << "Unexpected return type " << instr->InputAt(0)->GetType();
1602 }
1603 }
1604
VisitMethodExitHook(HMethodExitHook * method_hook)1605 void LocationsBuilderX86_64::VisitMethodExitHook(HMethodExitHook* method_hook) {
1606 LocationSummary* locations = new (GetGraph()->GetAllocator())
1607 LocationSummary(method_hook, LocationSummary::kCallOnSlowPath);
1608 SetInForReturnValue(method_hook, locations);
1609 }
1610
VisitMethodExitHook(HMethodExitHook * instruction)1611 void InstructionCodeGeneratorX86_64::VisitMethodExitHook(HMethodExitHook* instruction) {
1612 DCHECK(codegen_->GetCompilerOptions().IsJitCompiler() && GetGraph()->IsDebuggable());
1613 DCHECK(codegen_->RequiresCurrentMethod());
1614 GenerateMethodEntryExitHook(instruction);
1615 }
1616
MaybeIncrementHotness(bool is_frame_entry)1617 void CodeGeneratorX86_64::MaybeIncrementHotness(bool is_frame_entry) {
1618 if (GetCompilerOptions().CountHotnessInCompiledCode()) {
1619 NearLabel overflow;
1620 Register method = kMethodRegisterArgument;
1621 if (!is_frame_entry) {
1622 CHECK(RequiresCurrentMethod());
1623 method = TMP;
1624 __ movq(CpuRegister(method), Address(CpuRegister(RSP), kCurrentMethodStackOffset));
1625 }
1626 __ cmpw(Address(CpuRegister(method), ArtMethod::HotnessCountOffset().Int32Value()),
1627 Immediate(interpreter::kNterpHotnessValue));
1628 __ j(kEqual, &overflow);
1629 __ addw(Address(CpuRegister(method), ArtMethod::HotnessCountOffset().Int32Value()),
1630 Immediate(-1));
1631 __ Bind(&overflow);
1632 }
1633
1634 if (GetGraph()->IsCompilingBaseline() && !Runtime::Current()->IsAotCompiler()) {
1635 SlowPathCode* slow_path = new (GetScopedAllocator()) CompileOptimizedSlowPathX86_64();
1636 AddSlowPath(slow_path);
1637 ProfilingInfo* info = GetGraph()->GetProfilingInfo();
1638 DCHECK(info != nullptr);
1639 CHECK(!HasEmptyFrame());
1640 uint64_t address = reinterpret_cast64<uint64_t>(info);
1641 // Note: if the address was in the 32bit range, we could use
1642 // Address::Absolute and avoid this movq.
1643 __ movq(CpuRegister(TMP), Immediate(address));
1644 // With multiple threads, this can overflow. This is OK, we will eventually get to see
1645 // it reaching 0. Also, at this point we have no register available to look
1646 // at the counter directly.
1647 __ addw(Address(CpuRegister(TMP), ProfilingInfo::BaselineHotnessCountOffset().Int32Value()),
1648 Immediate(-1));
1649 __ j(kEqual, slow_path->GetEntryLabel());
1650 __ Bind(slow_path->GetExitLabel());
1651 }
1652 }
1653
GenerateFrameEntry()1654 void CodeGeneratorX86_64::GenerateFrameEntry() {
1655 __ cfi().SetCurrentCFAOffset(kX86_64WordSize); // return address
1656 __ Bind(&frame_entry_label_);
1657 bool skip_overflow_check = IsLeafMethod()
1658 && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86_64);
1659 DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
1660
1661
1662 if (!skip_overflow_check) {
1663 size_t reserved_bytes = GetStackOverflowReservedBytes(InstructionSet::kX86_64);
1664 __ testq(CpuRegister(RAX), Address(CpuRegister(RSP), -static_cast<int32_t>(reserved_bytes)));
1665 RecordPcInfo(nullptr, 0);
1666 }
1667
1668 if (!HasEmptyFrame()) {
1669 for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) {
1670 Register reg = kCoreCalleeSaves[i];
1671 if (allocated_registers_.ContainsCoreRegister(reg)) {
1672 __ pushq(CpuRegister(reg));
1673 __ cfi().AdjustCFAOffset(kX86_64WordSize);
1674 __ cfi().RelOffset(DWARFReg(reg), 0);
1675 }
1676 }
1677
1678 int adjust = GetFrameSize() - GetCoreSpillSize();
1679 IncreaseFrame(adjust);
1680 uint32_t xmm_spill_location = GetFpuSpillStart();
1681 size_t xmm_spill_slot_size = GetCalleePreservedFPWidth();
1682
1683 for (int i = arraysize(kFpuCalleeSaves) - 1; i >= 0; --i) {
1684 if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) {
1685 int offset = xmm_spill_location + (xmm_spill_slot_size * i);
1686 __ movsd(Address(CpuRegister(RSP), offset), XmmRegister(kFpuCalleeSaves[i]));
1687 __ cfi().RelOffset(DWARFReg(kFpuCalleeSaves[i]), offset);
1688 }
1689 }
1690
1691 // Save the current method if we need it. Note that we do not
1692 // do this in HCurrentMethod, as the instruction might have been removed
1693 // in the SSA graph.
1694 if (RequiresCurrentMethod()) {
1695 CHECK(!HasEmptyFrame());
1696 __ movq(Address(CpuRegister(RSP), kCurrentMethodStackOffset),
1697 CpuRegister(kMethodRegisterArgument));
1698 }
1699
1700 if (GetGraph()->HasShouldDeoptimizeFlag()) {
1701 CHECK(!HasEmptyFrame());
1702 // Initialize should_deoptimize flag to 0.
1703 __ movl(Address(CpuRegister(RSP), GetStackOffsetOfShouldDeoptimizeFlag()), Immediate(0));
1704 }
1705 }
1706
1707 MaybeIncrementHotness(/* is_frame_entry= */ true);
1708 }
1709
GenerateFrameExit()1710 void CodeGeneratorX86_64::GenerateFrameExit() {
1711 __ cfi().RememberState();
1712 if (!HasEmptyFrame()) {
1713 uint32_t xmm_spill_location = GetFpuSpillStart();
1714 size_t xmm_spill_slot_size = GetCalleePreservedFPWidth();
1715 for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) {
1716 if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) {
1717 int offset = xmm_spill_location + (xmm_spill_slot_size * i);
1718 __ movsd(XmmRegister(kFpuCalleeSaves[i]), Address(CpuRegister(RSP), offset));
1719 __ cfi().Restore(DWARFReg(kFpuCalleeSaves[i]));
1720 }
1721 }
1722
1723 int adjust = GetFrameSize() - GetCoreSpillSize();
1724 DecreaseFrame(adjust);
1725
1726 for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
1727 Register reg = kCoreCalleeSaves[i];
1728 if (allocated_registers_.ContainsCoreRegister(reg)) {
1729 __ popq(CpuRegister(reg));
1730 __ cfi().AdjustCFAOffset(-static_cast<int>(kX86_64WordSize));
1731 __ cfi().Restore(DWARFReg(reg));
1732 }
1733 }
1734 }
1735 __ ret();
1736 __ cfi().RestoreState();
1737 __ cfi().DefCFAOffset(GetFrameSize());
1738 }
1739
Bind(HBasicBlock * block)1740 void CodeGeneratorX86_64::Bind(HBasicBlock* block) {
1741 __ Bind(GetLabelOf(block));
1742 }
1743
Move(Location destination,Location source)1744 void CodeGeneratorX86_64::Move(Location destination, Location source) {
1745 if (source.Equals(destination)) {
1746 return;
1747 }
1748 if (destination.IsRegister()) {
1749 CpuRegister dest = destination.AsRegister<CpuRegister>();
1750 if (source.IsRegister()) {
1751 __ movq(dest, source.AsRegister<CpuRegister>());
1752 } else if (source.IsFpuRegister()) {
1753 __ movd(dest, source.AsFpuRegister<XmmRegister>());
1754 } else if (source.IsStackSlot()) {
1755 __ movl(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
1756 } else if (source.IsConstant()) {
1757 HConstant* constant = source.GetConstant();
1758 if (constant->IsLongConstant()) {
1759 Load64BitValue(dest, constant->AsLongConstant()->GetValue());
1760 } else if (constant->IsDoubleConstant()) {
1761 Load64BitValue(dest, GetInt64ValueOf(constant));
1762 } else {
1763 Load32BitValue(dest, GetInt32ValueOf(constant));
1764 }
1765 } else {
1766 DCHECK(source.IsDoubleStackSlot());
1767 __ movq(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
1768 }
1769 } else if (destination.IsFpuRegister()) {
1770 XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
1771 if (source.IsRegister()) {
1772 __ movd(dest, source.AsRegister<CpuRegister>());
1773 } else if (source.IsFpuRegister()) {
1774 __ movaps(dest, source.AsFpuRegister<XmmRegister>());
1775 } else if (source.IsConstant()) {
1776 HConstant* constant = source.GetConstant();
1777 int64_t value = CodeGenerator::GetInt64ValueOf(constant);
1778 if (constant->IsFloatConstant()) {
1779 Load32BitValue(dest, static_cast<int32_t>(value));
1780 } else {
1781 Load64BitValue(dest, value);
1782 }
1783 } else if (source.IsStackSlot()) {
1784 __ movss(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
1785 } else {
1786 DCHECK(source.IsDoubleStackSlot());
1787 __ movsd(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
1788 }
1789 } else if (destination.IsStackSlot()) {
1790 if (source.IsRegister()) {
1791 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()),
1792 source.AsRegister<CpuRegister>());
1793 } else if (source.IsFpuRegister()) {
1794 __ movss(Address(CpuRegister(RSP), destination.GetStackIndex()),
1795 source.AsFpuRegister<XmmRegister>());
1796 } else if (source.IsConstant()) {
1797 HConstant* constant = source.GetConstant();
1798 int32_t value = GetInt32ValueOf(constant);
1799 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), Immediate(value));
1800 } else {
1801 DCHECK(source.IsStackSlot()) << source;
1802 __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
1803 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
1804 }
1805 } else {
1806 DCHECK(destination.IsDoubleStackSlot());
1807 if (source.IsRegister()) {
1808 __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()),
1809 source.AsRegister<CpuRegister>());
1810 } else if (source.IsFpuRegister()) {
1811 __ movsd(Address(CpuRegister(RSP), destination.GetStackIndex()),
1812 source.AsFpuRegister<XmmRegister>());
1813 } else if (source.IsConstant()) {
1814 HConstant* constant = source.GetConstant();
1815 DCHECK(constant->IsLongConstant() || constant->IsDoubleConstant());
1816 int64_t value = GetInt64ValueOf(constant);
1817 Store64BitValueToStack(destination, value);
1818 } else {
1819 DCHECK(source.IsDoubleStackSlot());
1820 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
1821 __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
1822 }
1823 }
1824 }
1825
LoadFromMemoryNoReference(DataType::Type type,Location dst,Address src)1826 void CodeGeneratorX86_64::LoadFromMemoryNoReference(DataType::Type type,
1827 Location dst,
1828 Address src) {
1829 switch (type) {
1830 case DataType::Type::kBool:
1831 case DataType::Type::kUint8:
1832 __ movzxb(dst.AsRegister<CpuRegister>(), src);
1833 break;
1834 case DataType::Type::kInt8:
1835 __ movsxb(dst.AsRegister<CpuRegister>(), src);
1836 break;
1837 case DataType::Type::kUint16:
1838 __ movzxw(dst.AsRegister<CpuRegister>(), src);
1839 break;
1840 case DataType::Type::kInt16:
1841 __ movsxw(dst.AsRegister<CpuRegister>(), src);
1842 break;
1843 case DataType::Type::kInt32:
1844 case DataType::Type::kUint32:
1845 __ movl(dst.AsRegister<CpuRegister>(), src);
1846 break;
1847 case DataType::Type::kInt64:
1848 case DataType::Type::kUint64:
1849 __ movq(dst.AsRegister<CpuRegister>(), src);
1850 break;
1851 case DataType::Type::kFloat32:
1852 __ movss(dst.AsFpuRegister<XmmRegister>(), src);
1853 break;
1854 case DataType::Type::kFloat64:
1855 __ movsd(dst.AsFpuRegister<XmmRegister>(), src);
1856 break;
1857 case DataType::Type::kVoid:
1858 case DataType::Type::kReference:
1859 LOG(FATAL) << "Unreachable type " << type;
1860 UNREACHABLE();
1861 }
1862 }
1863
MoveConstant(Location location,int32_t value)1864 void CodeGeneratorX86_64::MoveConstant(Location location, int32_t value) {
1865 DCHECK(location.IsRegister());
1866 Load64BitValue(location.AsRegister<CpuRegister>(), static_cast<int64_t>(value));
1867 }
1868
MoveLocation(Location dst,Location src,DataType::Type dst_type ATTRIBUTE_UNUSED)1869 void CodeGeneratorX86_64::MoveLocation(
1870 Location dst, Location src, DataType::Type dst_type ATTRIBUTE_UNUSED) {
1871 Move(dst, src);
1872 }
1873
AddLocationAsTemp(Location location,LocationSummary * locations)1874 void CodeGeneratorX86_64::AddLocationAsTemp(Location location, LocationSummary* locations) {
1875 if (location.IsRegister()) {
1876 locations->AddTemp(location);
1877 } else {
1878 UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location;
1879 }
1880 }
1881
HandleGoto(HInstruction * got,HBasicBlock * successor)1882 void InstructionCodeGeneratorX86_64::HandleGoto(HInstruction* got, HBasicBlock* successor) {
1883 if (successor->IsExitBlock()) {
1884 DCHECK(got->GetPrevious()->AlwaysThrows());
1885 return; // no code needed
1886 }
1887
1888 HBasicBlock* block = got->GetBlock();
1889 HInstruction* previous = got->GetPrevious();
1890
1891 HLoopInformation* info = block->GetLoopInformation();
1892 if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
1893 codegen_->MaybeIncrementHotness(/* is_frame_entry= */ false);
1894 GenerateSuspendCheck(info->GetSuspendCheck(), successor);
1895 return;
1896 }
1897
1898 if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) {
1899 GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
1900 }
1901 if (!codegen_->GoesToNextBlock(got->GetBlock(), successor)) {
1902 __ jmp(codegen_->GetLabelOf(successor));
1903 }
1904 }
1905
VisitGoto(HGoto * got)1906 void LocationsBuilderX86_64::VisitGoto(HGoto* got) {
1907 got->SetLocations(nullptr);
1908 }
1909
VisitGoto(HGoto * got)1910 void InstructionCodeGeneratorX86_64::VisitGoto(HGoto* got) {
1911 HandleGoto(got, got->GetSuccessor());
1912 }
1913
VisitTryBoundary(HTryBoundary * try_boundary)1914 void LocationsBuilderX86_64::VisitTryBoundary(HTryBoundary* try_boundary) {
1915 try_boundary->SetLocations(nullptr);
1916 }
1917
VisitTryBoundary(HTryBoundary * try_boundary)1918 void InstructionCodeGeneratorX86_64::VisitTryBoundary(HTryBoundary* try_boundary) {
1919 HBasicBlock* successor = try_boundary->GetNormalFlowSuccessor();
1920 if (!successor->IsExitBlock()) {
1921 HandleGoto(try_boundary, successor);
1922 }
1923 }
1924
VisitExit(HExit * exit)1925 void LocationsBuilderX86_64::VisitExit(HExit* exit) {
1926 exit->SetLocations(nullptr);
1927 }
1928
VisitExit(HExit * exit ATTRIBUTE_UNUSED)1929 void InstructionCodeGeneratorX86_64::VisitExit(HExit* exit ATTRIBUTE_UNUSED) {
1930 }
1931
1932 template<class LabelType>
GenerateFPJumps(HCondition * cond,LabelType * true_label,LabelType * false_label)1933 void InstructionCodeGeneratorX86_64::GenerateFPJumps(HCondition* cond,
1934 LabelType* true_label,
1935 LabelType* false_label) {
1936 if (cond->IsFPConditionTrueIfNaN()) {
1937 __ j(kUnordered, true_label);
1938 } else if (cond->IsFPConditionFalseIfNaN()) {
1939 __ j(kUnordered, false_label);
1940 }
1941 __ j(X86_64FPCondition(cond->GetCondition()), true_label);
1942 }
1943
GenerateCompareTest(HCondition * condition)1944 void InstructionCodeGeneratorX86_64::GenerateCompareTest(HCondition* condition) {
1945 LocationSummary* locations = condition->GetLocations();
1946
1947 Location left = locations->InAt(0);
1948 Location right = locations->InAt(1);
1949 DataType::Type type = condition->InputAt(0)->GetType();
1950 switch (type) {
1951 case DataType::Type::kBool:
1952 case DataType::Type::kUint8:
1953 case DataType::Type::kInt8:
1954 case DataType::Type::kUint16:
1955 case DataType::Type::kInt16:
1956 case DataType::Type::kInt32:
1957 case DataType::Type::kReference: {
1958 codegen_->GenerateIntCompare(left, right);
1959 break;
1960 }
1961 case DataType::Type::kInt64: {
1962 codegen_->GenerateLongCompare(left, right);
1963 break;
1964 }
1965 case DataType::Type::kFloat32: {
1966 if (right.IsFpuRegister()) {
1967 __ ucomiss(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>());
1968 } else if (right.IsConstant()) {
1969 __ ucomiss(left.AsFpuRegister<XmmRegister>(),
1970 codegen_->LiteralFloatAddress(
1971 right.GetConstant()->AsFloatConstant()->GetValue()));
1972 } else {
1973 DCHECK(right.IsStackSlot());
1974 __ ucomiss(left.AsFpuRegister<XmmRegister>(),
1975 Address(CpuRegister(RSP), right.GetStackIndex()));
1976 }
1977 break;
1978 }
1979 case DataType::Type::kFloat64: {
1980 if (right.IsFpuRegister()) {
1981 __ ucomisd(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>());
1982 } else if (right.IsConstant()) {
1983 __ ucomisd(left.AsFpuRegister<XmmRegister>(),
1984 codegen_->LiteralDoubleAddress(
1985 right.GetConstant()->AsDoubleConstant()->GetValue()));
1986 } else {
1987 DCHECK(right.IsDoubleStackSlot());
1988 __ ucomisd(left.AsFpuRegister<XmmRegister>(),
1989 Address(CpuRegister(RSP), right.GetStackIndex()));
1990 }
1991 break;
1992 }
1993 default:
1994 LOG(FATAL) << "Unexpected condition type " << type;
1995 }
1996 }
1997
1998 template<class LabelType>
GenerateCompareTestAndBranch(HCondition * condition,LabelType * true_target_in,LabelType * false_target_in)1999 void InstructionCodeGeneratorX86_64::GenerateCompareTestAndBranch(HCondition* condition,
2000 LabelType* true_target_in,
2001 LabelType* false_target_in) {
2002 // Generated branching requires both targets to be explicit. If either of the
2003 // targets is nullptr (fallthrough) use and bind `fallthrough_target` instead.
2004 LabelType fallthrough_target;
2005 LabelType* true_target = true_target_in == nullptr ? &fallthrough_target : true_target_in;
2006 LabelType* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in;
2007
2008 // Generate the comparison to set the CC.
2009 GenerateCompareTest(condition);
2010
2011 // Now generate the correct jump(s).
2012 DataType::Type type = condition->InputAt(0)->GetType();
2013 switch (type) {
2014 case DataType::Type::kInt64: {
2015 __ j(X86_64IntegerCondition(condition->GetCondition()), true_target);
2016 break;
2017 }
2018 case DataType::Type::kFloat32: {
2019 GenerateFPJumps(condition, true_target, false_target);
2020 break;
2021 }
2022 case DataType::Type::kFloat64: {
2023 GenerateFPJumps(condition, true_target, false_target);
2024 break;
2025 }
2026 default:
2027 LOG(FATAL) << "Unexpected condition type " << type;
2028 }
2029
2030 if (false_target != &fallthrough_target) {
2031 __ jmp(false_target);
2032 }
2033
2034 if (fallthrough_target.IsLinked()) {
2035 __ Bind(&fallthrough_target);
2036 }
2037 }
2038
AreEflagsSetFrom(HInstruction * cond,HInstruction * branch)2039 static bool AreEflagsSetFrom(HInstruction* cond, HInstruction* branch) {
2040 // Moves may affect the eflags register (move zero uses xorl), so the EFLAGS
2041 // are set only strictly before `branch`. We can't use the eflags on long
2042 // conditions if they are materialized due to the complex branching.
2043 return cond->IsCondition() &&
2044 cond->GetNext() == branch &&
2045 !DataType::IsFloatingPointType(cond->InputAt(0)->GetType());
2046 }
2047
2048 template<class LabelType>
GenerateTestAndBranch(HInstruction * instruction,size_t condition_input_index,LabelType * true_target,LabelType * false_target)2049 void InstructionCodeGeneratorX86_64::GenerateTestAndBranch(HInstruction* instruction,
2050 size_t condition_input_index,
2051 LabelType* true_target,
2052 LabelType* false_target) {
2053 HInstruction* cond = instruction->InputAt(condition_input_index);
2054
2055 if (true_target == nullptr && false_target == nullptr) {
2056 // Nothing to do. The code always falls through.
2057 return;
2058 } else if (cond->IsIntConstant()) {
2059 // Constant condition, statically compared against "true" (integer value 1).
2060 if (cond->AsIntConstant()->IsTrue()) {
2061 if (true_target != nullptr) {
2062 __ jmp(true_target);
2063 }
2064 } else {
2065 DCHECK(cond->AsIntConstant()->IsFalse()) << cond->AsIntConstant()->GetValue();
2066 if (false_target != nullptr) {
2067 __ jmp(false_target);
2068 }
2069 }
2070 return;
2071 }
2072
2073 // The following code generates these patterns:
2074 // (1) true_target == nullptr && false_target != nullptr
2075 // - opposite condition true => branch to false_target
2076 // (2) true_target != nullptr && false_target == nullptr
2077 // - condition true => branch to true_target
2078 // (3) true_target != nullptr && false_target != nullptr
2079 // - condition true => branch to true_target
2080 // - branch to false_target
2081 if (IsBooleanValueOrMaterializedCondition(cond)) {
2082 if (AreEflagsSetFrom(cond, instruction)) {
2083 if (true_target == nullptr) {
2084 __ j(X86_64IntegerCondition(cond->AsCondition()->GetOppositeCondition()), false_target);
2085 } else {
2086 __ j(X86_64IntegerCondition(cond->AsCondition()->GetCondition()), true_target);
2087 }
2088 } else {
2089 // Materialized condition, compare against 0.
2090 Location lhs = instruction->GetLocations()->InAt(condition_input_index);
2091 if (lhs.IsRegister()) {
2092 __ testl(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>());
2093 } else {
2094 __ cmpl(Address(CpuRegister(RSP), lhs.GetStackIndex()), Immediate(0));
2095 }
2096 if (true_target == nullptr) {
2097 __ j(kEqual, false_target);
2098 } else {
2099 __ j(kNotEqual, true_target);
2100 }
2101 }
2102 } else {
2103 // Condition has not been materialized, use its inputs as the
2104 // comparison and its condition as the branch condition.
2105 HCondition* condition = cond->AsCondition();
2106
2107 // If this is a long or FP comparison that has been folded into
2108 // the HCondition, generate the comparison directly.
2109 DataType::Type type = condition->InputAt(0)->GetType();
2110 if (type == DataType::Type::kInt64 || DataType::IsFloatingPointType(type)) {
2111 GenerateCompareTestAndBranch(condition, true_target, false_target);
2112 return;
2113 }
2114
2115 Location lhs = condition->GetLocations()->InAt(0);
2116 Location rhs = condition->GetLocations()->InAt(1);
2117 codegen_->GenerateIntCompare(lhs, rhs);
2118 if (true_target == nullptr) {
2119 __ j(X86_64IntegerCondition(condition->GetOppositeCondition()), false_target);
2120 } else {
2121 __ j(X86_64IntegerCondition(condition->GetCondition()), true_target);
2122 }
2123 }
2124
2125 // If neither branch falls through (case 3), the conditional branch to `true_target`
2126 // was already emitted (case 2) and we need to emit a jump to `false_target`.
2127 if (true_target != nullptr && false_target != nullptr) {
2128 __ jmp(false_target);
2129 }
2130 }
2131
VisitIf(HIf * if_instr)2132 void LocationsBuilderX86_64::VisitIf(HIf* if_instr) {
2133 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(if_instr);
2134 if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
2135 locations->SetInAt(0, Location::Any());
2136 }
2137 }
2138
VisitIf(HIf * if_instr)2139 void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) {
2140 HBasicBlock* true_successor = if_instr->IfTrueSuccessor();
2141 HBasicBlock* false_successor = if_instr->IfFalseSuccessor();
2142 Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ?
2143 nullptr : codegen_->GetLabelOf(true_successor);
2144 Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ?
2145 nullptr : codegen_->GetLabelOf(false_successor);
2146 GenerateTestAndBranch(if_instr, /* condition_input_index= */ 0, true_target, false_target);
2147 }
2148
VisitDeoptimize(HDeoptimize * deoptimize)2149 void LocationsBuilderX86_64::VisitDeoptimize(HDeoptimize* deoptimize) {
2150 LocationSummary* locations = new (GetGraph()->GetAllocator())
2151 LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
2152 InvokeRuntimeCallingConvention calling_convention;
2153 RegisterSet caller_saves = RegisterSet::Empty();
2154 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
2155 locations->SetCustomSlowPathCallerSaves(caller_saves);
2156 if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
2157 locations->SetInAt(0, Location::Any());
2158 }
2159 }
2160
VisitDeoptimize(HDeoptimize * deoptimize)2161 void InstructionCodeGeneratorX86_64::VisitDeoptimize(HDeoptimize* deoptimize) {
2162 SlowPathCode* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathX86_64>(deoptimize);
2163 GenerateTestAndBranch<Label>(deoptimize,
2164 /* condition_input_index= */ 0,
2165 slow_path->GetEntryLabel(),
2166 /* false_target= */ nullptr);
2167 }
2168
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)2169 void LocationsBuilderX86_64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
2170 LocationSummary* locations = new (GetGraph()->GetAllocator())
2171 LocationSummary(flag, LocationSummary::kNoCall);
2172 locations->SetOut(Location::RequiresRegister());
2173 }
2174
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)2175 void InstructionCodeGeneratorX86_64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
2176 __ movl(flag->GetLocations()->Out().AsRegister<CpuRegister>(),
2177 Address(CpuRegister(RSP), codegen_->GetStackOffsetOfShouldDeoptimizeFlag()));
2178 }
2179
SelectCanUseCMOV(HSelect * select)2180 static bool SelectCanUseCMOV(HSelect* select) {
2181 // There are no conditional move instructions for XMMs.
2182 if (DataType::IsFloatingPointType(select->GetType())) {
2183 return false;
2184 }
2185
2186 // A FP condition doesn't generate the single CC that we need.
2187 HInstruction* condition = select->GetCondition();
2188 if (condition->IsCondition() &&
2189 DataType::IsFloatingPointType(condition->InputAt(0)->GetType())) {
2190 return false;
2191 }
2192
2193 // We can generate a CMOV for this Select.
2194 return true;
2195 }
2196
VisitSelect(HSelect * select)2197 void LocationsBuilderX86_64::VisitSelect(HSelect* select) {
2198 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(select);
2199 if (DataType::IsFloatingPointType(select->GetType())) {
2200 locations->SetInAt(0, Location::RequiresFpuRegister());
2201 locations->SetInAt(1, Location::Any());
2202 } else {
2203 locations->SetInAt(0, Location::RequiresRegister());
2204 if (SelectCanUseCMOV(select)) {
2205 if (select->InputAt(1)->IsConstant()) {
2206 locations->SetInAt(1, Location::RequiresRegister());
2207 } else {
2208 locations->SetInAt(1, Location::Any());
2209 }
2210 } else {
2211 locations->SetInAt(1, Location::Any());
2212 }
2213 }
2214 if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
2215 locations->SetInAt(2, Location::RequiresRegister());
2216 }
2217 locations->SetOut(Location::SameAsFirstInput());
2218 }
2219
VisitSelect(HSelect * select)2220 void InstructionCodeGeneratorX86_64::VisitSelect(HSelect* select) {
2221 LocationSummary* locations = select->GetLocations();
2222 if (SelectCanUseCMOV(select)) {
2223 // If both the condition and the source types are integer, we can generate
2224 // a CMOV to implement Select.
2225 CpuRegister value_false = locations->InAt(0).AsRegister<CpuRegister>();
2226 Location value_true_loc = locations->InAt(1);
2227 DCHECK(locations->InAt(0).Equals(locations->Out()));
2228
2229 HInstruction* select_condition = select->GetCondition();
2230 Condition cond = kNotEqual;
2231
2232 // Figure out how to test the 'condition'.
2233 if (select_condition->IsCondition()) {
2234 HCondition* condition = select_condition->AsCondition();
2235 if (!condition->IsEmittedAtUseSite()) {
2236 // This was a previously materialized condition.
2237 // Can we use the existing condition code?
2238 if (AreEflagsSetFrom(condition, select)) {
2239 // Materialization was the previous instruction. Condition codes are right.
2240 cond = X86_64IntegerCondition(condition->GetCondition());
2241 } else {
2242 // No, we have to recreate the condition code.
2243 CpuRegister cond_reg = locations->InAt(2).AsRegister<CpuRegister>();
2244 __ testl(cond_reg, cond_reg);
2245 }
2246 } else {
2247 GenerateCompareTest(condition);
2248 cond = X86_64IntegerCondition(condition->GetCondition());
2249 }
2250 } else {
2251 // Must be a Boolean condition, which needs to be compared to 0.
2252 CpuRegister cond_reg = locations->InAt(2).AsRegister<CpuRegister>();
2253 __ testl(cond_reg, cond_reg);
2254 }
2255
2256 // If the condition is true, overwrite the output, which already contains false.
2257 // Generate the correct sized CMOV.
2258 bool is_64_bit = DataType::Is64BitType(select->GetType());
2259 if (value_true_loc.IsRegister()) {
2260 __ cmov(cond, value_false, value_true_loc.AsRegister<CpuRegister>(), is_64_bit);
2261 } else {
2262 __ cmov(cond,
2263 value_false,
2264 Address(CpuRegister(RSP), value_true_loc.GetStackIndex()), is_64_bit);
2265 }
2266 } else {
2267 NearLabel false_target;
2268 GenerateTestAndBranch<NearLabel>(select,
2269 /* condition_input_index= */ 2,
2270 /* true_target= */ nullptr,
2271 &false_target);
2272 codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType());
2273 __ Bind(&false_target);
2274 }
2275 }
2276
VisitNativeDebugInfo(HNativeDebugInfo * info)2277 void LocationsBuilderX86_64::VisitNativeDebugInfo(HNativeDebugInfo* info) {
2278 new (GetGraph()->GetAllocator()) LocationSummary(info);
2279 }
2280
VisitNativeDebugInfo(HNativeDebugInfo *)2281 void InstructionCodeGeneratorX86_64::VisitNativeDebugInfo(HNativeDebugInfo*) {
2282 // MaybeRecordNativeDebugInfo is already called implicitly in CodeGenerator::Compile.
2283 }
2284
IncreaseFrame(size_t adjustment)2285 void CodeGeneratorX86_64::IncreaseFrame(size_t adjustment) {
2286 __ subq(CpuRegister(RSP), Immediate(adjustment));
2287 __ cfi().AdjustCFAOffset(adjustment);
2288 }
2289
DecreaseFrame(size_t adjustment)2290 void CodeGeneratorX86_64::DecreaseFrame(size_t adjustment) {
2291 __ addq(CpuRegister(RSP), Immediate(adjustment));
2292 __ cfi().AdjustCFAOffset(-adjustment);
2293 }
2294
GenerateNop()2295 void CodeGeneratorX86_64::GenerateNop() {
2296 __ nop();
2297 }
2298
HandleCondition(HCondition * cond)2299 void LocationsBuilderX86_64::HandleCondition(HCondition* cond) {
2300 LocationSummary* locations =
2301 new (GetGraph()->GetAllocator()) LocationSummary(cond, LocationSummary::kNoCall);
2302 // Handle the long/FP comparisons made in instruction simplification.
2303 switch (cond->InputAt(0)->GetType()) {
2304 case DataType::Type::kInt64:
2305 locations->SetInAt(0, Location::RequiresRegister());
2306 locations->SetInAt(1, Location::Any());
2307 break;
2308 case DataType::Type::kFloat32:
2309 case DataType::Type::kFloat64:
2310 locations->SetInAt(0, Location::RequiresFpuRegister());
2311 locations->SetInAt(1, Location::Any());
2312 break;
2313 default:
2314 locations->SetInAt(0, Location::RequiresRegister());
2315 locations->SetInAt(1, Location::Any());
2316 break;
2317 }
2318 if (!cond->IsEmittedAtUseSite()) {
2319 locations->SetOut(Location::RequiresRegister());
2320 }
2321 }
2322
HandleCondition(HCondition * cond)2323 void InstructionCodeGeneratorX86_64::HandleCondition(HCondition* cond) {
2324 if (cond->IsEmittedAtUseSite()) {
2325 return;
2326 }
2327
2328 LocationSummary* locations = cond->GetLocations();
2329 Location lhs = locations->InAt(0);
2330 Location rhs = locations->InAt(1);
2331 CpuRegister reg = locations->Out().AsRegister<CpuRegister>();
2332 NearLabel true_label, false_label;
2333
2334 switch (cond->InputAt(0)->GetType()) {
2335 default:
2336 // Integer case.
2337
2338 // Clear output register: setcc only sets the low byte.
2339 __ xorl(reg, reg);
2340
2341 codegen_->GenerateIntCompare(lhs, rhs);
2342 __ setcc(X86_64IntegerCondition(cond->GetCondition()), reg);
2343 return;
2344 case DataType::Type::kInt64:
2345 // Clear output register: setcc only sets the low byte.
2346 __ xorl(reg, reg);
2347
2348 codegen_->GenerateLongCompare(lhs, rhs);
2349 __ setcc(X86_64IntegerCondition(cond->GetCondition()), reg);
2350 return;
2351 case DataType::Type::kFloat32: {
2352 XmmRegister lhs_reg = lhs.AsFpuRegister<XmmRegister>();
2353 if (rhs.IsConstant()) {
2354 float value = rhs.GetConstant()->AsFloatConstant()->GetValue();
2355 __ ucomiss(lhs_reg, codegen_->LiteralFloatAddress(value));
2356 } else if (rhs.IsStackSlot()) {
2357 __ ucomiss(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex()));
2358 } else {
2359 __ ucomiss(lhs_reg, rhs.AsFpuRegister<XmmRegister>());
2360 }
2361 GenerateFPJumps(cond, &true_label, &false_label);
2362 break;
2363 }
2364 case DataType::Type::kFloat64: {
2365 XmmRegister lhs_reg = lhs.AsFpuRegister<XmmRegister>();
2366 if (rhs.IsConstant()) {
2367 double value = rhs.GetConstant()->AsDoubleConstant()->GetValue();
2368 __ ucomisd(lhs_reg, codegen_->LiteralDoubleAddress(value));
2369 } else if (rhs.IsDoubleStackSlot()) {
2370 __ ucomisd(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex()));
2371 } else {
2372 __ ucomisd(lhs_reg, rhs.AsFpuRegister<XmmRegister>());
2373 }
2374 GenerateFPJumps(cond, &true_label, &false_label);
2375 break;
2376 }
2377 }
2378
2379 // Convert the jumps into the result.
2380 NearLabel done_label;
2381
2382 // False case: result = 0.
2383 __ Bind(&false_label);
2384 __ xorl(reg, reg);
2385 __ jmp(&done_label);
2386
2387 // True case: result = 1.
2388 __ Bind(&true_label);
2389 __ movl(reg, Immediate(1));
2390 __ Bind(&done_label);
2391 }
2392
VisitEqual(HEqual * comp)2393 void LocationsBuilderX86_64::VisitEqual(HEqual* comp) {
2394 HandleCondition(comp);
2395 }
2396
VisitEqual(HEqual * comp)2397 void InstructionCodeGeneratorX86_64::VisitEqual(HEqual* comp) {
2398 HandleCondition(comp);
2399 }
2400
VisitNotEqual(HNotEqual * comp)2401 void LocationsBuilderX86_64::VisitNotEqual(HNotEqual* comp) {
2402 HandleCondition(comp);
2403 }
2404
VisitNotEqual(HNotEqual * comp)2405 void InstructionCodeGeneratorX86_64::VisitNotEqual(HNotEqual* comp) {
2406 HandleCondition(comp);
2407 }
2408
VisitLessThan(HLessThan * comp)2409 void LocationsBuilderX86_64::VisitLessThan(HLessThan* comp) {
2410 HandleCondition(comp);
2411 }
2412
VisitLessThan(HLessThan * comp)2413 void InstructionCodeGeneratorX86_64::VisitLessThan(HLessThan* comp) {
2414 HandleCondition(comp);
2415 }
2416
VisitLessThanOrEqual(HLessThanOrEqual * comp)2417 void LocationsBuilderX86_64::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
2418 HandleCondition(comp);
2419 }
2420
VisitLessThanOrEqual(HLessThanOrEqual * comp)2421 void InstructionCodeGeneratorX86_64::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
2422 HandleCondition(comp);
2423 }
2424
VisitGreaterThan(HGreaterThan * comp)2425 void LocationsBuilderX86_64::VisitGreaterThan(HGreaterThan* comp) {
2426 HandleCondition(comp);
2427 }
2428
VisitGreaterThan(HGreaterThan * comp)2429 void InstructionCodeGeneratorX86_64::VisitGreaterThan(HGreaterThan* comp) {
2430 HandleCondition(comp);
2431 }
2432
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)2433 void LocationsBuilderX86_64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
2434 HandleCondition(comp);
2435 }
2436
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)2437 void InstructionCodeGeneratorX86_64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
2438 HandleCondition(comp);
2439 }
2440
VisitBelow(HBelow * comp)2441 void LocationsBuilderX86_64::VisitBelow(HBelow* comp) {
2442 HandleCondition(comp);
2443 }
2444
VisitBelow(HBelow * comp)2445 void InstructionCodeGeneratorX86_64::VisitBelow(HBelow* comp) {
2446 HandleCondition(comp);
2447 }
2448
VisitBelowOrEqual(HBelowOrEqual * comp)2449 void LocationsBuilderX86_64::VisitBelowOrEqual(HBelowOrEqual* comp) {
2450 HandleCondition(comp);
2451 }
2452
VisitBelowOrEqual(HBelowOrEqual * comp)2453 void InstructionCodeGeneratorX86_64::VisitBelowOrEqual(HBelowOrEqual* comp) {
2454 HandleCondition(comp);
2455 }
2456
VisitAbove(HAbove * comp)2457 void LocationsBuilderX86_64::VisitAbove(HAbove* comp) {
2458 HandleCondition(comp);
2459 }
2460
VisitAbove(HAbove * comp)2461 void InstructionCodeGeneratorX86_64::VisitAbove(HAbove* comp) {
2462 HandleCondition(comp);
2463 }
2464
VisitAboveOrEqual(HAboveOrEqual * comp)2465 void LocationsBuilderX86_64::VisitAboveOrEqual(HAboveOrEqual* comp) {
2466 HandleCondition(comp);
2467 }
2468
VisitAboveOrEqual(HAboveOrEqual * comp)2469 void InstructionCodeGeneratorX86_64::VisitAboveOrEqual(HAboveOrEqual* comp) {
2470 HandleCondition(comp);
2471 }
2472
VisitCompare(HCompare * compare)2473 void LocationsBuilderX86_64::VisitCompare(HCompare* compare) {
2474 LocationSummary* locations =
2475 new (GetGraph()->GetAllocator()) LocationSummary(compare, LocationSummary::kNoCall);
2476 switch (compare->InputAt(0)->GetType()) {
2477 case DataType::Type::kBool:
2478 case DataType::Type::kUint8:
2479 case DataType::Type::kInt8:
2480 case DataType::Type::kUint16:
2481 case DataType::Type::kInt16:
2482 case DataType::Type::kInt32:
2483 case DataType::Type::kInt64: {
2484 locations->SetInAt(0, Location::RequiresRegister());
2485 locations->SetInAt(1, Location::Any());
2486 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2487 break;
2488 }
2489 case DataType::Type::kFloat32:
2490 case DataType::Type::kFloat64: {
2491 locations->SetInAt(0, Location::RequiresFpuRegister());
2492 locations->SetInAt(1, Location::Any());
2493 locations->SetOut(Location::RequiresRegister());
2494 break;
2495 }
2496 default:
2497 LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType();
2498 }
2499 }
2500
VisitCompare(HCompare * compare)2501 void InstructionCodeGeneratorX86_64::VisitCompare(HCompare* compare) {
2502 LocationSummary* locations = compare->GetLocations();
2503 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
2504 Location left = locations->InAt(0);
2505 Location right = locations->InAt(1);
2506
2507 NearLabel less, greater, done;
2508 DataType::Type type = compare->InputAt(0)->GetType();
2509 Condition less_cond = kLess;
2510
2511 switch (type) {
2512 case DataType::Type::kBool:
2513 case DataType::Type::kUint8:
2514 case DataType::Type::kInt8:
2515 case DataType::Type::kUint16:
2516 case DataType::Type::kInt16:
2517 case DataType::Type::kInt32: {
2518 codegen_->GenerateIntCompare(left, right);
2519 break;
2520 }
2521 case DataType::Type::kInt64: {
2522 codegen_->GenerateLongCompare(left, right);
2523 break;
2524 }
2525 case DataType::Type::kFloat32: {
2526 XmmRegister left_reg = left.AsFpuRegister<XmmRegister>();
2527 if (right.IsConstant()) {
2528 float value = right.GetConstant()->AsFloatConstant()->GetValue();
2529 __ ucomiss(left_reg, codegen_->LiteralFloatAddress(value));
2530 } else if (right.IsStackSlot()) {
2531 __ ucomiss(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
2532 } else {
2533 __ ucomiss(left_reg, right.AsFpuRegister<XmmRegister>());
2534 }
2535 __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
2536 less_cond = kBelow; // ucomis{s,d} sets CF
2537 break;
2538 }
2539 case DataType::Type::kFloat64: {
2540 XmmRegister left_reg = left.AsFpuRegister<XmmRegister>();
2541 if (right.IsConstant()) {
2542 double value = right.GetConstant()->AsDoubleConstant()->GetValue();
2543 __ ucomisd(left_reg, codegen_->LiteralDoubleAddress(value));
2544 } else if (right.IsDoubleStackSlot()) {
2545 __ ucomisd(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
2546 } else {
2547 __ ucomisd(left_reg, right.AsFpuRegister<XmmRegister>());
2548 }
2549 __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
2550 less_cond = kBelow; // ucomis{s,d} sets CF
2551 break;
2552 }
2553 default:
2554 LOG(FATAL) << "Unexpected compare type " << type;
2555 }
2556
2557 __ movl(out, Immediate(0));
2558 __ j(kEqual, &done);
2559 __ j(less_cond, &less);
2560
2561 __ Bind(&greater);
2562 __ movl(out, Immediate(1));
2563 __ jmp(&done);
2564
2565 __ Bind(&less);
2566 __ movl(out, Immediate(-1));
2567
2568 __ Bind(&done);
2569 }
2570
VisitIntConstant(HIntConstant * constant)2571 void LocationsBuilderX86_64::VisitIntConstant(HIntConstant* constant) {
2572 LocationSummary* locations =
2573 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2574 locations->SetOut(Location::ConstantLocation(constant));
2575 }
2576
VisitIntConstant(HIntConstant * constant ATTRIBUTE_UNUSED)2577 void InstructionCodeGeneratorX86_64::VisitIntConstant(HIntConstant* constant ATTRIBUTE_UNUSED) {
2578 // Will be generated at use site.
2579 }
2580
VisitNullConstant(HNullConstant * constant)2581 void LocationsBuilderX86_64::VisitNullConstant(HNullConstant* constant) {
2582 LocationSummary* locations =
2583 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2584 locations->SetOut(Location::ConstantLocation(constant));
2585 }
2586
VisitNullConstant(HNullConstant * constant ATTRIBUTE_UNUSED)2587 void InstructionCodeGeneratorX86_64::VisitNullConstant(HNullConstant* constant ATTRIBUTE_UNUSED) {
2588 // Will be generated at use site.
2589 }
2590
VisitLongConstant(HLongConstant * constant)2591 void LocationsBuilderX86_64::VisitLongConstant(HLongConstant* constant) {
2592 LocationSummary* locations =
2593 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2594 locations->SetOut(Location::ConstantLocation(constant));
2595 }
2596
VisitLongConstant(HLongConstant * constant ATTRIBUTE_UNUSED)2597 void InstructionCodeGeneratorX86_64::VisitLongConstant(HLongConstant* constant ATTRIBUTE_UNUSED) {
2598 // Will be generated at use site.
2599 }
2600
VisitFloatConstant(HFloatConstant * constant)2601 void LocationsBuilderX86_64::VisitFloatConstant(HFloatConstant* constant) {
2602 LocationSummary* locations =
2603 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2604 locations->SetOut(Location::ConstantLocation(constant));
2605 }
2606
VisitFloatConstant(HFloatConstant * constant ATTRIBUTE_UNUSED)2607 void InstructionCodeGeneratorX86_64::VisitFloatConstant(HFloatConstant* constant ATTRIBUTE_UNUSED) {
2608 // Will be generated at use site.
2609 }
2610
VisitDoubleConstant(HDoubleConstant * constant)2611 void LocationsBuilderX86_64::VisitDoubleConstant(HDoubleConstant* constant) {
2612 LocationSummary* locations =
2613 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2614 locations->SetOut(Location::ConstantLocation(constant));
2615 }
2616
VisitDoubleConstant(HDoubleConstant * constant ATTRIBUTE_UNUSED)2617 void InstructionCodeGeneratorX86_64::VisitDoubleConstant(
2618 HDoubleConstant* constant ATTRIBUTE_UNUSED) {
2619 // Will be generated at use site.
2620 }
2621
VisitConstructorFence(HConstructorFence * constructor_fence)2622 void LocationsBuilderX86_64::VisitConstructorFence(HConstructorFence* constructor_fence) {
2623 constructor_fence->SetLocations(nullptr);
2624 }
2625
VisitConstructorFence(HConstructorFence * constructor_fence ATTRIBUTE_UNUSED)2626 void InstructionCodeGeneratorX86_64::VisitConstructorFence(
2627 HConstructorFence* constructor_fence ATTRIBUTE_UNUSED) {
2628 codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
2629 }
2630
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)2631 void LocationsBuilderX86_64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
2632 memory_barrier->SetLocations(nullptr);
2633 }
2634
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)2635 void InstructionCodeGeneratorX86_64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
2636 codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
2637 }
2638
VisitReturnVoid(HReturnVoid * ret)2639 void LocationsBuilderX86_64::VisitReturnVoid(HReturnVoid* ret) {
2640 ret->SetLocations(nullptr);
2641 }
2642
VisitReturnVoid(HReturnVoid * ret ATTRIBUTE_UNUSED)2643 void InstructionCodeGeneratorX86_64::VisitReturnVoid(HReturnVoid* ret ATTRIBUTE_UNUSED) {
2644 codegen_->GenerateFrameExit();
2645 }
2646
VisitReturn(HReturn * ret)2647 void LocationsBuilderX86_64::VisitReturn(HReturn* ret) {
2648 LocationSummary* locations =
2649 new (GetGraph()->GetAllocator()) LocationSummary(ret, LocationSummary::kNoCall);
2650 SetInForReturnValue(ret, locations);
2651 }
2652
VisitReturn(HReturn * ret)2653 void InstructionCodeGeneratorX86_64::VisitReturn(HReturn* ret) {
2654 switch (ret->InputAt(0)->GetType()) {
2655 case DataType::Type::kReference:
2656 case DataType::Type::kBool:
2657 case DataType::Type::kUint8:
2658 case DataType::Type::kInt8:
2659 case DataType::Type::kUint16:
2660 case DataType::Type::kInt16:
2661 case DataType::Type::kInt32:
2662 case DataType::Type::kInt64:
2663 DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegister<CpuRegister>().AsRegister(), RAX);
2664 break;
2665
2666 case DataType::Type::kFloat32: {
2667 DCHECK_EQ(ret->GetLocations()->InAt(0).AsFpuRegister<XmmRegister>().AsFloatRegister(),
2668 XMM0);
2669 // To simplify callers of an OSR method, we put the return value in both
2670 // floating point and core register.
2671 if (GetGraph()->IsCompilingOsr()) {
2672 __ movd(CpuRegister(RAX), XmmRegister(XMM0), /* is64bit= */ false);
2673 }
2674 break;
2675 }
2676 case DataType::Type::kFloat64: {
2677 DCHECK_EQ(ret->GetLocations()->InAt(0).AsFpuRegister<XmmRegister>().AsFloatRegister(),
2678 XMM0);
2679 // To simplify callers of an OSR method, we put the return value in both
2680 // floating point and core register.
2681 if (GetGraph()->IsCompilingOsr()) {
2682 __ movd(CpuRegister(RAX), XmmRegister(XMM0), /* is64bit= */ true);
2683 }
2684 break;
2685 }
2686
2687 default:
2688 LOG(FATAL) << "Unexpected return type " << ret->InputAt(0)->GetType();
2689 }
2690 codegen_->GenerateFrameExit();
2691 }
2692
GetReturnLocation(DataType::Type type) const2693 Location InvokeDexCallingConventionVisitorX86_64::GetReturnLocation(DataType::Type type) const {
2694 switch (type) {
2695 case DataType::Type::kReference:
2696 case DataType::Type::kBool:
2697 case DataType::Type::kUint8:
2698 case DataType::Type::kInt8:
2699 case DataType::Type::kUint16:
2700 case DataType::Type::kInt16:
2701 case DataType::Type::kUint32:
2702 case DataType::Type::kInt32:
2703 case DataType::Type::kUint64:
2704 case DataType::Type::kInt64:
2705 return Location::RegisterLocation(RAX);
2706
2707 case DataType::Type::kVoid:
2708 return Location::NoLocation();
2709
2710 case DataType::Type::kFloat64:
2711 case DataType::Type::kFloat32:
2712 return Location::FpuRegisterLocation(XMM0);
2713 }
2714
2715 UNREACHABLE();
2716 }
2717
GetMethodLocation() const2718 Location InvokeDexCallingConventionVisitorX86_64::GetMethodLocation() const {
2719 return Location::RegisterLocation(kMethodRegisterArgument);
2720 }
2721
GetNextLocation(DataType::Type type)2722 Location InvokeDexCallingConventionVisitorX86_64::GetNextLocation(DataType::Type type) {
2723 switch (type) {
2724 case DataType::Type::kReference:
2725 case DataType::Type::kBool:
2726 case DataType::Type::kUint8:
2727 case DataType::Type::kInt8:
2728 case DataType::Type::kUint16:
2729 case DataType::Type::kInt16:
2730 case DataType::Type::kInt32: {
2731 uint32_t index = gp_index_++;
2732 stack_index_++;
2733 if (index < calling_convention.GetNumberOfRegisters()) {
2734 return Location::RegisterLocation(calling_convention.GetRegisterAt(index));
2735 } else {
2736 return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
2737 }
2738 }
2739
2740 case DataType::Type::kInt64: {
2741 uint32_t index = gp_index_;
2742 stack_index_ += 2;
2743 if (index < calling_convention.GetNumberOfRegisters()) {
2744 gp_index_ += 1;
2745 return Location::RegisterLocation(calling_convention.GetRegisterAt(index));
2746 } else {
2747 gp_index_ += 2;
2748 return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
2749 }
2750 }
2751
2752 case DataType::Type::kFloat32: {
2753 uint32_t index = float_index_++;
2754 stack_index_++;
2755 if (index < calling_convention.GetNumberOfFpuRegisters()) {
2756 return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
2757 } else {
2758 return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
2759 }
2760 }
2761
2762 case DataType::Type::kFloat64: {
2763 uint32_t index = float_index_++;
2764 stack_index_ += 2;
2765 if (index < calling_convention.GetNumberOfFpuRegisters()) {
2766 return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
2767 } else {
2768 return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
2769 }
2770 }
2771
2772 case DataType::Type::kUint32:
2773 case DataType::Type::kUint64:
2774 case DataType::Type::kVoid:
2775 LOG(FATAL) << "Unexpected parameter type " << type;
2776 UNREACHABLE();
2777 }
2778 return Location::NoLocation();
2779 }
2780
GetNextLocation(DataType::Type type)2781 Location CriticalNativeCallingConventionVisitorX86_64::GetNextLocation(DataType::Type type) {
2782 DCHECK_NE(type, DataType::Type::kReference);
2783
2784 Location location = Location::NoLocation();
2785 if (DataType::IsFloatingPointType(type)) {
2786 if (fpr_index_ < kParameterFloatRegistersLength) {
2787 location = Location::FpuRegisterLocation(kParameterFloatRegisters[fpr_index_]);
2788 ++fpr_index_;
2789 }
2790 } else {
2791 // Native ABI uses the same registers as managed, except that the method register RDI
2792 // is a normal argument.
2793 if (gpr_index_ < 1u + kParameterCoreRegistersLength) {
2794 location = Location::RegisterLocation(
2795 gpr_index_ == 0u ? RDI : kParameterCoreRegisters[gpr_index_ - 1u]);
2796 ++gpr_index_;
2797 }
2798 }
2799 if (location.IsInvalid()) {
2800 if (DataType::Is64BitType(type)) {
2801 location = Location::DoubleStackSlot(stack_offset_);
2802 } else {
2803 location = Location::StackSlot(stack_offset_);
2804 }
2805 stack_offset_ += kFramePointerSize;
2806
2807 if (for_register_allocation_) {
2808 location = Location::Any();
2809 }
2810 }
2811 return location;
2812 }
2813
GetReturnLocation(DataType::Type type) const2814 Location CriticalNativeCallingConventionVisitorX86_64::GetReturnLocation(DataType::Type type)
2815 const {
2816 // We perform conversion to the managed ABI return register after the call if needed.
2817 InvokeDexCallingConventionVisitorX86_64 dex_calling_convention;
2818 return dex_calling_convention.GetReturnLocation(type);
2819 }
2820
GetMethodLocation() const2821 Location CriticalNativeCallingConventionVisitorX86_64::GetMethodLocation() const {
2822 // Pass the method in the hidden argument RAX.
2823 return Location::RegisterLocation(RAX);
2824 }
2825
VisitInvokeUnresolved(HInvokeUnresolved * invoke)2826 void LocationsBuilderX86_64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
2827 // The trampoline uses the same calling convention as dex calling conventions,
2828 // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
2829 // the method_idx.
2830 HandleInvoke(invoke);
2831 }
2832
VisitInvokeUnresolved(HInvokeUnresolved * invoke)2833 void InstructionCodeGeneratorX86_64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
2834 codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke);
2835 }
2836
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)2837 void LocationsBuilderX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
2838 // Explicit clinit checks triggered by static invokes must have been pruned by
2839 // art::PrepareForRegisterAllocation.
2840 DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
2841
2842 IntrinsicLocationsBuilderX86_64 intrinsic(codegen_);
2843 if (intrinsic.TryDispatch(invoke)) {
2844 return;
2845 }
2846
2847 if (invoke->GetCodePtrLocation() == CodePtrLocation::kCallCriticalNative) {
2848 CriticalNativeCallingConventionVisitorX86_64 calling_convention_visitor(
2849 /*for_register_allocation=*/ true);
2850 CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
2851 CodeGeneratorX86_64::BlockNonVolatileXmmRegisters(invoke->GetLocations());
2852 } else {
2853 HandleInvoke(invoke);
2854 }
2855 }
2856
TryGenerateIntrinsicCode(HInvoke * invoke,CodeGeneratorX86_64 * codegen)2857 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
2858 if (invoke->GetLocations()->Intrinsified()) {
2859 IntrinsicCodeGeneratorX86_64 intrinsic(codegen);
2860 intrinsic.Dispatch(invoke);
2861 return true;
2862 }
2863 return false;
2864 }
2865
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)2866 void InstructionCodeGeneratorX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
2867 // Explicit clinit checks triggered by static invokes must have been pruned by
2868 // art::PrepareForRegisterAllocation.
2869 DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
2870
2871 if (TryGenerateIntrinsicCode(invoke, codegen_)) {
2872 return;
2873 }
2874
2875 LocationSummary* locations = invoke->GetLocations();
2876 codegen_->GenerateStaticOrDirectCall(
2877 invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
2878 }
2879
HandleInvoke(HInvoke * invoke)2880 void LocationsBuilderX86_64::HandleInvoke(HInvoke* invoke) {
2881 InvokeDexCallingConventionVisitorX86_64 calling_convention_visitor;
2882 CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
2883 }
2884
VisitInvokeVirtual(HInvokeVirtual * invoke)2885 void LocationsBuilderX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
2886 IntrinsicLocationsBuilderX86_64 intrinsic(codegen_);
2887 if (intrinsic.TryDispatch(invoke)) {
2888 return;
2889 }
2890
2891 HandleInvoke(invoke);
2892 }
2893
VisitInvokeVirtual(HInvokeVirtual * invoke)2894 void InstructionCodeGeneratorX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
2895 if (TryGenerateIntrinsicCode(invoke, codegen_)) {
2896 return;
2897 }
2898
2899 codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
2900 DCHECK(!codegen_->IsLeafMethod());
2901 }
2902
VisitInvokeInterface(HInvokeInterface * invoke)2903 void LocationsBuilderX86_64::VisitInvokeInterface(HInvokeInterface* invoke) {
2904 HandleInvoke(invoke);
2905 // Add the hidden argument.
2906 if (invoke->GetHiddenArgumentLoadKind() == MethodLoadKind::kRecursive) {
2907 invoke->GetLocations()->SetInAt(invoke->GetNumberOfArguments() - 1,
2908 Location::RegisterLocation(RAX));
2909 }
2910 invoke->GetLocations()->AddTemp(Location::RegisterLocation(RAX));
2911 }
2912
MaybeGenerateInlineCacheCheck(HInstruction * instruction,CpuRegister klass)2913 void CodeGeneratorX86_64::MaybeGenerateInlineCacheCheck(HInstruction* instruction,
2914 CpuRegister klass) {
2915 DCHECK_EQ(RDI, klass.AsRegister());
2916 // We know the destination of an intrinsic, so no need to record inline
2917 // caches.
2918 if (!instruction->GetLocations()->Intrinsified() &&
2919 GetGraph()->IsCompilingBaseline() &&
2920 !Runtime::Current()->IsAotCompiler()) {
2921 ProfilingInfo* info = GetGraph()->GetProfilingInfo();
2922 DCHECK(info != nullptr);
2923 InlineCache* cache = info->GetInlineCache(instruction->GetDexPc());
2924 uint64_t address = reinterpret_cast64<uint64_t>(cache);
2925 NearLabel done;
2926 __ movq(CpuRegister(TMP), Immediate(address));
2927 // Fast path for a monomorphic cache.
2928 __ cmpl(Address(CpuRegister(TMP), InlineCache::ClassesOffset().Int32Value()), klass);
2929 __ j(kEqual, &done);
2930 GenerateInvokeRuntime(
2931 GetThreadOffset<kX86_64PointerSize>(kQuickUpdateInlineCache).Int32Value());
2932 __ Bind(&done);
2933 }
2934 }
2935
VisitInvokeInterface(HInvokeInterface * invoke)2936 void InstructionCodeGeneratorX86_64::VisitInvokeInterface(HInvokeInterface* invoke) {
2937 // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
2938 LocationSummary* locations = invoke->GetLocations();
2939 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
2940 Location receiver = locations->InAt(0);
2941 size_t class_offset = mirror::Object::ClassOffset().SizeValue();
2942
2943 if (receiver.IsStackSlot()) {
2944 __ movl(temp, Address(CpuRegister(RSP), receiver.GetStackIndex()));
2945 // /* HeapReference<Class> */ temp = temp->klass_
2946 __ movl(temp, Address(temp, class_offset));
2947 } else {
2948 // /* HeapReference<Class> */ temp = receiver->klass_
2949 __ movl(temp, Address(receiver.AsRegister<CpuRegister>(), class_offset));
2950 }
2951 codegen_->MaybeRecordImplicitNullCheck(invoke);
2952 // Instead of simply (possibly) unpoisoning `temp` here, we should
2953 // emit a read barrier for the previous class reference load.
2954 // However this is not required in practice, as this is an
2955 // intermediate/temporary reference and because the current
2956 // concurrent copying collector keeps the from-space memory
2957 // intact/accessible until the end of the marking phase (the
2958 // concurrent copying collector may not in the future).
2959 __ MaybeUnpoisonHeapReference(temp);
2960
2961 codegen_->MaybeGenerateInlineCacheCheck(invoke, temp);
2962
2963 if (invoke->GetHiddenArgumentLoadKind() != MethodLoadKind::kRecursive &&
2964 invoke->GetHiddenArgumentLoadKind() != MethodLoadKind::kRuntimeCall) {
2965 Location hidden_reg = locations->GetTemp(1);
2966 // Set the hidden argument. This is safe to do this here, as RAX
2967 // won't be modified thereafter, before the `call` instruction.
2968 // We also do it after MaybeGenerateInlineCache that may use RAX.
2969 DCHECK_EQ(RAX, hidden_reg.AsRegister<Register>());
2970 codegen_->LoadMethod(invoke->GetHiddenArgumentLoadKind(), hidden_reg, invoke);
2971 }
2972
2973 // temp = temp->GetAddressOfIMT()
2974 __ movq(temp,
2975 Address(temp, mirror::Class::ImtPtrOffset(kX86_64PointerSize).Uint32Value()));
2976 // temp = temp->GetImtEntryAt(method_offset);
2977 uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
2978 invoke->GetImtIndex(), kX86_64PointerSize));
2979 // temp = temp->GetImtEntryAt(method_offset);
2980 __ movq(temp, Address(temp, method_offset));
2981 if (invoke->GetHiddenArgumentLoadKind() == MethodLoadKind::kRuntimeCall) {
2982 // We pass the method from the IMT in case of a conflict. This will ensure
2983 // we go into the runtime to resolve the actual method.
2984 Location hidden_reg = locations->GetTemp(1);
2985 __ movq(hidden_reg.AsRegister<CpuRegister>(), temp);
2986 }
2987 // call temp->GetEntryPoint();
2988 __ call(Address(
2989 temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86_64PointerSize).SizeValue()));
2990
2991 DCHECK(!codegen_->IsLeafMethod());
2992 codegen_->RecordPcInfo(invoke, invoke->GetDexPc());
2993 }
2994
VisitInvokePolymorphic(HInvokePolymorphic * invoke)2995 void LocationsBuilderX86_64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
2996 IntrinsicLocationsBuilderX86_64 intrinsic(codegen_);
2997 if (intrinsic.TryDispatch(invoke)) {
2998 return;
2999 }
3000 HandleInvoke(invoke);
3001 }
3002
VisitInvokePolymorphic(HInvokePolymorphic * invoke)3003 void InstructionCodeGeneratorX86_64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
3004 if (TryGenerateIntrinsicCode(invoke, codegen_)) {
3005 return;
3006 }
3007 codegen_->GenerateInvokePolymorphicCall(invoke);
3008 }
3009
VisitInvokeCustom(HInvokeCustom * invoke)3010 void LocationsBuilderX86_64::VisitInvokeCustom(HInvokeCustom* invoke) {
3011 HandleInvoke(invoke);
3012 }
3013
VisitInvokeCustom(HInvokeCustom * invoke)3014 void InstructionCodeGeneratorX86_64::VisitInvokeCustom(HInvokeCustom* invoke) {
3015 codegen_->GenerateInvokeCustomCall(invoke);
3016 }
3017
VisitNeg(HNeg * neg)3018 void LocationsBuilderX86_64::VisitNeg(HNeg* neg) {
3019 LocationSummary* locations =
3020 new (GetGraph()->GetAllocator()) LocationSummary(neg, LocationSummary::kNoCall);
3021 switch (neg->GetResultType()) {
3022 case DataType::Type::kInt32:
3023 case DataType::Type::kInt64:
3024 locations->SetInAt(0, Location::RequiresRegister());
3025 locations->SetOut(Location::SameAsFirstInput());
3026 break;
3027
3028 case DataType::Type::kFloat32:
3029 case DataType::Type::kFloat64:
3030 locations->SetInAt(0, Location::RequiresFpuRegister());
3031 locations->SetOut(Location::SameAsFirstInput());
3032 locations->AddTemp(Location::RequiresFpuRegister());
3033 break;
3034
3035 default:
3036 LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
3037 }
3038 }
3039
VisitNeg(HNeg * neg)3040 void InstructionCodeGeneratorX86_64::VisitNeg(HNeg* neg) {
3041 LocationSummary* locations = neg->GetLocations();
3042 Location out = locations->Out();
3043 Location in = locations->InAt(0);
3044 switch (neg->GetResultType()) {
3045 case DataType::Type::kInt32:
3046 DCHECK(in.IsRegister());
3047 DCHECK(in.Equals(out));
3048 __ negl(out.AsRegister<CpuRegister>());
3049 break;
3050
3051 case DataType::Type::kInt64:
3052 DCHECK(in.IsRegister());
3053 DCHECK(in.Equals(out));
3054 __ negq(out.AsRegister<CpuRegister>());
3055 break;
3056
3057 case DataType::Type::kFloat32: {
3058 DCHECK(in.Equals(out));
3059 XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
3060 // Implement float negation with an exclusive or with value
3061 // 0x80000000 (mask for bit 31, representing the sign of a
3062 // single-precision floating-point number).
3063 __ movss(mask, codegen_->LiteralInt32Address(0x80000000));
3064 __ xorps(out.AsFpuRegister<XmmRegister>(), mask);
3065 break;
3066 }
3067
3068 case DataType::Type::kFloat64: {
3069 DCHECK(in.Equals(out));
3070 XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
3071 // Implement double negation with an exclusive or with value
3072 // 0x8000000000000000 (mask for bit 63, representing the sign of
3073 // a double-precision floating-point number).
3074 __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x8000000000000000)));
3075 __ xorpd(out.AsFpuRegister<XmmRegister>(), mask);
3076 break;
3077 }
3078
3079 default:
3080 LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
3081 }
3082 }
3083
VisitTypeConversion(HTypeConversion * conversion)3084 void LocationsBuilderX86_64::VisitTypeConversion(HTypeConversion* conversion) {
3085 LocationSummary* locations =
3086 new (GetGraph()->GetAllocator()) LocationSummary(conversion, LocationSummary::kNoCall);
3087 DataType::Type result_type = conversion->GetResultType();
3088 DataType::Type input_type = conversion->GetInputType();
3089 DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
3090 << input_type << " -> " << result_type;
3091
3092 switch (result_type) {
3093 case DataType::Type::kUint8:
3094 case DataType::Type::kInt8:
3095 case DataType::Type::kUint16:
3096 case DataType::Type::kInt16:
3097 DCHECK(DataType::IsIntegralType(input_type)) << input_type;
3098 locations->SetInAt(0, Location::Any());
3099 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3100 break;
3101
3102 case DataType::Type::kInt32:
3103 switch (input_type) {
3104 case DataType::Type::kInt64:
3105 locations->SetInAt(0, Location::Any());
3106 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3107 break;
3108
3109 case DataType::Type::kFloat32:
3110 locations->SetInAt(0, Location::RequiresFpuRegister());
3111 locations->SetOut(Location::RequiresRegister());
3112 break;
3113
3114 case DataType::Type::kFloat64:
3115 locations->SetInAt(0, Location::RequiresFpuRegister());
3116 locations->SetOut(Location::RequiresRegister());
3117 break;
3118
3119 default:
3120 LOG(FATAL) << "Unexpected type conversion from " << input_type
3121 << " to " << result_type;
3122 }
3123 break;
3124
3125 case DataType::Type::kInt64:
3126 switch (input_type) {
3127 case DataType::Type::kBool:
3128 case DataType::Type::kUint8:
3129 case DataType::Type::kInt8:
3130 case DataType::Type::kUint16:
3131 case DataType::Type::kInt16:
3132 case DataType::Type::kInt32:
3133 // TODO: We would benefit from a (to-be-implemented)
3134 // Location::RegisterOrStackSlot requirement for this input.
3135 locations->SetInAt(0, Location::RequiresRegister());
3136 locations->SetOut(Location::RequiresRegister());
3137 break;
3138
3139 case DataType::Type::kFloat32:
3140 locations->SetInAt(0, Location::RequiresFpuRegister());
3141 locations->SetOut(Location::RequiresRegister());
3142 break;
3143
3144 case DataType::Type::kFloat64:
3145 locations->SetInAt(0, Location::RequiresFpuRegister());
3146 locations->SetOut(Location::RequiresRegister());
3147 break;
3148
3149 default:
3150 LOG(FATAL) << "Unexpected type conversion from " << input_type
3151 << " to " << result_type;
3152 }
3153 break;
3154
3155 case DataType::Type::kFloat32:
3156 switch (input_type) {
3157 case DataType::Type::kBool:
3158 case DataType::Type::kUint8:
3159 case DataType::Type::kInt8:
3160 case DataType::Type::kUint16:
3161 case DataType::Type::kInt16:
3162 case DataType::Type::kInt32:
3163 locations->SetInAt(0, Location::Any());
3164 locations->SetOut(Location::RequiresFpuRegister());
3165 break;
3166
3167 case DataType::Type::kInt64:
3168 locations->SetInAt(0, Location::Any());
3169 locations->SetOut(Location::RequiresFpuRegister());
3170 break;
3171
3172 case DataType::Type::kFloat64:
3173 locations->SetInAt(0, Location::Any());
3174 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
3175 break;
3176
3177 default:
3178 LOG(FATAL) << "Unexpected type conversion from " << input_type
3179 << " to " << result_type;
3180 }
3181 break;
3182
3183 case DataType::Type::kFloat64:
3184 switch (input_type) {
3185 case DataType::Type::kBool:
3186 case DataType::Type::kUint8:
3187 case DataType::Type::kInt8:
3188 case DataType::Type::kUint16:
3189 case DataType::Type::kInt16:
3190 case DataType::Type::kInt32:
3191 locations->SetInAt(0, Location::Any());
3192 locations->SetOut(Location::RequiresFpuRegister());
3193 break;
3194
3195 case DataType::Type::kInt64:
3196 locations->SetInAt(0, Location::Any());
3197 locations->SetOut(Location::RequiresFpuRegister());
3198 break;
3199
3200 case DataType::Type::kFloat32:
3201 locations->SetInAt(0, Location::Any());
3202 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
3203 break;
3204
3205 default:
3206 LOG(FATAL) << "Unexpected type conversion from " << input_type
3207 << " to " << result_type;
3208 }
3209 break;
3210
3211 default:
3212 LOG(FATAL) << "Unexpected type conversion from " << input_type
3213 << " to " << result_type;
3214 }
3215 }
3216
VisitTypeConversion(HTypeConversion * conversion)3217 void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conversion) {
3218 LocationSummary* locations = conversion->GetLocations();
3219 Location out = locations->Out();
3220 Location in = locations->InAt(0);
3221 DataType::Type result_type = conversion->GetResultType();
3222 DataType::Type input_type = conversion->GetInputType();
3223 DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
3224 << input_type << " -> " << result_type;
3225 switch (result_type) {
3226 case DataType::Type::kUint8:
3227 switch (input_type) {
3228 case DataType::Type::kInt8:
3229 case DataType::Type::kUint16:
3230 case DataType::Type::kInt16:
3231 case DataType::Type::kInt32:
3232 case DataType::Type::kInt64:
3233 if (in.IsRegister()) {
3234 __ movzxb(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
3235 } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
3236 __ movzxb(out.AsRegister<CpuRegister>(),
3237 Address(CpuRegister(RSP), in.GetStackIndex()));
3238 } else {
3239 __ movl(out.AsRegister<CpuRegister>(),
3240 Immediate(static_cast<uint8_t>(Int64FromConstant(in.GetConstant()))));
3241 }
3242 break;
3243
3244 default:
3245 LOG(FATAL) << "Unexpected type conversion from " << input_type
3246 << " to " << result_type;
3247 }
3248 break;
3249
3250 case DataType::Type::kInt8:
3251 switch (input_type) {
3252 case DataType::Type::kUint8:
3253 case DataType::Type::kUint16:
3254 case DataType::Type::kInt16:
3255 case DataType::Type::kInt32:
3256 case DataType::Type::kInt64:
3257 if (in.IsRegister()) {
3258 __ movsxb(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
3259 } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
3260 __ movsxb(out.AsRegister<CpuRegister>(),
3261 Address(CpuRegister(RSP), in.GetStackIndex()));
3262 } else {
3263 __ movl(out.AsRegister<CpuRegister>(),
3264 Immediate(static_cast<int8_t>(Int64FromConstant(in.GetConstant()))));
3265 }
3266 break;
3267
3268 default:
3269 LOG(FATAL) << "Unexpected type conversion from " << input_type
3270 << " to " << result_type;
3271 }
3272 break;
3273
3274 case DataType::Type::kUint16:
3275 switch (input_type) {
3276 case DataType::Type::kInt8:
3277 case DataType::Type::kInt16:
3278 case DataType::Type::kInt32:
3279 case DataType::Type::kInt64:
3280 if (in.IsRegister()) {
3281 __ movzxw(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
3282 } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
3283 __ movzxw(out.AsRegister<CpuRegister>(),
3284 Address(CpuRegister(RSP), in.GetStackIndex()));
3285 } else {
3286 __ movl(out.AsRegister<CpuRegister>(),
3287 Immediate(static_cast<uint16_t>(Int64FromConstant(in.GetConstant()))));
3288 }
3289 break;
3290
3291 default:
3292 LOG(FATAL) << "Unexpected type conversion from " << input_type
3293 << " to " << result_type;
3294 }
3295 break;
3296
3297 case DataType::Type::kInt16:
3298 switch (input_type) {
3299 case DataType::Type::kUint16:
3300 case DataType::Type::kInt32:
3301 case DataType::Type::kInt64:
3302 if (in.IsRegister()) {
3303 __ movsxw(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
3304 } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
3305 __ movsxw(out.AsRegister<CpuRegister>(),
3306 Address(CpuRegister(RSP), in.GetStackIndex()));
3307 } else {
3308 __ movl(out.AsRegister<CpuRegister>(),
3309 Immediate(static_cast<int16_t>(Int64FromConstant(in.GetConstant()))));
3310 }
3311 break;
3312
3313 default:
3314 LOG(FATAL) << "Unexpected type conversion from " << input_type
3315 << " to " << result_type;
3316 }
3317 break;
3318
3319 case DataType::Type::kInt32:
3320 switch (input_type) {
3321 case DataType::Type::kInt64:
3322 if (in.IsRegister()) {
3323 __ movl(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
3324 } else if (in.IsDoubleStackSlot()) {
3325 __ movl(out.AsRegister<CpuRegister>(),
3326 Address(CpuRegister(RSP), in.GetStackIndex()));
3327 } else {
3328 DCHECK(in.IsConstant());
3329 DCHECK(in.GetConstant()->IsLongConstant());
3330 int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
3331 __ movl(out.AsRegister<CpuRegister>(), Immediate(static_cast<int32_t>(value)));
3332 }
3333 break;
3334
3335 case DataType::Type::kFloat32: {
3336 XmmRegister input = in.AsFpuRegister<XmmRegister>();
3337 CpuRegister output = out.AsRegister<CpuRegister>();
3338 NearLabel done, nan;
3339
3340 __ movl(output, Immediate(kPrimIntMax));
3341 // if input >= (float)INT_MAX goto done
3342 __ comiss(input, codegen_->LiteralFloatAddress(static_cast<float>(kPrimIntMax)));
3343 __ j(kAboveEqual, &done);
3344 // if input == NaN goto nan
3345 __ j(kUnordered, &nan);
3346 // output = float-to-int-truncate(input)
3347 __ cvttss2si(output, input, false);
3348 __ jmp(&done);
3349 __ Bind(&nan);
3350 // output = 0
3351 __ xorl(output, output);
3352 __ Bind(&done);
3353 break;
3354 }
3355
3356 case DataType::Type::kFloat64: {
3357 XmmRegister input = in.AsFpuRegister<XmmRegister>();
3358 CpuRegister output = out.AsRegister<CpuRegister>();
3359 NearLabel done, nan;
3360
3361 __ movl(output, Immediate(kPrimIntMax));
3362 // if input >= (double)INT_MAX goto done
3363 __ comisd(input, codegen_->LiteralDoubleAddress(kPrimIntMax));
3364 __ j(kAboveEqual, &done);
3365 // if input == NaN goto nan
3366 __ j(kUnordered, &nan);
3367 // output = double-to-int-truncate(input)
3368 __ cvttsd2si(output, input);
3369 __ jmp(&done);
3370 __ Bind(&nan);
3371 // output = 0
3372 __ xorl(output, output);
3373 __ Bind(&done);
3374 break;
3375 }
3376
3377 default:
3378 LOG(FATAL) << "Unexpected type conversion from " << input_type
3379 << " to " << result_type;
3380 }
3381 break;
3382
3383 case DataType::Type::kInt64:
3384 switch (input_type) {
3385 DCHECK(out.IsRegister());
3386 case DataType::Type::kBool:
3387 case DataType::Type::kUint8:
3388 case DataType::Type::kInt8:
3389 case DataType::Type::kUint16:
3390 case DataType::Type::kInt16:
3391 case DataType::Type::kInt32:
3392 DCHECK(in.IsRegister());
3393 __ movsxd(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
3394 break;
3395
3396 case DataType::Type::kFloat32: {
3397 XmmRegister input = in.AsFpuRegister<XmmRegister>();
3398 CpuRegister output = out.AsRegister<CpuRegister>();
3399 NearLabel done, nan;
3400
3401 codegen_->Load64BitValue(output, kPrimLongMax);
3402 // if input >= (float)LONG_MAX goto done
3403 __ comiss(input, codegen_->LiteralFloatAddress(static_cast<float>(kPrimLongMax)));
3404 __ j(kAboveEqual, &done);
3405 // if input == NaN goto nan
3406 __ j(kUnordered, &nan);
3407 // output = float-to-long-truncate(input)
3408 __ cvttss2si(output, input, true);
3409 __ jmp(&done);
3410 __ Bind(&nan);
3411 // output = 0
3412 __ xorl(output, output);
3413 __ Bind(&done);
3414 break;
3415 }
3416
3417 case DataType::Type::kFloat64: {
3418 XmmRegister input = in.AsFpuRegister<XmmRegister>();
3419 CpuRegister output = out.AsRegister<CpuRegister>();
3420 NearLabel done, nan;
3421
3422 codegen_->Load64BitValue(output, kPrimLongMax);
3423 // if input >= (double)LONG_MAX goto done
3424 __ comisd(input, codegen_->LiteralDoubleAddress(
3425 static_cast<double>(kPrimLongMax)));
3426 __ j(kAboveEqual, &done);
3427 // if input == NaN goto nan
3428 __ j(kUnordered, &nan);
3429 // output = double-to-long-truncate(input)
3430 __ cvttsd2si(output, input, true);
3431 __ jmp(&done);
3432 __ Bind(&nan);
3433 // output = 0
3434 __ xorl(output, output);
3435 __ Bind(&done);
3436 break;
3437 }
3438
3439 default:
3440 LOG(FATAL) << "Unexpected type conversion from " << input_type
3441 << " to " << result_type;
3442 }
3443 break;
3444
3445 case DataType::Type::kFloat32:
3446 switch (input_type) {
3447 case DataType::Type::kBool:
3448 case DataType::Type::kUint8:
3449 case DataType::Type::kInt8:
3450 case DataType::Type::kUint16:
3451 case DataType::Type::kInt16:
3452 case DataType::Type::kInt32:
3453 if (in.IsRegister()) {
3454 __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), false);
3455 } else if (in.IsConstant()) {
3456 int32_t v = in.GetConstant()->AsIntConstant()->GetValue();
3457 XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3458 codegen_->Load32BitValue(dest, static_cast<float>(v));
3459 } else {
3460 __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(),
3461 Address(CpuRegister(RSP), in.GetStackIndex()), false);
3462 }
3463 break;
3464
3465 case DataType::Type::kInt64:
3466 if (in.IsRegister()) {
3467 __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), true);
3468 } else if (in.IsConstant()) {
3469 int64_t v = in.GetConstant()->AsLongConstant()->GetValue();
3470 XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3471 codegen_->Load32BitValue(dest, static_cast<float>(v));
3472 } else {
3473 __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(),
3474 Address(CpuRegister(RSP), in.GetStackIndex()), true);
3475 }
3476 break;
3477
3478 case DataType::Type::kFloat64:
3479 if (in.IsFpuRegister()) {
3480 __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
3481 } else if (in.IsConstant()) {
3482 double v = in.GetConstant()->AsDoubleConstant()->GetValue();
3483 XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3484 codegen_->Load32BitValue(dest, static_cast<float>(v));
3485 } else {
3486 __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(),
3487 Address(CpuRegister(RSP), in.GetStackIndex()));
3488 }
3489 break;
3490
3491 default:
3492 LOG(FATAL) << "Unexpected type conversion from " << input_type
3493 << " to " << result_type;
3494 }
3495 break;
3496
3497 case DataType::Type::kFloat64:
3498 switch (input_type) {
3499 case DataType::Type::kBool:
3500 case DataType::Type::kUint8:
3501 case DataType::Type::kInt8:
3502 case DataType::Type::kUint16:
3503 case DataType::Type::kInt16:
3504 case DataType::Type::kInt32:
3505 if (in.IsRegister()) {
3506 __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), false);
3507 } else if (in.IsConstant()) {
3508 int32_t v = in.GetConstant()->AsIntConstant()->GetValue();
3509 XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3510 codegen_->Load64BitValue(dest, static_cast<double>(v));
3511 } else {
3512 __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(),
3513 Address(CpuRegister(RSP), in.GetStackIndex()), false);
3514 }
3515 break;
3516
3517 case DataType::Type::kInt64:
3518 if (in.IsRegister()) {
3519 __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), true);
3520 } else if (in.IsConstant()) {
3521 int64_t v = in.GetConstant()->AsLongConstant()->GetValue();
3522 XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3523 codegen_->Load64BitValue(dest, static_cast<double>(v));
3524 } else {
3525 __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(),
3526 Address(CpuRegister(RSP), in.GetStackIndex()), true);
3527 }
3528 break;
3529
3530 case DataType::Type::kFloat32:
3531 if (in.IsFpuRegister()) {
3532 __ cvtss2sd(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
3533 } else if (in.IsConstant()) {
3534 float v = in.GetConstant()->AsFloatConstant()->GetValue();
3535 XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3536 codegen_->Load64BitValue(dest, static_cast<double>(v));
3537 } else {
3538 __ cvtss2sd(out.AsFpuRegister<XmmRegister>(),
3539 Address(CpuRegister(RSP), in.GetStackIndex()));
3540 }
3541 break;
3542
3543 default:
3544 LOG(FATAL) << "Unexpected type conversion from " << input_type
3545 << " to " << result_type;
3546 }
3547 break;
3548
3549 default:
3550 LOG(FATAL) << "Unexpected type conversion from " << input_type
3551 << " to " << result_type;
3552 }
3553 }
3554
VisitAdd(HAdd * add)3555 void LocationsBuilderX86_64::VisitAdd(HAdd* add) {
3556 LocationSummary* locations =
3557 new (GetGraph()->GetAllocator()) LocationSummary(add, LocationSummary::kNoCall);
3558 switch (add->GetResultType()) {
3559 case DataType::Type::kInt32: {
3560 locations->SetInAt(0, Location::RequiresRegister());
3561 locations->SetInAt(1, Location::RegisterOrConstant(add->InputAt(1)));
3562 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3563 break;
3564 }
3565
3566 case DataType::Type::kInt64: {
3567 locations->SetInAt(0, Location::RequiresRegister());
3568 // We can use a leaq or addq if the constant can fit in an immediate.
3569 locations->SetInAt(1, Location::RegisterOrInt32Constant(add->InputAt(1)));
3570 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3571 break;
3572 }
3573
3574 case DataType::Type::kFloat64:
3575 case DataType::Type::kFloat32: {
3576 locations->SetInAt(0, Location::RequiresFpuRegister());
3577 locations->SetInAt(1, Location::Any());
3578 locations->SetOut(Location::SameAsFirstInput());
3579 break;
3580 }
3581
3582 default:
3583 LOG(FATAL) << "Unexpected add type " << add->GetResultType();
3584 }
3585 }
3586
VisitAdd(HAdd * add)3587 void InstructionCodeGeneratorX86_64::VisitAdd(HAdd* add) {
3588 LocationSummary* locations = add->GetLocations();
3589 Location first = locations->InAt(0);
3590 Location second = locations->InAt(1);
3591 Location out = locations->Out();
3592
3593 switch (add->GetResultType()) {
3594 case DataType::Type::kInt32: {
3595 if (second.IsRegister()) {
3596 if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3597 __ addl(out.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3598 } else if (out.AsRegister<Register>() == second.AsRegister<Register>()) {
3599 __ addl(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>());
3600 } else {
3601 __ leal(out.AsRegister<CpuRegister>(), Address(
3602 first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>(), TIMES_1, 0));
3603 }
3604 } else if (second.IsConstant()) {
3605 if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3606 __ addl(out.AsRegister<CpuRegister>(),
3607 Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
3608 } else {
3609 __ leal(out.AsRegister<CpuRegister>(), Address(
3610 first.AsRegister<CpuRegister>(), second.GetConstant()->AsIntConstant()->GetValue()));
3611 }
3612 } else {
3613 DCHECK(first.Equals(locations->Out()));
3614 __ addl(first.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), second.GetStackIndex()));
3615 }
3616 break;
3617 }
3618
3619 case DataType::Type::kInt64: {
3620 if (second.IsRegister()) {
3621 if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3622 __ addq(out.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3623 } else if (out.AsRegister<Register>() == second.AsRegister<Register>()) {
3624 __ addq(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>());
3625 } else {
3626 __ leaq(out.AsRegister<CpuRegister>(), Address(
3627 first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>(), TIMES_1, 0));
3628 }
3629 } else {
3630 DCHECK(second.IsConstant());
3631 int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3632 int32_t int32_value = Low32Bits(value);
3633 DCHECK_EQ(int32_value, value);
3634 if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3635 __ addq(out.AsRegister<CpuRegister>(), Immediate(int32_value));
3636 } else {
3637 __ leaq(out.AsRegister<CpuRegister>(), Address(
3638 first.AsRegister<CpuRegister>(), int32_value));
3639 }
3640 }
3641 break;
3642 }
3643
3644 case DataType::Type::kFloat32: {
3645 if (second.IsFpuRegister()) {
3646 __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3647 } else if (second.IsConstant()) {
3648 __ addss(first.AsFpuRegister<XmmRegister>(),
3649 codegen_->LiteralFloatAddress(
3650 second.GetConstant()->AsFloatConstant()->GetValue()));
3651 } else {
3652 DCHECK(second.IsStackSlot());
3653 __ addss(first.AsFpuRegister<XmmRegister>(),
3654 Address(CpuRegister(RSP), second.GetStackIndex()));
3655 }
3656 break;
3657 }
3658
3659 case DataType::Type::kFloat64: {
3660 if (second.IsFpuRegister()) {
3661 __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3662 } else if (second.IsConstant()) {
3663 __ addsd(first.AsFpuRegister<XmmRegister>(),
3664 codegen_->LiteralDoubleAddress(
3665 second.GetConstant()->AsDoubleConstant()->GetValue()));
3666 } else {
3667 DCHECK(second.IsDoubleStackSlot());
3668 __ addsd(first.AsFpuRegister<XmmRegister>(),
3669 Address(CpuRegister(RSP), second.GetStackIndex()));
3670 }
3671 break;
3672 }
3673
3674 default:
3675 LOG(FATAL) << "Unexpected add type " << add->GetResultType();
3676 }
3677 }
3678
VisitSub(HSub * sub)3679 void LocationsBuilderX86_64::VisitSub(HSub* sub) {
3680 LocationSummary* locations =
3681 new (GetGraph()->GetAllocator()) LocationSummary(sub, LocationSummary::kNoCall);
3682 switch (sub->GetResultType()) {
3683 case DataType::Type::kInt32: {
3684 locations->SetInAt(0, Location::RequiresRegister());
3685 locations->SetInAt(1, Location::Any());
3686 locations->SetOut(Location::SameAsFirstInput());
3687 break;
3688 }
3689 case DataType::Type::kInt64: {
3690 locations->SetInAt(0, Location::RequiresRegister());
3691 locations->SetInAt(1, Location::RegisterOrInt32Constant(sub->InputAt(1)));
3692 locations->SetOut(Location::SameAsFirstInput());
3693 break;
3694 }
3695 case DataType::Type::kFloat32:
3696 case DataType::Type::kFloat64: {
3697 locations->SetInAt(0, Location::RequiresFpuRegister());
3698 locations->SetInAt(1, Location::Any());
3699 locations->SetOut(Location::SameAsFirstInput());
3700 break;
3701 }
3702 default:
3703 LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
3704 }
3705 }
3706
VisitSub(HSub * sub)3707 void InstructionCodeGeneratorX86_64::VisitSub(HSub* sub) {
3708 LocationSummary* locations = sub->GetLocations();
3709 Location first = locations->InAt(0);
3710 Location second = locations->InAt(1);
3711 DCHECK(first.Equals(locations->Out()));
3712 switch (sub->GetResultType()) {
3713 case DataType::Type::kInt32: {
3714 if (second.IsRegister()) {
3715 __ subl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3716 } else if (second.IsConstant()) {
3717 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue());
3718 __ subl(first.AsRegister<CpuRegister>(), imm);
3719 } else {
3720 __ subl(first.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), second.GetStackIndex()));
3721 }
3722 break;
3723 }
3724 case DataType::Type::kInt64: {
3725 if (second.IsConstant()) {
3726 int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3727 DCHECK(IsInt<32>(value));
3728 __ subq(first.AsRegister<CpuRegister>(), Immediate(static_cast<int32_t>(value)));
3729 } else {
3730 __ subq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3731 }
3732 break;
3733 }
3734
3735 case DataType::Type::kFloat32: {
3736 if (second.IsFpuRegister()) {
3737 __ subss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3738 } else if (second.IsConstant()) {
3739 __ subss(first.AsFpuRegister<XmmRegister>(),
3740 codegen_->LiteralFloatAddress(
3741 second.GetConstant()->AsFloatConstant()->GetValue()));
3742 } else {
3743 DCHECK(second.IsStackSlot());
3744 __ subss(first.AsFpuRegister<XmmRegister>(),
3745 Address(CpuRegister(RSP), second.GetStackIndex()));
3746 }
3747 break;
3748 }
3749
3750 case DataType::Type::kFloat64: {
3751 if (second.IsFpuRegister()) {
3752 __ subsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3753 } else if (second.IsConstant()) {
3754 __ subsd(first.AsFpuRegister<XmmRegister>(),
3755 codegen_->LiteralDoubleAddress(
3756 second.GetConstant()->AsDoubleConstant()->GetValue()));
3757 } else {
3758 DCHECK(second.IsDoubleStackSlot());
3759 __ subsd(first.AsFpuRegister<XmmRegister>(),
3760 Address(CpuRegister(RSP), second.GetStackIndex()));
3761 }
3762 break;
3763 }
3764
3765 default:
3766 LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
3767 }
3768 }
3769
VisitMul(HMul * mul)3770 void LocationsBuilderX86_64::VisitMul(HMul* mul) {
3771 LocationSummary* locations =
3772 new (GetGraph()->GetAllocator()) LocationSummary(mul, LocationSummary::kNoCall);
3773 switch (mul->GetResultType()) {
3774 case DataType::Type::kInt32: {
3775 locations->SetInAt(0, Location::RequiresRegister());
3776 locations->SetInAt(1, Location::Any());
3777 if (mul->InputAt(1)->IsIntConstant()) {
3778 // Can use 3 operand multiply.
3779 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3780 } else {
3781 locations->SetOut(Location::SameAsFirstInput());
3782 }
3783 break;
3784 }
3785 case DataType::Type::kInt64: {
3786 locations->SetInAt(0, Location::RequiresRegister());
3787 locations->SetInAt(1, Location::Any());
3788 if (mul->InputAt(1)->IsLongConstant() &&
3789 IsInt<32>(mul->InputAt(1)->AsLongConstant()->GetValue())) {
3790 // Can use 3 operand multiply.
3791 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3792 } else {
3793 locations->SetOut(Location::SameAsFirstInput());
3794 }
3795 break;
3796 }
3797 case DataType::Type::kFloat32:
3798 case DataType::Type::kFloat64: {
3799 locations->SetInAt(0, Location::RequiresFpuRegister());
3800 locations->SetInAt(1, Location::Any());
3801 locations->SetOut(Location::SameAsFirstInput());
3802 break;
3803 }
3804
3805 default:
3806 LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
3807 }
3808 }
3809
VisitMul(HMul * mul)3810 void InstructionCodeGeneratorX86_64::VisitMul(HMul* mul) {
3811 LocationSummary* locations = mul->GetLocations();
3812 Location first = locations->InAt(0);
3813 Location second = locations->InAt(1);
3814 Location out = locations->Out();
3815 switch (mul->GetResultType()) {
3816 case DataType::Type::kInt32:
3817 // The constant may have ended up in a register, so test explicitly to avoid
3818 // problems where the output may not be the same as the first operand.
3819 if (mul->InputAt(1)->IsIntConstant()) {
3820 Immediate imm(mul->InputAt(1)->AsIntConstant()->GetValue());
3821 __ imull(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(), imm);
3822 } else if (second.IsRegister()) {
3823 DCHECK(first.Equals(out));
3824 __ imull(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3825 } else {
3826 DCHECK(first.Equals(out));
3827 DCHECK(second.IsStackSlot());
3828 __ imull(first.AsRegister<CpuRegister>(),
3829 Address(CpuRegister(RSP), second.GetStackIndex()));
3830 }
3831 break;
3832 case DataType::Type::kInt64: {
3833 // The constant may have ended up in a register, so test explicitly to avoid
3834 // problems where the output may not be the same as the first operand.
3835 if (mul->InputAt(1)->IsLongConstant()) {
3836 int64_t value = mul->InputAt(1)->AsLongConstant()->GetValue();
3837 if (IsInt<32>(value)) {
3838 __ imulq(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(),
3839 Immediate(static_cast<int32_t>(value)));
3840 } else {
3841 // Have to use the constant area.
3842 DCHECK(first.Equals(out));
3843 __ imulq(first.AsRegister<CpuRegister>(), codegen_->LiteralInt64Address(value));
3844 }
3845 } else if (second.IsRegister()) {
3846 DCHECK(first.Equals(out));
3847 __ imulq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3848 } else {
3849 DCHECK(second.IsDoubleStackSlot());
3850 DCHECK(first.Equals(out));
3851 __ imulq(first.AsRegister<CpuRegister>(),
3852 Address(CpuRegister(RSP), second.GetStackIndex()));
3853 }
3854 break;
3855 }
3856
3857 case DataType::Type::kFloat32: {
3858 DCHECK(first.Equals(out));
3859 if (second.IsFpuRegister()) {
3860 __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3861 } else if (second.IsConstant()) {
3862 __ mulss(first.AsFpuRegister<XmmRegister>(),
3863 codegen_->LiteralFloatAddress(
3864 second.GetConstant()->AsFloatConstant()->GetValue()));
3865 } else {
3866 DCHECK(second.IsStackSlot());
3867 __ mulss(first.AsFpuRegister<XmmRegister>(),
3868 Address(CpuRegister(RSP), second.GetStackIndex()));
3869 }
3870 break;
3871 }
3872
3873 case DataType::Type::kFloat64: {
3874 DCHECK(first.Equals(out));
3875 if (second.IsFpuRegister()) {
3876 __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3877 } else if (second.IsConstant()) {
3878 __ mulsd(first.AsFpuRegister<XmmRegister>(),
3879 codegen_->LiteralDoubleAddress(
3880 second.GetConstant()->AsDoubleConstant()->GetValue()));
3881 } else {
3882 DCHECK(second.IsDoubleStackSlot());
3883 __ mulsd(first.AsFpuRegister<XmmRegister>(),
3884 Address(CpuRegister(RSP), second.GetStackIndex()));
3885 }
3886 break;
3887 }
3888
3889 default:
3890 LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
3891 }
3892 }
3893
PushOntoFPStack(Location source,uint32_t temp_offset,uint32_t stack_adjustment,bool is_float)3894 void InstructionCodeGeneratorX86_64::PushOntoFPStack(Location source, uint32_t temp_offset,
3895 uint32_t stack_adjustment, bool is_float) {
3896 if (source.IsStackSlot()) {
3897 DCHECK(is_float);
3898 __ flds(Address(CpuRegister(RSP), source.GetStackIndex() + stack_adjustment));
3899 } else if (source.IsDoubleStackSlot()) {
3900 DCHECK(!is_float);
3901 __ fldl(Address(CpuRegister(RSP), source.GetStackIndex() + stack_adjustment));
3902 } else {
3903 // Write the value to the temporary location on the stack and load to FP stack.
3904 if (is_float) {
3905 Location stack_temp = Location::StackSlot(temp_offset);
3906 codegen_->Move(stack_temp, source);
3907 __ flds(Address(CpuRegister(RSP), temp_offset));
3908 } else {
3909 Location stack_temp = Location::DoubleStackSlot(temp_offset);
3910 codegen_->Move(stack_temp, source);
3911 __ fldl(Address(CpuRegister(RSP), temp_offset));
3912 }
3913 }
3914 }
3915
GenerateRemFP(HRem * rem)3916 void InstructionCodeGeneratorX86_64::GenerateRemFP(HRem *rem) {
3917 DataType::Type type = rem->GetResultType();
3918 bool is_float = type == DataType::Type::kFloat32;
3919 size_t elem_size = DataType::Size(type);
3920 LocationSummary* locations = rem->GetLocations();
3921 Location first = locations->InAt(0);
3922 Location second = locations->InAt(1);
3923 Location out = locations->Out();
3924
3925 // Create stack space for 2 elements.
3926 // TODO: enhance register allocator to ask for stack temporaries.
3927 __ subq(CpuRegister(RSP), Immediate(2 * elem_size));
3928
3929 // Load the values to the FP stack in reverse order, using temporaries if needed.
3930 PushOntoFPStack(second, elem_size, 2 * elem_size, is_float);
3931 PushOntoFPStack(first, 0, 2 * elem_size, is_float);
3932
3933 // Loop doing FPREM until we stabilize.
3934 NearLabel retry;
3935 __ Bind(&retry);
3936 __ fprem();
3937
3938 // Move FP status to AX.
3939 __ fstsw();
3940
3941 // And see if the argument reduction is complete. This is signaled by the
3942 // C2 FPU flag bit set to 0.
3943 __ andl(CpuRegister(RAX), Immediate(kC2ConditionMask));
3944 __ j(kNotEqual, &retry);
3945
3946 // We have settled on the final value. Retrieve it into an XMM register.
3947 // Store FP top of stack to real stack.
3948 if (is_float) {
3949 __ fsts(Address(CpuRegister(RSP), 0));
3950 } else {
3951 __ fstl(Address(CpuRegister(RSP), 0));
3952 }
3953
3954 // Pop the 2 items from the FP stack.
3955 __ fucompp();
3956
3957 // Load the value from the stack into an XMM register.
3958 DCHECK(out.IsFpuRegister()) << out;
3959 if (is_float) {
3960 __ movss(out.AsFpuRegister<XmmRegister>(), Address(CpuRegister(RSP), 0));
3961 } else {
3962 __ movsd(out.AsFpuRegister<XmmRegister>(), Address(CpuRegister(RSP), 0));
3963 }
3964
3965 // And remove the temporary stack space we allocated.
3966 __ addq(CpuRegister(RSP), Immediate(2 * elem_size));
3967 }
3968
DivRemOneOrMinusOne(HBinaryOperation * instruction)3969 void InstructionCodeGeneratorX86_64::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
3970 DCHECK(instruction->IsDiv() || instruction->IsRem());
3971
3972 LocationSummary* locations = instruction->GetLocations();
3973 Location second = locations->InAt(1);
3974 DCHECK(second.IsConstant());
3975
3976 CpuRegister output_register = locations->Out().AsRegister<CpuRegister>();
3977 CpuRegister input_register = locations->InAt(0).AsRegister<CpuRegister>();
3978 int64_t imm = Int64FromConstant(second.GetConstant());
3979
3980 DCHECK(imm == 1 || imm == -1);
3981
3982 switch (instruction->GetResultType()) {
3983 case DataType::Type::kInt32: {
3984 if (instruction->IsRem()) {
3985 __ xorl(output_register, output_register);
3986 } else {
3987 __ movl(output_register, input_register);
3988 if (imm == -1) {
3989 __ negl(output_register);
3990 }
3991 }
3992 break;
3993 }
3994
3995 case DataType::Type::kInt64: {
3996 if (instruction->IsRem()) {
3997 __ xorl(output_register, output_register);
3998 } else {
3999 __ movq(output_register, input_register);
4000 if (imm == -1) {
4001 __ negq(output_register);
4002 }
4003 }
4004 break;
4005 }
4006
4007 default:
4008 LOG(FATAL) << "Unexpected type for div by (-)1 " << instruction->GetResultType();
4009 }
4010 }
RemByPowerOfTwo(HRem * instruction)4011 void InstructionCodeGeneratorX86_64::RemByPowerOfTwo(HRem* instruction) {
4012 LocationSummary* locations = instruction->GetLocations();
4013 Location second = locations->InAt(1);
4014 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
4015 CpuRegister numerator = locations->InAt(0).AsRegister<CpuRegister>();
4016 int64_t imm = Int64FromConstant(second.GetConstant());
4017 DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
4018 uint64_t abs_imm = AbsOrMin(imm);
4019 CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>();
4020 if (instruction->GetResultType() == DataType::Type::kInt32) {
4021 NearLabel done;
4022 __ movl(out, numerator);
4023 __ andl(out, Immediate(abs_imm-1));
4024 __ j(Condition::kZero, &done);
4025 __ leal(tmp, Address(out, static_cast<int32_t>(~(abs_imm-1))));
4026 __ testl(numerator, numerator);
4027 __ cmov(Condition::kLess, out, tmp, false);
4028 __ Bind(&done);
4029
4030 } else {
4031 DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
4032 codegen_->Load64BitValue(tmp, abs_imm - 1);
4033 NearLabel done;
4034
4035 __ movq(out, numerator);
4036 __ andq(out, tmp);
4037 __ j(Condition::kZero, &done);
4038 __ movq(tmp, numerator);
4039 __ sarq(tmp, Immediate(63));
4040 __ shlq(tmp, Immediate(WhichPowerOf2(abs_imm)));
4041 __ orq(out, tmp);
4042 __ Bind(&done);
4043 }
4044 }
DivByPowerOfTwo(HDiv * instruction)4045 void InstructionCodeGeneratorX86_64::DivByPowerOfTwo(HDiv* instruction) {
4046 LocationSummary* locations = instruction->GetLocations();
4047 Location second = locations->InAt(1);
4048
4049 CpuRegister output_register = locations->Out().AsRegister<CpuRegister>();
4050 CpuRegister numerator = locations->InAt(0).AsRegister<CpuRegister>();
4051
4052 int64_t imm = Int64FromConstant(second.GetConstant());
4053 DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
4054 uint64_t abs_imm = AbsOrMin(imm);
4055
4056 CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>();
4057
4058 if (instruction->GetResultType() == DataType::Type::kInt32) {
4059 // When denominator is equal to 2, we can add signed bit and numerator to tmp.
4060 // Below we are using addl instruction instead of cmov which give us 1 cycle benefit.
4061 if (abs_imm == 2) {
4062 __ leal(tmp, Address(numerator, 0));
4063 __ shrl(tmp, Immediate(31));
4064 __ addl(tmp, numerator);
4065 } else {
4066 __ leal(tmp, Address(numerator, abs_imm - 1));
4067 __ testl(numerator, numerator);
4068 __ cmov(kGreaterEqual, tmp, numerator);
4069 }
4070 int shift = CTZ(imm);
4071 __ sarl(tmp, Immediate(shift));
4072
4073 if (imm < 0) {
4074 __ negl(tmp);
4075 }
4076
4077 __ movl(output_register, tmp);
4078 } else {
4079 DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
4080 CpuRegister rdx = locations->GetTemp(0).AsRegister<CpuRegister>();
4081 if (abs_imm == 2) {
4082 __ movq(rdx, numerator);
4083 __ shrq(rdx, Immediate(63));
4084 __ addq(rdx, numerator);
4085 } else {
4086 codegen_->Load64BitValue(rdx, abs_imm - 1);
4087 __ addq(rdx, numerator);
4088 __ testq(numerator, numerator);
4089 __ cmov(kGreaterEqual, rdx, numerator);
4090 }
4091 int shift = CTZ(imm);
4092 __ sarq(rdx, Immediate(shift));
4093
4094 if (imm < 0) {
4095 __ negq(rdx);
4096 }
4097
4098 __ movq(output_register, rdx);
4099 }
4100 }
4101
GenerateDivRemWithAnyConstant(HBinaryOperation * instruction)4102 void InstructionCodeGeneratorX86_64::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) {
4103 DCHECK(instruction->IsDiv() || instruction->IsRem());
4104
4105 LocationSummary* locations = instruction->GetLocations();
4106 Location second = locations->InAt(1);
4107
4108 CpuRegister numerator = instruction->IsDiv() ? locations->GetTemp(1).AsRegister<CpuRegister>()
4109 : locations->GetTemp(0).AsRegister<CpuRegister>();
4110 CpuRegister eax = locations->InAt(0).AsRegister<CpuRegister>();
4111 CpuRegister edx = instruction->IsDiv() ? locations->GetTemp(0).AsRegister<CpuRegister>()
4112 : locations->Out().AsRegister<CpuRegister>();
4113 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
4114
4115 DCHECK_EQ(RAX, eax.AsRegister());
4116 DCHECK_EQ(RDX, edx.AsRegister());
4117 if (instruction->IsDiv()) {
4118 DCHECK_EQ(RAX, out.AsRegister());
4119 } else {
4120 DCHECK_EQ(RDX, out.AsRegister());
4121 }
4122
4123 int64_t magic;
4124 int shift;
4125
4126 // TODO: can these branches be written as one?
4127 if (instruction->GetResultType() == DataType::Type::kInt32) {
4128 int imm = second.GetConstant()->AsIntConstant()->GetValue();
4129
4130 CalculateMagicAndShiftForDivRem(imm, false /* is_long= */, &magic, &shift);
4131
4132 __ movl(numerator, eax);
4133
4134 __ movl(eax, Immediate(magic));
4135 __ imull(numerator);
4136
4137 if (imm > 0 && magic < 0) {
4138 __ addl(edx, numerator);
4139 } else if (imm < 0 && magic > 0) {
4140 __ subl(edx, numerator);
4141 }
4142
4143 if (shift != 0) {
4144 __ sarl(edx, Immediate(shift));
4145 }
4146
4147 __ movl(eax, edx);
4148 __ shrl(edx, Immediate(31));
4149 __ addl(edx, eax);
4150
4151 if (instruction->IsRem()) {
4152 __ movl(eax, numerator);
4153 __ imull(edx, Immediate(imm));
4154 __ subl(eax, edx);
4155 __ movl(edx, eax);
4156 } else {
4157 __ movl(eax, edx);
4158 }
4159 } else {
4160 int64_t imm = second.GetConstant()->AsLongConstant()->GetValue();
4161
4162 DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
4163
4164 CpuRegister rax = eax;
4165 CpuRegister rdx = edx;
4166
4167 CalculateMagicAndShiftForDivRem(imm, true /* is_long= */, &magic, &shift);
4168
4169 // Save the numerator.
4170 __ movq(numerator, rax);
4171
4172 // RAX = magic
4173 codegen_->Load64BitValue(rax, magic);
4174
4175 // RDX:RAX = magic * numerator
4176 __ imulq(numerator);
4177
4178 if (imm > 0 && magic < 0) {
4179 // RDX += numerator
4180 __ addq(rdx, numerator);
4181 } else if (imm < 0 && magic > 0) {
4182 // RDX -= numerator
4183 __ subq(rdx, numerator);
4184 }
4185
4186 // Shift if needed.
4187 if (shift != 0) {
4188 __ sarq(rdx, Immediate(shift));
4189 }
4190
4191 // RDX += 1 if RDX < 0
4192 __ movq(rax, rdx);
4193 __ shrq(rdx, Immediate(63));
4194 __ addq(rdx, rax);
4195
4196 if (instruction->IsRem()) {
4197 __ movq(rax, numerator);
4198
4199 if (IsInt<32>(imm)) {
4200 __ imulq(rdx, Immediate(static_cast<int32_t>(imm)));
4201 } else {
4202 __ imulq(rdx, codegen_->LiteralInt64Address(imm));
4203 }
4204
4205 __ subq(rax, rdx);
4206 __ movq(rdx, rax);
4207 } else {
4208 __ movq(rax, rdx);
4209 }
4210 }
4211 }
4212
GenerateDivRemIntegral(HBinaryOperation * instruction)4213 void InstructionCodeGeneratorX86_64::GenerateDivRemIntegral(HBinaryOperation* instruction) {
4214 DCHECK(instruction->IsDiv() || instruction->IsRem());
4215 DataType::Type type = instruction->GetResultType();
4216 DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
4217
4218 bool is_div = instruction->IsDiv();
4219 LocationSummary* locations = instruction->GetLocations();
4220
4221 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
4222 Location second = locations->InAt(1);
4223
4224 DCHECK_EQ(RAX, locations->InAt(0).AsRegister<CpuRegister>().AsRegister());
4225 DCHECK_EQ(is_div ? RAX : RDX, out.AsRegister());
4226
4227 if (second.IsConstant()) {
4228 int64_t imm = Int64FromConstant(second.GetConstant());
4229
4230 if (imm == 0) {
4231 // Do not generate anything. DivZeroCheck would prevent any code to be executed.
4232 } else if (imm == 1 || imm == -1) {
4233 DivRemOneOrMinusOne(instruction);
4234 } else if (IsPowerOfTwo(AbsOrMin(imm))) {
4235 if (is_div) {
4236 DivByPowerOfTwo(instruction->AsDiv());
4237 } else {
4238 RemByPowerOfTwo(instruction->AsRem());
4239 }
4240 } else {
4241 DCHECK(imm <= -2 || imm >= 2);
4242 GenerateDivRemWithAnyConstant(instruction);
4243 }
4244 } else {
4245 SlowPathCode* slow_path =
4246 new (codegen_->GetScopedAllocator()) DivRemMinusOneSlowPathX86_64(
4247 instruction, out.AsRegister(), type, is_div);
4248 codegen_->AddSlowPath(slow_path);
4249
4250 CpuRegister second_reg = second.AsRegister<CpuRegister>();
4251 // 0x80000000(00000000)/-1 triggers an arithmetic exception!
4252 // Dividing by -1 is actually negation and -0x800000000(00000000) = 0x80000000(00000000)
4253 // so it's safe to just use negl instead of more complex comparisons.
4254 if (type == DataType::Type::kInt32) {
4255 __ cmpl(second_reg, Immediate(-1));
4256 __ j(kEqual, slow_path->GetEntryLabel());
4257 // edx:eax <- sign-extended of eax
4258 __ cdq();
4259 // eax = quotient, edx = remainder
4260 __ idivl(second_reg);
4261 } else {
4262 __ cmpq(second_reg, Immediate(-1));
4263 __ j(kEqual, slow_path->GetEntryLabel());
4264 // rdx:rax <- sign-extended of rax
4265 __ cqo();
4266 // rax = quotient, rdx = remainder
4267 __ idivq(second_reg);
4268 }
4269 __ Bind(slow_path->GetExitLabel());
4270 }
4271 }
4272
VisitDiv(HDiv * div)4273 void LocationsBuilderX86_64::VisitDiv(HDiv* div) {
4274 LocationSummary* locations =
4275 new (GetGraph()->GetAllocator()) LocationSummary(div, LocationSummary::kNoCall);
4276 switch (div->GetResultType()) {
4277 case DataType::Type::kInt32:
4278 case DataType::Type::kInt64: {
4279 locations->SetInAt(0, Location::RegisterLocation(RAX));
4280 locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1)));
4281 locations->SetOut(Location::SameAsFirstInput());
4282 // Intel uses edx:eax as the dividend.
4283 locations->AddTemp(Location::RegisterLocation(RDX));
4284 // We need to save the numerator while we tweak rax and rdx. As we are using imul in a way
4285 // which enforces results to be in RAX and RDX, things are simpler if we use RDX also as
4286 // output and request another temp.
4287 if (div->InputAt(1)->IsConstant()) {
4288 locations->AddTemp(Location::RequiresRegister());
4289 }
4290 break;
4291 }
4292
4293 case DataType::Type::kFloat32:
4294 case DataType::Type::kFloat64: {
4295 locations->SetInAt(0, Location::RequiresFpuRegister());
4296 locations->SetInAt(1, Location::Any());
4297 locations->SetOut(Location::SameAsFirstInput());
4298 break;
4299 }
4300
4301 default:
4302 LOG(FATAL) << "Unexpected div type " << div->GetResultType();
4303 }
4304 }
4305
VisitDiv(HDiv * div)4306 void InstructionCodeGeneratorX86_64::VisitDiv(HDiv* div) {
4307 LocationSummary* locations = div->GetLocations();
4308 Location first = locations->InAt(0);
4309 Location second = locations->InAt(1);
4310 DCHECK(first.Equals(locations->Out()));
4311
4312 DataType::Type type = div->GetResultType();
4313 switch (type) {
4314 case DataType::Type::kInt32:
4315 case DataType::Type::kInt64: {
4316 GenerateDivRemIntegral(div);
4317 break;
4318 }
4319
4320 case DataType::Type::kFloat32: {
4321 if (second.IsFpuRegister()) {
4322 __ divss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
4323 } else if (second.IsConstant()) {
4324 __ divss(first.AsFpuRegister<XmmRegister>(),
4325 codegen_->LiteralFloatAddress(
4326 second.GetConstant()->AsFloatConstant()->GetValue()));
4327 } else {
4328 DCHECK(second.IsStackSlot());
4329 __ divss(first.AsFpuRegister<XmmRegister>(),
4330 Address(CpuRegister(RSP), second.GetStackIndex()));
4331 }
4332 break;
4333 }
4334
4335 case DataType::Type::kFloat64: {
4336 if (second.IsFpuRegister()) {
4337 __ divsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
4338 } else if (second.IsConstant()) {
4339 __ divsd(first.AsFpuRegister<XmmRegister>(),
4340 codegen_->LiteralDoubleAddress(
4341 second.GetConstant()->AsDoubleConstant()->GetValue()));
4342 } else {
4343 DCHECK(second.IsDoubleStackSlot());
4344 __ divsd(first.AsFpuRegister<XmmRegister>(),
4345 Address(CpuRegister(RSP), second.GetStackIndex()));
4346 }
4347 break;
4348 }
4349
4350 default:
4351 LOG(FATAL) << "Unexpected div type " << div->GetResultType();
4352 }
4353 }
4354
VisitRem(HRem * rem)4355 void LocationsBuilderX86_64::VisitRem(HRem* rem) {
4356 DataType::Type type = rem->GetResultType();
4357 LocationSummary* locations =
4358 new (GetGraph()->GetAllocator()) LocationSummary(rem, LocationSummary::kNoCall);
4359
4360 switch (type) {
4361 case DataType::Type::kInt32:
4362 case DataType::Type::kInt64: {
4363 locations->SetInAt(0, Location::RegisterLocation(RAX));
4364 locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1)));
4365 // Intel uses rdx:rax as the dividend and puts the remainder in rdx
4366 locations->SetOut(Location::RegisterLocation(RDX));
4367 // We need to save the numerator while we tweak eax and edx. As we are using imul in a way
4368 // which enforces results to be in RAX and RDX, things are simpler if we use EAX also as
4369 // output and request another temp.
4370 if (rem->InputAt(1)->IsConstant()) {
4371 locations->AddTemp(Location::RequiresRegister());
4372 }
4373 break;
4374 }
4375
4376 case DataType::Type::kFloat32:
4377 case DataType::Type::kFloat64: {
4378 locations->SetInAt(0, Location::Any());
4379 locations->SetInAt(1, Location::Any());
4380 locations->SetOut(Location::RequiresFpuRegister());
4381 locations->AddTemp(Location::RegisterLocation(RAX));
4382 break;
4383 }
4384
4385 default:
4386 LOG(FATAL) << "Unexpected rem type " << type;
4387 }
4388 }
4389
VisitRem(HRem * rem)4390 void InstructionCodeGeneratorX86_64::VisitRem(HRem* rem) {
4391 DataType::Type type = rem->GetResultType();
4392 switch (type) {
4393 case DataType::Type::kInt32:
4394 case DataType::Type::kInt64: {
4395 GenerateDivRemIntegral(rem);
4396 break;
4397 }
4398 case DataType::Type::kFloat32:
4399 case DataType::Type::kFloat64: {
4400 GenerateRemFP(rem);
4401 break;
4402 }
4403 default:
4404 LOG(FATAL) << "Unexpected rem type " << rem->GetResultType();
4405 }
4406 }
4407
CreateMinMaxLocations(ArenaAllocator * allocator,HBinaryOperation * minmax)4408 static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) {
4409 LocationSummary* locations = new (allocator) LocationSummary(minmax);
4410 switch (minmax->GetResultType()) {
4411 case DataType::Type::kInt32:
4412 case DataType::Type::kInt64:
4413 locations->SetInAt(0, Location::RequiresRegister());
4414 locations->SetInAt(1, Location::RequiresRegister());
4415 locations->SetOut(Location::SameAsFirstInput());
4416 break;
4417 case DataType::Type::kFloat32:
4418 case DataType::Type::kFloat64:
4419 locations->SetInAt(0, Location::RequiresFpuRegister());
4420 locations->SetInAt(1, Location::RequiresFpuRegister());
4421 // The following is sub-optimal, but all we can do for now. It would be fine to also accept
4422 // the second input to be the output (we can simply swap inputs).
4423 locations->SetOut(Location::SameAsFirstInput());
4424 break;
4425 default:
4426 LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType();
4427 }
4428 }
4429
GenerateMinMaxInt(LocationSummary * locations,bool is_min,DataType::Type type)4430 void InstructionCodeGeneratorX86_64::GenerateMinMaxInt(LocationSummary* locations,
4431 bool is_min,
4432 DataType::Type type) {
4433 Location op1_loc = locations->InAt(0);
4434 Location op2_loc = locations->InAt(1);
4435
4436 // Shortcut for same input locations.
4437 if (op1_loc.Equals(op2_loc)) {
4438 // Can return immediately, as op1_loc == out_loc.
4439 // Note: if we ever support separate registers, e.g., output into memory, we need to check for
4440 // a copy here.
4441 DCHECK(locations->Out().Equals(op1_loc));
4442 return;
4443 }
4444
4445 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
4446 CpuRegister op2 = op2_loc.AsRegister<CpuRegister>();
4447
4448 // (out := op1)
4449 // out <=? op2
4450 // if out is min jmp done
4451 // out := op2
4452 // done:
4453
4454 if (type == DataType::Type::kInt64) {
4455 __ cmpq(out, op2);
4456 __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, /*is64bit*/ true);
4457 } else {
4458 DCHECK_EQ(type, DataType::Type::kInt32);
4459 __ cmpl(out, op2);
4460 __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, /*is64bit*/ false);
4461 }
4462 }
4463
GenerateMinMaxFP(LocationSummary * locations,bool is_min,DataType::Type type)4464 void InstructionCodeGeneratorX86_64::GenerateMinMaxFP(LocationSummary* locations,
4465 bool is_min,
4466 DataType::Type type) {
4467 Location op1_loc = locations->InAt(0);
4468 Location op2_loc = locations->InAt(1);
4469 Location out_loc = locations->Out();
4470 XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
4471
4472 // Shortcut for same input locations.
4473 if (op1_loc.Equals(op2_loc)) {
4474 DCHECK(out_loc.Equals(op1_loc));
4475 return;
4476 }
4477
4478 // (out := op1)
4479 // out <=? op2
4480 // if Nan jmp Nan_label
4481 // if out is min jmp done
4482 // if op2 is min jmp op2_label
4483 // handle -0/+0
4484 // jmp done
4485 // Nan_label:
4486 // out := NaN
4487 // op2_label:
4488 // out := op2
4489 // done:
4490 //
4491 // This removes one jmp, but needs to copy one input (op1) to out.
4492 //
4493 // TODO: This is straight from Quick. Make NaN an out-of-line slowpath?
4494
4495 XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
4496
4497 NearLabel nan, done, op2_label;
4498 if (type == DataType::Type::kFloat64) {
4499 __ ucomisd(out, op2);
4500 } else {
4501 DCHECK_EQ(type, DataType::Type::kFloat32);
4502 __ ucomiss(out, op2);
4503 }
4504
4505 __ j(Condition::kParityEven, &nan);
4506
4507 __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label);
4508 __ j(is_min ? Condition::kBelow : Condition::kAbove, &done);
4509
4510 // Handle 0.0/-0.0.
4511 if (is_min) {
4512 if (type == DataType::Type::kFloat64) {
4513 __ orpd(out, op2);
4514 } else {
4515 __ orps(out, op2);
4516 }
4517 } else {
4518 if (type == DataType::Type::kFloat64) {
4519 __ andpd(out, op2);
4520 } else {
4521 __ andps(out, op2);
4522 }
4523 }
4524 __ jmp(&done);
4525
4526 // NaN handling.
4527 __ Bind(&nan);
4528 if (type == DataType::Type::kFloat64) {
4529 __ movsd(out, codegen_->LiteralInt64Address(INT64_C(0x7FF8000000000000)));
4530 } else {
4531 __ movss(out, codegen_->LiteralInt32Address(INT32_C(0x7FC00000)));
4532 }
4533 __ jmp(&done);
4534
4535 // out := op2;
4536 __ Bind(&op2_label);
4537 if (type == DataType::Type::kFloat64) {
4538 __ movsd(out, op2);
4539 } else {
4540 __ movss(out, op2);
4541 }
4542
4543 // Done.
4544 __ Bind(&done);
4545 }
4546
GenerateMinMax(HBinaryOperation * minmax,bool is_min)4547 void InstructionCodeGeneratorX86_64::GenerateMinMax(HBinaryOperation* minmax, bool is_min) {
4548 DataType::Type type = minmax->GetResultType();
4549 switch (type) {
4550 case DataType::Type::kInt32:
4551 case DataType::Type::kInt64:
4552 GenerateMinMaxInt(minmax->GetLocations(), is_min, type);
4553 break;
4554 case DataType::Type::kFloat32:
4555 case DataType::Type::kFloat64:
4556 GenerateMinMaxFP(minmax->GetLocations(), is_min, type);
4557 break;
4558 default:
4559 LOG(FATAL) << "Unexpected type for HMinMax " << type;
4560 }
4561 }
4562
VisitMin(HMin * min)4563 void LocationsBuilderX86_64::VisitMin(HMin* min) {
4564 CreateMinMaxLocations(GetGraph()->GetAllocator(), min);
4565 }
4566
VisitMin(HMin * min)4567 void InstructionCodeGeneratorX86_64::VisitMin(HMin* min) {
4568 GenerateMinMax(min, /*is_min*/ true);
4569 }
4570
VisitMax(HMax * max)4571 void LocationsBuilderX86_64::VisitMax(HMax* max) {
4572 CreateMinMaxLocations(GetGraph()->GetAllocator(), max);
4573 }
4574
VisitMax(HMax * max)4575 void InstructionCodeGeneratorX86_64::VisitMax(HMax* max) {
4576 GenerateMinMax(max, /*is_min*/ false);
4577 }
4578
VisitAbs(HAbs * abs)4579 void LocationsBuilderX86_64::VisitAbs(HAbs* abs) {
4580 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs);
4581 switch (abs->GetResultType()) {
4582 case DataType::Type::kInt32:
4583 case DataType::Type::kInt64:
4584 locations->SetInAt(0, Location::RequiresRegister());
4585 locations->SetOut(Location::SameAsFirstInput());
4586 locations->AddTemp(Location::RequiresRegister());
4587 break;
4588 case DataType::Type::kFloat32:
4589 case DataType::Type::kFloat64:
4590 locations->SetInAt(0, Location::RequiresFpuRegister());
4591 locations->SetOut(Location::SameAsFirstInput());
4592 locations->AddTemp(Location::RequiresFpuRegister());
4593 break;
4594 default:
4595 LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType();
4596 }
4597 }
4598
VisitAbs(HAbs * abs)4599 void InstructionCodeGeneratorX86_64::VisitAbs(HAbs* abs) {
4600 LocationSummary* locations = abs->GetLocations();
4601 switch (abs->GetResultType()) {
4602 case DataType::Type::kInt32: {
4603 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
4604 CpuRegister mask = locations->GetTemp(0).AsRegister<CpuRegister>();
4605 // Create mask.
4606 __ movl(mask, out);
4607 __ sarl(mask, Immediate(31));
4608 // Add mask.
4609 __ addl(out, mask);
4610 __ xorl(out, mask);
4611 break;
4612 }
4613 case DataType::Type::kInt64: {
4614 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
4615 CpuRegister mask = locations->GetTemp(0).AsRegister<CpuRegister>();
4616 // Create mask.
4617 __ movq(mask, out);
4618 __ sarq(mask, Immediate(63));
4619 // Add mask.
4620 __ addq(out, mask);
4621 __ xorq(out, mask);
4622 break;
4623 }
4624 case DataType::Type::kFloat32: {
4625 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
4626 XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
4627 __ movss(mask, codegen_->LiteralInt32Address(INT32_C(0x7FFFFFFF)));
4628 __ andps(out, mask);
4629 break;
4630 }
4631 case DataType::Type::kFloat64: {
4632 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
4633 XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
4634 __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x7FFFFFFFFFFFFFFF)));
4635 __ andpd(out, mask);
4636 break;
4637 }
4638 default:
4639 LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType();
4640 }
4641 }
4642
VisitDivZeroCheck(HDivZeroCheck * instruction)4643 void LocationsBuilderX86_64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
4644 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
4645 locations->SetInAt(0, Location::Any());
4646 }
4647
VisitDivZeroCheck(HDivZeroCheck * instruction)4648 void InstructionCodeGeneratorX86_64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
4649 SlowPathCode* slow_path =
4650 new (codegen_->GetScopedAllocator()) DivZeroCheckSlowPathX86_64(instruction);
4651 codegen_->AddSlowPath(slow_path);
4652
4653 LocationSummary* locations = instruction->GetLocations();
4654 Location value = locations->InAt(0);
4655
4656 switch (instruction->GetType()) {
4657 case DataType::Type::kBool:
4658 case DataType::Type::kUint8:
4659 case DataType::Type::kInt8:
4660 case DataType::Type::kUint16:
4661 case DataType::Type::kInt16:
4662 case DataType::Type::kInt32: {
4663 if (value.IsRegister()) {
4664 __ testl(value.AsRegister<CpuRegister>(), value.AsRegister<CpuRegister>());
4665 __ j(kEqual, slow_path->GetEntryLabel());
4666 } else if (value.IsStackSlot()) {
4667 __ cmpl(Address(CpuRegister(RSP), value.GetStackIndex()), Immediate(0));
4668 __ j(kEqual, slow_path->GetEntryLabel());
4669 } else {
4670 DCHECK(value.IsConstant()) << value;
4671 if (value.GetConstant()->AsIntConstant()->GetValue() == 0) {
4672 __ jmp(slow_path->GetEntryLabel());
4673 }
4674 }
4675 break;
4676 }
4677 case DataType::Type::kInt64: {
4678 if (value.IsRegister()) {
4679 __ testq(value.AsRegister<CpuRegister>(), value.AsRegister<CpuRegister>());
4680 __ j(kEqual, slow_path->GetEntryLabel());
4681 } else if (value.IsDoubleStackSlot()) {
4682 __ cmpq(Address(CpuRegister(RSP), value.GetStackIndex()), Immediate(0));
4683 __ j(kEqual, slow_path->GetEntryLabel());
4684 } else {
4685 DCHECK(value.IsConstant()) << value;
4686 if (value.GetConstant()->AsLongConstant()->GetValue() == 0) {
4687 __ jmp(slow_path->GetEntryLabel());
4688 }
4689 }
4690 break;
4691 }
4692 default:
4693 LOG(FATAL) << "Unexpected type for HDivZeroCheck " << instruction->GetType();
4694 }
4695 }
4696
HandleShift(HBinaryOperation * op)4697 void LocationsBuilderX86_64::HandleShift(HBinaryOperation* op) {
4698 DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
4699
4700 LocationSummary* locations =
4701 new (GetGraph()->GetAllocator()) LocationSummary(op, LocationSummary::kNoCall);
4702
4703 switch (op->GetResultType()) {
4704 case DataType::Type::kInt32:
4705 case DataType::Type::kInt64: {
4706 locations->SetInAt(0, Location::RequiresRegister());
4707 // The shift count needs to be in CL.
4708 locations->SetInAt(1, Location::ByteRegisterOrConstant(RCX, op->InputAt(1)));
4709 locations->SetOut(Location::SameAsFirstInput());
4710 break;
4711 }
4712 default:
4713 LOG(FATAL) << "Unexpected operation type " << op->GetResultType();
4714 }
4715 }
4716
HandleShift(HBinaryOperation * op)4717 void InstructionCodeGeneratorX86_64::HandleShift(HBinaryOperation* op) {
4718 DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
4719
4720 LocationSummary* locations = op->GetLocations();
4721 CpuRegister first_reg = locations->InAt(0).AsRegister<CpuRegister>();
4722 Location second = locations->InAt(1);
4723
4724 switch (op->GetResultType()) {
4725 case DataType::Type::kInt32: {
4726 if (second.IsRegister()) {
4727 CpuRegister second_reg = second.AsRegister<CpuRegister>();
4728 if (op->IsShl()) {
4729 __ shll(first_reg, second_reg);
4730 } else if (op->IsShr()) {
4731 __ sarl(first_reg, second_reg);
4732 } else {
4733 __ shrl(first_reg, second_reg);
4734 }
4735 } else {
4736 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance);
4737 if (op->IsShl()) {
4738 __ shll(first_reg, imm);
4739 } else if (op->IsShr()) {
4740 __ sarl(first_reg, imm);
4741 } else {
4742 __ shrl(first_reg, imm);
4743 }
4744 }
4745 break;
4746 }
4747 case DataType::Type::kInt64: {
4748 if (second.IsRegister()) {
4749 CpuRegister second_reg = second.AsRegister<CpuRegister>();
4750 if (op->IsShl()) {
4751 __ shlq(first_reg, second_reg);
4752 } else if (op->IsShr()) {
4753 __ sarq(first_reg, second_reg);
4754 } else {
4755 __ shrq(first_reg, second_reg);
4756 }
4757 } else {
4758 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance);
4759 if (op->IsShl()) {
4760 __ shlq(first_reg, imm);
4761 } else if (op->IsShr()) {
4762 __ sarq(first_reg, imm);
4763 } else {
4764 __ shrq(first_reg, imm);
4765 }
4766 }
4767 break;
4768 }
4769 default:
4770 LOG(FATAL) << "Unexpected operation type " << op->GetResultType();
4771 UNREACHABLE();
4772 }
4773 }
4774
VisitRor(HRor * ror)4775 void LocationsBuilderX86_64::VisitRor(HRor* ror) {
4776 LocationSummary* locations =
4777 new (GetGraph()->GetAllocator()) LocationSummary(ror, LocationSummary::kNoCall);
4778
4779 switch (ror->GetResultType()) {
4780 case DataType::Type::kInt32:
4781 case DataType::Type::kInt64: {
4782 locations->SetInAt(0, Location::RequiresRegister());
4783 // The shift count needs to be in CL (unless it is a constant).
4784 locations->SetInAt(1, Location::ByteRegisterOrConstant(RCX, ror->InputAt(1)));
4785 locations->SetOut(Location::SameAsFirstInput());
4786 break;
4787 }
4788 default:
4789 LOG(FATAL) << "Unexpected operation type " << ror->GetResultType();
4790 UNREACHABLE();
4791 }
4792 }
4793
VisitRor(HRor * ror)4794 void InstructionCodeGeneratorX86_64::VisitRor(HRor* ror) {
4795 LocationSummary* locations = ror->GetLocations();
4796 CpuRegister first_reg = locations->InAt(0).AsRegister<CpuRegister>();
4797 Location second = locations->InAt(1);
4798
4799 switch (ror->GetResultType()) {
4800 case DataType::Type::kInt32:
4801 if (second.IsRegister()) {
4802 CpuRegister second_reg = second.AsRegister<CpuRegister>();
4803 __ rorl(first_reg, second_reg);
4804 } else {
4805 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance);
4806 __ rorl(first_reg, imm);
4807 }
4808 break;
4809 case DataType::Type::kInt64:
4810 if (second.IsRegister()) {
4811 CpuRegister second_reg = second.AsRegister<CpuRegister>();
4812 __ rorq(first_reg, second_reg);
4813 } else {
4814 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance);
4815 __ rorq(first_reg, imm);
4816 }
4817 break;
4818 default:
4819 LOG(FATAL) << "Unexpected operation type " << ror->GetResultType();
4820 UNREACHABLE();
4821 }
4822 }
4823
VisitShl(HShl * shl)4824 void LocationsBuilderX86_64::VisitShl(HShl* shl) {
4825 HandleShift(shl);
4826 }
4827
VisitShl(HShl * shl)4828 void InstructionCodeGeneratorX86_64::VisitShl(HShl* shl) {
4829 HandleShift(shl);
4830 }
4831
VisitShr(HShr * shr)4832 void LocationsBuilderX86_64::VisitShr(HShr* shr) {
4833 HandleShift(shr);
4834 }
4835
VisitShr(HShr * shr)4836 void InstructionCodeGeneratorX86_64::VisitShr(HShr* shr) {
4837 HandleShift(shr);
4838 }
4839
VisitUShr(HUShr * ushr)4840 void LocationsBuilderX86_64::VisitUShr(HUShr* ushr) {
4841 HandleShift(ushr);
4842 }
4843
VisitUShr(HUShr * ushr)4844 void InstructionCodeGeneratorX86_64::VisitUShr(HUShr* ushr) {
4845 HandleShift(ushr);
4846 }
4847
VisitNewInstance(HNewInstance * instruction)4848 void LocationsBuilderX86_64::VisitNewInstance(HNewInstance* instruction) {
4849 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
4850 instruction, LocationSummary::kCallOnMainOnly);
4851 InvokeRuntimeCallingConvention calling_convention;
4852 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
4853 locations->SetOut(Location::RegisterLocation(RAX));
4854 }
4855
VisitNewInstance(HNewInstance * instruction)4856 void InstructionCodeGeneratorX86_64::VisitNewInstance(HNewInstance* instruction) {
4857 codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction, instruction->GetDexPc());
4858 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
4859 DCHECK(!codegen_->IsLeafMethod());
4860 }
4861
VisitNewArray(HNewArray * instruction)4862 void LocationsBuilderX86_64::VisitNewArray(HNewArray* instruction) {
4863 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
4864 instruction, LocationSummary::kCallOnMainOnly);
4865 InvokeRuntimeCallingConvention calling_convention;
4866 locations->SetOut(Location::RegisterLocation(RAX));
4867 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
4868 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
4869 }
4870
VisitNewArray(HNewArray * instruction)4871 void InstructionCodeGeneratorX86_64::VisitNewArray(HNewArray* instruction) {
4872 // Note: if heap poisoning is enabled, the entry point takes care of poisoning the reference.
4873 QuickEntrypointEnum entrypoint = CodeGenerator::GetArrayAllocationEntrypoint(instruction);
4874 codegen_->InvokeRuntime(entrypoint, instruction, instruction->GetDexPc());
4875 CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
4876 DCHECK(!codegen_->IsLeafMethod());
4877 }
4878
VisitParameterValue(HParameterValue * instruction)4879 void LocationsBuilderX86_64::VisitParameterValue(HParameterValue* instruction) {
4880 LocationSummary* locations =
4881 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
4882 Location location = parameter_visitor_.GetNextLocation(instruction->GetType());
4883 if (location.IsStackSlot()) {
4884 location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
4885 } else if (location.IsDoubleStackSlot()) {
4886 location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
4887 }
4888 locations->SetOut(location);
4889 }
4890
VisitParameterValue(HParameterValue * instruction ATTRIBUTE_UNUSED)4891 void InstructionCodeGeneratorX86_64::VisitParameterValue(
4892 HParameterValue* instruction ATTRIBUTE_UNUSED) {
4893 // Nothing to do, the parameter is already at its location.
4894 }
4895
VisitCurrentMethod(HCurrentMethod * instruction)4896 void LocationsBuilderX86_64::VisitCurrentMethod(HCurrentMethod* instruction) {
4897 LocationSummary* locations =
4898 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
4899 locations->SetOut(Location::RegisterLocation(kMethodRegisterArgument));
4900 }
4901
VisitCurrentMethod(HCurrentMethod * instruction ATTRIBUTE_UNUSED)4902 void InstructionCodeGeneratorX86_64::VisitCurrentMethod(
4903 HCurrentMethod* instruction ATTRIBUTE_UNUSED) {
4904 // Nothing to do, the method is already at its location.
4905 }
4906
VisitClassTableGet(HClassTableGet * instruction)4907 void LocationsBuilderX86_64::VisitClassTableGet(HClassTableGet* instruction) {
4908 LocationSummary* locations =
4909 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
4910 locations->SetInAt(0, Location::RequiresRegister());
4911 locations->SetOut(Location::RequiresRegister());
4912 }
4913
VisitClassTableGet(HClassTableGet * instruction)4914 void InstructionCodeGeneratorX86_64::VisitClassTableGet(HClassTableGet* instruction) {
4915 LocationSummary* locations = instruction->GetLocations();
4916 if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
4917 uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
4918 instruction->GetIndex(), kX86_64PointerSize).SizeValue();
4919 __ movq(locations->Out().AsRegister<CpuRegister>(),
4920 Address(locations->InAt(0).AsRegister<CpuRegister>(), method_offset));
4921 } else {
4922 uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
4923 instruction->GetIndex(), kX86_64PointerSize));
4924 __ movq(locations->Out().AsRegister<CpuRegister>(),
4925 Address(locations->InAt(0).AsRegister<CpuRegister>(),
4926 mirror::Class::ImtPtrOffset(kX86_64PointerSize).Uint32Value()));
4927 __ movq(locations->Out().AsRegister<CpuRegister>(),
4928 Address(locations->Out().AsRegister<CpuRegister>(), method_offset));
4929 }
4930 }
4931
VisitNot(HNot * not_)4932 void LocationsBuilderX86_64::VisitNot(HNot* not_) {
4933 LocationSummary* locations =
4934 new (GetGraph()->GetAllocator()) LocationSummary(not_, LocationSummary::kNoCall);
4935 locations->SetInAt(0, Location::RequiresRegister());
4936 locations->SetOut(Location::SameAsFirstInput());
4937 }
4938
VisitNot(HNot * not_)4939 void InstructionCodeGeneratorX86_64::VisitNot(HNot* not_) {
4940 LocationSummary* locations = not_->GetLocations();
4941 DCHECK_EQ(locations->InAt(0).AsRegister<CpuRegister>().AsRegister(),
4942 locations->Out().AsRegister<CpuRegister>().AsRegister());
4943 Location out = locations->Out();
4944 switch (not_->GetResultType()) {
4945 case DataType::Type::kInt32:
4946 __ notl(out.AsRegister<CpuRegister>());
4947 break;
4948
4949 case DataType::Type::kInt64:
4950 __ notq(out.AsRegister<CpuRegister>());
4951 break;
4952
4953 default:
4954 LOG(FATAL) << "Unimplemented type for not operation " << not_->GetResultType();
4955 }
4956 }
4957
VisitBooleanNot(HBooleanNot * bool_not)4958 void LocationsBuilderX86_64::VisitBooleanNot(HBooleanNot* bool_not) {
4959 LocationSummary* locations =
4960 new (GetGraph()->GetAllocator()) LocationSummary(bool_not, LocationSummary::kNoCall);
4961 locations->SetInAt(0, Location::RequiresRegister());
4962 locations->SetOut(Location::SameAsFirstInput());
4963 }
4964
VisitBooleanNot(HBooleanNot * bool_not)4965 void InstructionCodeGeneratorX86_64::VisitBooleanNot(HBooleanNot* bool_not) {
4966 LocationSummary* locations = bool_not->GetLocations();
4967 DCHECK_EQ(locations->InAt(0).AsRegister<CpuRegister>().AsRegister(),
4968 locations->Out().AsRegister<CpuRegister>().AsRegister());
4969 Location out = locations->Out();
4970 __ xorl(out.AsRegister<CpuRegister>(), Immediate(1));
4971 }
4972
VisitPhi(HPhi * instruction)4973 void LocationsBuilderX86_64::VisitPhi(HPhi* instruction) {
4974 LocationSummary* locations =
4975 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
4976 for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
4977 locations->SetInAt(i, Location::Any());
4978 }
4979 locations->SetOut(Location::Any());
4980 }
4981
VisitPhi(HPhi * instruction ATTRIBUTE_UNUSED)4982 void InstructionCodeGeneratorX86_64::VisitPhi(HPhi* instruction ATTRIBUTE_UNUSED) {
4983 LOG(FATAL) << "Unimplemented";
4984 }
4985
GenerateMemoryBarrier(MemBarrierKind kind)4986 void CodeGeneratorX86_64::GenerateMemoryBarrier(MemBarrierKind kind) {
4987 /*
4988 * According to the JSR-133 Cookbook, for x86-64 only StoreLoad/AnyAny barriers need memory fence.
4989 * All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86-64 memory model.
4990 * For those cases, all we need to ensure is that there is a scheduling barrier in place.
4991 */
4992 switch (kind) {
4993 case MemBarrierKind::kAnyAny: {
4994 MemoryFence();
4995 break;
4996 }
4997 case MemBarrierKind::kAnyStore:
4998 case MemBarrierKind::kLoadAny:
4999 case MemBarrierKind::kStoreStore: {
5000 // nop
5001 break;
5002 }
5003 case MemBarrierKind::kNTStoreStore:
5004 // Non-Temporal Store/Store needs an explicit fence.
5005 MemoryFence(/* non-temporal= */ true);
5006 break;
5007 }
5008 }
5009
HandleFieldGet(HInstruction * instruction)5010 void LocationsBuilderX86_64::HandleFieldGet(HInstruction* instruction) {
5011 DCHECK(instruction->IsInstanceFieldGet() ||
5012 instruction->IsStaticFieldGet() ||
5013 instruction->IsPredicatedInstanceFieldGet());
5014
5015 bool object_field_get_with_read_barrier =
5016 kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference);
5017 bool is_predicated = instruction->IsPredicatedInstanceFieldGet();
5018 LocationSummary* locations =
5019 new (GetGraph()->GetAllocator()) LocationSummary(instruction,
5020 object_field_get_with_read_barrier
5021 ? LocationSummary::kCallOnSlowPath
5022 : LocationSummary::kNoCall);
5023 if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
5024 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
5025 }
5026 // receiver_input
5027 locations->SetInAt(is_predicated ? 1 : 0, Location::RequiresRegister());
5028 if (is_predicated) {
5029 if (DataType::IsFloatingPointType(instruction->GetType())) {
5030 locations->SetInAt(0, Location::RequiresFpuRegister());
5031 } else {
5032 locations->SetInAt(0, Location::RequiresRegister());
5033 }
5034 }
5035 if (DataType::IsFloatingPointType(instruction->GetType())) {
5036 locations->SetOut(is_predicated ? Location::SameAsFirstInput()
5037 : Location::RequiresFpuRegister());
5038 } else {
5039 // The output overlaps for an object field get when read barriers are
5040 // enabled: we do not want the move to overwrite the object's location, as
5041 // we need it to emit the read barrier. For predicated instructions we can
5042 // always overlap since the output is SameAsFirst and the default value.
5043 locations->SetOut(is_predicated ? Location::SameAsFirstInput() : Location::RequiresRegister(),
5044 object_field_get_with_read_barrier || is_predicated
5045 ? Location::kOutputOverlap
5046 : Location::kNoOutputOverlap);
5047 }
5048 }
5049
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)5050 void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction,
5051 const FieldInfo& field_info) {
5052 DCHECK(instruction->IsInstanceFieldGet() ||
5053 instruction->IsStaticFieldGet() ||
5054 instruction->IsPredicatedInstanceFieldGet());
5055
5056 LocationSummary* locations = instruction->GetLocations();
5057 Location base_loc = locations->InAt(instruction->IsPredicatedInstanceFieldGet() ? 1 : 0);
5058 CpuRegister base = base_loc.AsRegister<CpuRegister>();
5059 Location out = locations->Out();
5060 bool is_volatile = field_info.IsVolatile();
5061 DCHECK_EQ(DataType::Size(field_info.GetFieldType()), DataType::Size(instruction->GetType()));
5062 DataType::Type load_type = instruction->GetType();
5063 uint32_t offset = field_info.GetFieldOffset().Uint32Value();
5064
5065 if (load_type == DataType::Type::kReference) {
5066 // /* HeapReference<Object> */ out = *(base + offset)
5067 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
5068 // Note that a potential implicit null check is handled in this
5069 // CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier call.
5070 codegen_->GenerateFieldLoadWithBakerReadBarrier(
5071 instruction, out, base, offset, /* needs_null_check= */ true);
5072 if (is_volatile) {
5073 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
5074 }
5075 } else {
5076 __ movl(out.AsRegister<CpuRegister>(), Address(base, offset));
5077 codegen_->MaybeRecordImplicitNullCheck(instruction);
5078 if (is_volatile) {
5079 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
5080 }
5081 // If read barriers are enabled, emit read barriers other than
5082 // Baker's using a slow path (and also unpoison the loaded
5083 // reference, if heap poisoning is enabled).
5084 codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset);
5085 }
5086 } else {
5087 codegen_->LoadFromMemoryNoReference(load_type, out, Address(base, offset));
5088 codegen_->MaybeRecordImplicitNullCheck(instruction);
5089 if (is_volatile) {
5090 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
5091 }
5092 }
5093 }
5094
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info)5095 void LocationsBuilderX86_64::HandleFieldSet(HInstruction* instruction,
5096 const FieldInfo& field_info) {
5097 DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
5098
5099 LocationSummary* locations =
5100 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5101 DataType::Type field_type = field_info.GetFieldType();
5102 bool is_volatile = field_info.IsVolatile();
5103 bool needs_write_barrier =
5104 CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(1));
5105
5106 locations->SetInAt(0, Location::RequiresRegister());
5107 if (DataType::IsFloatingPointType(instruction->InputAt(1)->GetType())) {
5108 if (is_volatile) {
5109 // In order to satisfy the semantics of volatile, this must be a single instruction store.
5110 locations->SetInAt(1, Location::FpuRegisterOrInt32Constant(instruction->InputAt(1)));
5111 } else {
5112 locations->SetInAt(1, Location::FpuRegisterOrConstant(instruction->InputAt(1)));
5113 }
5114 } else {
5115 if (is_volatile) {
5116 // In order to satisfy the semantics of volatile, this must be a single instruction store.
5117 locations->SetInAt(1, Location::RegisterOrInt32Constant(instruction->InputAt(1)));
5118 } else {
5119 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
5120 }
5121 }
5122 if (needs_write_barrier) {
5123 // Temporary registers for the write barrier.
5124 locations->AddTemp(Location::RequiresRegister());
5125 locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too.
5126 } else if (kPoisonHeapReferences && field_type == DataType::Type::kReference) {
5127 // Temporary register for the reference poisoning.
5128 locations->AddTemp(Location::RequiresRegister());
5129 }
5130 }
5131
Bswap(Location value,DataType::Type type,CpuRegister * temp)5132 void InstructionCodeGeneratorX86_64::Bswap(Location value,
5133 DataType::Type type,
5134 CpuRegister* temp) {
5135 switch (type) {
5136 case DataType::Type::kInt16:
5137 // This should sign-extend, even if reimplemented with an XCHG of 8-bit registers.
5138 __ bswapl(value.AsRegister<CpuRegister>());
5139 __ sarl(value.AsRegister<CpuRegister>(), Immediate(16));
5140 break;
5141 case DataType::Type::kUint16:
5142 // TODO: Can be done with an XCHG of 8-bit registers. This is straight from Quick.
5143 __ bswapl(value.AsRegister<CpuRegister>());
5144 __ shrl(value.AsRegister<CpuRegister>(), Immediate(16));
5145 break;
5146 case DataType::Type::kInt32:
5147 case DataType::Type::kUint32:
5148 __ bswapl(value.AsRegister<CpuRegister>());
5149 break;
5150 case DataType::Type::kInt64:
5151 case DataType::Type::kUint64:
5152 __ bswapq(value.AsRegister<CpuRegister>());
5153 break;
5154 case DataType::Type::kFloat32: {
5155 DCHECK_NE(temp, nullptr);
5156 __ movd(*temp, value.AsFpuRegister<XmmRegister>(), /*is64bit=*/ false);
5157 __ bswapl(*temp);
5158 __ movd(value.AsFpuRegister<XmmRegister>(), *temp, /*is64bit=*/ false);
5159 break;
5160 }
5161 case DataType::Type::kFloat64: {
5162 DCHECK_NE(temp, nullptr);
5163 __ movd(*temp, value.AsFpuRegister<XmmRegister>(), /*is64bit=*/ true);
5164 __ bswapq(*temp);
5165 __ movd(value.AsFpuRegister<XmmRegister>(), *temp, /*is64bit=*/ true);
5166 break;
5167 }
5168 default:
5169 LOG(FATAL) << "Unexpected type for reverse-bytes: " << type;
5170 UNREACHABLE();
5171 }
5172 }
5173
HandleFieldSet(HInstruction * instruction,uint32_t value_index,uint32_t extra_temp_index,DataType::Type field_type,Address field_addr,CpuRegister base,bool is_volatile,bool is_atomic,bool value_can_be_null,bool byte_swap)5174 void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction,
5175 uint32_t value_index,
5176 uint32_t extra_temp_index,
5177 DataType::Type field_type,
5178 Address field_addr,
5179 CpuRegister base,
5180 bool is_volatile,
5181 bool is_atomic,
5182 bool value_can_be_null,
5183 bool byte_swap) {
5184 LocationSummary* locations = instruction->GetLocations();
5185 Location value = locations->InAt(value_index);
5186
5187 if (is_volatile) {
5188 codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
5189 }
5190
5191 bool maybe_record_implicit_null_check_done = false;
5192
5193 if (value.IsConstant()) {
5194 switch (field_type) {
5195 case DataType::Type::kBool:
5196 case DataType::Type::kUint8:
5197 case DataType::Type::kInt8:
5198 __ movb(field_addr, Immediate(CodeGenerator::GetInt8ValueOf(value.GetConstant())));
5199 break;
5200 case DataType::Type::kUint16:
5201 case DataType::Type::kInt16: {
5202 int16_t v = CodeGenerator::GetInt16ValueOf(value.GetConstant());
5203 if (byte_swap) {
5204 v = BSWAP(v);
5205 }
5206 __ movw(field_addr, Immediate(v));
5207 break;
5208 }
5209 case DataType::Type::kUint32:
5210 case DataType::Type::kInt32:
5211 case DataType::Type::kFloat32:
5212 case DataType::Type::kReference: {
5213 int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
5214 if (byte_swap) {
5215 v = BSWAP(v);
5216 }
5217 DCHECK_IMPLIES(field_type == DataType::Type::kReference, v == 0);
5218 // Note: if heap poisoning is enabled, no need to poison
5219 // (negate) `v` if it is a reference, as it would be null.
5220 __ movl(field_addr, Immediate(v));
5221 break;
5222 }
5223 case DataType::Type::kUint64:
5224 case DataType::Type::kInt64:
5225 case DataType::Type::kFloat64: {
5226 int64_t v = CodeGenerator::GetInt64ValueOf(value.GetConstant());
5227 if (byte_swap) {
5228 v = BSWAP(v);
5229 }
5230 if (is_atomic) {
5231 // Move constant into a register, then atomically store the register to memory.
5232 CpuRegister temp = locations->GetTemp(extra_temp_index).AsRegister<CpuRegister>();
5233 __ movq(temp, Immediate(v));
5234 __ movq(field_addr, temp);
5235 } else {
5236 Address field_addr2 = Address::displace(field_addr, sizeof(int32_t));
5237 codegen_->MoveInt64ToAddress(field_addr, field_addr2, v, instruction);
5238 }
5239 maybe_record_implicit_null_check_done = true;
5240 break;
5241 }
5242 case DataType::Type::kVoid:
5243 LOG(FATAL) << "Unreachable type " << field_type;
5244 UNREACHABLE();
5245 }
5246 } else {
5247 if (byte_swap) {
5248 // Swap byte order in-place in the input register (we will restore it later).
5249 CpuRegister temp = locations->GetTemp(extra_temp_index).AsRegister<CpuRegister>();
5250 Bswap(value, field_type, &temp);
5251 }
5252
5253 switch (field_type) {
5254 case DataType::Type::kBool:
5255 case DataType::Type::kUint8:
5256 case DataType::Type::kInt8:
5257 __ movb(field_addr, value.AsRegister<CpuRegister>());
5258 break;
5259 case DataType::Type::kUint16:
5260 case DataType::Type::kInt16:
5261 __ movw(field_addr, value.AsRegister<CpuRegister>());
5262 break;
5263 case DataType::Type::kUint32:
5264 case DataType::Type::kInt32:
5265 case DataType::Type::kReference:
5266 if (kPoisonHeapReferences && field_type == DataType::Type::kReference) {
5267 CpuRegister temp = locations->GetTemp(extra_temp_index).AsRegister<CpuRegister>();
5268 __ movl(temp, value.AsRegister<CpuRegister>());
5269 __ PoisonHeapReference(temp);
5270 __ movl(field_addr, temp);
5271 } else {
5272 __ movl(field_addr, value.AsRegister<CpuRegister>());
5273 }
5274 break;
5275 case DataType::Type::kUint64:
5276 case DataType::Type::kInt64:
5277 __ movq(field_addr, value.AsRegister<CpuRegister>());
5278 break;
5279 case DataType::Type::kFloat32:
5280 __ movss(field_addr, value.AsFpuRegister<XmmRegister>());
5281 break;
5282 case DataType::Type::kFloat64:
5283 __ movsd(field_addr, value.AsFpuRegister<XmmRegister>());
5284 break;
5285 case DataType::Type::kVoid:
5286 LOG(FATAL) << "Unreachable type " << field_type;
5287 UNREACHABLE();
5288 }
5289
5290 if (byte_swap) {
5291 // Restore byte order.
5292 CpuRegister temp = locations->GetTemp(extra_temp_index).AsRegister<CpuRegister>();
5293 Bswap(value, field_type, &temp);
5294 }
5295 }
5296
5297 if (!maybe_record_implicit_null_check_done) {
5298 codegen_->MaybeRecordImplicitNullCheck(instruction);
5299 }
5300
5301 if (CodeGenerator::StoreNeedsWriteBarrier(field_type, instruction->InputAt(value_index))) {
5302 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
5303 CpuRegister card = locations->GetTemp(extra_temp_index).AsRegister<CpuRegister>();
5304 codegen_->MarkGCCard(temp, card, base, value.AsRegister<CpuRegister>(), value_can_be_null);
5305 }
5306
5307 if (is_volatile) {
5308 codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
5309 }
5310 }
5311
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info,bool value_can_be_null)5312 void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction,
5313 const FieldInfo& field_info,
5314 bool value_can_be_null) {
5315 DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
5316
5317 LocationSummary* locations = instruction->GetLocations();
5318 CpuRegister base = locations->InAt(0).AsRegister<CpuRegister>();
5319 bool is_volatile = field_info.IsVolatile();
5320 DataType::Type field_type = field_info.GetFieldType();
5321 uint32_t offset = field_info.GetFieldOffset().Uint32Value();
5322 bool is_predicated =
5323 instruction->IsInstanceFieldSet() && instruction->AsInstanceFieldSet()->GetIsPredicatedSet();
5324
5325 NearLabel pred_is_null;
5326 if (is_predicated) {
5327 __ testl(base, base);
5328 __ j(kZero, &pred_is_null);
5329 }
5330
5331 HandleFieldSet(instruction,
5332 /*value_index=*/ 1,
5333 /*extra_temp_index=*/ 1,
5334 field_type,
5335 Address(base, offset),
5336 base,
5337 is_volatile,
5338 /*is_atomic=*/ false,
5339 value_can_be_null);
5340
5341 if (is_predicated) {
5342 __ Bind(&pred_is_null);
5343 }
5344 }
5345
VisitInstanceFieldSet(HInstanceFieldSet * instruction)5346 void LocationsBuilderX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
5347 HandleFieldSet(instruction, instruction->GetFieldInfo());
5348 }
5349
VisitInstanceFieldSet(HInstanceFieldSet * instruction)5350 void InstructionCodeGeneratorX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
5351 HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
5352 }
5353
VisitPredicatedInstanceFieldGet(HPredicatedInstanceFieldGet * instruction)5354 void LocationsBuilderX86_64::VisitPredicatedInstanceFieldGet(
5355 HPredicatedInstanceFieldGet* instruction) {
5356 HandleFieldGet(instruction);
5357 }
5358
VisitInstanceFieldGet(HInstanceFieldGet * instruction)5359 void LocationsBuilderX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
5360 HandleFieldGet(instruction);
5361 }
5362
VisitPredicatedInstanceFieldGet(HPredicatedInstanceFieldGet * instruction)5363 void InstructionCodeGeneratorX86_64::VisitPredicatedInstanceFieldGet(
5364 HPredicatedInstanceFieldGet* instruction) {
5365 NearLabel finish;
5366 LocationSummary* locations = instruction->GetLocations();
5367 CpuRegister target = locations->InAt(1).AsRegister<CpuRegister>();
5368 __ testl(target, target);
5369 __ j(kZero, &finish);
5370 HandleFieldGet(instruction, instruction->GetFieldInfo());
5371 __ Bind(&finish);
5372 }
5373
VisitInstanceFieldGet(HInstanceFieldGet * instruction)5374 void InstructionCodeGeneratorX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
5375 HandleFieldGet(instruction, instruction->GetFieldInfo());
5376 }
5377
VisitStaticFieldGet(HStaticFieldGet * instruction)5378 void LocationsBuilderX86_64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
5379 HandleFieldGet(instruction);
5380 }
5381
VisitStaticFieldGet(HStaticFieldGet * instruction)5382 void InstructionCodeGeneratorX86_64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
5383 HandleFieldGet(instruction, instruction->GetFieldInfo());
5384 }
5385
VisitStaticFieldSet(HStaticFieldSet * instruction)5386 void LocationsBuilderX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
5387 HandleFieldSet(instruction, instruction->GetFieldInfo());
5388 }
5389
VisitStaticFieldSet(HStaticFieldSet * instruction)5390 void InstructionCodeGeneratorX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
5391 HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetValueCanBeNull());
5392 }
5393
VisitStringBuilderAppend(HStringBuilderAppend * instruction)5394 void LocationsBuilderX86_64::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
5395 codegen_->CreateStringBuilderAppendLocations(instruction, Location::RegisterLocation(RAX));
5396 }
5397
VisitStringBuilderAppend(HStringBuilderAppend * instruction)5398 void InstructionCodeGeneratorX86_64::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
5399 __ movl(CpuRegister(RDI), Immediate(instruction->GetFormat()->GetValue()));
5400 codegen_->InvokeRuntime(kQuickStringBuilderAppend, instruction, instruction->GetDexPc());
5401 }
5402
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)5403 void LocationsBuilderX86_64::VisitUnresolvedInstanceFieldGet(
5404 HUnresolvedInstanceFieldGet* instruction) {
5405 FieldAccessCallingConventionX86_64 calling_convention;
5406 codegen_->CreateUnresolvedFieldLocationSummary(
5407 instruction, instruction->GetFieldType(), calling_convention);
5408 }
5409
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)5410 void InstructionCodeGeneratorX86_64::VisitUnresolvedInstanceFieldGet(
5411 HUnresolvedInstanceFieldGet* instruction) {
5412 FieldAccessCallingConventionX86_64 calling_convention;
5413 codegen_->GenerateUnresolvedFieldAccess(instruction,
5414 instruction->GetFieldType(),
5415 instruction->GetFieldIndex(),
5416 instruction->GetDexPc(),
5417 calling_convention);
5418 }
5419
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)5420 void LocationsBuilderX86_64::VisitUnresolvedInstanceFieldSet(
5421 HUnresolvedInstanceFieldSet* instruction) {
5422 FieldAccessCallingConventionX86_64 calling_convention;
5423 codegen_->CreateUnresolvedFieldLocationSummary(
5424 instruction, instruction->GetFieldType(), calling_convention);
5425 }
5426
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)5427 void InstructionCodeGeneratorX86_64::VisitUnresolvedInstanceFieldSet(
5428 HUnresolvedInstanceFieldSet* instruction) {
5429 FieldAccessCallingConventionX86_64 calling_convention;
5430 codegen_->GenerateUnresolvedFieldAccess(instruction,
5431 instruction->GetFieldType(),
5432 instruction->GetFieldIndex(),
5433 instruction->GetDexPc(),
5434 calling_convention);
5435 }
5436
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)5437 void LocationsBuilderX86_64::VisitUnresolvedStaticFieldGet(
5438 HUnresolvedStaticFieldGet* instruction) {
5439 FieldAccessCallingConventionX86_64 calling_convention;
5440 codegen_->CreateUnresolvedFieldLocationSummary(
5441 instruction, instruction->GetFieldType(), calling_convention);
5442 }
5443
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)5444 void InstructionCodeGeneratorX86_64::VisitUnresolvedStaticFieldGet(
5445 HUnresolvedStaticFieldGet* instruction) {
5446 FieldAccessCallingConventionX86_64 calling_convention;
5447 codegen_->GenerateUnresolvedFieldAccess(instruction,
5448 instruction->GetFieldType(),
5449 instruction->GetFieldIndex(),
5450 instruction->GetDexPc(),
5451 calling_convention);
5452 }
5453
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)5454 void LocationsBuilderX86_64::VisitUnresolvedStaticFieldSet(
5455 HUnresolvedStaticFieldSet* instruction) {
5456 FieldAccessCallingConventionX86_64 calling_convention;
5457 codegen_->CreateUnresolvedFieldLocationSummary(
5458 instruction, instruction->GetFieldType(), calling_convention);
5459 }
5460
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)5461 void InstructionCodeGeneratorX86_64::VisitUnresolvedStaticFieldSet(
5462 HUnresolvedStaticFieldSet* instruction) {
5463 FieldAccessCallingConventionX86_64 calling_convention;
5464 codegen_->GenerateUnresolvedFieldAccess(instruction,
5465 instruction->GetFieldType(),
5466 instruction->GetFieldIndex(),
5467 instruction->GetDexPc(),
5468 calling_convention);
5469 }
5470
VisitNullCheck(HNullCheck * instruction)5471 void LocationsBuilderX86_64::VisitNullCheck(HNullCheck* instruction) {
5472 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
5473 Location loc = codegen_->GetCompilerOptions().GetImplicitNullChecks()
5474 ? Location::RequiresRegister()
5475 : Location::Any();
5476 locations->SetInAt(0, loc);
5477 }
5478
GenerateImplicitNullCheck(HNullCheck * instruction)5479 void CodeGeneratorX86_64::GenerateImplicitNullCheck(HNullCheck* instruction) {
5480 if (CanMoveNullCheckToUser(instruction)) {
5481 return;
5482 }
5483 LocationSummary* locations = instruction->GetLocations();
5484 Location obj = locations->InAt(0);
5485
5486 __ testl(CpuRegister(RAX), Address(obj.AsRegister<CpuRegister>(), 0));
5487 RecordPcInfo(instruction, instruction->GetDexPc());
5488 }
5489
GenerateExplicitNullCheck(HNullCheck * instruction)5490 void CodeGeneratorX86_64::GenerateExplicitNullCheck(HNullCheck* instruction) {
5491 SlowPathCode* slow_path = new (GetScopedAllocator()) NullCheckSlowPathX86_64(instruction);
5492 AddSlowPath(slow_path);
5493
5494 LocationSummary* locations = instruction->GetLocations();
5495 Location obj = locations->InAt(0);
5496
5497 if (obj.IsRegister()) {
5498 __ testl(obj.AsRegister<CpuRegister>(), obj.AsRegister<CpuRegister>());
5499 } else if (obj.IsStackSlot()) {
5500 __ cmpl(Address(CpuRegister(RSP), obj.GetStackIndex()), Immediate(0));
5501 } else {
5502 DCHECK(obj.IsConstant()) << obj;
5503 DCHECK(obj.GetConstant()->IsNullConstant());
5504 __ jmp(slow_path->GetEntryLabel());
5505 return;
5506 }
5507 __ j(kEqual, slow_path->GetEntryLabel());
5508 }
5509
VisitNullCheck(HNullCheck * instruction)5510 void InstructionCodeGeneratorX86_64::VisitNullCheck(HNullCheck* instruction) {
5511 codegen_->GenerateNullCheck(instruction);
5512 }
5513
VisitArrayGet(HArrayGet * instruction)5514 void LocationsBuilderX86_64::VisitArrayGet(HArrayGet* instruction) {
5515 bool object_array_get_with_read_barrier =
5516 kEmitCompilerReadBarrier && (instruction->GetType() == DataType::Type::kReference);
5517 LocationSummary* locations =
5518 new (GetGraph()->GetAllocator()) LocationSummary(instruction,
5519 object_array_get_with_read_barrier
5520 ? LocationSummary::kCallOnSlowPath
5521 : LocationSummary::kNoCall);
5522 if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
5523 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
5524 }
5525 locations->SetInAt(0, Location::RequiresRegister());
5526 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
5527 if (DataType::IsFloatingPointType(instruction->GetType())) {
5528 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
5529 } else {
5530 // The output overlaps for an object array get when read barriers
5531 // are enabled: we do not want the move to overwrite the array's
5532 // location, as we need it to emit the read barrier.
5533 locations->SetOut(
5534 Location::RequiresRegister(),
5535 object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
5536 }
5537 }
5538
VisitArrayGet(HArrayGet * instruction)5539 void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) {
5540 LocationSummary* locations = instruction->GetLocations();
5541 Location obj_loc = locations->InAt(0);
5542 CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
5543 Location index = locations->InAt(1);
5544 Location out_loc = locations->Out();
5545 uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
5546
5547 DataType::Type type = instruction->GetType();
5548 if (type == DataType::Type::kReference) {
5549 static_assert(
5550 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
5551 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
5552 // /* HeapReference<Object> */ out =
5553 // *(obj + data_offset + index * sizeof(HeapReference<Object>))
5554 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
5555 // Note that a potential implicit null check is handled in this
5556 // CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier call.
5557 codegen_->GenerateArrayLoadWithBakerReadBarrier(
5558 instruction, out_loc, obj, data_offset, index, /* needs_null_check= */ true);
5559 } else {
5560 CpuRegister out = out_loc.AsRegister<CpuRegister>();
5561 __ movl(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset));
5562 codegen_->MaybeRecordImplicitNullCheck(instruction);
5563 // If read barriers are enabled, emit read barriers other than
5564 // Baker's using a slow path (and also unpoison the loaded
5565 // reference, if heap poisoning is enabled).
5566 if (index.IsConstant()) {
5567 uint32_t offset =
5568 (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
5569 codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset);
5570 } else {
5571 codegen_->MaybeGenerateReadBarrierSlow(
5572 instruction, out_loc, out_loc, obj_loc, data_offset, index);
5573 }
5574 }
5575 } else {
5576 if (type == DataType::Type::kUint16
5577 && mirror::kUseStringCompression
5578 && instruction->IsStringCharAt()) {
5579 // Branch cases into compressed and uncompressed for each index's type.
5580 CpuRegister out = out_loc.AsRegister<CpuRegister>();
5581 uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
5582 NearLabel done, not_compressed;
5583 __ testb(Address(obj, count_offset), Immediate(1));
5584 codegen_->MaybeRecordImplicitNullCheck(instruction);
5585 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
5586 "Expecting 0=compressed, 1=uncompressed");
5587 __ j(kNotZero, ¬_compressed);
5588 __ movzxb(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_1, data_offset));
5589 __ jmp(&done);
5590 __ Bind(¬_compressed);
5591 __ movzxw(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_2, data_offset));
5592 __ Bind(&done);
5593 } else {
5594 ScaleFactor scale = CodeGenerator::ScaleFactorForType(type);
5595 Address src = CodeGeneratorX86_64::ArrayAddress(obj, index, scale, data_offset);
5596 codegen_->LoadFromMemoryNoReference(type, out_loc, src);
5597 }
5598 codegen_->MaybeRecordImplicitNullCheck(instruction);
5599 }
5600 }
5601
VisitArraySet(HArraySet * instruction)5602 void LocationsBuilderX86_64::VisitArraySet(HArraySet* instruction) {
5603 DataType::Type value_type = instruction->GetComponentType();
5604
5605 bool needs_write_barrier =
5606 CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
5607 bool needs_type_check = instruction->NeedsTypeCheck();
5608
5609 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5610 instruction,
5611 needs_type_check ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall);
5612
5613 locations->SetInAt(0, Location::RequiresRegister());
5614 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
5615 if (DataType::IsFloatingPointType(value_type)) {
5616 locations->SetInAt(2, Location::FpuRegisterOrConstant(instruction->InputAt(2)));
5617 } else {
5618 locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2)));
5619 }
5620
5621 if (needs_write_barrier) {
5622 // Temporary registers for the write barrier.
5623 locations->AddTemp(Location::RequiresRegister()); // Possibly used for ref. poisoning too.
5624 locations->AddTemp(Location::RequiresRegister());
5625 }
5626 }
5627
VisitArraySet(HArraySet * instruction)5628 void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) {
5629 LocationSummary* locations = instruction->GetLocations();
5630 Location array_loc = locations->InAt(0);
5631 CpuRegister array = array_loc.AsRegister<CpuRegister>();
5632 Location index = locations->InAt(1);
5633 Location value = locations->InAt(2);
5634 DataType::Type value_type = instruction->GetComponentType();
5635 bool needs_type_check = instruction->NeedsTypeCheck();
5636 bool needs_write_barrier =
5637 CodeGenerator::StoreNeedsWriteBarrier(value_type, instruction->GetValue());
5638
5639 switch (value_type) {
5640 case DataType::Type::kBool:
5641 case DataType::Type::kUint8:
5642 case DataType::Type::kInt8: {
5643 uint32_t offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
5644 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_1, offset);
5645 if (value.IsRegister()) {
5646 __ movb(address, value.AsRegister<CpuRegister>());
5647 } else {
5648 __ movb(address, Immediate(CodeGenerator::GetInt8ValueOf(value.GetConstant())));
5649 }
5650 codegen_->MaybeRecordImplicitNullCheck(instruction);
5651 break;
5652 }
5653
5654 case DataType::Type::kUint16:
5655 case DataType::Type::kInt16: {
5656 uint32_t offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
5657 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_2, offset);
5658 if (value.IsRegister()) {
5659 __ movw(address, value.AsRegister<CpuRegister>());
5660 } else {
5661 DCHECK(value.IsConstant()) << value;
5662 __ movw(address, Immediate(CodeGenerator::GetInt16ValueOf(value.GetConstant())));
5663 }
5664 codegen_->MaybeRecordImplicitNullCheck(instruction);
5665 break;
5666 }
5667
5668 case DataType::Type::kReference: {
5669 uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
5670 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_4, offset);
5671
5672 if (!value.IsRegister()) {
5673 // Just setting null.
5674 DCHECK(instruction->InputAt(2)->IsNullConstant());
5675 DCHECK(value.IsConstant()) << value;
5676 __ movl(address, Immediate(0));
5677 codegen_->MaybeRecordImplicitNullCheck(instruction);
5678 DCHECK(!needs_write_barrier);
5679 DCHECK(!needs_type_check);
5680 break;
5681 }
5682
5683 DCHECK(needs_write_barrier);
5684 CpuRegister register_value = value.AsRegister<CpuRegister>();
5685 Location temp_loc = locations->GetTemp(0);
5686 CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
5687
5688 bool can_value_be_null = instruction->GetValueCanBeNull();
5689 NearLabel do_store;
5690 if (can_value_be_null) {
5691 __ testl(register_value, register_value);
5692 __ j(kEqual, &do_store);
5693 }
5694
5695 SlowPathCode* slow_path = nullptr;
5696 if (needs_type_check) {
5697 slow_path = new (codegen_->GetScopedAllocator()) ArraySetSlowPathX86_64(instruction);
5698 codegen_->AddSlowPath(slow_path);
5699
5700 const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
5701 const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
5702 const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
5703
5704 // Note that when Baker read barriers are enabled, the type
5705 // checks are performed without read barriers. This is fine,
5706 // even in the case where a class object is in the from-space
5707 // after the flip, as a comparison involving such a type would
5708 // not produce a false positive; it may of course produce a
5709 // false negative, in which case we would take the ArraySet
5710 // slow path.
5711
5712 // /* HeapReference<Class> */ temp = array->klass_
5713 __ movl(temp, Address(array, class_offset));
5714 codegen_->MaybeRecordImplicitNullCheck(instruction);
5715 __ MaybeUnpoisonHeapReference(temp);
5716
5717 // /* HeapReference<Class> */ temp = temp->component_type_
5718 __ movl(temp, Address(temp, component_offset));
5719 // If heap poisoning is enabled, no need to unpoison `temp`
5720 // nor the object reference in `register_value->klass`, as
5721 // we are comparing two poisoned references.
5722 __ cmpl(temp, Address(register_value, class_offset));
5723
5724 if (instruction->StaticTypeOfArrayIsObjectArray()) {
5725 NearLabel do_put;
5726 __ j(kEqual, &do_put);
5727 // If heap poisoning is enabled, the `temp` reference has
5728 // not been unpoisoned yet; unpoison it now.
5729 __ MaybeUnpoisonHeapReference(temp);
5730
5731 // If heap poisoning is enabled, no need to unpoison the
5732 // heap reference loaded below, as it is only used for a
5733 // comparison with null.
5734 __ cmpl(Address(temp, super_offset), Immediate(0));
5735 __ j(kNotEqual, slow_path->GetEntryLabel());
5736 __ Bind(&do_put);
5737 } else {
5738 __ j(kNotEqual, slow_path->GetEntryLabel());
5739 }
5740 }
5741
5742 CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>();
5743 codegen_->MarkGCCard(
5744 temp, card, array, value.AsRegister<CpuRegister>(), /* value_can_be_null= */ false);
5745
5746 if (can_value_be_null) {
5747 DCHECK(do_store.IsLinked());
5748 __ Bind(&do_store);
5749 }
5750
5751 Location source = value;
5752 if (kPoisonHeapReferences) {
5753 __ movl(temp, register_value);
5754 __ PoisonHeapReference(temp);
5755 source = temp_loc;
5756 }
5757
5758 __ movl(address, source.AsRegister<CpuRegister>());
5759
5760 if (can_value_be_null || !needs_type_check) {
5761 codegen_->MaybeRecordImplicitNullCheck(instruction);
5762 }
5763
5764 if (slow_path != nullptr) {
5765 __ Bind(slow_path->GetExitLabel());
5766 }
5767
5768 break;
5769 }
5770
5771 case DataType::Type::kInt32: {
5772 uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
5773 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_4, offset);
5774 if (value.IsRegister()) {
5775 __ movl(address, value.AsRegister<CpuRegister>());
5776 } else {
5777 DCHECK(value.IsConstant()) << value;
5778 int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
5779 __ movl(address, Immediate(v));
5780 }
5781 codegen_->MaybeRecordImplicitNullCheck(instruction);
5782 break;
5783 }
5784
5785 case DataType::Type::kInt64: {
5786 uint32_t offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
5787 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset);
5788 if (value.IsRegister()) {
5789 __ movq(address, value.AsRegister<CpuRegister>());
5790 codegen_->MaybeRecordImplicitNullCheck(instruction);
5791 } else {
5792 int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
5793 Address address_high =
5794 CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset + sizeof(int32_t));
5795 codegen_->MoveInt64ToAddress(address, address_high, v, instruction);
5796 }
5797 break;
5798 }
5799
5800 case DataType::Type::kFloat32: {
5801 uint32_t offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
5802 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_4, offset);
5803 if (value.IsFpuRegister()) {
5804 __ movss(address, value.AsFpuRegister<XmmRegister>());
5805 } else {
5806 DCHECK(value.IsConstant());
5807 int32_t v = bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue());
5808 __ movl(address, Immediate(v));
5809 }
5810 codegen_->MaybeRecordImplicitNullCheck(instruction);
5811 break;
5812 }
5813
5814 case DataType::Type::kFloat64: {
5815 uint32_t offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
5816 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset);
5817 if (value.IsFpuRegister()) {
5818 __ movsd(address, value.AsFpuRegister<XmmRegister>());
5819 codegen_->MaybeRecordImplicitNullCheck(instruction);
5820 } else {
5821 int64_t v =
5822 bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue());
5823 Address address_high =
5824 CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset + sizeof(int32_t));
5825 codegen_->MoveInt64ToAddress(address, address_high, v, instruction);
5826 }
5827 break;
5828 }
5829
5830 case DataType::Type::kUint32:
5831 case DataType::Type::kUint64:
5832 case DataType::Type::kVoid:
5833 LOG(FATAL) << "Unreachable type " << instruction->GetType();
5834 UNREACHABLE();
5835 }
5836 }
5837
VisitArrayLength(HArrayLength * instruction)5838 void LocationsBuilderX86_64::VisitArrayLength(HArrayLength* instruction) {
5839 LocationSummary* locations =
5840 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5841 locations->SetInAt(0, Location::RequiresRegister());
5842 if (!instruction->IsEmittedAtUseSite()) {
5843 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
5844 }
5845 }
5846
VisitArrayLength(HArrayLength * instruction)5847 void InstructionCodeGeneratorX86_64::VisitArrayLength(HArrayLength* instruction) {
5848 if (instruction->IsEmittedAtUseSite()) {
5849 return;
5850 }
5851
5852 LocationSummary* locations = instruction->GetLocations();
5853 uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
5854 CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
5855 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
5856 __ movl(out, Address(obj, offset));
5857 codegen_->MaybeRecordImplicitNullCheck(instruction);
5858 // Mask out most significant bit in case the array is String's array of char.
5859 if (mirror::kUseStringCompression && instruction->IsStringLength()) {
5860 __ shrl(out, Immediate(1));
5861 }
5862 }
5863
VisitBoundsCheck(HBoundsCheck * instruction)5864 void LocationsBuilderX86_64::VisitBoundsCheck(HBoundsCheck* instruction) {
5865 RegisterSet caller_saves = RegisterSet::Empty();
5866 InvokeRuntimeCallingConvention calling_convention;
5867 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
5868 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
5869 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves);
5870 locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
5871 HInstruction* length = instruction->InputAt(1);
5872 if (!length->IsEmittedAtUseSite()) {
5873 locations->SetInAt(1, Location::RegisterOrConstant(length));
5874 }
5875 }
5876
VisitBoundsCheck(HBoundsCheck * instruction)5877 void InstructionCodeGeneratorX86_64::VisitBoundsCheck(HBoundsCheck* instruction) {
5878 LocationSummary* locations = instruction->GetLocations();
5879 Location index_loc = locations->InAt(0);
5880 Location length_loc = locations->InAt(1);
5881 SlowPathCode* slow_path =
5882 new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathX86_64(instruction);
5883
5884 if (length_loc.IsConstant()) {
5885 int32_t length = CodeGenerator::GetInt32ValueOf(length_loc.GetConstant());
5886 if (index_loc.IsConstant()) {
5887 // BCE will remove the bounds check if we are guarenteed to pass.
5888 int32_t index = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
5889 if (index < 0 || index >= length) {
5890 codegen_->AddSlowPath(slow_path);
5891 __ jmp(slow_path->GetEntryLabel());
5892 } else {
5893 // Some optimization after BCE may have generated this, and we should not
5894 // generate a bounds check if it is a valid range.
5895 }
5896 return;
5897 }
5898
5899 // We have to reverse the jump condition because the length is the constant.
5900 CpuRegister index_reg = index_loc.AsRegister<CpuRegister>();
5901 __ cmpl(index_reg, Immediate(length));
5902 codegen_->AddSlowPath(slow_path);
5903 __ j(kAboveEqual, slow_path->GetEntryLabel());
5904 } else {
5905 HInstruction* array_length = instruction->InputAt(1);
5906 if (array_length->IsEmittedAtUseSite()) {
5907 // Address the length field in the array.
5908 DCHECK(array_length->IsArrayLength());
5909 uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length->AsArrayLength());
5910 Location array_loc = array_length->GetLocations()->InAt(0);
5911 Address array_len(array_loc.AsRegister<CpuRegister>(), len_offset);
5912 if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
5913 // TODO: if index_loc.IsConstant(), compare twice the index (to compensate for
5914 // the string compression flag) with the in-memory length and avoid the temporary.
5915 CpuRegister length_reg = CpuRegister(TMP);
5916 __ movl(length_reg, array_len);
5917 codegen_->MaybeRecordImplicitNullCheck(array_length);
5918 __ shrl(length_reg, Immediate(1));
5919 codegen_->GenerateIntCompare(length_reg, index_loc);
5920 } else {
5921 // Checking the bound for general case:
5922 // Array of char or String's array when the compression feature off.
5923 if (index_loc.IsConstant()) {
5924 int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
5925 __ cmpl(array_len, Immediate(value));
5926 } else {
5927 __ cmpl(array_len, index_loc.AsRegister<CpuRegister>());
5928 }
5929 codegen_->MaybeRecordImplicitNullCheck(array_length);
5930 }
5931 } else {
5932 codegen_->GenerateIntCompare(length_loc, index_loc);
5933 }
5934 codegen_->AddSlowPath(slow_path);
5935 __ j(kBelowEqual, slow_path->GetEntryLabel());
5936 }
5937 }
5938
MarkGCCard(CpuRegister temp,CpuRegister card,CpuRegister object,CpuRegister value,bool value_can_be_null)5939 void CodeGeneratorX86_64::MarkGCCard(CpuRegister temp,
5940 CpuRegister card,
5941 CpuRegister object,
5942 CpuRegister value,
5943 bool value_can_be_null) {
5944 NearLabel is_null;
5945 if (value_can_be_null) {
5946 __ testl(value, value);
5947 __ j(kEqual, &is_null);
5948 }
5949 // Load the address of the card table into `card`.
5950 __ gs()->movq(card, Address::Absolute(Thread::CardTableOffset<kX86_64PointerSize>().Int32Value(),
5951 /* no_rip= */ true));
5952 // Calculate the offset (in the card table) of the card corresponding to
5953 // `object`.
5954 __ movq(temp, object);
5955 __ shrq(temp, Immediate(gc::accounting::CardTable::kCardShift));
5956 // Write the `art::gc::accounting::CardTable::kCardDirty` value into the
5957 // `object`'s card.
5958 //
5959 // Register `card` contains the address of the card table. Note that the card
5960 // table's base is biased during its creation so that it always starts at an
5961 // address whose least-significant byte is equal to `kCardDirty` (see
5962 // art::gc::accounting::CardTable::Create). Therefore the MOVB instruction
5963 // below writes the `kCardDirty` (byte) value into the `object`'s card
5964 // (located at `card + object >> kCardShift`).
5965 //
5966 // This dual use of the value in register `card` (1. to calculate the location
5967 // of the card to mark; and 2. to load the `kCardDirty` value) saves a load
5968 // (no need to explicitly load `kCardDirty` as an immediate value).
5969 __ movb(Address(temp, card, TIMES_1, 0), card);
5970 if (value_can_be_null) {
5971 __ Bind(&is_null);
5972 }
5973 }
5974
VisitParallelMove(HParallelMove * instruction ATTRIBUTE_UNUSED)5975 void LocationsBuilderX86_64::VisitParallelMove(HParallelMove* instruction ATTRIBUTE_UNUSED) {
5976 LOG(FATAL) << "Unimplemented";
5977 }
5978
VisitParallelMove(HParallelMove * instruction)5979 void InstructionCodeGeneratorX86_64::VisitParallelMove(HParallelMove* instruction) {
5980 if (instruction->GetNext()->IsSuspendCheck() &&
5981 instruction->GetBlock()->GetLoopInformation() != nullptr) {
5982 HSuspendCheck* suspend_check = instruction->GetNext()->AsSuspendCheck();
5983 // The back edge will generate the suspend check.
5984 codegen_->ClearSpillSlotsFromLoopPhisInStackMap(suspend_check, instruction);
5985 }
5986
5987 codegen_->GetMoveResolver()->EmitNativeCode(instruction);
5988 }
5989
VisitSuspendCheck(HSuspendCheck * instruction)5990 void LocationsBuilderX86_64::VisitSuspendCheck(HSuspendCheck* instruction) {
5991 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5992 instruction, LocationSummary::kCallOnSlowPath);
5993 // In suspend check slow path, usually there are no caller-save registers at all.
5994 // If SIMD instructions are present, however, we force spilling all live SIMD
5995 // registers in full width (since the runtime only saves/restores lower part).
5996 locations->SetCustomSlowPathCallerSaves(
5997 GetGraph()->HasSIMD() ? RegisterSet::AllFpu() : RegisterSet::Empty());
5998 }
5999
VisitSuspendCheck(HSuspendCheck * instruction)6000 void InstructionCodeGeneratorX86_64::VisitSuspendCheck(HSuspendCheck* instruction) {
6001 HBasicBlock* block = instruction->GetBlock();
6002 if (block->GetLoopInformation() != nullptr) {
6003 DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction);
6004 // The back edge will generate the suspend check.
6005 return;
6006 }
6007 if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) {
6008 // The goto will generate the suspend check.
6009 return;
6010 }
6011 GenerateSuspendCheck(instruction, nullptr);
6012 }
6013
GenerateSuspendCheck(HSuspendCheck * instruction,HBasicBlock * successor)6014 void InstructionCodeGeneratorX86_64::GenerateSuspendCheck(HSuspendCheck* instruction,
6015 HBasicBlock* successor) {
6016 SuspendCheckSlowPathX86_64* slow_path =
6017 down_cast<SuspendCheckSlowPathX86_64*>(instruction->GetSlowPath());
6018 if (slow_path == nullptr) {
6019 slow_path =
6020 new (codegen_->GetScopedAllocator()) SuspendCheckSlowPathX86_64(instruction, successor);
6021 instruction->SetSlowPath(slow_path);
6022 codegen_->AddSlowPath(slow_path);
6023 if (successor != nullptr) {
6024 DCHECK(successor->IsLoopHeader());
6025 }
6026 } else {
6027 DCHECK_EQ(slow_path->GetSuccessor(), successor);
6028 }
6029
6030 __ gs()->testl(Address::Absolute(Thread::ThreadFlagsOffset<kX86_64PointerSize>().Int32Value(),
6031 /* no_rip= */ true),
6032 Immediate(Thread::SuspendOrCheckpointRequestFlags()));
6033 if (successor == nullptr) {
6034 __ j(kNotZero, slow_path->GetEntryLabel());
6035 __ Bind(slow_path->GetReturnLabel());
6036 } else {
6037 __ j(kZero, codegen_->GetLabelOf(successor));
6038 __ jmp(slow_path->GetEntryLabel());
6039 }
6040 }
6041
GetAssembler() const6042 X86_64Assembler* ParallelMoveResolverX86_64::GetAssembler() const {
6043 return codegen_->GetAssembler();
6044 }
6045
EmitMove(size_t index)6046 void ParallelMoveResolverX86_64::EmitMove(size_t index) {
6047 MoveOperands* move = moves_[index];
6048 Location source = move->GetSource();
6049 Location destination = move->GetDestination();
6050
6051 if (source.IsRegister()) {
6052 if (destination.IsRegister()) {
6053 __ movq(destination.AsRegister<CpuRegister>(), source.AsRegister<CpuRegister>());
6054 } else if (destination.IsStackSlot()) {
6055 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()),
6056 source.AsRegister<CpuRegister>());
6057 } else {
6058 DCHECK(destination.IsDoubleStackSlot());
6059 __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()),
6060 source.AsRegister<CpuRegister>());
6061 }
6062 } else if (source.IsStackSlot()) {
6063 if (destination.IsRegister()) {
6064 __ movl(destination.AsRegister<CpuRegister>(),
6065 Address(CpuRegister(RSP), source.GetStackIndex()));
6066 } else if (destination.IsFpuRegister()) {
6067 __ movss(destination.AsFpuRegister<XmmRegister>(),
6068 Address(CpuRegister(RSP), source.GetStackIndex()));
6069 } else {
6070 DCHECK(destination.IsStackSlot());
6071 __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
6072 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
6073 }
6074 } else if (source.IsDoubleStackSlot()) {
6075 if (destination.IsRegister()) {
6076 __ movq(destination.AsRegister<CpuRegister>(),
6077 Address(CpuRegister(RSP), source.GetStackIndex()));
6078 } else if (destination.IsFpuRegister()) {
6079 __ movsd(destination.AsFpuRegister<XmmRegister>(),
6080 Address(CpuRegister(RSP), source.GetStackIndex()));
6081 } else {
6082 DCHECK(destination.IsDoubleStackSlot()) << destination;
6083 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
6084 __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
6085 }
6086 } else if (source.IsSIMDStackSlot()) {
6087 if (destination.IsFpuRegister()) {
6088 __ movups(destination.AsFpuRegister<XmmRegister>(),
6089 Address(CpuRegister(RSP), source.GetStackIndex()));
6090 } else {
6091 DCHECK(destination.IsSIMDStackSlot());
6092 size_t high = kX86_64WordSize;
6093 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
6094 __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
6095 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex() + high));
6096 __ movq(Address(CpuRegister(RSP), destination.GetStackIndex() + high), CpuRegister(TMP));
6097 }
6098 } else if (source.IsConstant()) {
6099 HConstant* constant = source.GetConstant();
6100 if (constant->IsIntConstant() || constant->IsNullConstant()) {
6101 int32_t value = CodeGenerator::GetInt32ValueOf(constant);
6102 if (destination.IsRegister()) {
6103 if (value == 0) {
6104 __ xorl(destination.AsRegister<CpuRegister>(), destination.AsRegister<CpuRegister>());
6105 } else {
6106 __ movl(destination.AsRegister<CpuRegister>(), Immediate(value));
6107 }
6108 } else {
6109 DCHECK(destination.IsStackSlot()) << destination;
6110 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), Immediate(value));
6111 }
6112 } else if (constant->IsLongConstant()) {
6113 int64_t value = constant->AsLongConstant()->GetValue();
6114 if (destination.IsRegister()) {
6115 codegen_->Load64BitValue(destination.AsRegister<CpuRegister>(), value);
6116 } else {
6117 DCHECK(destination.IsDoubleStackSlot()) << destination;
6118 codegen_->Store64BitValueToStack(destination, value);
6119 }
6120 } else if (constant->IsFloatConstant()) {
6121 float fp_value = constant->AsFloatConstant()->GetValue();
6122 if (destination.IsFpuRegister()) {
6123 XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
6124 codegen_->Load32BitValue(dest, fp_value);
6125 } else {
6126 DCHECK(destination.IsStackSlot()) << destination;
6127 Immediate imm(bit_cast<int32_t, float>(fp_value));
6128 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), imm);
6129 }
6130 } else {
6131 DCHECK(constant->IsDoubleConstant()) << constant->DebugName();
6132 double fp_value = constant->AsDoubleConstant()->GetValue();
6133 int64_t value = bit_cast<int64_t, double>(fp_value);
6134 if (destination.IsFpuRegister()) {
6135 XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
6136 codegen_->Load64BitValue(dest, fp_value);
6137 } else {
6138 DCHECK(destination.IsDoubleStackSlot()) << destination;
6139 codegen_->Store64BitValueToStack(destination, value);
6140 }
6141 }
6142 } else if (source.IsFpuRegister()) {
6143 if (destination.IsFpuRegister()) {
6144 __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
6145 } else if (destination.IsStackSlot()) {
6146 __ movss(Address(CpuRegister(RSP), destination.GetStackIndex()),
6147 source.AsFpuRegister<XmmRegister>());
6148 } else if (destination.IsDoubleStackSlot()) {
6149 __ movsd(Address(CpuRegister(RSP), destination.GetStackIndex()),
6150 source.AsFpuRegister<XmmRegister>());
6151 } else {
6152 DCHECK(destination.IsSIMDStackSlot());
6153 __ movups(Address(CpuRegister(RSP), destination.GetStackIndex()),
6154 source.AsFpuRegister<XmmRegister>());
6155 }
6156 }
6157 }
6158
Exchange32(CpuRegister reg,int mem)6159 void ParallelMoveResolverX86_64::Exchange32(CpuRegister reg, int mem) {
6160 __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
6161 __ movl(Address(CpuRegister(RSP), mem), reg);
6162 __ movl(reg, CpuRegister(TMP));
6163 }
6164
Exchange64(CpuRegister reg1,CpuRegister reg2)6165 void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg1, CpuRegister reg2) {
6166 __ movq(CpuRegister(TMP), reg1);
6167 __ movq(reg1, reg2);
6168 __ movq(reg2, CpuRegister(TMP));
6169 }
6170
Exchange64(CpuRegister reg,int mem)6171 void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg, int mem) {
6172 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
6173 __ movq(Address(CpuRegister(RSP), mem), reg);
6174 __ movq(reg, CpuRegister(TMP));
6175 }
6176
Exchange32(XmmRegister reg,int mem)6177 void ParallelMoveResolverX86_64::Exchange32(XmmRegister reg, int mem) {
6178 __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
6179 __ movss(Address(CpuRegister(RSP), mem), reg);
6180 __ movd(reg, CpuRegister(TMP));
6181 }
6182
Exchange64(XmmRegister reg,int mem)6183 void ParallelMoveResolverX86_64::Exchange64(XmmRegister reg, int mem) {
6184 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
6185 __ movsd(Address(CpuRegister(RSP), mem), reg);
6186 __ movd(reg, CpuRegister(TMP));
6187 }
6188
Exchange128(XmmRegister reg,int mem)6189 void ParallelMoveResolverX86_64::Exchange128(XmmRegister reg, int mem) {
6190 size_t extra_slot = 2 * kX86_64WordSize;
6191 __ subq(CpuRegister(RSP), Immediate(extra_slot));
6192 __ movups(Address(CpuRegister(RSP), 0), XmmRegister(reg));
6193 ExchangeMemory64(0, mem + extra_slot, 2);
6194 __ movups(XmmRegister(reg), Address(CpuRegister(RSP), 0));
6195 __ addq(CpuRegister(RSP), Immediate(extra_slot));
6196 }
6197
ExchangeMemory32(int mem1,int mem2)6198 void ParallelMoveResolverX86_64::ExchangeMemory32(int mem1, int mem2) {
6199 ScratchRegisterScope ensure_scratch(
6200 this, TMP, RAX, codegen_->GetNumberOfCoreRegisters());
6201
6202 int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0;
6203 __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem1 + stack_offset));
6204 __ movl(CpuRegister(ensure_scratch.GetRegister()),
6205 Address(CpuRegister(RSP), mem2 + stack_offset));
6206 __ movl(Address(CpuRegister(RSP), mem2 + stack_offset), CpuRegister(TMP));
6207 __ movl(Address(CpuRegister(RSP), mem1 + stack_offset),
6208 CpuRegister(ensure_scratch.GetRegister()));
6209 }
6210
ExchangeMemory64(int mem1,int mem2,int num_of_qwords)6211 void ParallelMoveResolverX86_64::ExchangeMemory64(int mem1, int mem2, int num_of_qwords) {
6212 ScratchRegisterScope ensure_scratch(
6213 this, TMP, RAX, codegen_->GetNumberOfCoreRegisters());
6214
6215 int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0;
6216
6217 // Now that temp registers are available (possibly spilled), exchange blocks of memory.
6218 for (int i = 0; i < num_of_qwords; i++) {
6219 __ movq(CpuRegister(TMP),
6220 Address(CpuRegister(RSP), mem1 + stack_offset));
6221 __ movq(CpuRegister(ensure_scratch.GetRegister()),
6222 Address(CpuRegister(RSP), mem2 + stack_offset));
6223 __ movq(Address(CpuRegister(RSP), mem2 + stack_offset),
6224 CpuRegister(TMP));
6225 __ movq(Address(CpuRegister(RSP), mem1 + stack_offset),
6226 CpuRegister(ensure_scratch.GetRegister()));
6227 stack_offset += kX86_64WordSize;
6228 }
6229 }
6230
EmitSwap(size_t index)6231 void ParallelMoveResolverX86_64::EmitSwap(size_t index) {
6232 MoveOperands* move = moves_[index];
6233 Location source = move->GetSource();
6234 Location destination = move->GetDestination();
6235
6236 if (source.IsRegister() && destination.IsRegister()) {
6237 Exchange64(source.AsRegister<CpuRegister>(), destination.AsRegister<CpuRegister>());
6238 } else if (source.IsRegister() && destination.IsStackSlot()) {
6239 Exchange32(source.AsRegister<CpuRegister>(), destination.GetStackIndex());
6240 } else if (source.IsStackSlot() && destination.IsRegister()) {
6241 Exchange32(destination.AsRegister<CpuRegister>(), source.GetStackIndex());
6242 } else if (source.IsStackSlot() && destination.IsStackSlot()) {
6243 ExchangeMemory32(destination.GetStackIndex(), source.GetStackIndex());
6244 } else if (source.IsRegister() && destination.IsDoubleStackSlot()) {
6245 Exchange64(source.AsRegister<CpuRegister>(), destination.GetStackIndex());
6246 } else if (source.IsDoubleStackSlot() && destination.IsRegister()) {
6247 Exchange64(destination.AsRegister<CpuRegister>(), source.GetStackIndex());
6248 } else if (source.IsDoubleStackSlot() && destination.IsDoubleStackSlot()) {
6249 ExchangeMemory64(destination.GetStackIndex(), source.GetStackIndex(), 1);
6250 } else if (source.IsFpuRegister() && destination.IsFpuRegister()) {
6251 __ movd(CpuRegister(TMP), source.AsFpuRegister<XmmRegister>());
6252 __ movaps(source.AsFpuRegister<XmmRegister>(), destination.AsFpuRegister<XmmRegister>());
6253 __ movd(destination.AsFpuRegister<XmmRegister>(), CpuRegister(TMP));
6254 } else if (source.IsFpuRegister() && destination.IsStackSlot()) {
6255 Exchange32(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
6256 } else if (source.IsStackSlot() && destination.IsFpuRegister()) {
6257 Exchange32(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
6258 } else if (source.IsFpuRegister() && destination.IsDoubleStackSlot()) {
6259 Exchange64(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
6260 } else if (source.IsDoubleStackSlot() && destination.IsFpuRegister()) {
6261 Exchange64(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
6262 } else if (source.IsSIMDStackSlot() && destination.IsSIMDStackSlot()) {
6263 ExchangeMemory64(destination.GetStackIndex(), source.GetStackIndex(), 2);
6264 } else if (source.IsFpuRegister() && destination.IsSIMDStackSlot()) {
6265 Exchange128(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
6266 } else if (destination.IsFpuRegister() && source.IsSIMDStackSlot()) {
6267 Exchange128(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
6268 } else {
6269 LOG(FATAL) << "Unimplemented swap between " << source << " and " << destination;
6270 }
6271 }
6272
6273
SpillScratch(int reg)6274 void ParallelMoveResolverX86_64::SpillScratch(int reg) {
6275 __ pushq(CpuRegister(reg));
6276 }
6277
6278
RestoreScratch(int reg)6279 void ParallelMoveResolverX86_64::RestoreScratch(int reg) {
6280 __ popq(CpuRegister(reg));
6281 }
6282
GenerateClassInitializationCheck(SlowPathCode * slow_path,CpuRegister class_reg)6283 void InstructionCodeGeneratorX86_64::GenerateClassInitializationCheck(
6284 SlowPathCode* slow_path, CpuRegister class_reg) {
6285 constexpr size_t status_lsb_position = SubtypeCheckBits::BitStructSizeOf();
6286 const size_t status_byte_offset =
6287 mirror::Class::StatusOffset().SizeValue() + (status_lsb_position / kBitsPerByte);
6288 constexpr uint32_t shifted_visibly_initialized_value =
6289 enum_cast<uint32_t>(ClassStatus::kVisiblyInitialized) << (status_lsb_position % kBitsPerByte);
6290
6291 __ cmpb(Address(class_reg, status_byte_offset), Immediate(shifted_visibly_initialized_value));
6292 __ j(kBelow, slow_path->GetEntryLabel());
6293 __ Bind(slow_path->GetExitLabel());
6294 }
6295
GenerateBitstringTypeCheckCompare(HTypeCheckInstruction * check,CpuRegister temp)6296 void InstructionCodeGeneratorX86_64::GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check,
6297 CpuRegister temp) {
6298 uint32_t path_to_root = check->GetBitstringPathToRoot();
6299 uint32_t mask = check->GetBitstringMask();
6300 DCHECK(IsPowerOfTwo(mask + 1));
6301 size_t mask_bits = WhichPowerOf2(mask + 1);
6302
6303 if (mask_bits == 16u) {
6304 // Compare the bitstring in memory.
6305 __ cmpw(Address(temp, mirror::Class::StatusOffset()), Immediate(path_to_root));
6306 } else {
6307 // /* uint32_t */ temp = temp->status_
6308 __ movl(temp, Address(temp, mirror::Class::StatusOffset()));
6309 // Compare the bitstring bits using SUB.
6310 __ subl(temp, Immediate(path_to_root));
6311 // Shift out bits that do not contribute to the comparison.
6312 __ shll(temp, Immediate(32u - mask_bits));
6313 }
6314 }
6315
GetSupportedLoadClassKind(HLoadClass::LoadKind desired_class_load_kind)6316 HLoadClass::LoadKind CodeGeneratorX86_64::GetSupportedLoadClassKind(
6317 HLoadClass::LoadKind desired_class_load_kind) {
6318 switch (desired_class_load_kind) {
6319 case HLoadClass::LoadKind::kInvalid:
6320 LOG(FATAL) << "UNREACHABLE";
6321 UNREACHABLE();
6322 case HLoadClass::LoadKind::kReferrersClass:
6323 break;
6324 case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
6325 case HLoadClass::LoadKind::kBootImageRelRo:
6326 case HLoadClass::LoadKind::kBssEntry:
6327 case HLoadClass::LoadKind::kBssEntryPublic:
6328 case HLoadClass::LoadKind::kBssEntryPackage:
6329 DCHECK(!GetCompilerOptions().IsJitCompiler());
6330 break;
6331 case HLoadClass::LoadKind::kJitBootImageAddress:
6332 case HLoadClass::LoadKind::kJitTableAddress:
6333 DCHECK(GetCompilerOptions().IsJitCompiler());
6334 break;
6335 case HLoadClass::LoadKind::kRuntimeCall:
6336 break;
6337 }
6338 return desired_class_load_kind;
6339 }
6340
VisitLoadClass(HLoadClass * cls)6341 void LocationsBuilderX86_64::VisitLoadClass(HLoadClass* cls) {
6342 HLoadClass::LoadKind load_kind = cls->GetLoadKind();
6343 if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
6344 // Custom calling convention: RAX serves as both input and output.
6345 CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
6346 cls,
6347 Location::RegisterLocation(RAX),
6348 Location::RegisterLocation(RAX));
6349 return;
6350 }
6351 DCHECK_EQ(cls->NeedsAccessCheck(),
6352 load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
6353 load_kind == HLoadClass::LoadKind::kBssEntryPackage);
6354
6355 const bool requires_read_barrier = kEmitCompilerReadBarrier && !cls->IsInBootImage();
6356 LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
6357 ? LocationSummary::kCallOnSlowPath
6358 : LocationSummary::kNoCall;
6359 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(cls, call_kind);
6360 if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) {
6361 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
6362 }
6363
6364 if (load_kind == HLoadClass::LoadKind::kReferrersClass) {
6365 locations->SetInAt(0, Location::RequiresRegister());
6366 }
6367 locations->SetOut(Location::RequiresRegister());
6368 if (load_kind == HLoadClass::LoadKind::kBssEntry) {
6369 if (!kUseReadBarrier || kUseBakerReadBarrier) {
6370 // Rely on the type resolution and/or initialization to save everything.
6371 locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
6372 } else {
6373 // For non-Baker read barrier we have a temp-clobbering call.
6374 }
6375 }
6376 }
6377
NewJitRootClassPatch(const DexFile & dex_file,dex::TypeIndex type_index,Handle<mirror::Class> handle)6378 Label* CodeGeneratorX86_64::NewJitRootClassPatch(const DexFile& dex_file,
6379 dex::TypeIndex type_index,
6380 Handle<mirror::Class> handle) {
6381 ReserveJitClassRoot(TypeReference(&dex_file, type_index), handle);
6382 // Add a patch entry and return the label.
6383 jit_class_patches_.emplace_back(&dex_file, type_index.index_);
6384 PatchInfo<Label>* info = &jit_class_patches_.back();
6385 return &info->label;
6386 }
6387
6388 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
6389 // move.
VisitLoadClass(HLoadClass * cls)6390 void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS {
6391 HLoadClass::LoadKind load_kind = cls->GetLoadKind();
6392 if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
6393 codegen_->GenerateLoadClassRuntimeCall(cls);
6394 return;
6395 }
6396 DCHECK_EQ(cls->NeedsAccessCheck(),
6397 load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
6398 load_kind == HLoadClass::LoadKind::kBssEntryPackage);
6399
6400 LocationSummary* locations = cls->GetLocations();
6401 Location out_loc = locations->Out();
6402 CpuRegister out = out_loc.AsRegister<CpuRegister>();
6403
6404 const ReadBarrierOption read_barrier_option = cls->IsInBootImage()
6405 ? kWithoutReadBarrier
6406 : kCompilerReadBarrierOption;
6407 bool generate_null_check = false;
6408 switch (load_kind) {
6409 case HLoadClass::LoadKind::kReferrersClass: {
6410 DCHECK(!cls->CanCallRuntime());
6411 DCHECK(!cls->MustGenerateClinitCheck());
6412 // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
6413 CpuRegister current_method = locations->InAt(0).AsRegister<CpuRegister>();
6414 GenerateGcRootFieldLoad(
6415 cls,
6416 out_loc,
6417 Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()),
6418 /* fixup_label= */ nullptr,
6419 read_barrier_option);
6420 break;
6421 }
6422 case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
6423 DCHECK(codegen_->GetCompilerOptions().IsBootImage() ||
6424 codegen_->GetCompilerOptions().IsBootImageExtension());
6425 DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
6426 __ leal(out,
6427 Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
6428 codegen_->RecordBootImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex());
6429 break;
6430 case HLoadClass::LoadKind::kBootImageRelRo: {
6431 DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
6432 __ movl(out,
6433 Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
6434 codegen_->RecordBootImageRelRoPatch(CodeGenerator::GetBootImageOffset(cls));
6435 break;
6436 }
6437 case HLoadClass::LoadKind::kBssEntry:
6438 case HLoadClass::LoadKind::kBssEntryPublic:
6439 case HLoadClass::LoadKind::kBssEntryPackage: {
6440 Address address = Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset,
6441 /* no_rip= */ false);
6442 Label* fixup_label = codegen_->NewTypeBssEntryPatch(cls);
6443 // /* GcRoot<mirror::Class> */ out = *address /* PC-relative */
6444 GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option);
6445 // No need for memory fence, thanks to the x86-64 memory model.
6446 generate_null_check = true;
6447 break;
6448 }
6449 case HLoadClass::LoadKind::kJitBootImageAddress: {
6450 DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
6451 uint32_t address = reinterpret_cast32<uint32_t>(cls->GetClass().Get());
6452 DCHECK_NE(address, 0u);
6453 __ movl(out, Immediate(static_cast<int32_t>(address))); // Zero-extended.
6454 break;
6455 }
6456 case HLoadClass::LoadKind::kJitTableAddress: {
6457 Address address = Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset,
6458 /* no_rip= */ true);
6459 Label* fixup_label =
6460 codegen_->NewJitRootClassPatch(cls->GetDexFile(), cls->GetTypeIndex(), cls->GetClass());
6461 // /* GcRoot<mirror::Class> */ out = *address
6462 GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option);
6463 break;
6464 }
6465 default:
6466 LOG(FATAL) << "Unexpected load kind: " << cls->GetLoadKind();
6467 UNREACHABLE();
6468 }
6469
6470 if (generate_null_check || cls->MustGenerateClinitCheck()) {
6471 DCHECK(cls->CanCallRuntime());
6472 SlowPathCode* slow_path =
6473 new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86_64(cls, cls);
6474 codegen_->AddSlowPath(slow_path);
6475 if (generate_null_check) {
6476 __ testl(out, out);
6477 __ j(kEqual, slow_path->GetEntryLabel());
6478 }
6479 if (cls->MustGenerateClinitCheck()) {
6480 GenerateClassInitializationCheck(slow_path, out);
6481 } else {
6482 __ Bind(slow_path->GetExitLabel());
6483 }
6484 }
6485 }
6486
VisitClinitCheck(HClinitCheck * check)6487 void LocationsBuilderX86_64::VisitClinitCheck(HClinitCheck* check) {
6488 LocationSummary* locations =
6489 new (GetGraph()->GetAllocator()) LocationSummary(check, LocationSummary::kCallOnSlowPath);
6490 locations->SetInAt(0, Location::RequiresRegister());
6491 if (check->HasUses()) {
6492 locations->SetOut(Location::SameAsFirstInput());
6493 }
6494 // Rely on the type initialization to save everything we need.
6495 locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
6496 }
6497
VisitLoadMethodHandle(HLoadMethodHandle * load)6498 void LocationsBuilderX86_64::VisitLoadMethodHandle(HLoadMethodHandle* load) {
6499 // Custom calling convention: RAX serves as both input and output.
6500 Location location = Location::RegisterLocation(RAX);
6501 CodeGenerator::CreateLoadMethodHandleRuntimeCallLocationSummary(load, location, location);
6502 }
6503
VisitLoadMethodHandle(HLoadMethodHandle * load)6504 void InstructionCodeGeneratorX86_64::VisitLoadMethodHandle(HLoadMethodHandle* load) {
6505 codegen_->GenerateLoadMethodHandleRuntimeCall(load);
6506 }
6507
VisitLoadMethodType(HLoadMethodType * load)6508 void LocationsBuilderX86_64::VisitLoadMethodType(HLoadMethodType* load) {
6509 // Custom calling convention: RAX serves as both input and output.
6510 Location location = Location::RegisterLocation(RAX);
6511 CodeGenerator::CreateLoadMethodTypeRuntimeCallLocationSummary(load, location, location);
6512 }
6513
VisitLoadMethodType(HLoadMethodType * load)6514 void InstructionCodeGeneratorX86_64::VisitLoadMethodType(HLoadMethodType* load) {
6515 codegen_->GenerateLoadMethodTypeRuntimeCall(load);
6516 }
6517
VisitClinitCheck(HClinitCheck * check)6518 void InstructionCodeGeneratorX86_64::VisitClinitCheck(HClinitCheck* check) {
6519 // We assume the class to not be null.
6520 SlowPathCode* slow_path =
6521 new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86_64(check->GetLoadClass(), check);
6522 codegen_->AddSlowPath(slow_path);
6523 GenerateClassInitializationCheck(slow_path,
6524 check->GetLocations()->InAt(0).AsRegister<CpuRegister>());
6525 }
6526
GetSupportedLoadStringKind(HLoadString::LoadKind desired_string_load_kind)6527 HLoadString::LoadKind CodeGeneratorX86_64::GetSupportedLoadStringKind(
6528 HLoadString::LoadKind desired_string_load_kind) {
6529 switch (desired_string_load_kind) {
6530 case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
6531 case HLoadString::LoadKind::kBootImageRelRo:
6532 case HLoadString::LoadKind::kBssEntry:
6533 DCHECK(!GetCompilerOptions().IsJitCompiler());
6534 break;
6535 case HLoadString::LoadKind::kJitBootImageAddress:
6536 case HLoadString::LoadKind::kJitTableAddress:
6537 DCHECK(GetCompilerOptions().IsJitCompiler());
6538 break;
6539 case HLoadString::LoadKind::kRuntimeCall:
6540 break;
6541 }
6542 return desired_string_load_kind;
6543 }
6544
VisitLoadString(HLoadString * load)6545 void LocationsBuilderX86_64::VisitLoadString(HLoadString* load) {
6546 LocationSummary::CallKind call_kind = CodeGenerator::GetLoadStringCallKind(load);
6547 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(load, call_kind);
6548 if (load->GetLoadKind() == HLoadString::LoadKind::kRuntimeCall) {
6549 locations->SetOut(Location::RegisterLocation(RAX));
6550 } else {
6551 locations->SetOut(Location::RequiresRegister());
6552 if (load->GetLoadKind() == HLoadString::LoadKind::kBssEntry) {
6553 if (!kUseReadBarrier || kUseBakerReadBarrier) {
6554 // Rely on the pResolveString to save everything.
6555 locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
6556 } else {
6557 // For non-Baker read barrier we have a temp-clobbering call.
6558 }
6559 }
6560 }
6561 }
6562
NewJitRootStringPatch(const DexFile & dex_file,dex::StringIndex string_index,Handle<mirror::String> handle)6563 Label* CodeGeneratorX86_64::NewJitRootStringPatch(const DexFile& dex_file,
6564 dex::StringIndex string_index,
6565 Handle<mirror::String> handle) {
6566 ReserveJitStringRoot(StringReference(&dex_file, string_index), handle);
6567 // Add a patch entry and return the label.
6568 jit_string_patches_.emplace_back(&dex_file, string_index.index_);
6569 PatchInfo<Label>* info = &jit_string_patches_.back();
6570 return &info->label;
6571 }
6572
6573 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
6574 // move.
VisitLoadString(HLoadString * load)6575 void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) NO_THREAD_SAFETY_ANALYSIS {
6576 LocationSummary* locations = load->GetLocations();
6577 Location out_loc = locations->Out();
6578 CpuRegister out = out_loc.AsRegister<CpuRegister>();
6579
6580 switch (load->GetLoadKind()) {
6581 case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
6582 DCHECK(codegen_->GetCompilerOptions().IsBootImage() ||
6583 codegen_->GetCompilerOptions().IsBootImageExtension());
6584 __ leal(out,
6585 Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
6586 codegen_->RecordBootImageStringPatch(load);
6587 return;
6588 }
6589 case HLoadString::LoadKind::kBootImageRelRo: {
6590 DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
6591 __ movl(out,
6592 Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
6593 codegen_->RecordBootImageRelRoPatch(CodeGenerator::GetBootImageOffset(load));
6594 return;
6595 }
6596 case HLoadString::LoadKind::kBssEntry: {
6597 Address address = Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset,
6598 /* no_rip= */ false);
6599 Label* fixup_label = codegen_->NewStringBssEntryPatch(load);
6600 // /* GcRoot<mirror::Class> */ out = *address /* PC-relative */
6601 GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption);
6602 // No need for memory fence, thanks to the x86-64 memory model.
6603 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadStringSlowPathX86_64(load);
6604 codegen_->AddSlowPath(slow_path);
6605 __ testl(out, out);
6606 __ j(kEqual, slow_path->GetEntryLabel());
6607 __ Bind(slow_path->GetExitLabel());
6608 return;
6609 }
6610 case HLoadString::LoadKind::kJitBootImageAddress: {
6611 uint32_t address = reinterpret_cast32<uint32_t>(load->GetString().Get());
6612 DCHECK_NE(address, 0u);
6613 __ movl(out, Immediate(static_cast<int32_t>(address))); // Zero-extended.
6614 return;
6615 }
6616 case HLoadString::LoadKind::kJitTableAddress: {
6617 Address address = Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset,
6618 /* no_rip= */ true);
6619 Label* fixup_label = codegen_->NewJitRootStringPatch(
6620 load->GetDexFile(), load->GetStringIndex(), load->GetString());
6621 // /* GcRoot<mirror::String> */ out = *address
6622 GenerateGcRootFieldLoad(load, out_loc, address, fixup_label, kCompilerReadBarrierOption);
6623 return;
6624 }
6625 default:
6626 break;
6627 }
6628
6629 // TODO: Re-add the compiler code to do string dex cache lookup again.
6630 // Custom calling convention: RAX serves as both input and output.
6631 __ movl(CpuRegister(RAX), Immediate(load->GetStringIndex().index_));
6632 codegen_->InvokeRuntime(kQuickResolveString,
6633 load,
6634 load->GetDexPc());
6635 CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
6636 }
6637
GetExceptionTlsAddress()6638 static Address GetExceptionTlsAddress() {
6639 return Address::Absolute(Thread::ExceptionOffset<kX86_64PointerSize>().Int32Value(),
6640 /* no_rip= */ true);
6641 }
6642
VisitLoadException(HLoadException * load)6643 void LocationsBuilderX86_64::VisitLoadException(HLoadException* load) {
6644 LocationSummary* locations =
6645 new (GetGraph()->GetAllocator()) LocationSummary(load, LocationSummary::kNoCall);
6646 locations->SetOut(Location::RequiresRegister());
6647 }
6648
VisitLoadException(HLoadException * load)6649 void InstructionCodeGeneratorX86_64::VisitLoadException(HLoadException* load) {
6650 __ gs()->movl(load->GetLocations()->Out().AsRegister<CpuRegister>(), GetExceptionTlsAddress());
6651 }
6652
VisitClearException(HClearException * clear)6653 void LocationsBuilderX86_64::VisitClearException(HClearException* clear) {
6654 new (GetGraph()->GetAllocator()) LocationSummary(clear, LocationSummary::kNoCall);
6655 }
6656
VisitClearException(HClearException * clear ATTRIBUTE_UNUSED)6657 void InstructionCodeGeneratorX86_64::VisitClearException(HClearException* clear ATTRIBUTE_UNUSED) {
6658 __ gs()->movl(GetExceptionTlsAddress(), Immediate(0));
6659 }
6660
VisitThrow(HThrow * instruction)6661 void LocationsBuilderX86_64::VisitThrow(HThrow* instruction) {
6662 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
6663 instruction, LocationSummary::kCallOnMainOnly);
6664 InvokeRuntimeCallingConvention calling_convention;
6665 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
6666 }
6667
VisitThrow(HThrow * instruction)6668 void InstructionCodeGeneratorX86_64::VisitThrow(HThrow* instruction) {
6669 codegen_->InvokeRuntime(kQuickDeliverException, instruction, instruction->GetDexPc());
6670 CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
6671 }
6672
6673 // Temp is used for read barrier.
NumberOfInstanceOfTemps(TypeCheckKind type_check_kind)6674 static size_t NumberOfInstanceOfTemps(TypeCheckKind type_check_kind) {
6675 if (kEmitCompilerReadBarrier &&
6676 !kUseBakerReadBarrier &&
6677 (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
6678 type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
6679 type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
6680 return 1;
6681 }
6682 return 0;
6683 }
6684
6685 // Interface case has 2 temps, one for holding the number of interfaces, one for the current
6686 // interface pointer, the current interface is compared in memory.
6687 // The other checks have one temp for loading the object's class.
NumberOfCheckCastTemps(TypeCheckKind type_check_kind)6688 static size_t NumberOfCheckCastTemps(TypeCheckKind type_check_kind) {
6689 if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
6690 return 2;
6691 }
6692 return 1 + NumberOfInstanceOfTemps(type_check_kind);
6693 }
6694
VisitInstanceOf(HInstanceOf * instruction)6695 void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) {
6696 LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
6697 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
6698 bool baker_read_barrier_slow_path = false;
6699 switch (type_check_kind) {
6700 case TypeCheckKind::kExactCheck:
6701 case TypeCheckKind::kAbstractClassCheck:
6702 case TypeCheckKind::kClassHierarchyCheck:
6703 case TypeCheckKind::kArrayObjectCheck: {
6704 bool needs_read_barrier = CodeGenerator::InstanceOfNeedsReadBarrier(instruction);
6705 call_kind = needs_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
6706 baker_read_barrier_slow_path = kUseBakerReadBarrier && needs_read_barrier;
6707 break;
6708 }
6709 case TypeCheckKind::kArrayCheck:
6710 case TypeCheckKind::kUnresolvedCheck:
6711 case TypeCheckKind::kInterfaceCheck:
6712 call_kind = LocationSummary::kCallOnSlowPath;
6713 break;
6714 case TypeCheckKind::kBitstringCheck:
6715 break;
6716 }
6717
6718 LocationSummary* locations =
6719 new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
6720 if (baker_read_barrier_slow_path) {
6721 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
6722 }
6723 locations->SetInAt(0, Location::RequiresRegister());
6724 if (type_check_kind == TypeCheckKind::kBitstringCheck) {
6725 locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
6726 locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
6727 locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
6728 } else {
6729 locations->SetInAt(1, Location::Any());
6730 }
6731 // Note that TypeCheckSlowPathX86_64 uses this "out" register too.
6732 locations->SetOut(Location::RequiresRegister());
6733 locations->AddRegisterTemps(NumberOfInstanceOfTemps(type_check_kind));
6734 }
6735
VisitInstanceOf(HInstanceOf * instruction)6736 void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) {
6737 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
6738 LocationSummary* locations = instruction->GetLocations();
6739 Location obj_loc = locations->InAt(0);
6740 CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
6741 Location cls = locations->InAt(1);
6742 Location out_loc = locations->Out();
6743 CpuRegister out = out_loc.AsRegister<CpuRegister>();
6744 const size_t num_temps = NumberOfInstanceOfTemps(type_check_kind);
6745 DCHECK_LE(num_temps, 1u);
6746 Location maybe_temp_loc = (num_temps >= 1u) ? locations->GetTemp(0) : Location::NoLocation();
6747 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
6748 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
6749 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
6750 uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
6751 SlowPathCode* slow_path = nullptr;
6752 NearLabel done, zero;
6753
6754 // Return 0 if `obj` is null.
6755 // Avoid null check if we know obj is not null.
6756 if (instruction->MustDoNullCheck()) {
6757 __ testl(obj, obj);
6758 __ j(kEqual, &zero);
6759 }
6760
6761 switch (type_check_kind) {
6762 case TypeCheckKind::kExactCheck: {
6763 ReadBarrierOption read_barrier_option =
6764 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
6765 // /* HeapReference<Class> */ out = obj->klass_
6766 GenerateReferenceLoadTwoRegisters(instruction,
6767 out_loc,
6768 obj_loc,
6769 class_offset,
6770 read_barrier_option);
6771 if (cls.IsRegister()) {
6772 __ cmpl(out, cls.AsRegister<CpuRegister>());
6773 } else {
6774 DCHECK(cls.IsStackSlot()) << cls;
6775 __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
6776 }
6777 if (zero.IsLinked()) {
6778 // Classes must be equal for the instanceof to succeed.
6779 __ j(kNotEqual, &zero);
6780 __ movl(out, Immediate(1));
6781 __ jmp(&done);
6782 } else {
6783 __ setcc(kEqual, out);
6784 // setcc only sets the low byte.
6785 __ andl(out, Immediate(1));
6786 }
6787 break;
6788 }
6789
6790 case TypeCheckKind::kAbstractClassCheck: {
6791 ReadBarrierOption read_barrier_option =
6792 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
6793 // /* HeapReference<Class> */ out = obj->klass_
6794 GenerateReferenceLoadTwoRegisters(instruction,
6795 out_loc,
6796 obj_loc,
6797 class_offset,
6798 read_barrier_option);
6799 // If the class is abstract, we eagerly fetch the super class of the
6800 // object to avoid doing a comparison we know will fail.
6801 NearLabel loop, success;
6802 __ Bind(&loop);
6803 // /* HeapReference<Class> */ out = out->super_class_
6804 GenerateReferenceLoadOneRegister(instruction,
6805 out_loc,
6806 super_offset,
6807 maybe_temp_loc,
6808 read_barrier_option);
6809 __ testl(out, out);
6810 // If `out` is null, we use it for the result, and jump to `done`.
6811 __ j(kEqual, &done);
6812 if (cls.IsRegister()) {
6813 __ cmpl(out, cls.AsRegister<CpuRegister>());
6814 } else {
6815 DCHECK(cls.IsStackSlot()) << cls;
6816 __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
6817 }
6818 __ j(kNotEqual, &loop);
6819 __ movl(out, Immediate(1));
6820 if (zero.IsLinked()) {
6821 __ jmp(&done);
6822 }
6823 break;
6824 }
6825
6826 case TypeCheckKind::kClassHierarchyCheck: {
6827 ReadBarrierOption read_barrier_option =
6828 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
6829 // /* HeapReference<Class> */ out = obj->klass_
6830 GenerateReferenceLoadTwoRegisters(instruction,
6831 out_loc,
6832 obj_loc,
6833 class_offset,
6834 read_barrier_option);
6835 // Walk over the class hierarchy to find a match.
6836 NearLabel loop, success;
6837 __ Bind(&loop);
6838 if (cls.IsRegister()) {
6839 __ cmpl(out, cls.AsRegister<CpuRegister>());
6840 } else {
6841 DCHECK(cls.IsStackSlot()) << cls;
6842 __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
6843 }
6844 __ j(kEqual, &success);
6845 // /* HeapReference<Class> */ out = out->super_class_
6846 GenerateReferenceLoadOneRegister(instruction,
6847 out_loc,
6848 super_offset,
6849 maybe_temp_loc,
6850 read_barrier_option);
6851 __ testl(out, out);
6852 __ j(kNotEqual, &loop);
6853 // If `out` is null, we use it for the result, and jump to `done`.
6854 __ jmp(&done);
6855 __ Bind(&success);
6856 __ movl(out, Immediate(1));
6857 if (zero.IsLinked()) {
6858 __ jmp(&done);
6859 }
6860 break;
6861 }
6862
6863 case TypeCheckKind::kArrayObjectCheck: {
6864 ReadBarrierOption read_barrier_option =
6865 CodeGenerator::ReadBarrierOptionForInstanceOf(instruction);
6866 // /* HeapReference<Class> */ out = obj->klass_
6867 GenerateReferenceLoadTwoRegisters(instruction,
6868 out_loc,
6869 obj_loc,
6870 class_offset,
6871 read_barrier_option);
6872 // Do an exact check.
6873 NearLabel exact_check;
6874 if (cls.IsRegister()) {
6875 __ cmpl(out, cls.AsRegister<CpuRegister>());
6876 } else {
6877 DCHECK(cls.IsStackSlot()) << cls;
6878 __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
6879 }
6880 __ j(kEqual, &exact_check);
6881 // Otherwise, we need to check that the object's class is a non-primitive array.
6882 // /* HeapReference<Class> */ out = out->component_type_
6883 GenerateReferenceLoadOneRegister(instruction,
6884 out_loc,
6885 component_offset,
6886 maybe_temp_loc,
6887 read_barrier_option);
6888 __ testl(out, out);
6889 // If `out` is null, we use it for the result, and jump to `done`.
6890 __ j(kEqual, &done);
6891 __ cmpw(Address(out, primitive_offset), Immediate(Primitive::kPrimNot));
6892 __ j(kNotEqual, &zero);
6893 __ Bind(&exact_check);
6894 __ movl(out, Immediate(1));
6895 __ jmp(&done);
6896 break;
6897 }
6898
6899 case TypeCheckKind::kArrayCheck: {
6900 // No read barrier since the slow path will retry upon failure.
6901 // /* HeapReference<Class> */ out = obj->klass_
6902 GenerateReferenceLoadTwoRegisters(instruction,
6903 out_loc,
6904 obj_loc,
6905 class_offset,
6906 kWithoutReadBarrier);
6907 if (cls.IsRegister()) {
6908 __ cmpl(out, cls.AsRegister<CpuRegister>());
6909 } else {
6910 DCHECK(cls.IsStackSlot()) << cls;
6911 __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
6912 }
6913 DCHECK(locations->OnlyCallsOnSlowPath());
6914 slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86_64(
6915 instruction, /* is_fatal= */ false);
6916 codegen_->AddSlowPath(slow_path);
6917 __ j(kNotEqual, slow_path->GetEntryLabel());
6918 __ movl(out, Immediate(1));
6919 if (zero.IsLinked()) {
6920 __ jmp(&done);
6921 }
6922 break;
6923 }
6924
6925 case TypeCheckKind::kUnresolvedCheck:
6926 case TypeCheckKind::kInterfaceCheck: {
6927 // Note that we indeed only call on slow path, but we always go
6928 // into the slow path for the unresolved and interface check
6929 // cases.
6930 //
6931 // We cannot directly call the InstanceofNonTrivial runtime
6932 // entry point without resorting to a type checking slow path
6933 // here (i.e. by calling InvokeRuntime directly), as it would
6934 // require to assign fixed registers for the inputs of this
6935 // HInstanceOf instruction (following the runtime calling
6936 // convention), which might be cluttered by the potential first
6937 // read barrier emission at the beginning of this method.
6938 //
6939 // TODO: Introduce a new runtime entry point taking the object
6940 // to test (instead of its class) as argument, and let it deal
6941 // with the read barrier issues. This will let us refactor this
6942 // case of the `switch` code as it was previously (with a direct
6943 // call to the runtime not using a type checking slow path).
6944 // This should also be beneficial for the other cases above.
6945 DCHECK(locations->OnlyCallsOnSlowPath());
6946 slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86_64(
6947 instruction, /* is_fatal= */ false);
6948 codegen_->AddSlowPath(slow_path);
6949 __ jmp(slow_path->GetEntryLabel());
6950 if (zero.IsLinked()) {
6951 __ jmp(&done);
6952 }
6953 break;
6954 }
6955
6956 case TypeCheckKind::kBitstringCheck: {
6957 // /* HeapReference<Class> */ temp = obj->klass_
6958 GenerateReferenceLoadTwoRegisters(instruction,
6959 out_loc,
6960 obj_loc,
6961 class_offset,
6962 kWithoutReadBarrier);
6963
6964 GenerateBitstringTypeCheckCompare(instruction, out);
6965 if (zero.IsLinked()) {
6966 __ j(kNotEqual, &zero);
6967 __ movl(out, Immediate(1));
6968 __ jmp(&done);
6969 } else {
6970 __ setcc(kEqual, out);
6971 // setcc only sets the low byte.
6972 __ andl(out, Immediate(1));
6973 }
6974 break;
6975 }
6976 }
6977
6978 if (zero.IsLinked()) {
6979 __ Bind(&zero);
6980 __ xorl(out, out);
6981 }
6982
6983 if (done.IsLinked()) {
6984 __ Bind(&done);
6985 }
6986
6987 if (slow_path != nullptr) {
6988 __ Bind(slow_path->GetExitLabel());
6989 }
6990 }
6991
VisitCheckCast(HCheckCast * instruction)6992 void LocationsBuilderX86_64::VisitCheckCast(HCheckCast* instruction) {
6993 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
6994 LocationSummary::CallKind call_kind = CodeGenerator::GetCheckCastCallKind(instruction);
6995 LocationSummary* locations =
6996 new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
6997 locations->SetInAt(0, Location::RequiresRegister());
6998 if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
6999 // Require a register for the interface check since there is a loop that compares the class to
7000 // a memory address.
7001 locations->SetInAt(1, Location::RequiresRegister());
7002 } else if (type_check_kind == TypeCheckKind::kBitstringCheck) {
7003 locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
7004 locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)->AsConstant()));
7005 locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)->AsConstant()));
7006 } else {
7007 locations->SetInAt(1, Location::Any());
7008 }
7009 // Add temps for read barriers and other uses. One is used by TypeCheckSlowPathX86.
7010 locations->AddRegisterTemps(NumberOfCheckCastTemps(type_check_kind));
7011 }
7012
VisitCheckCast(HCheckCast * instruction)7013 void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
7014 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
7015 LocationSummary* locations = instruction->GetLocations();
7016 Location obj_loc = locations->InAt(0);
7017 CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
7018 Location cls = locations->InAt(1);
7019 Location temp_loc = locations->GetTemp(0);
7020 CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
7021 const size_t num_temps = NumberOfCheckCastTemps(type_check_kind);
7022 DCHECK_GE(num_temps, 1u);
7023 DCHECK_LE(num_temps, 2u);
7024 Location maybe_temp2_loc = (num_temps >= 2u) ? locations->GetTemp(1) : Location::NoLocation();
7025 const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
7026 const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
7027 const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
7028 const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
7029 const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
7030 const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
7031 const uint32_t object_array_data_offset =
7032 mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
7033
7034 bool is_type_check_slow_path_fatal = CodeGenerator::IsTypeCheckSlowPathFatal(instruction);
7035 SlowPathCode* type_check_slow_path =
7036 new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86_64(
7037 instruction, is_type_check_slow_path_fatal);
7038 codegen_->AddSlowPath(type_check_slow_path);
7039
7040
7041 NearLabel done;
7042 // Avoid null check if we know obj is not null.
7043 if (instruction->MustDoNullCheck()) {
7044 __ testl(obj, obj);
7045 __ j(kEqual, &done);
7046 }
7047
7048 switch (type_check_kind) {
7049 case TypeCheckKind::kExactCheck:
7050 case TypeCheckKind::kArrayCheck: {
7051 // /* HeapReference<Class> */ temp = obj->klass_
7052 GenerateReferenceLoadTwoRegisters(instruction,
7053 temp_loc,
7054 obj_loc,
7055 class_offset,
7056 kWithoutReadBarrier);
7057 if (cls.IsRegister()) {
7058 __ cmpl(temp, cls.AsRegister<CpuRegister>());
7059 } else {
7060 DCHECK(cls.IsStackSlot()) << cls;
7061 __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
7062 }
7063 // Jump to slow path for throwing the exception or doing a
7064 // more involved array check.
7065 __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
7066 break;
7067 }
7068
7069 case TypeCheckKind::kAbstractClassCheck: {
7070 // /* HeapReference<Class> */ temp = obj->klass_
7071 GenerateReferenceLoadTwoRegisters(instruction,
7072 temp_loc,
7073 obj_loc,
7074 class_offset,
7075 kWithoutReadBarrier);
7076 // If the class is abstract, we eagerly fetch the super class of the
7077 // object to avoid doing a comparison we know will fail.
7078 NearLabel loop;
7079 __ Bind(&loop);
7080 // /* HeapReference<Class> */ temp = temp->super_class_
7081 GenerateReferenceLoadOneRegister(instruction,
7082 temp_loc,
7083 super_offset,
7084 maybe_temp2_loc,
7085 kWithoutReadBarrier);
7086
7087 // If the class reference currently in `temp` is null, jump to the slow path to throw the
7088 // exception.
7089 __ testl(temp, temp);
7090 // Otherwise, compare the classes.
7091 __ j(kZero, type_check_slow_path->GetEntryLabel());
7092 if (cls.IsRegister()) {
7093 __ cmpl(temp, cls.AsRegister<CpuRegister>());
7094 } else {
7095 DCHECK(cls.IsStackSlot()) << cls;
7096 __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
7097 }
7098 __ j(kNotEqual, &loop);
7099 break;
7100 }
7101
7102 case TypeCheckKind::kClassHierarchyCheck: {
7103 // /* HeapReference<Class> */ temp = obj->klass_
7104 GenerateReferenceLoadTwoRegisters(instruction,
7105 temp_loc,
7106 obj_loc,
7107 class_offset,
7108 kWithoutReadBarrier);
7109 // Walk over the class hierarchy to find a match.
7110 NearLabel loop;
7111 __ Bind(&loop);
7112 if (cls.IsRegister()) {
7113 __ cmpl(temp, cls.AsRegister<CpuRegister>());
7114 } else {
7115 DCHECK(cls.IsStackSlot()) << cls;
7116 __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
7117 }
7118 __ j(kEqual, &done);
7119
7120 // /* HeapReference<Class> */ temp = temp->super_class_
7121 GenerateReferenceLoadOneRegister(instruction,
7122 temp_loc,
7123 super_offset,
7124 maybe_temp2_loc,
7125 kWithoutReadBarrier);
7126
7127 // If the class reference currently in `temp` is not null, jump
7128 // back at the beginning of the loop.
7129 __ testl(temp, temp);
7130 __ j(kNotZero, &loop);
7131 // Otherwise, jump to the slow path to throw the exception.
7132 __ jmp(type_check_slow_path->GetEntryLabel());
7133 break;
7134 }
7135
7136 case TypeCheckKind::kArrayObjectCheck: {
7137 // /* HeapReference<Class> */ temp = obj->klass_
7138 GenerateReferenceLoadTwoRegisters(instruction,
7139 temp_loc,
7140 obj_loc,
7141 class_offset,
7142 kWithoutReadBarrier);
7143 // Do an exact check.
7144 NearLabel check_non_primitive_component_type;
7145 if (cls.IsRegister()) {
7146 __ cmpl(temp, cls.AsRegister<CpuRegister>());
7147 } else {
7148 DCHECK(cls.IsStackSlot()) << cls;
7149 __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
7150 }
7151 __ j(kEqual, &done);
7152
7153 // Otherwise, we need to check that the object's class is a non-primitive array.
7154 // /* HeapReference<Class> */ temp = temp->component_type_
7155 GenerateReferenceLoadOneRegister(instruction,
7156 temp_loc,
7157 component_offset,
7158 maybe_temp2_loc,
7159 kWithoutReadBarrier);
7160
7161 // If the component type is not null (i.e. the object is indeed
7162 // an array), jump to label `check_non_primitive_component_type`
7163 // to further check that this component type is not a primitive
7164 // type.
7165 __ testl(temp, temp);
7166 // Otherwise, jump to the slow path to throw the exception.
7167 __ j(kZero, type_check_slow_path->GetEntryLabel());
7168 __ cmpw(Address(temp, primitive_offset), Immediate(Primitive::kPrimNot));
7169 __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
7170 break;
7171 }
7172
7173 case TypeCheckKind::kUnresolvedCheck: {
7174 // We always go into the type check slow path for the unresolved case.
7175 //
7176 // We cannot directly call the CheckCast runtime entry point
7177 // without resorting to a type checking slow path here (i.e. by
7178 // calling InvokeRuntime directly), as it would require to
7179 // assign fixed registers for the inputs of this HInstanceOf
7180 // instruction (following the runtime calling convention), which
7181 // might be cluttered by the potential first read barrier
7182 // emission at the beginning of this method.
7183 __ jmp(type_check_slow_path->GetEntryLabel());
7184 break;
7185 }
7186
7187 case TypeCheckKind::kInterfaceCheck: {
7188 // Fast path for the interface check. Try to avoid read barriers to improve the fast path.
7189 // We can not get false positives by doing this.
7190 // /* HeapReference<Class> */ temp = obj->klass_
7191 GenerateReferenceLoadTwoRegisters(instruction,
7192 temp_loc,
7193 obj_loc,
7194 class_offset,
7195 kWithoutReadBarrier);
7196
7197 // /* HeapReference<Class> */ temp = temp->iftable_
7198 GenerateReferenceLoadTwoRegisters(instruction,
7199 temp_loc,
7200 temp_loc,
7201 iftable_offset,
7202 kWithoutReadBarrier);
7203 // Iftable is never null.
7204 __ movl(maybe_temp2_loc.AsRegister<CpuRegister>(), Address(temp, array_length_offset));
7205 // Maybe poison the `cls` for direct comparison with memory.
7206 __ MaybePoisonHeapReference(cls.AsRegister<CpuRegister>());
7207 // Loop through the iftable and check if any class matches.
7208 NearLabel start_loop;
7209 __ Bind(&start_loop);
7210 // Need to subtract first to handle the empty array case.
7211 __ subl(maybe_temp2_loc.AsRegister<CpuRegister>(), Immediate(2));
7212 __ j(kNegative, type_check_slow_path->GetEntryLabel());
7213 // Go to next interface if the classes do not match.
7214 __ cmpl(cls.AsRegister<CpuRegister>(),
7215 CodeGeneratorX86_64::ArrayAddress(temp,
7216 maybe_temp2_loc,
7217 TIMES_4,
7218 object_array_data_offset));
7219 __ j(kNotEqual, &start_loop); // Return if same class.
7220 // If `cls` was poisoned above, unpoison it.
7221 __ MaybeUnpoisonHeapReference(cls.AsRegister<CpuRegister>());
7222 break;
7223 }
7224
7225 case TypeCheckKind::kBitstringCheck: {
7226 // /* HeapReference<Class> */ temp = obj->klass_
7227 GenerateReferenceLoadTwoRegisters(instruction,
7228 temp_loc,
7229 obj_loc,
7230 class_offset,
7231 kWithoutReadBarrier);
7232
7233 GenerateBitstringTypeCheckCompare(instruction, temp);
7234 __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
7235 break;
7236 }
7237 }
7238
7239 if (done.IsLinked()) {
7240 __ Bind(&done);
7241 }
7242
7243 __ Bind(type_check_slow_path->GetExitLabel());
7244 }
7245
VisitMonitorOperation(HMonitorOperation * instruction)7246 void LocationsBuilderX86_64::VisitMonitorOperation(HMonitorOperation* instruction) {
7247 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
7248 instruction, LocationSummary::kCallOnMainOnly);
7249 InvokeRuntimeCallingConvention calling_convention;
7250 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
7251 }
7252
VisitMonitorOperation(HMonitorOperation * instruction)7253 void InstructionCodeGeneratorX86_64::VisitMonitorOperation(HMonitorOperation* instruction) {
7254 codegen_->InvokeRuntime(instruction->IsEnter() ? kQuickLockObject : kQuickUnlockObject,
7255 instruction,
7256 instruction->GetDexPc());
7257 if (instruction->IsEnter()) {
7258 CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
7259 } else {
7260 CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
7261 }
7262 }
7263
VisitX86AndNot(HX86AndNot * instruction)7264 void LocationsBuilderX86_64::VisitX86AndNot(HX86AndNot* instruction) {
7265 DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
7266 DCHECK(DataType::IsIntOrLongType(instruction->GetType())) << instruction->GetType();
7267 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
7268 locations->SetInAt(0, Location::RequiresRegister());
7269 // There is no immediate variant of negated bitwise and in X86.
7270 locations->SetInAt(1, Location::RequiresRegister());
7271 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
7272 }
7273
VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit * instruction)7274 void LocationsBuilderX86_64::VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit* instruction) {
7275 DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
7276 DCHECK(DataType::IsIntOrLongType(instruction->GetType())) << instruction->GetType();
7277 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
7278 locations->SetInAt(0, Location::RequiresRegister());
7279 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
7280 }
7281
VisitX86AndNot(HX86AndNot * instruction)7282 void InstructionCodeGeneratorX86_64::VisitX86AndNot(HX86AndNot* instruction) {
7283 LocationSummary* locations = instruction->GetLocations();
7284 Location first = locations->InAt(0);
7285 Location second = locations->InAt(1);
7286 Location dest = locations->Out();
7287 __ andn(dest.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
7288 }
7289
VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit * instruction)7290 void InstructionCodeGeneratorX86_64::VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit* instruction) {
7291 LocationSummary* locations = instruction->GetLocations();
7292 Location src = locations->InAt(0);
7293 Location dest = locations->Out();
7294 switch (instruction->GetOpKind()) {
7295 case HInstruction::kAnd:
7296 __ blsr(dest.AsRegister<CpuRegister>(), src.AsRegister<CpuRegister>());
7297 break;
7298 case HInstruction::kXor:
7299 __ blsmsk(dest.AsRegister<CpuRegister>(), src.AsRegister<CpuRegister>());
7300 break;
7301 default:
7302 LOG(FATAL) << "Unreachable";
7303 }
7304 }
7305
VisitAnd(HAnd * instruction)7306 void LocationsBuilderX86_64::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction); }
VisitOr(HOr * instruction)7307 void LocationsBuilderX86_64::VisitOr(HOr* instruction) { HandleBitwiseOperation(instruction); }
VisitXor(HXor * instruction)7308 void LocationsBuilderX86_64::VisitXor(HXor* instruction) { HandleBitwiseOperation(instruction); }
7309
HandleBitwiseOperation(HBinaryOperation * instruction)7310 void LocationsBuilderX86_64::HandleBitwiseOperation(HBinaryOperation* instruction) {
7311 LocationSummary* locations =
7312 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
7313 DCHECK(instruction->GetResultType() == DataType::Type::kInt32
7314 || instruction->GetResultType() == DataType::Type::kInt64);
7315 locations->SetInAt(0, Location::RequiresRegister());
7316 locations->SetInAt(1, Location::Any());
7317 locations->SetOut(Location::SameAsFirstInput());
7318 }
7319
VisitAnd(HAnd * instruction)7320 void InstructionCodeGeneratorX86_64::VisitAnd(HAnd* instruction) {
7321 HandleBitwiseOperation(instruction);
7322 }
7323
VisitOr(HOr * instruction)7324 void InstructionCodeGeneratorX86_64::VisitOr(HOr* instruction) {
7325 HandleBitwiseOperation(instruction);
7326 }
7327
VisitXor(HXor * instruction)7328 void InstructionCodeGeneratorX86_64::VisitXor(HXor* instruction) {
7329 HandleBitwiseOperation(instruction);
7330 }
7331
HandleBitwiseOperation(HBinaryOperation * instruction)7332 void InstructionCodeGeneratorX86_64::HandleBitwiseOperation(HBinaryOperation* instruction) {
7333 LocationSummary* locations = instruction->GetLocations();
7334 Location first = locations->InAt(0);
7335 Location second = locations->InAt(1);
7336 DCHECK(first.Equals(locations->Out()));
7337
7338 if (instruction->GetResultType() == DataType::Type::kInt32) {
7339 if (second.IsRegister()) {
7340 if (instruction->IsAnd()) {
7341 __ andl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
7342 } else if (instruction->IsOr()) {
7343 __ orl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
7344 } else {
7345 DCHECK(instruction->IsXor());
7346 __ xorl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
7347 }
7348 } else if (second.IsConstant()) {
7349 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue());
7350 if (instruction->IsAnd()) {
7351 __ andl(first.AsRegister<CpuRegister>(), imm);
7352 } else if (instruction->IsOr()) {
7353 __ orl(first.AsRegister<CpuRegister>(), imm);
7354 } else {
7355 DCHECK(instruction->IsXor());
7356 __ xorl(first.AsRegister<CpuRegister>(), imm);
7357 }
7358 } else {
7359 Address address(CpuRegister(RSP), second.GetStackIndex());
7360 if (instruction->IsAnd()) {
7361 __ andl(first.AsRegister<CpuRegister>(), address);
7362 } else if (instruction->IsOr()) {
7363 __ orl(first.AsRegister<CpuRegister>(), address);
7364 } else {
7365 DCHECK(instruction->IsXor());
7366 __ xorl(first.AsRegister<CpuRegister>(), address);
7367 }
7368 }
7369 } else {
7370 DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
7371 CpuRegister first_reg = first.AsRegister<CpuRegister>();
7372 bool second_is_constant = false;
7373 int64_t value = 0;
7374 if (second.IsConstant()) {
7375 second_is_constant = true;
7376 value = second.GetConstant()->AsLongConstant()->GetValue();
7377 }
7378 bool is_int32_value = IsInt<32>(value);
7379
7380 if (instruction->IsAnd()) {
7381 if (second_is_constant) {
7382 if (is_int32_value) {
7383 __ andq(first_reg, Immediate(static_cast<int32_t>(value)));
7384 } else {
7385 __ andq(first_reg, codegen_->LiteralInt64Address(value));
7386 }
7387 } else if (second.IsDoubleStackSlot()) {
7388 __ andq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
7389 } else {
7390 __ andq(first_reg, second.AsRegister<CpuRegister>());
7391 }
7392 } else if (instruction->IsOr()) {
7393 if (second_is_constant) {
7394 if (is_int32_value) {
7395 __ orq(first_reg, Immediate(static_cast<int32_t>(value)));
7396 } else {
7397 __ orq(first_reg, codegen_->LiteralInt64Address(value));
7398 }
7399 } else if (second.IsDoubleStackSlot()) {
7400 __ orq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
7401 } else {
7402 __ orq(first_reg, second.AsRegister<CpuRegister>());
7403 }
7404 } else {
7405 DCHECK(instruction->IsXor());
7406 if (second_is_constant) {
7407 if (is_int32_value) {
7408 __ xorq(first_reg, Immediate(static_cast<int32_t>(value)));
7409 } else {
7410 __ xorq(first_reg, codegen_->LiteralInt64Address(value));
7411 }
7412 } else if (second.IsDoubleStackSlot()) {
7413 __ xorq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
7414 } else {
7415 __ xorq(first_reg, second.AsRegister<CpuRegister>());
7416 }
7417 }
7418 }
7419 }
7420
GenerateReferenceLoadOneRegister(HInstruction * instruction,Location out,uint32_t offset,Location maybe_temp,ReadBarrierOption read_barrier_option)7421 void InstructionCodeGeneratorX86_64::GenerateReferenceLoadOneRegister(
7422 HInstruction* instruction,
7423 Location out,
7424 uint32_t offset,
7425 Location maybe_temp,
7426 ReadBarrierOption read_barrier_option) {
7427 CpuRegister out_reg = out.AsRegister<CpuRegister>();
7428 if (read_barrier_option == kWithReadBarrier) {
7429 CHECK(kEmitCompilerReadBarrier);
7430 if (kUseBakerReadBarrier) {
7431 // Load with fast path based Baker's read barrier.
7432 // /* HeapReference<Object> */ out = *(out + offset)
7433 codegen_->GenerateFieldLoadWithBakerReadBarrier(
7434 instruction, out, out_reg, offset, /* needs_null_check= */ false);
7435 } else {
7436 // Load with slow path based read barrier.
7437 // Save the value of `out` into `maybe_temp` before overwriting it
7438 // in the following move operation, as we will need it for the
7439 // read barrier below.
7440 DCHECK(maybe_temp.IsRegister()) << maybe_temp;
7441 __ movl(maybe_temp.AsRegister<CpuRegister>(), out_reg);
7442 // /* HeapReference<Object> */ out = *(out + offset)
7443 __ movl(out_reg, Address(out_reg, offset));
7444 codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset);
7445 }
7446 } else {
7447 // Plain load with no read barrier.
7448 // /* HeapReference<Object> */ out = *(out + offset)
7449 __ movl(out_reg, Address(out_reg, offset));
7450 __ MaybeUnpoisonHeapReference(out_reg);
7451 }
7452 }
7453
GenerateReferenceLoadTwoRegisters(HInstruction * instruction,Location out,Location obj,uint32_t offset,ReadBarrierOption read_barrier_option)7454 void InstructionCodeGeneratorX86_64::GenerateReferenceLoadTwoRegisters(
7455 HInstruction* instruction,
7456 Location out,
7457 Location obj,
7458 uint32_t offset,
7459 ReadBarrierOption read_barrier_option) {
7460 CpuRegister out_reg = out.AsRegister<CpuRegister>();
7461 CpuRegister obj_reg = obj.AsRegister<CpuRegister>();
7462 if (read_barrier_option == kWithReadBarrier) {
7463 CHECK(kEmitCompilerReadBarrier);
7464 if (kUseBakerReadBarrier) {
7465 // Load with fast path based Baker's read barrier.
7466 // /* HeapReference<Object> */ out = *(obj + offset)
7467 codegen_->GenerateFieldLoadWithBakerReadBarrier(
7468 instruction, out, obj_reg, offset, /* needs_null_check= */ false);
7469 } else {
7470 // Load with slow path based read barrier.
7471 // /* HeapReference<Object> */ out = *(obj + offset)
7472 __ movl(out_reg, Address(obj_reg, offset));
7473 codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
7474 }
7475 } else {
7476 // Plain load with no read barrier.
7477 // /* HeapReference<Object> */ out = *(obj + offset)
7478 __ movl(out_reg, Address(obj_reg, offset));
7479 __ MaybeUnpoisonHeapReference(out_reg);
7480 }
7481 }
7482
GenerateGcRootFieldLoad(HInstruction * instruction,Location root,const Address & address,Label * fixup_label,ReadBarrierOption read_barrier_option)7483 void InstructionCodeGeneratorX86_64::GenerateGcRootFieldLoad(
7484 HInstruction* instruction,
7485 Location root,
7486 const Address& address,
7487 Label* fixup_label,
7488 ReadBarrierOption read_barrier_option) {
7489 CpuRegister root_reg = root.AsRegister<CpuRegister>();
7490 if (read_barrier_option == kWithReadBarrier) {
7491 DCHECK(kEmitCompilerReadBarrier);
7492 if (kUseBakerReadBarrier) {
7493 // Fast path implementation of art::ReadBarrier::BarrierForRoot when
7494 // Baker's read barrier are used:
7495 //
7496 // root = obj.field;
7497 // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
7498 // if (temp != null) {
7499 // root = temp(root)
7500 // }
7501
7502 // /* GcRoot<mirror::Object> */ root = *address
7503 __ movl(root_reg, address);
7504 if (fixup_label != nullptr) {
7505 __ Bind(fixup_label);
7506 }
7507 static_assert(
7508 sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
7509 "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
7510 "have different sizes.");
7511 static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
7512 "art::mirror::CompressedReference<mirror::Object> and int32_t "
7513 "have different sizes.");
7514
7515 // Slow path marking the GC root `root`.
7516 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) ReadBarrierMarkSlowPathX86_64(
7517 instruction, root, /* unpoison_ref_before_marking= */ false);
7518 codegen_->AddSlowPath(slow_path);
7519
7520 // Test the `Thread::Current()->pReadBarrierMarkReg ## root.reg()` entrypoint.
7521 const int32_t entry_point_offset =
7522 Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(root.reg());
7523 __ gs()->cmpl(Address::Absolute(entry_point_offset, /* no_rip= */ true), Immediate(0));
7524 // The entrypoint is null when the GC is not marking.
7525 __ j(kNotEqual, slow_path->GetEntryLabel());
7526 __ Bind(slow_path->GetExitLabel());
7527 } else {
7528 // GC root loaded through a slow path for read barriers other
7529 // than Baker's.
7530 // /* GcRoot<mirror::Object>* */ root = address
7531 __ leaq(root_reg, address);
7532 if (fixup_label != nullptr) {
7533 __ Bind(fixup_label);
7534 }
7535 // /* mirror::Object* */ root = root->Read()
7536 codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
7537 }
7538 } else {
7539 // Plain GC root load with no read barrier.
7540 // /* GcRoot<mirror::Object> */ root = *address
7541 __ movl(root_reg, address);
7542 if (fixup_label != nullptr) {
7543 __ Bind(fixup_label);
7544 }
7545 // Note that GC roots are not affected by heap poisoning, thus we
7546 // do not have to unpoison `root_reg` here.
7547 }
7548 }
7549
GenerateFieldLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,CpuRegister obj,uint32_t offset,bool needs_null_check)7550 void CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
7551 Location ref,
7552 CpuRegister obj,
7553 uint32_t offset,
7554 bool needs_null_check) {
7555 DCHECK(kEmitCompilerReadBarrier);
7556 DCHECK(kUseBakerReadBarrier);
7557
7558 // /* HeapReference<Object> */ ref = *(obj + offset)
7559 Address src(obj, offset);
7560 GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check);
7561 }
7562
GenerateArrayLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,CpuRegister obj,uint32_t data_offset,Location index,bool needs_null_check)7563 void CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
7564 Location ref,
7565 CpuRegister obj,
7566 uint32_t data_offset,
7567 Location index,
7568 bool needs_null_check) {
7569 DCHECK(kEmitCompilerReadBarrier);
7570 DCHECK(kUseBakerReadBarrier);
7571
7572 static_assert(
7573 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
7574 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
7575 // /* HeapReference<Object> */ ref =
7576 // *(obj + data_offset + index * sizeof(HeapReference<Object>))
7577 Address src = CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset);
7578 GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check);
7579 }
7580
GenerateReferenceLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,CpuRegister obj,const Address & src,bool needs_null_check,bool always_update_field,CpuRegister * temp1,CpuRegister * temp2)7581 void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
7582 Location ref,
7583 CpuRegister obj,
7584 const Address& src,
7585 bool needs_null_check,
7586 bool always_update_field,
7587 CpuRegister* temp1,
7588 CpuRegister* temp2) {
7589 DCHECK(kEmitCompilerReadBarrier);
7590 DCHECK(kUseBakerReadBarrier);
7591
7592 // In slow path based read barriers, the read barrier call is
7593 // inserted after the original load. However, in fast path based
7594 // Baker's read barriers, we need to perform the load of
7595 // mirror::Object::monitor_ *before* the original reference load.
7596 // This load-load ordering is required by the read barrier.
7597 // The fast path/slow path (for Baker's algorithm) should look like:
7598 //
7599 // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
7600 // lfence; // Load fence or artificial data dependency to prevent load-load reordering
7601 // HeapReference<Object> ref = *src; // Original reference load.
7602 // bool is_gray = (rb_state == ReadBarrier::GrayState());
7603 // if (is_gray) {
7604 // ref = ReadBarrier::Mark(ref); // Performed by runtime entrypoint slow path.
7605 // }
7606 //
7607 // Note: the original implementation in ReadBarrier::Barrier is
7608 // slightly more complex as:
7609 // - it implements the load-load fence using a data dependency on
7610 // the high-bits of rb_state, which are expected to be all zeroes
7611 // (we use CodeGeneratorX86_64::GenerateMemoryBarrier instead
7612 // here, which is a no-op thanks to the x86-64 memory model);
7613 // - it performs additional checks that we do not do here for
7614 // performance reasons.
7615
7616 CpuRegister ref_reg = ref.AsRegister<CpuRegister>();
7617 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
7618
7619 // Given the numeric representation, it's enough to check the low bit of the rb_state.
7620 static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
7621 static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
7622 constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
7623 constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
7624 constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
7625
7626 // if (rb_state == ReadBarrier::GrayState())
7627 // ref = ReadBarrier::Mark(ref);
7628 // At this point, just do the "if" and make sure that flags are preserved until the branch.
7629 __ testb(Address(obj, monitor_offset + gray_byte_position), Immediate(test_value));
7630 if (needs_null_check) {
7631 MaybeRecordImplicitNullCheck(instruction);
7632 }
7633
7634 // Load fence to prevent load-load reordering.
7635 // Note that this is a no-op, thanks to the x86-64 memory model.
7636 GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
7637
7638 // The actual reference load.
7639 // /* HeapReference<Object> */ ref = *src
7640 __ movl(ref_reg, src); // Flags are unaffected.
7641
7642 // Note: Reference unpoisoning modifies the flags, so we need to delay it after the branch.
7643 // Slow path marking the object `ref` when it is gray.
7644 SlowPathCode* slow_path;
7645 if (always_update_field) {
7646 DCHECK(temp1 != nullptr);
7647 DCHECK(temp2 != nullptr);
7648 slow_path = new (GetScopedAllocator()) ReadBarrierMarkAndUpdateFieldSlowPathX86_64(
7649 instruction, ref, obj, src, /* unpoison_ref_before_marking= */ true, *temp1, *temp2);
7650 } else {
7651 slow_path = new (GetScopedAllocator()) ReadBarrierMarkSlowPathX86_64(
7652 instruction, ref, /* unpoison_ref_before_marking= */ true);
7653 }
7654 AddSlowPath(slow_path);
7655
7656 // We have done the "if" of the gray bit check above, now branch based on the flags.
7657 __ j(kNotZero, slow_path->GetEntryLabel());
7658
7659 // Object* ref = ref_addr->AsMirrorPtr()
7660 __ MaybeUnpoisonHeapReference(ref_reg);
7661
7662 __ Bind(slow_path->GetExitLabel());
7663 }
7664
GenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)7665 void CodeGeneratorX86_64::GenerateReadBarrierSlow(HInstruction* instruction,
7666 Location out,
7667 Location ref,
7668 Location obj,
7669 uint32_t offset,
7670 Location index) {
7671 DCHECK(kEmitCompilerReadBarrier);
7672
7673 // Insert a slow path based read barrier *after* the reference load.
7674 //
7675 // If heap poisoning is enabled, the unpoisoning of the loaded
7676 // reference will be carried out by the runtime within the slow
7677 // path.
7678 //
7679 // Note that `ref` currently does not get unpoisoned (when heap
7680 // poisoning is enabled), which is alright as the `ref` argument is
7681 // not used by the artReadBarrierSlow entry point.
7682 //
7683 // TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
7684 SlowPathCode* slow_path = new (GetScopedAllocator())
7685 ReadBarrierForHeapReferenceSlowPathX86_64(instruction, out, ref, obj, offset, index);
7686 AddSlowPath(slow_path);
7687
7688 __ jmp(slow_path->GetEntryLabel());
7689 __ Bind(slow_path->GetExitLabel());
7690 }
7691
MaybeGenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)7692 void CodeGeneratorX86_64::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
7693 Location out,
7694 Location ref,
7695 Location obj,
7696 uint32_t offset,
7697 Location index) {
7698 if (kEmitCompilerReadBarrier) {
7699 // Baker's read barriers shall be handled by the fast path
7700 // (CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier).
7701 DCHECK(!kUseBakerReadBarrier);
7702 // If heap poisoning is enabled, unpoisoning will be taken care of
7703 // by the runtime within the slow path.
7704 GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
7705 } else if (kPoisonHeapReferences) {
7706 __ UnpoisonHeapReference(out.AsRegister<CpuRegister>());
7707 }
7708 }
7709
GenerateReadBarrierForRootSlow(HInstruction * instruction,Location out,Location root)7710 void CodeGeneratorX86_64::GenerateReadBarrierForRootSlow(HInstruction* instruction,
7711 Location out,
7712 Location root) {
7713 DCHECK(kEmitCompilerReadBarrier);
7714
7715 // Insert a slow path based read barrier *after* the GC root load.
7716 //
7717 // Note that GC roots are not affected by heap poisoning, so we do
7718 // not need to do anything special for this here.
7719 SlowPathCode* slow_path =
7720 new (GetScopedAllocator()) ReadBarrierForRootSlowPathX86_64(instruction, out, root);
7721 AddSlowPath(slow_path);
7722
7723 __ jmp(slow_path->GetEntryLabel());
7724 __ Bind(slow_path->GetExitLabel());
7725 }
7726
VisitBoundType(HBoundType * instruction ATTRIBUTE_UNUSED)7727 void LocationsBuilderX86_64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
7728 // Nothing to do, this should be removed during prepare for register allocator.
7729 LOG(FATAL) << "Unreachable";
7730 }
7731
VisitBoundType(HBoundType * instruction ATTRIBUTE_UNUSED)7732 void InstructionCodeGeneratorX86_64::VisitBoundType(HBoundType* instruction ATTRIBUTE_UNUSED) {
7733 // Nothing to do, this should be removed during prepare for register allocator.
7734 LOG(FATAL) << "Unreachable";
7735 }
7736
7737 // Simple implementation of packed switch - generate cascaded compare/jumps.
VisitPackedSwitch(HPackedSwitch * switch_instr)7738 void LocationsBuilderX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
7739 LocationSummary* locations =
7740 new (GetGraph()->GetAllocator()) LocationSummary(switch_instr, LocationSummary::kNoCall);
7741 locations->SetInAt(0, Location::RequiresRegister());
7742 locations->AddTemp(Location::RequiresRegister());
7743 locations->AddTemp(Location::RequiresRegister());
7744 }
7745
VisitPackedSwitch(HPackedSwitch * switch_instr)7746 void InstructionCodeGeneratorX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
7747 int32_t lower_bound = switch_instr->GetStartValue();
7748 uint32_t num_entries = switch_instr->GetNumEntries();
7749 LocationSummary* locations = switch_instr->GetLocations();
7750 CpuRegister value_reg_in = locations->InAt(0).AsRegister<CpuRegister>();
7751 CpuRegister temp_reg = locations->GetTemp(0).AsRegister<CpuRegister>();
7752 CpuRegister base_reg = locations->GetTemp(1).AsRegister<CpuRegister>();
7753 HBasicBlock* default_block = switch_instr->GetDefaultBlock();
7754
7755 // Should we generate smaller inline compare/jumps?
7756 if (num_entries <= kPackedSwitchJumpTableThreshold) {
7757 // Figure out the correct compare values and jump conditions.
7758 // Handle the first compare/branch as a special case because it might
7759 // jump to the default case.
7760 DCHECK_GT(num_entries, 2u);
7761 Condition first_condition;
7762 uint32_t index;
7763 const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
7764 if (lower_bound != 0) {
7765 first_condition = kLess;
7766 __ cmpl(value_reg_in, Immediate(lower_bound));
7767 __ j(first_condition, codegen_->GetLabelOf(default_block));
7768 __ j(kEqual, codegen_->GetLabelOf(successors[0]));
7769
7770 index = 1;
7771 } else {
7772 // Handle all the compare/jumps below.
7773 first_condition = kBelow;
7774 index = 0;
7775 }
7776
7777 // Handle the rest of the compare/jumps.
7778 for (; index + 1 < num_entries; index += 2) {
7779 int32_t compare_to_value = lower_bound + index + 1;
7780 __ cmpl(value_reg_in, Immediate(compare_to_value));
7781 // Jump to successors[index] if value < case_value[index].
7782 __ j(first_condition, codegen_->GetLabelOf(successors[index]));
7783 // Jump to successors[index + 1] if value == case_value[index + 1].
7784 __ j(kEqual, codegen_->GetLabelOf(successors[index + 1]));
7785 }
7786
7787 if (index != num_entries) {
7788 // There are an odd number of entries. Handle the last one.
7789 DCHECK_EQ(index + 1, num_entries);
7790 __ cmpl(value_reg_in, Immediate(static_cast<int32_t>(lower_bound + index)));
7791 __ j(kEqual, codegen_->GetLabelOf(successors[index]));
7792 }
7793
7794 // And the default for any other value.
7795 if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
7796 __ jmp(codegen_->GetLabelOf(default_block));
7797 }
7798 return;
7799 }
7800
7801 // Remove the bias, if needed.
7802 Register value_reg_out = value_reg_in.AsRegister();
7803 if (lower_bound != 0) {
7804 __ leal(temp_reg, Address(value_reg_in, -lower_bound));
7805 value_reg_out = temp_reg.AsRegister();
7806 }
7807 CpuRegister value_reg(value_reg_out);
7808
7809 // Is the value in range?
7810 __ cmpl(value_reg, Immediate(num_entries - 1));
7811 __ j(kAbove, codegen_->GetLabelOf(default_block));
7812
7813 // We are in the range of the table.
7814 // Load the address of the jump table in the constant area.
7815 __ leaq(base_reg, codegen_->LiteralCaseTable(switch_instr));
7816
7817 // Load the (signed) offset from the jump table.
7818 __ movsxd(temp_reg, Address(base_reg, value_reg, TIMES_4, 0));
7819
7820 // Add the offset to the address of the table base.
7821 __ addq(temp_reg, base_reg);
7822
7823 // And jump.
7824 __ jmp(temp_reg);
7825 }
7826
VisitIntermediateAddress(HIntermediateAddress * instruction ATTRIBUTE_UNUSED)7827 void LocationsBuilderX86_64::VisitIntermediateAddress(HIntermediateAddress* instruction
7828 ATTRIBUTE_UNUSED) {
7829 LOG(FATAL) << "Unreachable";
7830 }
7831
VisitIntermediateAddress(HIntermediateAddress * instruction ATTRIBUTE_UNUSED)7832 void InstructionCodeGeneratorX86_64::VisitIntermediateAddress(HIntermediateAddress* instruction
7833 ATTRIBUTE_UNUSED) {
7834 LOG(FATAL) << "Unreachable";
7835 }
7836
Load32BitValue(CpuRegister dest,int32_t value)7837 void CodeGeneratorX86_64::Load32BitValue(CpuRegister dest, int32_t value) {
7838 if (value == 0) {
7839 __ xorl(dest, dest);
7840 } else {
7841 __ movl(dest, Immediate(value));
7842 }
7843 }
7844
Load64BitValue(CpuRegister dest,int64_t value)7845 void CodeGeneratorX86_64::Load64BitValue(CpuRegister dest, int64_t value) {
7846 if (value == 0) {
7847 // Clears upper bits too.
7848 __ xorl(dest, dest);
7849 } else if (IsUint<32>(value)) {
7850 // We can use a 32 bit move, as it will zero-extend and is shorter.
7851 __ movl(dest, Immediate(static_cast<int32_t>(value)));
7852 } else {
7853 __ movq(dest, Immediate(value));
7854 }
7855 }
7856
Load32BitValue(XmmRegister dest,int32_t value)7857 void CodeGeneratorX86_64::Load32BitValue(XmmRegister dest, int32_t value) {
7858 if (value == 0) {
7859 __ xorps(dest, dest);
7860 } else {
7861 __ movss(dest, LiteralInt32Address(value));
7862 }
7863 }
7864
Load64BitValue(XmmRegister dest,int64_t value)7865 void CodeGeneratorX86_64::Load64BitValue(XmmRegister dest, int64_t value) {
7866 if (value == 0) {
7867 __ xorpd(dest, dest);
7868 } else {
7869 __ movsd(dest, LiteralInt64Address(value));
7870 }
7871 }
7872
Load32BitValue(XmmRegister dest,float value)7873 void CodeGeneratorX86_64::Load32BitValue(XmmRegister dest, float value) {
7874 Load32BitValue(dest, bit_cast<int32_t, float>(value));
7875 }
7876
Load64BitValue(XmmRegister dest,double value)7877 void CodeGeneratorX86_64::Load64BitValue(XmmRegister dest, double value) {
7878 Load64BitValue(dest, bit_cast<int64_t, double>(value));
7879 }
7880
Compare32BitValue(CpuRegister dest,int32_t value)7881 void CodeGeneratorX86_64::Compare32BitValue(CpuRegister dest, int32_t value) {
7882 if (value == 0) {
7883 __ testl(dest, dest);
7884 } else {
7885 __ cmpl(dest, Immediate(value));
7886 }
7887 }
7888
Compare64BitValue(CpuRegister dest,int64_t value)7889 void CodeGeneratorX86_64::Compare64BitValue(CpuRegister dest, int64_t value) {
7890 if (IsInt<32>(value)) {
7891 if (value == 0) {
7892 __ testq(dest, dest);
7893 } else {
7894 __ cmpq(dest, Immediate(static_cast<int32_t>(value)));
7895 }
7896 } else {
7897 // Value won't fit in an int.
7898 __ cmpq(dest, LiteralInt64Address(value));
7899 }
7900 }
7901
GenerateIntCompare(Location lhs,Location rhs)7902 void CodeGeneratorX86_64::GenerateIntCompare(Location lhs, Location rhs) {
7903 CpuRegister lhs_reg = lhs.AsRegister<CpuRegister>();
7904 GenerateIntCompare(lhs_reg, rhs);
7905 }
7906
GenerateIntCompare(CpuRegister lhs,Location rhs)7907 void CodeGeneratorX86_64::GenerateIntCompare(CpuRegister lhs, Location rhs) {
7908 if (rhs.IsConstant()) {
7909 int32_t value = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
7910 Compare32BitValue(lhs, value);
7911 } else if (rhs.IsStackSlot()) {
7912 __ cmpl(lhs, Address(CpuRegister(RSP), rhs.GetStackIndex()));
7913 } else {
7914 __ cmpl(lhs, rhs.AsRegister<CpuRegister>());
7915 }
7916 }
7917
GenerateLongCompare(Location lhs,Location rhs)7918 void CodeGeneratorX86_64::GenerateLongCompare(Location lhs, Location rhs) {
7919 CpuRegister lhs_reg = lhs.AsRegister<CpuRegister>();
7920 if (rhs.IsConstant()) {
7921 int64_t value = rhs.GetConstant()->AsLongConstant()->GetValue();
7922 Compare64BitValue(lhs_reg, value);
7923 } else if (rhs.IsDoubleStackSlot()) {
7924 __ cmpq(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex()));
7925 } else {
7926 __ cmpq(lhs_reg, rhs.AsRegister<CpuRegister>());
7927 }
7928 }
7929
ArrayAddress(CpuRegister obj,Location index,ScaleFactor scale,uint32_t data_offset)7930 Address CodeGeneratorX86_64::ArrayAddress(CpuRegister obj,
7931 Location index,
7932 ScaleFactor scale,
7933 uint32_t data_offset) {
7934 return index.IsConstant() ?
7935 Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << scale) + data_offset) :
7936 Address(obj, index.AsRegister<CpuRegister>(), scale, data_offset);
7937 }
7938
Store64BitValueToStack(Location dest,int64_t value)7939 void CodeGeneratorX86_64::Store64BitValueToStack(Location dest, int64_t value) {
7940 DCHECK(dest.IsDoubleStackSlot());
7941 if (IsInt<32>(value)) {
7942 // Can move directly as an int32 constant.
7943 __ movq(Address(CpuRegister(RSP), dest.GetStackIndex()),
7944 Immediate(static_cast<int32_t>(value)));
7945 } else {
7946 Load64BitValue(CpuRegister(TMP), value);
7947 __ movq(Address(CpuRegister(RSP), dest.GetStackIndex()), CpuRegister(TMP));
7948 }
7949 }
7950
7951 /**
7952 * Class to handle late fixup of offsets into constant area.
7953 */
7954 class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocCodeGenerator> {
7955 public:
RIPFixup(CodeGeneratorX86_64 & codegen,size_t offset)7956 RIPFixup(CodeGeneratorX86_64& codegen, size_t offset)
7957 : codegen_(&codegen), offset_into_constant_area_(offset) {}
7958
7959 protected:
SetOffset(size_t offset)7960 void SetOffset(size_t offset) { offset_into_constant_area_ = offset; }
7961
7962 CodeGeneratorX86_64* codegen_;
7963
7964 private:
Process(const MemoryRegion & region,int pos)7965 void Process(const MemoryRegion& region, int pos) override {
7966 // Patch the correct offset for the instruction. We use the address of the
7967 // 'next' instruction, which is 'pos' (patch the 4 bytes before).
7968 int32_t constant_offset = codegen_->ConstantAreaStart() + offset_into_constant_area_;
7969 int32_t relative_position = constant_offset - pos;
7970
7971 // Patch in the right value.
7972 region.StoreUnaligned<int32_t>(pos - 4, relative_position);
7973 }
7974
7975 // Location in constant area that the fixup refers to.
7976 size_t offset_into_constant_area_;
7977 };
7978
7979 /**
7980 t * Class to handle late fixup of offsets to a jump table that will be created in the
7981 * constant area.
7982 */
7983 class JumpTableRIPFixup : public RIPFixup {
7984 public:
JumpTableRIPFixup(CodeGeneratorX86_64 & codegen,HPackedSwitch * switch_instr)7985 JumpTableRIPFixup(CodeGeneratorX86_64& codegen, HPackedSwitch* switch_instr)
7986 : RIPFixup(codegen, -1), switch_instr_(switch_instr) {}
7987
CreateJumpTable()7988 void CreateJumpTable() {
7989 X86_64Assembler* assembler = codegen_->GetAssembler();
7990
7991 // Ensure that the reference to the jump table has the correct offset.
7992 const int32_t offset_in_constant_table = assembler->ConstantAreaSize();
7993 SetOffset(offset_in_constant_table);
7994
7995 // Compute the offset from the start of the function to this jump table.
7996 const int32_t current_table_offset = assembler->CodeSize() + offset_in_constant_table;
7997
7998 // Populate the jump table with the correct values for the jump table.
7999 int32_t num_entries = switch_instr_->GetNumEntries();
8000 HBasicBlock* block = switch_instr_->GetBlock();
8001 const ArenaVector<HBasicBlock*>& successors = block->GetSuccessors();
8002 // The value that we want is the target offset - the position of the table.
8003 for (int32_t i = 0; i < num_entries; i++) {
8004 HBasicBlock* b = successors[i];
8005 Label* l = codegen_->GetLabelOf(b);
8006 DCHECK(l->IsBound());
8007 int32_t offset_to_block = l->Position() - current_table_offset;
8008 assembler->AppendInt32(offset_to_block);
8009 }
8010 }
8011
8012 private:
8013 const HPackedSwitch* switch_instr_;
8014 };
8015
Finalize(CodeAllocator * allocator)8016 void CodeGeneratorX86_64::Finalize(CodeAllocator* allocator) {
8017 // Generate the constant area if needed.
8018 X86_64Assembler* assembler = GetAssembler();
8019 if (!assembler->IsConstantAreaEmpty() || !fixups_to_jump_tables_.empty()) {
8020 // Align to 4 byte boundary to reduce cache misses, as the data is 4 and 8 byte values.
8021 assembler->Align(4, 0);
8022 constant_area_start_ = assembler->CodeSize();
8023
8024 // Populate any jump tables.
8025 for (JumpTableRIPFixup* jump_table : fixups_to_jump_tables_) {
8026 jump_table->CreateJumpTable();
8027 }
8028
8029 // And now add the constant area to the generated code.
8030 assembler->AddConstantArea();
8031 }
8032
8033 // And finish up.
8034 CodeGenerator::Finalize(allocator);
8035 }
8036
LiteralDoubleAddress(double v)8037 Address CodeGeneratorX86_64::LiteralDoubleAddress(double v) {
8038 AssemblerFixup* fixup = new (GetGraph()->GetAllocator()) RIPFixup(*this, __ AddDouble(v));
8039 return Address::RIP(fixup);
8040 }
8041
LiteralFloatAddress(float v)8042 Address CodeGeneratorX86_64::LiteralFloatAddress(float v) {
8043 AssemblerFixup* fixup = new (GetGraph()->GetAllocator()) RIPFixup(*this, __ AddFloat(v));
8044 return Address::RIP(fixup);
8045 }
8046
LiteralInt32Address(int32_t v)8047 Address CodeGeneratorX86_64::LiteralInt32Address(int32_t v) {
8048 AssemblerFixup* fixup = new (GetGraph()->GetAllocator()) RIPFixup(*this, __ AddInt32(v));
8049 return Address::RIP(fixup);
8050 }
8051
LiteralInt64Address(int64_t v)8052 Address CodeGeneratorX86_64::LiteralInt64Address(int64_t v) {
8053 AssemblerFixup* fixup = new (GetGraph()->GetAllocator()) RIPFixup(*this, __ AddInt64(v));
8054 return Address::RIP(fixup);
8055 }
8056
8057 // TODO: trg as memory.
MoveFromReturnRegister(Location trg,DataType::Type type)8058 void CodeGeneratorX86_64::MoveFromReturnRegister(Location trg, DataType::Type type) {
8059 if (!trg.IsValid()) {
8060 DCHECK_EQ(type, DataType::Type::kVoid);
8061 return;
8062 }
8063
8064 DCHECK_NE(type, DataType::Type::kVoid);
8065
8066 Location return_loc = InvokeDexCallingConventionVisitorX86_64().GetReturnLocation(type);
8067 if (trg.Equals(return_loc)) {
8068 return;
8069 }
8070
8071 // Let the parallel move resolver take care of all of this.
8072 HParallelMove parallel_move(GetGraph()->GetAllocator());
8073 parallel_move.AddMove(return_loc, trg, type, nullptr);
8074 GetMoveResolver()->EmitNativeCode(¶llel_move);
8075 }
8076
LiteralCaseTable(HPackedSwitch * switch_instr)8077 Address CodeGeneratorX86_64::LiteralCaseTable(HPackedSwitch* switch_instr) {
8078 // Create a fixup to be used to create and address the jump table.
8079 JumpTableRIPFixup* table_fixup =
8080 new (GetGraph()->GetAllocator()) JumpTableRIPFixup(*this, switch_instr);
8081
8082 // We have to populate the jump tables.
8083 fixups_to_jump_tables_.push_back(table_fixup);
8084 return Address::RIP(table_fixup);
8085 }
8086
MoveInt64ToAddress(const Address & addr_low,const Address & addr_high,int64_t v,HInstruction * instruction)8087 void CodeGeneratorX86_64::MoveInt64ToAddress(const Address& addr_low,
8088 const Address& addr_high,
8089 int64_t v,
8090 HInstruction* instruction) {
8091 if (IsInt<32>(v)) {
8092 int32_t v_32 = v;
8093 __ movq(addr_low, Immediate(v_32));
8094 MaybeRecordImplicitNullCheck(instruction);
8095 } else {
8096 // Didn't fit in a register. Do it in pieces.
8097 int32_t low_v = Low32Bits(v);
8098 int32_t high_v = High32Bits(v);
8099 __ movl(addr_low, Immediate(low_v));
8100 MaybeRecordImplicitNullCheck(instruction);
8101 __ movl(addr_high, Immediate(high_v));
8102 }
8103 }
8104
PatchJitRootUse(uint8_t * code,const uint8_t * roots_data,const PatchInfo<Label> & info,uint64_t index_in_table) const8105 void CodeGeneratorX86_64::PatchJitRootUse(uint8_t* code,
8106 const uint8_t* roots_data,
8107 const PatchInfo<Label>& info,
8108 uint64_t index_in_table) const {
8109 uint32_t code_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
8110 uintptr_t address =
8111 reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>);
8112 using unaligned_uint32_t __attribute__((__aligned__(1))) = uint32_t;
8113 reinterpret_cast<unaligned_uint32_t*>(code + code_offset)[0] =
8114 dchecked_integral_cast<uint32_t>(address);
8115 }
8116
EmitJitRootPatches(uint8_t * code,const uint8_t * roots_data)8117 void CodeGeneratorX86_64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
8118 for (const PatchInfo<Label>& info : jit_string_patches_) {
8119 StringReference string_reference(info.target_dex_file, dex::StringIndex(info.offset_or_index));
8120 uint64_t index_in_table = GetJitStringRootIndex(string_reference);
8121 PatchJitRootUse(code, roots_data, info, index_in_table);
8122 }
8123
8124 for (const PatchInfo<Label>& info : jit_class_patches_) {
8125 TypeReference type_reference(info.target_dex_file, dex::TypeIndex(info.offset_or_index));
8126 uint64_t index_in_table = GetJitClassRootIndex(type_reference);
8127 PatchJitRootUse(code, roots_data, info, index_in_table);
8128 }
8129 }
8130
CpuHasAvxFeatureFlag()8131 bool LocationsBuilderX86_64::CpuHasAvxFeatureFlag() {
8132 return codegen_->GetInstructionSetFeatures().HasAVX();
8133 }
8134
CpuHasAvx2FeatureFlag()8135 bool LocationsBuilderX86_64::CpuHasAvx2FeatureFlag() {
8136 return codegen_->GetInstructionSetFeatures().HasAVX2();
8137 }
8138
CpuHasAvxFeatureFlag()8139 bool InstructionCodeGeneratorX86_64::CpuHasAvxFeatureFlag() {
8140 return codegen_->GetInstructionSetFeatures().HasAVX();
8141 }
8142
CpuHasAvx2FeatureFlag()8143 bool InstructionCodeGeneratorX86_64::CpuHasAvx2FeatureFlag() {
8144 return codegen_->GetInstructionSetFeatures().HasAVX2();
8145 }
8146
8147 #undef __
8148
8149 } // namespace x86_64
8150 } // namespace art
8151