1 /*
2 * Copyright (C) 2014 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "code_generator_x86_64.h"
18
19 #include "arch/x86_64/jni_frame_x86_64.h"
20 #include "art_method-inl.h"
21 #include "class_root-inl.h"
22 #include "class_table.h"
23 #include "code_generator_utils.h"
24 #include "entrypoints/quick/quick_entrypoints.h"
25 #include "gc/accounting/card_table.h"
26 #include "gc/space/image_space.h"
27 #include "heap_poisoning.h"
28 #include "interpreter/mterp/nterp.h"
29 #include "intrinsics.h"
30 #include "intrinsics_list.h"
31 #include "intrinsics_utils.h"
32 #include "intrinsics_x86_64.h"
33 #include "jit/profiling_info.h"
34 #include "linker/linker_patch.h"
35 #include "lock_word.h"
36 #include "mirror/array-inl.h"
37 #include "mirror/class-inl.h"
38 #include "mirror/method_type.h"
39 #include "mirror/object_reference.h"
40 #include "mirror/var_handle.h"
41 #include "optimizing/nodes.h"
42 #include "profiling_info_builder.h"
43 #include "scoped_thread_state_change-inl.h"
44 #include "thread.h"
45 #include "trace.h"
46 #include "utils/assembler.h"
47 #include "utils/stack_checks.h"
48 #include "utils/x86_64/assembler_x86_64.h"
49 #include "utils/x86_64/constants_x86_64.h"
50 #include "utils/x86_64/managed_register_x86_64.h"
51
52 namespace art HIDDEN {
53
54 template<class MirrorType>
55 class GcRoot;
56
57 namespace x86_64 {
58
59 static constexpr int kCurrentMethodStackOffset = 0;
60 // The compare/jump sequence will generate about (1.5 * num_entries) instructions. A jump
61 // table version generates 7 instructions and num_entries literals. Compare/jump sequence will
62 // generates less code/data with a small num_entries.
63 static constexpr uint32_t kPackedSwitchJumpTableThreshold = 5;
64
65 static constexpr Register kCoreCalleeSaves[] = { RBX, RBP, R12, R13, R14, R15 };
66 static constexpr FloatRegister kFpuCalleeSaves[] = { XMM12, XMM13, XMM14, XMM15 };
67
68 static constexpr int kC2ConditionMask = 0x400;
69
OneRegInReferenceOutSaveEverythingCallerSaves()70 static RegisterSet OneRegInReferenceOutSaveEverythingCallerSaves() {
71 // Custom calling convention: RAX serves as both input and output.
72 RegisterSet caller_saves = RegisterSet::Empty();
73 caller_saves.Add(Location::RegisterLocation(RAX));
74 return caller_saves;
75 }
76
77 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
78 #define __ down_cast<X86_64Assembler*>(codegen->GetAssembler())-> // NOLINT
79 #define QUICK_ENTRY_POINT(x) QUICK_ENTRYPOINT_OFFSET(kX86_64PointerSize, x).Int32Value()
80
81 class NullCheckSlowPathX86_64 : public SlowPathCode {
82 public:
NullCheckSlowPathX86_64(HNullCheck * instruction)83 explicit NullCheckSlowPathX86_64(HNullCheck* instruction) : SlowPathCode(instruction) {}
84
EmitNativeCode(CodeGenerator * codegen)85 void EmitNativeCode(CodeGenerator* codegen) override {
86 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
87 __ Bind(GetEntryLabel());
88 if (instruction_->CanThrowIntoCatchBlock()) {
89 // Live registers will be restored in the catch block if caught.
90 SaveLiveRegisters(codegen, instruction_->GetLocations());
91 }
92 x86_64_codegen->InvokeRuntime(kQuickThrowNullPointer, instruction_, this);
93 CheckEntrypointTypes<kQuickThrowNullPointer, void, void>();
94 }
95
IsFatal() const96 bool IsFatal() const override { return true; }
97
GetDescription() const98 const char* GetDescription() const override { return "NullCheckSlowPathX86_64"; }
99
100 private:
101 DISALLOW_COPY_AND_ASSIGN(NullCheckSlowPathX86_64);
102 };
103
104 class DivZeroCheckSlowPathX86_64 : public SlowPathCode {
105 public:
DivZeroCheckSlowPathX86_64(HDivZeroCheck * instruction)106 explicit DivZeroCheckSlowPathX86_64(HDivZeroCheck* instruction) : SlowPathCode(instruction) {}
107
EmitNativeCode(CodeGenerator * codegen)108 void EmitNativeCode(CodeGenerator* codegen) override {
109 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
110 __ Bind(GetEntryLabel());
111 x86_64_codegen->InvokeRuntime(kQuickThrowDivZero, instruction_, this);
112 CheckEntrypointTypes<kQuickThrowDivZero, void, void>();
113 }
114
IsFatal() const115 bool IsFatal() const override { return true; }
116
GetDescription() const117 const char* GetDescription() const override { return "DivZeroCheckSlowPathX86_64"; }
118
119 private:
120 DISALLOW_COPY_AND_ASSIGN(DivZeroCheckSlowPathX86_64);
121 };
122
123 class DivRemMinusOneSlowPathX86_64 : public SlowPathCode {
124 public:
DivRemMinusOneSlowPathX86_64(HInstruction * at,Register reg,DataType::Type type,bool is_div)125 DivRemMinusOneSlowPathX86_64(HInstruction* at, Register reg, DataType::Type type, bool is_div)
126 : SlowPathCode(at), cpu_reg_(CpuRegister(reg)), type_(type), is_div_(is_div) {}
127
EmitNativeCode(CodeGenerator * codegen)128 void EmitNativeCode(CodeGenerator* codegen) override {
129 __ Bind(GetEntryLabel());
130 if (type_ == DataType::Type::kInt32) {
131 if (is_div_) {
132 __ negl(cpu_reg_);
133 } else {
134 __ xorl(cpu_reg_, cpu_reg_);
135 }
136
137 } else {
138 DCHECK_EQ(DataType::Type::kInt64, type_);
139 if (is_div_) {
140 __ negq(cpu_reg_);
141 } else {
142 __ xorl(cpu_reg_, cpu_reg_);
143 }
144 }
145 __ jmp(GetExitLabel());
146 }
147
GetDescription() const148 const char* GetDescription() const override { return "DivRemMinusOneSlowPathX86_64"; }
149
150 private:
151 const CpuRegister cpu_reg_;
152 const DataType::Type type_;
153 const bool is_div_;
154 DISALLOW_COPY_AND_ASSIGN(DivRemMinusOneSlowPathX86_64);
155 };
156
157 class SuspendCheckSlowPathX86_64 : public SlowPathCode {
158 public:
SuspendCheckSlowPathX86_64(HSuspendCheck * instruction,HBasicBlock * successor)159 SuspendCheckSlowPathX86_64(HSuspendCheck* instruction, HBasicBlock* successor)
160 : SlowPathCode(instruction), successor_(successor) {}
161
EmitNativeCode(CodeGenerator * codegen)162 void EmitNativeCode(CodeGenerator* codegen) override {
163 LocationSummary* locations = instruction_->GetLocations();
164 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
165 __ Bind(GetEntryLabel());
166 SaveLiveRegisters(codegen, locations); // Only saves full width XMM for SIMD.
167 x86_64_codegen->InvokeRuntime(kQuickTestSuspend, instruction_, this);
168 CheckEntrypointTypes<kQuickTestSuspend, void, void>();
169 RestoreLiveRegisters(codegen, locations); // Only restores full width XMM for SIMD.
170 if (successor_ == nullptr) {
171 __ jmp(GetReturnLabel());
172 } else {
173 __ jmp(x86_64_codegen->GetLabelOf(successor_));
174 }
175 }
176
GetReturnLabel()177 Label* GetReturnLabel() {
178 DCHECK(successor_ == nullptr);
179 return &return_label_;
180 }
181
GetSuccessor() const182 HBasicBlock* GetSuccessor() const {
183 return successor_;
184 }
185
GetDescription() const186 const char* GetDescription() const override { return "SuspendCheckSlowPathX86_64"; }
187
188 private:
189 HBasicBlock* const successor_;
190 Label return_label_;
191
192 DISALLOW_COPY_AND_ASSIGN(SuspendCheckSlowPathX86_64);
193 };
194
195 class BoundsCheckSlowPathX86_64 : public SlowPathCode {
196 public:
BoundsCheckSlowPathX86_64(HBoundsCheck * instruction)197 explicit BoundsCheckSlowPathX86_64(HBoundsCheck* instruction)
198 : SlowPathCode(instruction) {}
199
EmitNativeCode(CodeGenerator * codegen)200 void EmitNativeCode(CodeGenerator* codegen) override {
201 LocationSummary* locations = instruction_->GetLocations();
202 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
203 __ Bind(GetEntryLabel());
204 if (instruction_->CanThrowIntoCatchBlock()) {
205 // Live registers will be restored in the catch block if caught.
206 SaveLiveRegisters(codegen, locations);
207 }
208
209 Location index_loc = locations->InAt(0);
210 Location length_loc = locations->InAt(1);
211 InvokeRuntimeCallingConvention calling_convention;
212 Location index_arg = Location::RegisterLocation(calling_convention.GetRegisterAt(0));
213 Location length_arg = Location::RegisterLocation(calling_convention.GetRegisterAt(1));
214
215 // Are we using an array length from memory?
216 if (!length_loc.IsValid()) {
217 DCHECK(instruction_->InputAt(1)->IsArrayLength());
218 HArrayLength* array_length = instruction_->InputAt(1)->AsArrayLength();
219 DCHECK(array_length->IsEmittedAtUseSite());
220 uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length);
221 Location array_loc = array_length->GetLocations()->InAt(0);
222 Address array_len(array_loc.AsRegister<CpuRegister>(), len_offset);
223 if (!index_loc.Equals(length_arg)) {
224 // The index is not clobbered by loading the length directly to `length_arg`.
225 __ movl(length_arg.AsRegister<CpuRegister>(), array_len);
226 x86_64_codegen->Move(index_arg, index_loc);
227 } else if (!array_loc.Equals(index_arg)) {
228 // The array reference is not clobbered by the index move.
229 x86_64_codegen->Move(index_arg, index_loc);
230 __ movl(length_arg.AsRegister<CpuRegister>(), array_len);
231 } else {
232 // Load the array length into `TMP`.
233 DCHECK(codegen->IsBlockedCoreRegister(TMP));
234 __ movl(CpuRegister(TMP), array_len);
235 // Single move to CPU register does not clobber `TMP`.
236 x86_64_codegen->Move(index_arg, index_loc);
237 __ movl(length_arg.AsRegister<CpuRegister>(), CpuRegister(TMP));
238 }
239 if (mirror::kUseStringCompression && array_length->IsStringLength()) {
240 __ shrl(length_arg.AsRegister<CpuRegister>(), Immediate(1));
241 }
242 } else {
243 // We're moving two locations to locations that could overlap,
244 // so we need a parallel move resolver.
245 codegen->EmitParallelMoves(
246 index_loc,
247 index_arg,
248 DataType::Type::kInt32,
249 length_loc,
250 length_arg,
251 DataType::Type::kInt32);
252 }
253
254 QuickEntrypointEnum entrypoint = instruction_->AsBoundsCheck()->IsStringCharAt()
255 ? kQuickThrowStringBounds
256 : kQuickThrowArrayBounds;
257 x86_64_codegen->InvokeRuntime(entrypoint, instruction_, this);
258 CheckEntrypointTypes<kQuickThrowStringBounds, void, int32_t, int32_t>();
259 CheckEntrypointTypes<kQuickThrowArrayBounds, void, int32_t, int32_t>();
260 }
261
IsFatal() const262 bool IsFatal() const override { return true; }
263
GetDescription() const264 const char* GetDescription() const override { return "BoundsCheckSlowPathX86_64"; }
265
266 private:
267 DISALLOW_COPY_AND_ASSIGN(BoundsCheckSlowPathX86_64);
268 };
269
270 class LoadMethodTypeSlowPathX86_64: public SlowPathCode {
271 public:
LoadMethodTypeSlowPathX86_64(HLoadMethodType * mt)272 explicit LoadMethodTypeSlowPathX86_64(HLoadMethodType* mt) : SlowPathCode(mt) {}
273
EmitNativeCode(CodeGenerator * codegen)274 void EmitNativeCode(CodeGenerator* codegen) override {
275 LocationSummary* locations = instruction_->GetLocations();
276 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
277
278 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
279 __ Bind(GetEntryLabel());
280 SaveLiveRegisters(codegen, locations);
281
282 const dex::ProtoIndex proto_index = instruction_->AsLoadMethodType()->GetProtoIndex();
283 // Custom calling convention: RAX serves as both input and output.
284 __ movl(CpuRegister(RAX), Immediate(proto_index.index_));
285 x86_64_codegen->InvokeRuntime(kQuickResolveMethodType, instruction_, this);
286 CheckEntrypointTypes<kQuickResolveMethodType, void*, uint32_t>();
287 x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX));
288 RestoreLiveRegisters(codegen, locations);
289
290 __ jmp(GetExitLabel());
291 }
292
GetDescription() const293 const char* GetDescription() const override { return "LoadMethodTypeSlowPathX86_64"; }
294
295 private:
296 DISALLOW_COPY_AND_ASSIGN(LoadMethodTypeSlowPathX86_64);
297 };
298
299 class LoadClassSlowPathX86_64 : public SlowPathCode {
300 public:
LoadClassSlowPathX86_64(HLoadClass * cls,HInstruction * at)301 LoadClassSlowPathX86_64(HLoadClass* cls, HInstruction* at)
302 : SlowPathCode(at), cls_(cls) {
303 DCHECK(at->IsLoadClass() || at->IsClinitCheck());
304 DCHECK_EQ(instruction_->IsLoadClass(), cls_ == instruction_);
305 }
306
EmitNativeCode(CodeGenerator * codegen)307 void EmitNativeCode(CodeGenerator* codegen) override {
308 LocationSummary* locations = instruction_->GetLocations();
309 Location out = locations->Out();
310 bool must_resolve_type = instruction_->IsLoadClass() && cls_->MustResolveTypeOnSlowPath();
311 bool must_do_clinit = instruction_->IsClinitCheck() || cls_->MustGenerateClinitCheck();
312
313 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
314 __ Bind(GetEntryLabel());
315 SaveLiveRegisters(codegen, locations);
316
317 // Custom calling convention: RAX serves as both input and output.
318 if (must_resolve_type) {
319 DCHECK(IsSameDexFile(cls_->GetDexFile(), x86_64_codegen->GetGraph()->GetDexFile()) ||
320 x86_64_codegen->GetCompilerOptions().WithinOatFile(&cls_->GetDexFile()) ||
321 ContainsElement(Runtime::Current()->GetClassLinker()->GetBootClassPath(),
322 &cls_->GetDexFile()));
323 dex::TypeIndex type_index = cls_->GetTypeIndex();
324 __ movl(CpuRegister(RAX), Immediate(type_index.index_));
325 if (cls_->NeedsAccessCheck()) {
326 CheckEntrypointTypes<kQuickResolveTypeAndVerifyAccess, void*, uint32_t>();
327 x86_64_codegen->InvokeRuntime(kQuickResolveTypeAndVerifyAccess, instruction_, this);
328 } else {
329 CheckEntrypointTypes<kQuickResolveType, void*, uint32_t>();
330 x86_64_codegen->InvokeRuntime(kQuickResolveType, instruction_, this);
331 }
332 // If we also must_do_clinit, the resolved type is now in the correct register.
333 } else {
334 DCHECK(must_do_clinit);
335 Location source = instruction_->IsLoadClass() ? out : locations->InAt(0);
336 x86_64_codegen->Move(Location::RegisterLocation(RAX), source);
337 }
338 if (must_do_clinit) {
339 x86_64_codegen->InvokeRuntime(kQuickInitializeStaticStorage, instruction_, this);
340 CheckEntrypointTypes<kQuickInitializeStaticStorage, void*, mirror::Class*>();
341 }
342
343 // Move the class to the desired location.
344 if (out.IsValid()) {
345 DCHECK(out.IsRegister() && !locations->GetLiveRegisters()->ContainsCoreRegister(out.reg()));
346 x86_64_codegen->Move(out, Location::RegisterLocation(RAX));
347 }
348
349 RestoreLiveRegisters(codegen, locations);
350 __ jmp(GetExitLabel());
351 }
352
GetDescription() const353 const char* GetDescription() const override { return "LoadClassSlowPathX86_64"; }
354
355 private:
356 // The class this slow path will load.
357 HLoadClass* const cls_;
358
359 DISALLOW_COPY_AND_ASSIGN(LoadClassSlowPathX86_64);
360 };
361
362 class LoadStringSlowPathX86_64 : public SlowPathCode {
363 public:
LoadStringSlowPathX86_64(HLoadString * instruction)364 explicit LoadStringSlowPathX86_64(HLoadString* instruction) : SlowPathCode(instruction) {}
365
EmitNativeCode(CodeGenerator * codegen)366 void EmitNativeCode(CodeGenerator* codegen) override {
367 LocationSummary* locations = instruction_->GetLocations();
368 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
369
370 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
371 __ Bind(GetEntryLabel());
372 SaveLiveRegisters(codegen, locations);
373
374 const dex::StringIndex string_index = instruction_->AsLoadString()->GetStringIndex();
375 // Custom calling convention: RAX serves as both input and output.
376 __ movl(CpuRegister(RAX), Immediate(string_index.index_));
377 x86_64_codegen->InvokeRuntime(kQuickResolveString, instruction_, this);
378 CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
379 x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX));
380 RestoreLiveRegisters(codegen, locations);
381
382 __ jmp(GetExitLabel());
383 }
384
GetDescription() const385 const char* GetDescription() const override { return "LoadStringSlowPathX86_64"; }
386
387 private:
388 DISALLOW_COPY_AND_ASSIGN(LoadStringSlowPathX86_64);
389 };
390
391 class TypeCheckSlowPathX86_64 : public SlowPathCode {
392 public:
TypeCheckSlowPathX86_64(HInstruction * instruction,bool is_fatal)393 TypeCheckSlowPathX86_64(HInstruction* instruction, bool is_fatal)
394 : SlowPathCode(instruction), is_fatal_(is_fatal) {}
395
EmitNativeCode(CodeGenerator * codegen)396 void EmitNativeCode(CodeGenerator* codegen) override {
397 LocationSummary* locations = instruction_->GetLocations();
398 DCHECK(instruction_->IsCheckCast()
399 || !locations->GetLiveRegisters()->ContainsCoreRegister(locations->Out().reg()));
400
401 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
402 __ Bind(GetEntryLabel());
403
404 if (kPoisonHeapReferences &&
405 instruction_->IsCheckCast() &&
406 instruction_->AsCheckCast()->GetTypeCheckKind() == TypeCheckKind::kInterfaceCheck) {
407 // First, unpoison the `cls` reference that was poisoned for direct memory comparison.
408 __ UnpoisonHeapReference(locations->InAt(1).AsRegister<CpuRegister>());
409 }
410
411 if (!is_fatal_ || instruction_->CanThrowIntoCatchBlock()) {
412 SaveLiveRegisters(codegen, locations);
413 }
414
415 // We're moving two locations to locations that could overlap, so we need a parallel
416 // move resolver.
417 InvokeRuntimeCallingConvention calling_convention;
418 codegen->EmitParallelMoves(locations->InAt(0),
419 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
420 DataType::Type::kReference,
421 locations->InAt(1),
422 Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
423 DataType::Type::kReference);
424 if (instruction_->IsInstanceOf()) {
425 x86_64_codegen->InvokeRuntime(kQuickInstanceofNonTrivial, instruction_, this);
426 CheckEntrypointTypes<kQuickInstanceofNonTrivial, size_t, mirror::Object*, mirror::Class*>();
427 } else {
428 DCHECK(instruction_->IsCheckCast());
429 x86_64_codegen->InvokeRuntime(kQuickCheckInstanceOf, instruction_, this);
430 CheckEntrypointTypes<kQuickCheckInstanceOf, void, mirror::Object*, mirror::Class*>();
431 }
432
433 if (!is_fatal_) {
434 if (instruction_->IsInstanceOf()) {
435 x86_64_codegen->Move(locations->Out(), Location::RegisterLocation(RAX));
436 }
437
438 RestoreLiveRegisters(codegen, locations);
439 __ jmp(GetExitLabel());
440 }
441 }
442
GetDescription() const443 const char* GetDescription() const override { return "TypeCheckSlowPathX86_64"; }
444
IsFatal() const445 bool IsFatal() const override { return is_fatal_; }
446
447 private:
448 const bool is_fatal_;
449
450 DISALLOW_COPY_AND_ASSIGN(TypeCheckSlowPathX86_64);
451 };
452
453 class DeoptimizationSlowPathX86_64 : public SlowPathCode {
454 public:
DeoptimizationSlowPathX86_64(HDeoptimize * instruction)455 explicit DeoptimizationSlowPathX86_64(HDeoptimize* instruction)
456 : SlowPathCode(instruction) {}
457
EmitNativeCode(CodeGenerator * codegen)458 void EmitNativeCode(CodeGenerator* codegen) override {
459 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
460 __ Bind(GetEntryLabel());
461 LocationSummary* locations = instruction_->GetLocations();
462 SaveLiveRegisters(codegen, locations);
463 InvokeRuntimeCallingConvention calling_convention;
464 x86_64_codegen->Load32BitValue(
465 CpuRegister(calling_convention.GetRegisterAt(0)),
466 static_cast<uint32_t>(instruction_->AsDeoptimize()->GetDeoptimizationKind()));
467 x86_64_codegen->InvokeRuntime(kQuickDeoptimize, instruction_, this);
468 CheckEntrypointTypes<kQuickDeoptimize, void, DeoptimizationKind>();
469 }
470
GetDescription() const471 const char* GetDescription() const override { return "DeoptimizationSlowPathX86_64"; }
472
473 private:
474 DISALLOW_COPY_AND_ASSIGN(DeoptimizationSlowPathX86_64);
475 };
476
477 class ArraySetSlowPathX86_64 : public SlowPathCode {
478 public:
ArraySetSlowPathX86_64(HInstruction * instruction)479 explicit ArraySetSlowPathX86_64(HInstruction* instruction) : SlowPathCode(instruction) {}
480
EmitNativeCode(CodeGenerator * codegen)481 void EmitNativeCode(CodeGenerator* codegen) override {
482 LocationSummary* locations = instruction_->GetLocations();
483 __ Bind(GetEntryLabel());
484 SaveLiveRegisters(codegen, locations);
485
486 InvokeRuntimeCallingConvention calling_convention;
487 HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
488 parallel_move.AddMove(
489 locations->InAt(0),
490 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
491 DataType::Type::kReference,
492 nullptr);
493 parallel_move.AddMove(
494 locations->InAt(1),
495 Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
496 DataType::Type::kInt32,
497 nullptr);
498 parallel_move.AddMove(
499 locations->InAt(2),
500 Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
501 DataType::Type::kReference,
502 nullptr);
503 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
504
505 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
506 x86_64_codegen->InvokeRuntime(kQuickAputObject, instruction_, this);
507 CheckEntrypointTypes<kQuickAputObject, void, mirror::Array*, int32_t, mirror::Object*>();
508 RestoreLiveRegisters(codegen, locations);
509 __ jmp(GetExitLabel());
510 }
511
GetDescription() const512 const char* GetDescription() const override { return "ArraySetSlowPathX86_64"; }
513
514 private:
515 DISALLOW_COPY_AND_ASSIGN(ArraySetSlowPathX86_64);
516 };
517
518 // Slow path marking an object reference `ref` during a read
519 // barrier. The field `obj.field` in the object `obj` holding this
520 // reference does not get updated by this slow path after marking (see
521 // ReadBarrierMarkAndUpdateFieldSlowPathX86_64 below for that).
522 //
523 // This means that after the execution of this slow path, `ref` will
524 // always be up-to-date, but `obj.field` may not; i.e., after the
525 // flip, `ref` will be a to-space reference, but `obj.field` will
526 // probably still be a from-space reference (unless it gets updated by
527 // another thread, or if another thread installed another object
528 // reference (different from `ref`) in `obj.field`).
529 class ReadBarrierMarkSlowPathX86_64 : public SlowPathCode {
530 public:
ReadBarrierMarkSlowPathX86_64(HInstruction * instruction,Location ref,bool unpoison_ref_before_marking)531 ReadBarrierMarkSlowPathX86_64(HInstruction* instruction,
532 Location ref,
533 bool unpoison_ref_before_marking)
534 : SlowPathCode(instruction),
535 ref_(ref),
536 unpoison_ref_before_marking_(unpoison_ref_before_marking) {
537 }
538
GetDescription() const539 const char* GetDescription() const override { return "ReadBarrierMarkSlowPathX86_64"; }
540
EmitNativeCode(CodeGenerator * codegen)541 void EmitNativeCode(CodeGenerator* codegen) override {
542 DCHECK(codegen->EmitReadBarrier());
543 LocationSummary* locations = instruction_->GetLocations();
544 CpuRegister ref_cpu_reg = ref_.AsRegister<CpuRegister>();
545 Register ref_reg = ref_cpu_reg.AsRegister();
546 DCHECK(locations->CanCall());
547 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
548 DCHECK(instruction_->IsInstanceFieldGet() ||
549 instruction_->IsStaticFieldGet() ||
550 instruction_->IsArrayGet() ||
551 instruction_->IsArraySet() ||
552 instruction_->IsLoadClass() ||
553 instruction_->IsLoadMethodType() ||
554 instruction_->IsLoadString() ||
555 instruction_->IsInstanceOf() ||
556 instruction_->IsCheckCast() ||
557 (instruction_->IsInvoke() && instruction_->GetLocations()->Intrinsified()))
558 << "Unexpected instruction in read barrier marking slow path: "
559 << instruction_->DebugName();
560
561 __ Bind(GetEntryLabel());
562 if (unpoison_ref_before_marking_) {
563 // Object* ref = ref_addr->AsMirrorPtr()
564 __ MaybeUnpoisonHeapReference(ref_cpu_reg);
565 }
566 // No need to save live registers; it's taken care of by the
567 // entrypoint. Also, there is no need to update the stack mask,
568 // as this runtime call will not trigger a garbage collection.
569 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
570 DCHECK_NE(ref_reg, RSP);
571 DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg;
572 // "Compact" slow path, saving two moves.
573 //
574 // Instead of using the standard runtime calling convention (input
575 // and output in R0):
576 //
577 // RDI <- ref
578 // RAX <- ReadBarrierMark(RDI)
579 // ref <- RAX
580 //
581 // we just use rX (the register containing `ref`) as input and output
582 // of a dedicated entrypoint:
583 //
584 // rX <- ReadBarrierMarkRegX(rX)
585 //
586 int32_t entry_point_offset =
587 Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(ref_reg);
588 // This runtime call does not require a stack map.
589 x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
590 __ jmp(GetExitLabel());
591 }
592
593 private:
594 // The location (register) of the marked object reference.
595 const Location ref_;
596 // Should the reference in `ref_` be unpoisoned prior to marking it?
597 const bool unpoison_ref_before_marking_;
598
599 DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkSlowPathX86_64);
600 };
601
602 // Slow path marking an object reference `ref` during a read barrier,
603 // and if needed, atomically updating the field `obj.field` in the
604 // object `obj` holding this reference after marking (contrary to
605 // ReadBarrierMarkSlowPathX86_64 above, which never tries to update
606 // `obj.field`).
607 //
608 // This means that after the execution of this slow path, both `ref`
609 // and `obj.field` will be up-to-date; i.e., after the flip, both will
610 // hold the same to-space reference (unless another thread installed
611 // another object reference (different from `ref`) in `obj.field`).
612 class ReadBarrierMarkAndUpdateFieldSlowPathX86_64 : public SlowPathCode {
613 public:
ReadBarrierMarkAndUpdateFieldSlowPathX86_64(HInstruction * instruction,Location ref,CpuRegister obj,const Address & field_addr,bool unpoison_ref_before_marking,CpuRegister temp1,CpuRegister temp2)614 ReadBarrierMarkAndUpdateFieldSlowPathX86_64(HInstruction* instruction,
615 Location ref,
616 CpuRegister obj,
617 const Address& field_addr,
618 bool unpoison_ref_before_marking,
619 CpuRegister temp1,
620 CpuRegister temp2)
621 : SlowPathCode(instruction),
622 ref_(ref),
623 obj_(obj),
624 field_addr_(field_addr),
625 unpoison_ref_before_marking_(unpoison_ref_before_marking),
626 temp1_(temp1),
627 temp2_(temp2) {
628 }
629
GetDescription() const630 const char* GetDescription() const override {
631 return "ReadBarrierMarkAndUpdateFieldSlowPathX86_64";
632 }
633
EmitNativeCode(CodeGenerator * codegen)634 void EmitNativeCode(CodeGenerator* codegen) override {
635 DCHECK(codegen->EmitReadBarrier());
636 LocationSummary* locations = instruction_->GetLocations();
637 CpuRegister ref_cpu_reg = ref_.AsRegister<CpuRegister>();
638 Register ref_reg = ref_cpu_reg.AsRegister();
639 DCHECK(locations->CanCall());
640 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(ref_reg)) << ref_reg;
641 DCHECK((instruction_->IsInvoke() && instruction_->GetLocations()->Intrinsified()))
642 << "Unexpected instruction in read barrier marking and field updating slow path: "
643 << instruction_->DebugName();
644 HInvoke* invoke = instruction_->AsInvoke();
645 DCHECK(IsUnsafeCASReference(invoke) ||
646 IsUnsafeGetAndSetReference(invoke) ||
647 IsVarHandleCASFamily(invoke)) << invoke->GetIntrinsic();
648
649 __ Bind(GetEntryLabel());
650 if (unpoison_ref_before_marking_) {
651 // Object* ref = ref_addr->AsMirrorPtr()
652 __ MaybeUnpoisonHeapReference(ref_cpu_reg);
653 }
654
655 // Save the old (unpoisoned) reference.
656 __ movl(temp1_, ref_cpu_reg);
657
658 // No need to save live registers; it's taken care of by the
659 // entrypoint. Also, there is no need to update the stack mask,
660 // as this runtime call will not trigger a garbage collection.
661 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
662 DCHECK_NE(ref_reg, RSP);
663 DCHECK(0 <= ref_reg && ref_reg < kNumberOfCpuRegisters) << ref_reg;
664 // "Compact" slow path, saving two moves.
665 //
666 // Instead of using the standard runtime calling convention (input
667 // and output in R0):
668 //
669 // RDI <- ref
670 // RAX <- ReadBarrierMark(RDI)
671 // ref <- RAX
672 //
673 // we just use rX (the register containing `ref`) as input and output
674 // of a dedicated entrypoint:
675 //
676 // rX <- ReadBarrierMarkRegX(rX)
677 //
678 int32_t entry_point_offset =
679 Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(ref_reg);
680 // This runtime call does not require a stack map.
681 x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
682
683 // If the new reference is different from the old reference,
684 // update the field in the holder (`*field_addr`).
685 //
686 // Note that this field could also hold a different object, if
687 // another thread had concurrently changed it. In that case, the
688 // LOCK CMPXCHGL instruction in the compare-and-set (CAS)
689 // operation below would abort the CAS, leaving the field as-is.
690 NearLabel done;
691 __ cmpl(temp1_, ref_cpu_reg);
692 __ j(kEqual, &done);
693
694 // Update the holder's field atomically. This may fail if
695 // mutator updates before us, but it's OK. This is achived
696 // using a strong compare-and-set (CAS) operation with relaxed
697 // memory synchronization ordering, where the expected value is
698 // the old reference and the desired value is the new reference.
699 // This operation is implemented with a 32-bit LOCK CMPXLCHG
700 // instruction, which requires the expected value (the old
701 // reference) to be in EAX. Save RAX beforehand, and move the
702 // expected value (stored in `temp1_`) into EAX.
703 __ movq(temp2_, CpuRegister(RAX));
704 __ movl(CpuRegister(RAX), temp1_);
705
706 // Convenience aliases.
707 CpuRegister base = obj_;
708 CpuRegister expected = CpuRegister(RAX);
709 CpuRegister value = ref_cpu_reg;
710
711 bool base_equals_value = (base.AsRegister() == value.AsRegister());
712 Register value_reg = ref_reg;
713 if (kPoisonHeapReferences) {
714 if (base_equals_value) {
715 // If `base` and `value` are the same register location, move
716 // `value_reg` to a temporary register. This way, poisoning
717 // `value_reg` won't invalidate `base`.
718 value_reg = temp1_.AsRegister();
719 __ movl(CpuRegister(value_reg), base);
720 }
721
722 // Check that the register allocator did not assign the location
723 // of `expected` (RAX) to `value` nor to `base`, so that heap
724 // poisoning (when enabled) works as intended below.
725 // - If `value` were equal to `expected`, both references would
726 // be poisoned twice, meaning they would not be poisoned at
727 // all, as heap poisoning uses address negation.
728 // - If `base` were equal to `expected`, poisoning `expected`
729 // would invalidate `base`.
730 DCHECK_NE(value_reg, expected.AsRegister());
731 DCHECK_NE(base.AsRegister(), expected.AsRegister());
732
733 __ PoisonHeapReference(expected);
734 __ PoisonHeapReference(CpuRegister(value_reg));
735 }
736
737 __ LockCmpxchgl(field_addr_, CpuRegister(value_reg));
738
739 // If heap poisoning is enabled, we need to unpoison the values
740 // that were poisoned earlier.
741 if (kPoisonHeapReferences) {
742 if (base_equals_value) {
743 // `value_reg` has been moved to a temporary register, no need
744 // to unpoison it.
745 } else {
746 __ UnpoisonHeapReference(CpuRegister(value_reg));
747 }
748 // No need to unpoison `expected` (RAX), as it is be overwritten below.
749 }
750
751 // Restore RAX.
752 __ movq(CpuRegister(RAX), temp2_);
753
754 __ Bind(&done);
755 __ jmp(GetExitLabel());
756 }
757
758 private:
759 // The location (register) of the marked object reference.
760 const Location ref_;
761 // The register containing the object holding the marked object reference field.
762 const CpuRegister obj_;
763 // The address of the marked reference field. The base of this address must be `obj_`.
764 const Address field_addr_;
765
766 // Should the reference in `ref_` be unpoisoned prior to marking it?
767 const bool unpoison_ref_before_marking_;
768
769 const CpuRegister temp1_;
770 const CpuRegister temp2_;
771
772 DISALLOW_COPY_AND_ASSIGN(ReadBarrierMarkAndUpdateFieldSlowPathX86_64);
773 };
774
775 // Slow path generating a read barrier for a heap reference.
776 class ReadBarrierForHeapReferenceSlowPathX86_64 : public SlowPathCode {
777 public:
ReadBarrierForHeapReferenceSlowPathX86_64(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)778 ReadBarrierForHeapReferenceSlowPathX86_64(HInstruction* instruction,
779 Location out,
780 Location ref,
781 Location obj,
782 uint32_t offset,
783 Location index)
784 : SlowPathCode(instruction),
785 out_(out),
786 ref_(ref),
787 obj_(obj),
788 offset_(offset),
789 index_(index) {
790 // If `obj` is equal to `out` or `ref`, it means the initial
791 // object has been overwritten by (or after) the heap object
792 // reference load to be instrumented, e.g.:
793 //
794 // __ movl(out, Address(out, offset));
795 // codegen_->GenerateReadBarrierSlow(instruction, out_loc, out_loc, out_loc, offset);
796 //
797 // In that case, we have lost the information about the original
798 // object, and the emitted read barrier cannot work properly.
799 DCHECK(!obj.Equals(out)) << "obj=" << obj << " out=" << out;
800 DCHECK(!obj.Equals(ref)) << "obj=" << obj << " ref=" << ref;
801 }
802
EmitNativeCode(CodeGenerator * codegen)803 void EmitNativeCode(CodeGenerator* codegen) override {
804 DCHECK(codegen->EmitReadBarrier());
805 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
806 LocationSummary* locations = instruction_->GetLocations();
807 CpuRegister reg_out = out_.AsRegister<CpuRegister>();
808 DCHECK(locations->CanCall());
809 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(reg_out.AsRegister())) << out_;
810 DCHECK(instruction_->IsInstanceFieldGet() ||
811 instruction_->IsStaticFieldGet() ||
812 instruction_->IsArrayGet() ||
813 instruction_->IsInstanceOf() ||
814 instruction_->IsCheckCast() ||
815 (instruction_->IsInvoke() && instruction_->GetLocations()->Intrinsified()))
816 << "Unexpected instruction in read barrier for heap reference slow path: "
817 << instruction_->DebugName();
818
819 __ Bind(GetEntryLabel());
820 SaveLiveRegisters(codegen, locations);
821
822 // We may have to change the index's value, but as `index_` is a
823 // constant member (like other "inputs" of this slow path),
824 // introduce a copy of it, `index`.
825 Location index = index_;
826 if (index_.IsValid()) {
827 // Handle `index_` for HArrayGet and UnsafeGetObject/UnsafeGetObjectVolatile intrinsics.
828 if (instruction_->IsArrayGet()) {
829 // Compute real offset and store it in index_.
830 Register index_reg = index_.AsRegister<CpuRegister>().AsRegister();
831 DCHECK(locations->GetLiveRegisters()->ContainsCoreRegister(index_reg));
832 if (codegen->IsCoreCalleeSaveRegister(index_reg)) {
833 // We are about to change the value of `index_reg` (see the
834 // calls to art::x86_64::X86_64Assembler::shll and
835 // art::x86_64::X86_64Assembler::AddImmediate below), but it
836 // has not been saved by the previous call to
837 // art::SlowPathCode::SaveLiveRegisters, as it is a
838 // callee-save register --
839 // art::SlowPathCode::SaveLiveRegisters does not consider
840 // callee-save registers, as it has been designed with the
841 // assumption that callee-save registers are supposed to be
842 // handled by the called function. So, as a callee-save
843 // register, `index_reg` _would_ eventually be saved onto
844 // the stack, but it would be too late: we would have
845 // changed its value earlier. Therefore, we manually save
846 // it here into another freely available register,
847 // `free_reg`, chosen of course among the caller-save
848 // registers (as a callee-save `free_reg` register would
849 // exhibit the same problem).
850 //
851 // Note we could have requested a temporary register from
852 // the register allocator instead; but we prefer not to, as
853 // this is a slow path, and we know we can find a
854 // caller-save register that is available.
855 Register free_reg = FindAvailableCallerSaveRegister(codegen).AsRegister();
856 __ movl(CpuRegister(free_reg), CpuRegister(index_reg));
857 index_reg = free_reg;
858 index = Location::RegisterLocation(index_reg);
859 } else {
860 // The initial register stored in `index_` has already been
861 // saved in the call to art::SlowPathCode::SaveLiveRegisters
862 // (as it is not a callee-save register), so we can freely
863 // use it.
864 }
865 // Shifting the index value contained in `index_reg` by the
866 // scale factor (2) cannot overflow in practice, as the
867 // runtime is unable to allocate object arrays with a size
868 // larger than 2^26 - 1 (that is, 2^28 - 4 bytes).
869 __ shll(CpuRegister(index_reg), Immediate(TIMES_4));
870 static_assert(
871 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
872 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
873 __ AddImmediate(CpuRegister(index_reg), Immediate(offset_));
874 } else {
875 // In the case of the UnsafeGetObject/UnsafeGetObjectVolatile
876 // intrinsics, `index_` is not shifted by a scale factor of 2
877 // (as in the case of ArrayGet), as it is actually an offset
878 // to an object field within an object.
879 DCHECK(instruction_->IsInvoke()) << instruction_->DebugName();
880 DCHECK(instruction_->GetLocations()->Intrinsified());
881 DCHECK((instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObject) ||
882 (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile) ||
883 (instruction_->AsInvoke()->GetIntrinsic() == Intrinsics::kJdkUnsafeGetReference) ||
884 (instruction_->AsInvoke()->GetIntrinsic() ==
885 Intrinsics::kJdkUnsafeGetReferenceVolatile) ||
886 (instruction_->AsInvoke()->GetIntrinsic() ==
887 Intrinsics::kJdkUnsafeGetReferenceAcquire))
888 << instruction_->AsInvoke()->GetIntrinsic();
889 DCHECK_EQ(offset_, 0U);
890 DCHECK(index_.IsRegister());
891 }
892 }
893
894 // We're moving two or three locations to locations that could
895 // overlap, so we need a parallel move resolver.
896 InvokeRuntimeCallingConvention calling_convention;
897 HParallelMove parallel_move(codegen->GetGraph()->GetAllocator());
898 parallel_move.AddMove(ref_,
899 Location::RegisterLocation(calling_convention.GetRegisterAt(0)),
900 DataType::Type::kReference,
901 nullptr);
902 parallel_move.AddMove(obj_,
903 Location::RegisterLocation(calling_convention.GetRegisterAt(1)),
904 DataType::Type::kReference,
905 nullptr);
906 if (index.IsValid()) {
907 parallel_move.AddMove(index,
908 Location::RegisterLocation(calling_convention.GetRegisterAt(2)),
909 DataType::Type::kInt32,
910 nullptr);
911 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
912 } else {
913 codegen->GetMoveResolver()->EmitNativeCode(¶llel_move);
914 __ movl(CpuRegister(calling_convention.GetRegisterAt(2)), Immediate(offset_));
915 }
916 x86_64_codegen->InvokeRuntime(kQuickReadBarrierSlow, instruction_, this);
917 CheckEntrypointTypes<
918 kQuickReadBarrierSlow, mirror::Object*, mirror::Object*, mirror::Object*, uint32_t>();
919 x86_64_codegen->Move(out_, Location::RegisterLocation(RAX));
920
921 RestoreLiveRegisters(codegen, locations);
922 __ jmp(GetExitLabel());
923 }
924
GetDescription() const925 const char* GetDescription() const override {
926 return "ReadBarrierForHeapReferenceSlowPathX86_64";
927 }
928
929 private:
FindAvailableCallerSaveRegister(CodeGenerator * codegen)930 CpuRegister FindAvailableCallerSaveRegister(CodeGenerator* codegen) {
931 size_t ref = static_cast<int>(ref_.AsRegister<CpuRegister>().AsRegister());
932 size_t obj = static_cast<int>(obj_.AsRegister<CpuRegister>().AsRegister());
933 for (size_t i = 0, e = codegen->GetNumberOfCoreRegisters(); i < e; ++i) {
934 if (i != ref && i != obj && !codegen->IsCoreCalleeSaveRegister(i)) {
935 return static_cast<CpuRegister>(i);
936 }
937 }
938 // We shall never fail to find a free caller-save register, as
939 // there are more than two core caller-save registers on x86-64
940 // (meaning it is possible to find one which is different from
941 // `ref` and `obj`).
942 DCHECK_GT(codegen->GetNumberOfCoreCallerSaveRegisters(), 2u);
943 LOG(FATAL) << "Could not find a free caller-save register";
944 UNREACHABLE();
945 }
946
947 const Location out_;
948 const Location ref_;
949 const Location obj_;
950 const uint32_t offset_;
951 // An additional location containing an index to an array.
952 // Only used for HArrayGet and the UnsafeGetObject &
953 // UnsafeGetObjectVolatile intrinsics.
954 const Location index_;
955
956 DISALLOW_COPY_AND_ASSIGN(ReadBarrierForHeapReferenceSlowPathX86_64);
957 };
958
959 // Slow path generating a read barrier for a GC root.
960 class ReadBarrierForRootSlowPathX86_64 : public SlowPathCode {
961 public:
ReadBarrierForRootSlowPathX86_64(HInstruction * instruction,Location out,Location root)962 ReadBarrierForRootSlowPathX86_64(HInstruction* instruction, Location out, Location root)
963 : SlowPathCode(instruction), out_(out), root_(root) {
964 }
965
EmitNativeCode(CodeGenerator * codegen)966 void EmitNativeCode(CodeGenerator* codegen) override {
967 DCHECK(codegen->EmitReadBarrier());
968 LocationSummary* locations = instruction_->GetLocations();
969 DCHECK(locations->CanCall());
970 DCHECK(!locations->GetLiveRegisters()->ContainsCoreRegister(out_.reg()));
971 DCHECK(instruction_->IsLoadClass() || instruction_->IsLoadString())
972 << "Unexpected instruction in read barrier for GC root slow path: "
973 << instruction_->DebugName();
974
975 __ Bind(GetEntryLabel());
976 SaveLiveRegisters(codegen, locations);
977
978 InvokeRuntimeCallingConvention calling_convention;
979 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
980 x86_64_codegen->Move(Location::RegisterLocation(calling_convention.GetRegisterAt(0)), root_);
981 x86_64_codegen->InvokeRuntime(kQuickReadBarrierForRootSlow, instruction_, this);
982 CheckEntrypointTypes<kQuickReadBarrierForRootSlow, mirror::Object*, GcRoot<mirror::Object>*>();
983 x86_64_codegen->Move(out_, Location::RegisterLocation(RAX));
984
985 RestoreLiveRegisters(codegen, locations);
986 __ jmp(GetExitLabel());
987 }
988
GetDescription() const989 const char* GetDescription() const override { return "ReadBarrierForRootSlowPathX86_64"; }
990
991 private:
992 const Location out_;
993 const Location root_;
994
995 DISALLOW_COPY_AND_ASSIGN(ReadBarrierForRootSlowPathX86_64);
996 };
997
998 class MethodEntryExitHooksSlowPathX86_64 : public SlowPathCode {
999 public:
MethodEntryExitHooksSlowPathX86_64(HInstruction * instruction)1000 explicit MethodEntryExitHooksSlowPathX86_64(HInstruction* instruction)
1001 : SlowPathCode(instruction) {}
1002
EmitNativeCode(CodeGenerator * codegen)1003 void EmitNativeCode(CodeGenerator* codegen) override {
1004 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
1005 LocationSummary* locations = instruction_->GetLocations();
1006 QuickEntrypointEnum entry_point =
1007 (instruction_->IsMethodEntryHook()) ? kQuickMethodEntryHook : kQuickMethodExitHook;
1008 __ Bind(GetEntryLabel());
1009 SaveLiveRegisters(codegen, locations);
1010 if (instruction_->IsMethodExitHook()) {
1011 // Load FrameSize to pass to the exit hook.
1012 __ movq(CpuRegister(R8), Immediate(codegen->GetFrameSize()));
1013 }
1014 x86_64_codegen->InvokeRuntime(entry_point, instruction_, this);
1015 RestoreLiveRegisters(codegen, locations);
1016 __ jmp(GetExitLabel());
1017 }
1018
GetDescription() const1019 const char* GetDescription() const override {
1020 return "MethodEntryExitHooksSlowPath";
1021 }
1022
1023 private:
1024 DISALLOW_COPY_AND_ASSIGN(MethodEntryExitHooksSlowPathX86_64);
1025 };
1026
1027 class CompileOptimizedSlowPathX86_64 : public SlowPathCode {
1028 public:
CompileOptimizedSlowPathX86_64(HSuspendCheck * suspend_check,uint64_t counter_address)1029 CompileOptimizedSlowPathX86_64(HSuspendCheck* suspend_check, uint64_t counter_address)
1030 : SlowPathCode(suspend_check),
1031 counter_address_(counter_address) {}
1032
EmitNativeCode(CodeGenerator * codegen)1033 void EmitNativeCode(CodeGenerator* codegen) override {
1034 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
1035 __ Bind(GetEntryLabel());
1036 __ movq(CpuRegister(TMP), Immediate(counter_address_));
1037 __ movw(Address(CpuRegister(TMP), 0), Immediate(ProfilingInfo::GetOptimizeThreshold()));
1038 if (instruction_ != nullptr) {
1039 // Only saves full width XMM for SIMD.
1040 SaveLiveRegisters(codegen, instruction_->GetLocations());
1041 }
1042 x86_64_codegen->GenerateInvokeRuntime(
1043 GetThreadOffset<kX86_64PointerSize>(kQuickCompileOptimized).Int32Value());
1044 if (instruction_ != nullptr) {
1045 // Only restores full width XMM for SIMD.
1046 RestoreLiveRegisters(codegen, instruction_->GetLocations());
1047 }
1048 __ jmp(GetExitLabel());
1049 }
1050
GetDescription() const1051 const char* GetDescription() const override {
1052 return "CompileOptimizedSlowPath";
1053 }
1054
1055 private:
1056 uint64_t counter_address_;
1057
1058 DISALLOW_COPY_AND_ASSIGN(CompileOptimizedSlowPathX86_64);
1059 };
1060
1061 #undef __
1062 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
1063 #define __ down_cast<X86_64Assembler*>(GetAssembler())-> // NOLINT
1064
X86_64IntegerCondition(IfCondition cond)1065 inline Condition X86_64IntegerCondition(IfCondition cond) {
1066 switch (cond) {
1067 case kCondEQ: return kEqual;
1068 case kCondNE: return kNotEqual;
1069 case kCondLT: return kLess;
1070 case kCondLE: return kLessEqual;
1071 case kCondGT: return kGreater;
1072 case kCondGE: return kGreaterEqual;
1073 case kCondB: return kBelow;
1074 case kCondBE: return kBelowEqual;
1075 case kCondA: return kAbove;
1076 case kCondAE: return kAboveEqual;
1077 }
1078 LOG(FATAL) << "Unreachable";
1079 UNREACHABLE();
1080 }
1081
1082 // Maps FP condition to x86_64 name.
X86_64FPCondition(IfCondition cond)1083 inline Condition X86_64FPCondition(IfCondition cond) {
1084 switch (cond) {
1085 case kCondEQ: return kEqual;
1086 case kCondNE: return kNotEqual;
1087 case kCondLT: return kBelow;
1088 case kCondLE: return kBelowEqual;
1089 case kCondGT: return kAbove;
1090 case kCondGE: return kAboveEqual;
1091 default: break; // should not happen
1092 }
1093 LOG(FATAL) << "Unreachable";
1094 UNREACHABLE();
1095 }
1096
BlockNonVolatileXmmRegisters(LocationSummary * locations)1097 void CodeGeneratorX86_64::BlockNonVolatileXmmRegisters(LocationSummary* locations) {
1098 // We have to ensure that the native code we call directly (such as @CriticalNative
1099 // or some intrinsic helpers, say Math.sin()) doesn't clobber the XMM registers
1100 // which are non-volatile for ART, but volatile for Native calls. This will ensure
1101 // that they are saved in the prologue and properly restored.
1102 for (FloatRegister fp_reg : non_volatile_xmm_regs) {
1103 locations->AddTemp(Location::FpuRegisterLocation(fp_reg));
1104 }
1105 }
1106
GetSupportedInvokeStaticOrDirectDispatch(const HInvokeStaticOrDirect::DispatchInfo & desired_dispatch_info,ArtMethod * method)1107 HInvokeStaticOrDirect::DispatchInfo CodeGeneratorX86_64::GetSupportedInvokeStaticOrDirectDispatch(
1108 const HInvokeStaticOrDirect::DispatchInfo& desired_dispatch_info,
1109 [[maybe_unused]] ArtMethod* method) {
1110 return desired_dispatch_info;
1111 }
1112
LoadMethod(MethodLoadKind load_kind,Location temp,HInvoke * invoke)1113 void CodeGeneratorX86_64::LoadMethod(MethodLoadKind load_kind, Location temp, HInvoke* invoke) {
1114 switch (load_kind) {
1115 case MethodLoadKind::kBootImageLinkTimePcRelative:
1116 DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
1117 __ leal(temp.AsRegister<CpuRegister>(),
1118 Address::Absolute(kPlaceholder32BitOffset, /* no_rip= */ false));
1119 RecordBootImageMethodPatch(invoke);
1120 break;
1121 case MethodLoadKind::kBootImageRelRo: {
1122 // Note: Boot image is in the low 4GiB and the entry is 32-bit, so emit a 32-bit load.
1123 __ movl(temp.AsRegister<CpuRegister>(),
1124 Address::Absolute(kPlaceholder32BitOffset, /* no_rip= */ false));
1125 RecordBootImageRelRoPatch(GetBootImageOffset(invoke));
1126 break;
1127 }
1128 case MethodLoadKind::kAppImageRelRo: {
1129 DCHECK(GetCompilerOptions().IsAppImage());
1130 __ movl(temp.AsRegister<CpuRegister>(),
1131 Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
1132 RecordAppImageMethodPatch(invoke);
1133 break;
1134 }
1135 case MethodLoadKind::kBssEntry: {
1136 __ movq(temp.AsRegister<CpuRegister>(),
1137 Address::Absolute(kPlaceholder32BitOffset, /* no_rip= */ false));
1138 RecordMethodBssEntryPatch(invoke);
1139 // No need for memory fence, thanks to the x86-64 memory model.
1140 break;
1141 }
1142 case MethodLoadKind::kJitDirectAddress: {
1143 Load64BitValue(temp.AsRegister<CpuRegister>(),
1144 reinterpret_cast<int64_t>(invoke->GetResolvedMethod()));
1145 break;
1146 }
1147 case MethodLoadKind::kRuntimeCall: {
1148 // Test situation, don't do anything.
1149 break;
1150 }
1151 default: {
1152 LOG(FATAL) << "Load kind should have already been handled " << load_kind;
1153 UNREACHABLE();
1154 }
1155 }
1156 }
1157
GenerateStaticOrDirectCall(HInvokeStaticOrDirect * invoke,Location temp,SlowPathCode * slow_path)1158 void CodeGeneratorX86_64::GenerateStaticOrDirectCall(
1159 HInvokeStaticOrDirect* invoke, Location temp, SlowPathCode* slow_path) {
1160 // All registers are assumed to be correctly set up.
1161
1162 Location callee_method = temp; // For all kinds except kRecursive, callee will be in temp.
1163 switch (invoke->GetMethodLoadKind()) {
1164 case MethodLoadKind::kStringInit: {
1165 // temp = thread->string_init_entrypoint
1166 uint32_t offset =
1167 GetThreadOffset<kX86_64PointerSize>(invoke->GetStringInitEntryPoint()).Int32Value();
1168 __ gs()->movq(temp.AsRegister<CpuRegister>(), Address::Absolute(offset, /* no_rip= */ true));
1169 break;
1170 }
1171 case MethodLoadKind::kRecursive: {
1172 callee_method = invoke->GetLocations()->InAt(invoke->GetCurrentMethodIndex());
1173 break;
1174 }
1175 case MethodLoadKind::kRuntimeCall: {
1176 GenerateInvokeStaticOrDirectRuntimeCall(invoke, temp, slow_path);
1177 return; // No code pointer retrieval; the runtime performs the call directly.
1178 }
1179 case MethodLoadKind::kBootImageLinkTimePcRelative:
1180 // For kCallCriticalNative we skip loading the method and do the call directly.
1181 if (invoke->GetCodePtrLocation() == CodePtrLocation::kCallCriticalNative) {
1182 break;
1183 }
1184 FALLTHROUGH_INTENDED;
1185 default: {
1186 LoadMethod(invoke->GetMethodLoadKind(), temp, invoke);
1187 break;
1188 }
1189 }
1190
1191 switch (invoke->GetCodePtrLocation()) {
1192 case CodePtrLocation::kCallSelf:
1193 DCHECK(!GetGraph()->HasShouldDeoptimizeFlag());
1194 __ call(&frame_entry_label_);
1195 RecordPcInfo(invoke, slow_path);
1196 break;
1197 case CodePtrLocation::kCallCriticalNative: {
1198 size_t out_frame_size =
1199 PrepareCriticalNativeCall<CriticalNativeCallingConventionVisitorX86_64,
1200 kNativeStackAlignment,
1201 GetCriticalNativeDirectCallFrameSize>(invoke);
1202 if (invoke->GetMethodLoadKind() == MethodLoadKind::kBootImageLinkTimePcRelative) {
1203 DCHECK(GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension());
1204 __ call(Address::Absolute(kPlaceholder32BitOffset, /* no_rip= */ false));
1205 RecordBootImageJniEntrypointPatch(invoke);
1206 } else {
1207 // (callee_method + offset_of_jni_entry_point)()
1208 __ call(Address(callee_method.AsRegister<CpuRegister>(),
1209 ArtMethod::EntryPointFromJniOffset(kX86_64PointerSize).SizeValue()));
1210 }
1211 RecordPcInfo(invoke, slow_path);
1212 // Zero-/sign-extend the result when needed due to native and managed ABI mismatch.
1213 switch (invoke->GetType()) {
1214 case DataType::Type::kBool:
1215 __ movzxb(CpuRegister(RAX), CpuRegister(RAX));
1216 break;
1217 case DataType::Type::kInt8:
1218 __ movsxb(CpuRegister(RAX), CpuRegister(RAX));
1219 break;
1220 case DataType::Type::kUint16:
1221 __ movzxw(CpuRegister(RAX), CpuRegister(RAX));
1222 break;
1223 case DataType::Type::kInt16:
1224 __ movsxw(CpuRegister(RAX), CpuRegister(RAX));
1225 break;
1226 case DataType::Type::kInt32:
1227 case DataType::Type::kInt64:
1228 case DataType::Type::kFloat32:
1229 case DataType::Type::kFloat64:
1230 case DataType::Type::kVoid:
1231 break;
1232 default:
1233 DCHECK(false) << invoke->GetType();
1234 break;
1235 }
1236 if (out_frame_size != 0u) {
1237 DecreaseFrame(out_frame_size);
1238 }
1239 break;
1240 }
1241 case CodePtrLocation::kCallArtMethod:
1242 // (callee_method + offset_of_quick_compiled_code)()
1243 __ call(Address(callee_method.AsRegister<CpuRegister>(),
1244 ArtMethod::EntryPointFromQuickCompiledCodeOffset(
1245 kX86_64PointerSize).SizeValue()));
1246 RecordPcInfo(invoke, slow_path);
1247 break;
1248 }
1249
1250 DCHECK(!IsLeafMethod());
1251 }
1252
GenerateVirtualCall(HInvokeVirtual * invoke,Location temp_in,SlowPathCode * slow_path)1253 void CodeGeneratorX86_64::GenerateVirtualCall(
1254 HInvokeVirtual* invoke, Location temp_in, SlowPathCode* slow_path) {
1255 CpuRegister temp = temp_in.AsRegister<CpuRegister>();
1256 size_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
1257 invoke->GetVTableIndex(), kX86_64PointerSize).SizeValue();
1258
1259 // Use the calling convention instead of the location of the receiver, as
1260 // intrinsics may have put the receiver in a different register. In the intrinsics
1261 // slow path, the arguments have been moved to the right place, so here we are
1262 // guaranteed that the receiver is the first register of the calling convention.
1263 InvokeDexCallingConvention calling_convention;
1264 Register receiver = calling_convention.GetRegisterAt(0);
1265
1266 size_t class_offset = mirror::Object::ClassOffset().SizeValue();
1267 // /* HeapReference<Class> */ temp = receiver->klass_
1268 __ movl(temp, Address(CpuRegister(receiver), class_offset));
1269 MaybeRecordImplicitNullCheck(invoke);
1270 // Instead of simply (possibly) unpoisoning `temp` here, we should
1271 // emit a read barrier for the previous class reference load.
1272 // However this is not required in practice, as this is an
1273 // intermediate/temporary reference and because the current
1274 // concurrent copying collector keeps the from-space memory
1275 // intact/accessible until the end of the marking phase (the
1276 // concurrent copying collector may not in the future).
1277 __ MaybeUnpoisonHeapReference(temp);
1278
1279 MaybeGenerateInlineCacheCheck(invoke, temp);
1280
1281 // temp = temp->GetMethodAt(method_offset);
1282 __ movq(temp, Address(temp, method_offset));
1283 // call temp->GetEntryPoint();
1284 __ call(Address(temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(
1285 kX86_64PointerSize).SizeValue()));
1286 RecordPcInfo(invoke, slow_path);
1287 }
1288
RecordBootImageIntrinsicPatch(uint32_t intrinsic_data)1289 void CodeGeneratorX86_64::RecordBootImageIntrinsicPatch(uint32_t intrinsic_data) {
1290 boot_image_other_patches_.emplace_back(/* target_dex_file= */ nullptr, intrinsic_data);
1291 __ Bind(&boot_image_other_patches_.back().label);
1292 }
1293
RecordBootImageRelRoPatch(uint32_t boot_image_offset)1294 void CodeGeneratorX86_64::RecordBootImageRelRoPatch(uint32_t boot_image_offset) {
1295 boot_image_other_patches_.emplace_back(/* target_dex_file= */ nullptr, boot_image_offset);
1296 __ Bind(&boot_image_other_patches_.back().label);
1297 }
1298
RecordBootImageMethodPatch(HInvoke * invoke)1299 void CodeGeneratorX86_64::RecordBootImageMethodPatch(HInvoke* invoke) {
1300 boot_image_method_patches_.emplace_back(invoke->GetResolvedMethodReference().dex_file,
1301 invoke->GetResolvedMethodReference().index);
1302 __ Bind(&boot_image_method_patches_.back().label);
1303 }
1304
RecordAppImageMethodPatch(HInvoke * invoke)1305 void CodeGeneratorX86_64::RecordAppImageMethodPatch(HInvoke* invoke) {
1306 app_image_method_patches_.emplace_back(invoke->GetResolvedMethodReference().dex_file,
1307 invoke->GetResolvedMethodReference().index);
1308 __ Bind(&app_image_method_patches_.back().label);
1309 }
1310
RecordMethodBssEntryPatch(HInvoke * invoke)1311 void CodeGeneratorX86_64::RecordMethodBssEntryPatch(HInvoke* invoke) {
1312 DCHECK(IsSameDexFile(GetGraph()->GetDexFile(), *invoke->GetMethodReference().dex_file) ||
1313 GetCompilerOptions().WithinOatFile(invoke->GetMethodReference().dex_file) ||
1314 ContainsElement(Runtime::Current()->GetClassLinker()->GetBootClassPath(),
1315 invoke->GetMethodReference().dex_file));
1316 method_bss_entry_patches_.emplace_back(invoke->GetMethodReference().dex_file,
1317 invoke->GetMethodReference().index);
1318 __ Bind(&method_bss_entry_patches_.back().label);
1319 }
1320
RecordBootImageTypePatch(const DexFile & dex_file,dex::TypeIndex type_index)1321 void CodeGeneratorX86_64::RecordBootImageTypePatch(const DexFile& dex_file,
1322 dex::TypeIndex type_index) {
1323 boot_image_type_patches_.emplace_back(&dex_file, type_index.index_);
1324 __ Bind(&boot_image_type_patches_.back().label);
1325 }
1326
RecordAppImageTypePatch(const DexFile & dex_file,dex::TypeIndex type_index)1327 void CodeGeneratorX86_64::RecordAppImageTypePatch(const DexFile& dex_file,
1328 dex::TypeIndex type_index) {
1329 app_image_type_patches_.emplace_back(&dex_file, type_index.index_);
1330 __ Bind(&app_image_type_patches_.back().label);
1331 }
1332
NewTypeBssEntryPatch(HLoadClass * load_class)1333 Label* CodeGeneratorX86_64::NewTypeBssEntryPatch(HLoadClass* load_class) {
1334 ArenaDeque<PatchInfo<Label>>* patches = nullptr;
1335 switch (load_class->GetLoadKind()) {
1336 case HLoadClass::LoadKind::kBssEntry:
1337 patches = &type_bss_entry_patches_;
1338 break;
1339 case HLoadClass::LoadKind::kBssEntryPublic:
1340 patches = &public_type_bss_entry_patches_;
1341 break;
1342 case HLoadClass::LoadKind::kBssEntryPackage:
1343 patches = &package_type_bss_entry_patches_;
1344 break;
1345 default:
1346 LOG(FATAL) << "Unexpected load kind: " << load_class->GetLoadKind();
1347 UNREACHABLE();
1348 }
1349 patches->emplace_back(&load_class->GetDexFile(), load_class->GetTypeIndex().index_);
1350 return &patches->back().label;
1351 }
1352
RecordBootImageStringPatch(HLoadString * load_string)1353 void CodeGeneratorX86_64::RecordBootImageStringPatch(HLoadString* load_string) {
1354 boot_image_string_patches_.emplace_back(
1355 &load_string->GetDexFile(), load_string->GetStringIndex().index_);
1356 __ Bind(&boot_image_string_patches_.back().label);
1357 }
1358
NewStringBssEntryPatch(HLoadString * load_string)1359 Label* CodeGeneratorX86_64::NewStringBssEntryPatch(HLoadString* load_string) {
1360 string_bss_entry_patches_.emplace_back(
1361 &load_string->GetDexFile(), load_string->GetStringIndex().index_);
1362 return &string_bss_entry_patches_.back().label;
1363 }
1364
NewMethodTypeBssEntryPatch(HLoadMethodType * load_method_type)1365 Label* CodeGeneratorX86_64::NewMethodTypeBssEntryPatch(HLoadMethodType* load_method_type) {
1366 method_type_bss_entry_patches_.emplace_back(
1367 &load_method_type->GetDexFile(), load_method_type->GetProtoIndex().index_);
1368 return &method_type_bss_entry_patches_.back().label;
1369 }
1370
RecordBootImageJniEntrypointPatch(HInvokeStaticOrDirect * invoke)1371 void CodeGeneratorX86_64::RecordBootImageJniEntrypointPatch(HInvokeStaticOrDirect* invoke) {
1372 boot_image_jni_entrypoint_patches_.emplace_back(invoke->GetResolvedMethodReference().dex_file,
1373 invoke->GetResolvedMethodReference().index);
1374 __ Bind(&boot_image_jni_entrypoint_patches_.back().label);
1375 }
1376
LoadBootImageAddress(CpuRegister reg,uint32_t boot_image_reference)1377 void CodeGeneratorX86_64::LoadBootImageAddress(CpuRegister reg, uint32_t boot_image_reference) {
1378 if (GetCompilerOptions().IsBootImage()) {
1379 __ leal(reg,
1380 Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
1381 RecordBootImageIntrinsicPatch(boot_image_reference);
1382 } else if (GetCompilerOptions().GetCompilePic()) {
1383 __ movl(reg,
1384 Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
1385 RecordBootImageRelRoPatch(boot_image_reference);
1386 } else {
1387 DCHECK(GetCompilerOptions().IsJitCompiler());
1388 gc::Heap* heap = Runtime::Current()->GetHeap();
1389 DCHECK(!heap->GetBootImageSpaces().empty());
1390 const uint8_t* address = heap->GetBootImageSpaces()[0]->Begin() + boot_image_reference;
1391 __ movl(reg, Immediate(dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(address))));
1392 }
1393 }
1394
LoadIntrinsicDeclaringClass(CpuRegister reg,HInvoke * invoke)1395 void CodeGeneratorX86_64::LoadIntrinsicDeclaringClass(CpuRegister reg, HInvoke* invoke) {
1396 DCHECK_NE(invoke->GetIntrinsic(), Intrinsics::kNone);
1397 if (GetCompilerOptions().IsBootImage()) {
1398 // Load the type the same way as for HLoadClass::LoadKind::kBootImageLinkTimePcRelative.
1399 __ leal(reg,
1400 Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
1401 MethodReference target_method = invoke->GetResolvedMethodReference();
1402 dex::TypeIndex type_idx = target_method.dex_file->GetMethodId(target_method.index).class_idx_;
1403 boot_image_type_patches_.emplace_back(target_method.dex_file, type_idx.index_);
1404 __ Bind(&boot_image_type_patches_.back().label);
1405 } else {
1406 uint32_t boot_image_offset = GetBootImageOffsetOfIntrinsicDeclaringClass(invoke);
1407 LoadBootImageAddress(reg, boot_image_offset);
1408 }
1409 }
1410
LoadClassRootForIntrinsic(CpuRegister reg,ClassRoot class_root)1411 void CodeGeneratorX86_64::LoadClassRootForIntrinsic(CpuRegister reg, ClassRoot class_root) {
1412 if (GetCompilerOptions().IsBootImage()) {
1413 ScopedObjectAccess soa(Thread::Current());
1414 ObjPtr<mirror::Class> klass = GetClassRoot(class_root);
1415 boot_image_type_patches_.emplace_back(&klass->GetDexFile(), klass->GetDexTypeIndex().index_);
1416 __ Bind(&boot_image_type_patches_.back().label);
1417 } else {
1418 uint32_t boot_image_offset = GetBootImageOffset(class_root);
1419 LoadBootImageAddress(reg, boot_image_offset);
1420 }
1421 }
1422
1423 // The label points to the end of the "movl" or another instruction but the literal offset
1424 // for method patch needs to point to the embedded constant which occupies the last 4 bytes.
1425 constexpr uint32_t kLabelPositionToLiteralOffsetAdjustment = 4u;
1426
1427 template <linker::LinkerPatch (*Factory)(size_t, const DexFile*, uint32_t, uint32_t)>
EmitPcRelativeLinkerPatches(const ArenaDeque<PatchInfo<Label>> & infos,ArenaVector<linker::LinkerPatch> * linker_patches)1428 inline void CodeGeneratorX86_64::EmitPcRelativeLinkerPatches(
1429 const ArenaDeque<PatchInfo<Label>>& infos,
1430 ArenaVector<linker::LinkerPatch>* linker_patches) {
1431 for (const PatchInfo<Label>& info : infos) {
1432 uint32_t literal_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
1433 linker_patches->push_back(
1434 Factory(literal_offset, info.target_dex_file, info.label.Position(), info.offset_or_index));
1435 }
1436 }
1437
1438 template <linker::LinkerPatch (*Factory)(size_t, uint32_t, uint32_t)>
NoDexFileAdapter(size_t literal_offset,const DexFile * target_dex_file,uint32_t pc_insn_offset,uint32_t boot_image_offset)1439 linker::LinkerPatch NoDexFileAdapter(size_t literal_offset,
1440 const DexFile* target_dex_file,
1441 uint32_t pc_insn_offset,
1442 uint32_t boot_image_offset) {
1443 DCHECK(target_dex_file == nullptr); // Unused for these patches, should be null.
1444 return Factory(literal_offset, pc_insn_offset, boot_image_offset);
1445 }
1446
EmitLinkerPatches(ArenaVector<linker::LinkerPatch> * linker_patches)1447 void CodeGeneratorX86_64::EmitLinkerPatches(ArenaVector<linker::LinkerPatch>* linker_patches) {
1448 DCHECK(linker_patches->empty());
1449 size_t size =
1450 boot_image_method_patches_.size() +
1451 app_image_method_patches_.size() +
1452 method_bss_entry_patches_.size() +
1453 boot_image_type_patches_.size() +
1454 app_image_type_patches_.size() +
1455 type_bss_entry_patches_.size() +
1456 public_type_bss_entry_patches_.size() +
1457 package_type_bss_entry_patches_.size() +
1458 boot_image_string_patches_.size() +
1459 string_bss_entry_patches_.size() +
1460 method_type_bss_entry_patches_.size() +
1461 boot_image_jni_entrypoint_patches_.size() +
1462 boot_image_other_patches_.size();
1463 linker_patches->reserve(size);
1464 if (GetCompilerOptions().IsBootImage() || GetCompilerOptions().IsBootImageExtension()) {
1465 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeMethodPatch>(
1466 boot_image_method_patches_, linker_patches);
1467 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeTypePatch>(
1468 boot_image_type_patches_, linker_patches);
1469 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeStringPatch>(
1470 boot_image_string_patches_, linker_patches);
1471 } else {
1472 DCHECK(boot_image_method_patches_.empty());
1473 DCHECK(boot_image_type_patches_.empty());
1474 DCHECK(boot_image_string_patches_.empty());
1475 }
1476 DCHECK_IMPLIES(!GetCompilerOptions().IsAppImage(), app_image_method_patches_.empty());
1477 DCHECK_IMPLIES(!GetCompilerOptions().IsAppImage(), app_image_type_patches_.empty());
1478 if (GetCompilerOptions().IsBootImage()) {
1479 EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::IntrinsicReferencePatch>>(
1480 boot_image_other_patches_, linker_patches);
1481 } else {
1482 EmitPcRelativeLinkerPatches<NoDexFileAdapter<linker::LinkerPatch::BootImageRelRoPatch>>(
1483 boot_image_other_patches_, linker_patches);
1484 EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodAppImageRelRoPatch>(
1485 app_image_method_patches_, linker_patches);
1486 EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeAppImageRelRoPatch>(
1487 app_image_type_patches_, linker_patches);
1488 }
1489 EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodBssEntryPatch>(
1490 method_bss_entry_patches_, linker_patches);
1491 EmitPcRelativeLinkerPatches<linker::LinkerPatch::TypeBssEntryPatch>(
1492 type_bss_entry_patches_, linker_patches);
1493 EmitPcRelativeLinkerPatches<linker::LinkerPatch::PublicTypeBssEntryPatch>(
1494 public_type_bss_entry_patches_, linker_patches);
1495 EmitPcRelativeLinkerPatches<linker::LinkerPatch::PackageTypeBssEntryPatch>(
1496 package_type_bss_entry_patches_, linker_patches);
1497 EmitPcRelativeLinkerPatches<linker::LinkerPatch::StringBssEntryPatch>(
1498 string_bss_entry_patches_, linker_patches);
1499 EmitPcRelativeLinkerPatches<linker::LinkerPatch::MethodTypeBssEntryPatch>(
1500 method_type_bss_entry_patches_, linker_patches);
1501 EmitPcRelativeLinkerPatches<linker::LinkerPatch::RelativeJniEntrypointPatch>(
1502 boot_image_jni_entrypoint_patches_, linker_patches);
1503 DCHECK_EQ(size, linker_patches->size());
1504 }
1505
DumpCoreRegister(std::ostream & stream,int reg) const1506 void CodeGeneratorX86_64::DumpCoreRegister(std::ostream& stream, int reg) const {
1507 stream << Register(reg);
1508 }
1509
DumpFloatingPointRegister(std::ostream & stream,int reg) const1510 void CodeGeneratorX86_64::DumpFloatingPointRegister(std::ostream& stream, int reg) const {
1511 stream << FloatRegister(reg);
1512 }
1513
GetInstructionSetFeatures() const1514 const X86_64InstructionSetFeatures& CodeGeneratorX86_64::GetInstructionSetFeatures() const {
1515 return *GetCompilerOptions().GetInstructionSetFeatures()->AsX86_64InstructionSetFeatures();
1516 }
1517
SaveCoreRegister(size_t stack_index,uint32_t reg_id)1518 size_t CodeGeneratorX86_64::SaveCoreRegister(size_t stack_index, uint32_t reg_id) {
1519 __ movq(Address(CpuRegister(RSP), stack_index), CpuRegister(reg_id));
1520 return kX86_64WordSize;
1521 }
1522
RestoreCoreRegister(size_t stack_index,uint32_t reg_id)1523 size_t CodeGeneratorX86_64::RestoreCoreRegister(size_t stack_index, uint32_t reg_id) {
1524 __ movq(CpuRegister(reg_id), Address(CpuRegister(RSP), stack_index));
1525 return kX86_64WordSize;
1526 }
1527
SaveFloatingPointRegister(size_t stack_index,uint32_t reg_id)1528 size_t CodeGeneratorX86_64::SaveFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
1529 if (GetGraph()->HasSIMD()) {
1530 __ movups(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id));
1531 } else {
1532 __ movsd(Address(CpuRegister(RSP), stack_index), XmmRegister(reg_id));
1533 }
1534 return GetSlowPathFPWidth();
1535 }
1536
RestoreFloatingPointRegister(size_t stack_index,uint32_t reg_id)1537 size_t CodeGeneratorX86_64::RestoreFloatingPointRegister(size_t stack_index, uint32_t reg_id) {
1538 if (GetGraph()->HasSIMD()) {
1539 __ movups(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index));
1540 } else {
1541 __ movsd(XmmRegister(reg_id), Address(CpuRegister(RSP), stack_index));
1542 }
1543 return GetSlowPathFPWidth();
1544 }
1545
InvokeRuntime(QuickEntrypointEnum entrypoint,HInstruction * instruction,SlowPathCode * slow_path)1546 void CodeGeneratorX86_64::InvokeRuntime(QuickEntrypointEnum entrypoint,
1547 HInstruction* instruction,
1548 SlowPathCode* slow_path) {
1549 ValidateInvokeRuntime(entrypoint, instruction, slow_path);
1550 GenerateInvokeRuntime(GetThreadOffset<kX86_64PointerSize>(entrypoint).Int32Value());
1551 if (EntrypointRequiresStackMap(entrypoint)) {
1552 RecordPcInfo(instruction, slow_path);
1553 }
1554 }
1555
InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,HInstruction * instruction,SlowPathCode * slow_path)1556 void CodeGeneratorX86_64::InvokeRuntimeWithoutRecordingPcInfo(int32_t entry_point_offset,
1557 HInstruction* instruction,
1558 SlowPathCode* slow_path) {
1559 ValidateInvokeRuntimeWithoutRecordingPcInfo(instruction, slow_path);
1560 GenerateInvokeRuntime(entry_point_offset);
1561 }
1562
GenerateInvokeRuntime(int32_t entry_point_offset)1563 void CodeGeneratorX86_64::GenerateInvokeRuntime(int32_t entry_point_offset) {
1564 __ gs()->call(Address::Absolute(entry_point_offset, /* no_rip= */ true));
1565 }
1566
1567 namespace detail {
1568
1569 // Mark which intrinsics we don't have handcrafted code for.
1570 template <Intrinsics T>
1571 struct IsUnimplemented {
1572 bool is_unimplemented = false;
1573 };
1574
1575 #define TRUE_OVERRIDE(Name) \
1576 template <> \
1577 struct IsUnimplemented<Intrinsics::k##Name> { \
1578 bool is_unimplemented = true; \
1579 };
1580 UNIMPLEMENTED_INTRINSIC_LIST_X86_64(TRUE_OVERRIDE)
1581 #undef TRUE_OVERRIDE
1582
1583 static constexpr bool kIsIntrinsicUnimplemented[] = {
1584 false, // kNone
1585 #define IS_UNIMPLEMENTED(Intrinsic, ...) \
1586 IsUnimplemented<Intrinsics::k##Intrinsic>().is_unimplemented,
1587 ART_INTRINSICS_LIST(IS_UNIMPLEMENTED)
1588 #undef IS_UNIMPLEMENTED
1589 };
1590
1591 } // namespace detail
1592
1593 static constexpr int kNumberOfCpuRegisterPairs = 0;
1594 // Use a fake return address register to mimic Quick.
1595 static constexpr Register kFakeReturnRegister = Register(kLastCpuRegister + 1);
CodeGeneratorX86_64(HGraph * graph,const CompilerOptions & compiler_options,OptimizingCompilerStats * stats)1596 CodeGeneratorX86_64::CodeGeneratorX86_64(HGraph* graph,
1597 const CompilerOptions& compiler_options,
1598 OptimizingCompilerStats* stats)
1599 : CodeGenerator(graph,
1600 kNumberOfCpuRegisters,
1601 kNumberOfFloatRegisters,
1602 kNumberOfCpuRegisterPairs,
1603 ComputeRegisterMask(kCoreCalleeSaves, arraysize(kCoreCalleeSaves))
1604 | (1 << kFakeReturnRegister),
1605 ComputeRegisterMask(kFpuCalleeSaves, arraysize(kFpuCalleeSaves)),
1606 compiler_options,
1607 stats,
1608 ArrayRef<const bool>(detail::kIsIntrinsicUnimplemented)),
1609 block_labels_(nullptr),
1610 location_builder_(graph, this),
1611 instruction_visitor_(graph, this),
1612 move_resolver_(graph->GetAllocator(), this),
1613 assembler_(graph->GetAllocator(),
1614 compiler_options.GetInstructionSetFeatures()->AsX86_64InstructionSetFeatures()),
1615 constant_area_start_(0),
1616 boot_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1617 app_image_method_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1618 method_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1619 boot_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1620 app_image_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1621 type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1622 public_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1623 package_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1624 boot_image_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1625 string_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1626 method_type_bss_entry_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1627 boot_image_jni_entrypoint_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1628 boot_image_other_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1629 jit_string_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1630 jit_class_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1631 jit_method_type_patches_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)),
1632 fixups_to_jump_tables_(graph->GetAllocator()->Adapter(kArenaAllocCodeGenerator)) {
1633 AddAllocatedRegister(Location::RegisterLocation(kFakeReturnRegister));
1634 }
1635
InstructionCodeGeneratorX86_64(HGraph * graph,CodeGeneratorX86_64 * codegen)1636 InstructionCodeGeneratorX86_64::InstructionCodeGeneratorX86_64(HGraph* graph,
1637 CodeGeneratorX86_64* codegen)
1638 : InstructionCodeGenerator(graph, codegen),
1639 assembler_(codegen->GetAssembler()),
1640 codegen_(codegen) {}
1641
SetupBlockedRegisters() const1642 void CodeGeneratorX86_64::SetupBlockedRegisters() const {
1643 // Stack register is always reserved.
1644 blocked_core_registers_[RSP] = true;
1645
1646 // Block the register used as TMP.
1647 blocked_core_registers_[TMP] = true;
1648 }
1649
DWARFReg(Register reg)1650 static dwarf::Reg DWARFReg(Register reg) {
1651 return dwarf::Reg::X86_64Core(static_cast<int>(reg));
1652 }
1653
DWARFReg(FloatRegister reg)1654 static dwarf::Reg DWARFReg(FloatRegister reg) {
1655 return dwarf::Reg::X86_64Fp(static_cast<int>(reg));
1656 }
1657
VisitMethodEntryHook(HMethodEntryHook * method_hook)1658 void LocationsBuilderX86_64::VisitMethodEntryHook(HMethodEntryHook* method_hook) {
1659 LocationSummary* locations = new (GetGraph()->GetAllocator())
1660 LocationSummary(method_hook, LocationSummary::kCallOnSlowPath);
1661 // We use rdtsc to record the timestamp for method profiling. rdtsc returns
1662 // two 32-bit values in EAX + EDX even on 64-bit architectures.
1663 locations->AddTemp(Location::RegisterLocation(RAX));
1664 locations->AddTemp(Location::RegisterLocation(RDX));
1665 }
1666
GenerateMethodEntryExitHook(HInstruction * instruction)1667 void InstructionCodeGeneratorX86_64::GenerateMethodEntryExitHook(HInstruction* instruction) {
1668 SlowPathCode* slow_path =
1669 new (codegen_->GetScopedAllocator()) MethodEntryExitHooksSlowPathX86_64(instruction);
1670 LocationSummary* locations = instruction->GetLocations();
1671 codegen_->AddSlowPath(slow_path);
1672
1673 if (instruction->IsMethodExitHook()) {
1674 // Check if we are required to check if the caller needs a deoptimization. Strictly speaking it
1675 // would be sufficient to check if CheckCallerForDeopt bit is set. Though it is faster to check
1676 // if it is just non-zero. kCHA bit isn't used in debuggable runtimes as cha optimization is
1677 // disabled in debuggable runtime. The other bit is used when this method itself requires a
1678 // deoptimization due to redefinition. So it is safe to just check for non-zero value here.
1679 __ cmpl(Address(CpuRegister(RSP), codegen_->GetStackOffsetOfShouldDeoptimizeFlag()),
1680 Immediate(0));
1681 __ j(kNotEqual, slow_path->GetEntryLabel());
1682 }
1683
1684 uint64_t address = reinterpret_cast64<uint64_t>(Runtime::Current()->GetInstrumentation());
1685 MemberOffset offset = instruction->IsMethodExitHook() ?
1686 instrumentation::Instrumentation::HaveMethodExitListenersOffset()
1687 : instrumentation::Instrumentation::HaveMethodEntryListenersOffset();
1688 __ movq(CpuRegister(TMP), Immediate(address + offset.Int32Value()));
1689 __ cmpb(Address(CpuRegister(TMP), 0),
1690 Immediate(instrumentation::Instrumentation::kFastTraceListeners));
1691 // Check if there are any method entry / exit listeners. If no, continue with execution.
1692 __ j(kLess, slow_path->GetExitLabel());
1693 // Check if there are any slow method entry / exit listeners. If yes, take the slow path.
1694 __ j(kGreater, slow_path->GetEntryLabel());
1695
1696 // Check if there is place in the buffer for a new entry, if no, take slow path.
1697 CpuRegister init_entry = locations->GetTemp(0).AsRegister<CpuRegister>();
1698 // Use a register that is different from RAX and RDX. RDTSC returns result in RAX and RDX and we
1699 // use curr entry to store the result into the buffer.
1700 CpuRegister curr_entry = CpuRegister(TMP);
1701 DCHECK(curr_entry.AsRegister() != RAX);
1702 DCHECK(curr_entry.AsRegister() != RDX);
1703 uint64_t trace_buffer_curr_entry_offset =
1704 Thread::TraceBufferCurrPtrOffset<kX86_64PointerSize>().SizeValue();
1705 __ gs()->movq(CpuRegister(curr_entry),
1706 Address::Absolute(trace_buffer_curr_entry_offset, /* no_rip= */ true));
1707 __ subq(CpuRegister(curr_entry), Immediate(kNumEntriesForWallClock * sizeof(void*)));
1708 __ gs()->movq(init_entry,
1709 Address::Absolute(Thread::TraceBufferPtrOffset<kX86_64PointerSize>().SizeValue(),
1710 /* no_rip= */ true));
1711 __ cmpq(curr_entry, init_entry);
1712 __ j(kLess, slow_path->GetEntryLabel());
1713
1714 // Update the index in the `Thread`.
1715 __ gs()->movq(Address::Absolute(trace_buffer_curr_entry_offset, /* no_rip= */ true),
1716 CpuRegister(curr_entry));
1717
1718 // Record method pointer and action.
1719 CpuRegister method = init_entry;
1720 __ movq(CpuRegister(method), Address(CpuRegister(RSP), kCurrentMethodStackOffset));
1721 // Use last two bits to encode trace method action. For MethodEntry it is 0
1722 // so no need to set the bits since they are 0 already.
1723 if (instruction->IsMethodExitHook()) {
1724 DCHECK_GE(ArtMethod::Alignment(kRuntimePointerSize), static_cast<size_t>(4));
1725 static_assert(enum_cast<int32_t>(TraceAction::kTraceMethodEnter) == 0);
1726 static_assert(enum_cast<int32_t>(TraceAction::kTraceMethodExit) == 1);
1727 __ orq(method, Immediate(enum_cast<int32_t>(TraceAction::kTraceMethodExit)));
1728 }
1729 __ movq(Address(curr_entry, kMethodOffsetInBytes), CpuRegister(method));
1730 // Get the timestamp. rdtsc returns timestamp in RAX + RDX even in 64-bit architectures.
1731 __ rdtsc();
1732 __ shlq(CpuRegister(RDX), Immediate(32));
1733 __ orq(CpuRegister(RAX), CpuRegister(RDX));
1734 __ movq(Address(curr_entry, kTimestampOffsetInBytes), CpuRegister(RAX));
1735 __ Bind(slow_path->GetExitLabel());
1736 }
1737
VisitMethodEntryHook(HMethodEntryHook * instruction)1738 void InstructionCodeGeneratorX86_64::VisitMethodEntryHook(HMethodEntryHook* instruction) {
1739 DCHECK(codegen_->GetCompilerOptions().IsJitCompiler() && GetGraph()->IsDebuggable());
1740 DCHECK(codegen_->RequiresCurrentMethod());
1741 GenerateMethodEntryExitHook(instruction);
1742 }
1743
SetInForReturnValue(HInstruction * instr,LocationSummary * locations)1744 void SetInForReturnValue(HInstruction* instr, LocationSummary* locations) {
1745 switch (instr->InputAt(0)->GetType()) {
1746 case DataType::Type::kReference:
1747 case DataType::Type::kBool:
1748 case DataType::Type::kUint8:
1749 case DataType::Type::kInt8:
1750 case DataType::Type::kUint16:
1751 case DataType::Type::kInt16:
1752 case DataType::Type::kInt32:
1753 case DataType::Type::kInt64:
1754 locations->SetInAt(0, Location::RegisterLocation(RAX));
1755 break;
1756
1757 case DataType::Type::kFloat32:
1758 case DataType::Type::kFloat64:
1759 locations->SetInAt(0, Location::FpuRegisterLocation(XMM0));
1760 break;
1761
1762 case DataType::Type::kVoid:
1763 locations->SetInAt(0, Location::NoLocation());
1764 break;
1765
1766 default:
1767 LOG(FATAL) << "Unexpected return type " << instr->InputAt(0)->GetType();
1768 }
1769 }
1770
VisitMethodExitHook(HMethodExitHook * method_hook)1771 void LocationsBuilderX86_64::VisitMethodExitHook(HMethodExitHook* method_hook) {
1772 LocationSummary* locations = new (GetGraph()->GetAllocator())
1773 LocationSummary(method_hook, LocationSummary::kCallOnSlowPath);
1774 SetInForReturnValue(method_hook, locations);
1775 // We use rdtsc to record the timestamp for method profiling. rdtsc returns
1776 // two 32-bit values in EAX + EDX even on 64-bit architectures.
1777 locations->AddTemp(Location::RegisterLocation(RAX));
1778 locations->AddTemp(Location::RegisterLocation(RDX));
1779 }
1780
VisitMethodExitHook(HMethodExitHook * instruction)1781 void InstructionCodeGeneratorX86_64::VisitMethodExitHook(HMethodExitHook* instruction) {
1782 DCHECK(codegen_->GetCompilerOptions().IsJitCompiler() && GetGraph()->IsDebuggable());
1783 DCHECK(codegen_->RequiresCurrentMethod());
1784 GenerateMethodEntryExitHook(instruction);
1785 }
1786
MaybeIncrementHotness(HSuspendCheck * suspend_check,bool is_frame_entry)1787 void CodeGeneratorX86_64::MaybeIncrementHotness(HSuspendCheck* suspend_check, bool is_frame_entry) {
1788 if (GetCompilerOptions().CountHotnessInCompiledCode()) {
1789 NearLabel overflow;
1790 Register method = kMethodRegisterArgument;
1791 if (!is_frame_entry) {
1792 CHECK(RequiresCurrentMethod());
1793 method = TMP;
1794 __ movq(CpuRegister(method), Address(CpuRegister(RSP), kCurrentMethodStackOffset));
1795 }
1796 __ cmpw(Address(CpuRegister(method), ArtMethod::HotnessCountOffset().Int32Value()),
1797 Immediate(interpreter::kNterpHotnessValue));
1798 __ j(kEqual, &overflow);
1799 __ addw(Address(CpuRegister(method), ArtMethod::HotnessCountOffset().Int32Value()),
1800 Immediate(-1));
1801 __ Bind(&overflow);
1802 }
1803
1804 if (GetGraph()->IsCompilingBaseline() &&
1805 GetGraph()->IsUsefulOptimizing() &&
1806 !Runtime::Current()->IsAotCompiler()) {
1807 ProfilingInfo* info = GetGraph()->GetProfilingInfo();
1808 DCHECK(info != nullptr);
1809 CHECK(!HasEmptyFrame());
1810 uint64_t address = reinterpret_cast64<uint64_t>(info) +
1811 ProfilingInfo::BaselineHotnessCountOffset().Int32Value();
1812 SlowPathCode* slow_path =
1813 new (GetScopedAllocator()) CompileOptimizedSlowPathX86_64(suspend_check, address);
1814 AddSlowPath(slow_path);
1815 // Note: if the address was in the 32bit range, we could use
1816 // Address::Absolute and avoid this movq.
1817 __ movq(CpuRegister(TMP), Immediate(address));
1818 // With multiple threads, this can overflow. This is OK, we will eventually get to see
1819 // it reaching 0. Also, at this point we have no register available to look
1820 // at the counter directly.
1821 __ addw(Address(CpuRegister(TMP), 0), Immediate(-1));
1822 __ j(kEqual, slow_path->GetEntryLabel());
1823 __ Bind(slow_path->GetExitLabel());
1824 }
1825 }
1826
GenerateFrameEntry()1827 void CodeGeneratorX86_64::GenerateFrameEntry() {
1828 __ cfi().SetCurrentCFAOffset(kX86_64WordSize); // return address
1829
1830 // Check if we need to generate the clinit check. We will jump to the
1831 // resolution stub if the class is not initialized and the executing thread is
1832 // not the thread initializing it.
1833 // We do this before constructing the frame to get the correct stack trace if
1834 // an exception is thrown.
1835 if (GetCompilerOptions().ShouldCompileWithClinitCheck(GetGraph()->GetArtMethod())) {
1836 NearLabel resolution;
1837 // Check if we're visibly initialized.
1838
1839 // We don't emit a read barrier here to save on code size. We rely on the
1840 // resolution trampoline to do a suspend check before re-entering this code.
1841 __ movl(CpuRegister(TMP),
1842 Address(CpuRegister(kMethodRegisterArgument),
1843 ArtMethod::DeclaringClassOffset().Int32Value()));
1844 __ cmpb(Address(CpuRegister(TMP), kClassStatusByteOffset),
1845 Immediate(kShiftedVisiblyInitializedValue));
1846 __ j(kAboveEqual, &frame_entry_label_);
1847
1848 // Check if we're initializing and the thread initializing is the one
1849 // executing the code.
1850 __ cmpb(Address(CpuRegister(TMP), kClassStatusByteOffset),
1851 Immediate(kShiftedInitializingValue));
1852 __ j(kBelow, &resolution);
1853
1854 __ movl(CpuRegister(TMP),
1855 Address(CpuRegister(TMP), mirror::Class::ClinitThreadIdOffset().Int32Value()));
1856 __ gs()->cmpl(
1857 CpuRegister(TMP),
1858 Address::Absolute(Thread::TidOffset<kX86_64PointerSize>().Int32Value(), /*no_rip=*/ true));
1859 __ j(kEqual, &frame_entry_label_);
1860 __ Bind(&resolution);
1861
1862 // Jump to the resolution stub.
1863 ThreadOffset64 entrypoint_offset =
1864 GetThreadOffset<kX86_64PointerSize>(kQuickQuickResolutionTrampoline);
1865 __ gs()->jmp(Address::Absolute(entrypoint_offset, /*no_rip=*/ true));
1866 }
1867
1868 __ Bind(&frame_entry_label_);
1869 bool skip_overflow_check = IsLeafMethod()
1870 && !FrameNeedsStackCheck(GetFrameSize(), InstructionSet::kX86_64);
1871 DCHECK(GetCompilerOptions().GetImplicitStackOverflowChecks());
1872
1873
1874 if (!skip_overflow_check) {
1875 size_t reserved_bytes = GetStackOverflowReservedBytes(InstructionSet::kX86_64);
1876 __ testq(CpuRegister(RAX), Address(CpuRegister(RSP), -static_cast<int32_t>(reserved_bytes)));
1877 RecordPcInfoForFrameOrBlockEntry();
1878 }
1879
1880 if (!HasEmptyFrame()) {
1881 // Make sure the frame size isn't unreasonably large.
1882 DCHECK_LE(GetFrameSize(), GetMaximumFrameSize());
1883
1884 for (int i = arraysize(kCoreCalleeSaves) - 1; i >= 0; --i) {
1885 Register reg = kCoreCalleeSaves[i];
1886 if (allocated_registers_.ContainsCoreRegister(reg)) {
1887 __ pushq(CpuRegister(reg));
1888 __ cfi().AdjustCFAOffset(kX86_64WordSize);
1889 __ cfi().RelOffset(DWARFReg(reg), 0);
1890 }
1891 }
1892
1893 int adjust = GetFrameSize() - GetCoreSpillSize();
1894 IncreaseFrame(adjust);
1895 uint32_t xmm_spill_location = GetFpuSpillStart();
1896 size_t xmm_spill_slot_size = GetCalleePreservedFPWidth();
1897
1898 for (int i = arraysize(kFpuCalleeSaves) - 1; i >= 0; --i) {
1899 if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) {
1900 int offset = xmm_spill_location + (xmm_spill_slot_size * i);
1901 __ movsd(Address(CpuRegister(RSP), offset), XmmRegister(kFpuCalleeSaves[i]));
1902 __ cfi().RelOffset(DWARFReg(kFpuCalleeSaves[i]), offset);
1903 }
1904 }
1905
1906 // Save the current method if we need it. Note that we do not
1907 // do this in HCurrentMethod, as the instruction might have been removed
1908 // in the SSA graph.
1909 if (RequiresCurrentMethod()) {
1910 CHECK(!HasEmptyFrame());
1911 __ movq(Address(CpuRegister(RSP), kCurrentMethodStackOffset),
1912 CpuRegister(kMethodRegisterArgument));
1913 }
1914
1915 if (GetGraph()->HasShouldDeoptimizeFlag()) {
1916 CHECK(!HasEmptyFrame());
1917 // Initialize should_deoptimize flag to 0.
1918 __ movl(Address(CpuRegister(RSP), GetStackOffsetOfShouldDeoptimizeFlag()), Immediate(0));
1919 }
1920 }
1921
1922 MaybeIncrementHotness(/* suspend_check= */ nullptr, /* is_frame_entry= */ true);
1923 }
1924
GenerateFrameExit()1925 void CodeGeneratorX86_64::GenerateFrameExit() {
1926 __ cfi().RememberState();
1927 if (!HasEmptyFrame()) {
1928 uint32_t xmm_spill_location = GetFpuSpillStart();
1929 size_t xmm_spill_slot_size = GetCalleePreservedFPWidth();
1930 for (size_t i = 0; i < arraysize(kFpuCalleeSaves); ++i) {
1931 if (allocated_registers_.ContainsFloatingPointRegister(kFpuCalleeSaves[i])) {
1932 int offset = xmm_spill_location + (xmm_spill_slot_size * i);
1933 __ movsd(XmmRegister(kFpuCalleeSaves[i]), Address(CpuRegister(RSP), offset));
1934 __ cfi().Restore(DWARFReg(kFpuCalleeSaves[i]));
1935 }
1936 }
1937
1938 int adjust = GetFrameSize() - GetCoreSpillSize();
1939 DecreaseFrame(adjust);
1940
1941 for (size_t i = 0; i < arraysize(kCoreCalleeSaves); ++i) {
1942 Register reg = kCoreCalleeSaves[i];
1943 if (allocated_registers_.ContainsCoreRegister(reg)) {
1944 __ popq(CpuRegister(reg));
1945 __ cfi().AdjustCFAOffset(-static_cast<int>(kX86_64WordSize));
1946 __ cfi().Restore(DWARFReg(reg));
1947 }
1948 }
1949 }
1950 __ ret();
1951 __ cfi().RestoreState();
1952 __ cfi().DefCFAOffset(GetFrameSize());
1953 }
1954
Bind(HBasicBlock * block)1955 void CodeGeneratorX86_64::Bind(HBasicBlock* block) {
1956 __ Bind(GetLabelOf(block));
1957 }
1958
Move(Location destination,Location source)1959 void CodeGeneratorX86_64::Move(Location destination, Location source) {
1960 if (source.Equals(destination)) {
1961 return;
1962 }
1963 if (destination.IsRegister()) {
1964 CpuRegister dest = destination.AsRegister<CpuRegister>();
1965 if (source.IsRegister()) {
1966 __ movq(dest, source.AsRegister<CpuRegister>());
1967 } else if (source.IsFpuRegister()) {
1968 __ movq(dest, source.AsFpuRegister<XmmRegister>());
1969 } else if (source.IsStackSlot()) {
1970 __ movl(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
1971 } else if (source.IsConstant()) {
1972 HConstant* constant = source.GetConstant();
1973 if (constant->IsLongConstant()) {
1974 Load64BitValue(dest, constant->AsLongConstant()->GetValue());
1975 } else if (constant->IsDoubleConstant()) {
1976 Load64BitValue(dest, GetInt64ValueOf(constant));
1977 } else {
1978 Load32BitValue(dest, GetInt32ValueOf(constant));
1979 }
1980 } else {
1981 DCHECK(source.IsDoubleStackSlot());
1982 __ movq(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
1983 }
1984 } else if (destination.IsFpuRegister()) {
1985 XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
1986 if (source.IsRegister()) {
1987 __ movq(dest, source.AsRegister<CpuRegister>());
1988 } else if (source.IsFpuRegister()) {
1989 __ movaps(dest, source.AsFpuRegister<XmmRegister>());
1990 } else if (source.IsConstant()) {
1991 HConstant* constant = source.GetConstant();
1992 int64_t value = CodeGenerator::GetInt64ValueOf(constant);
1993 if (constant->IsFloatConstant()) {
1994 Load32BitValue(dest, static_cast<int32_t>(value));
1995 } else {
1996 Load64BitValue(dest, value);
1997 }
1998 } else if (source.IsStackSlot()) {
1999 __ movss(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
2000 } else {
2001 DCHECK(source.IsDoubleStackSlot());
2002 __ movsd(dest, Address(CpuRegister(RSP), source.GetStackIndex()));
2003 }
2004 } else if (destination.IsStackSlot()) {
2005 if (source.IsRegister()) {
2006 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()),
2007 source.AsRegister<CpuRegister>());
2008 } else if (source.IsFpuRegister()) {
2009 __ movss(Address(CpuRegister(RSP), destination.GetStackIndex()),
2010 source.AsFpuRegister<XmmRegister>());
2011 } else if (source.IsConstant()) {
2012 HConstant* constant = source.GetConstant();
2013 int32_t value = GetInt32ValueOf(constant);
2014 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), Immediate(value));
2015 } else {
2016 DCHECK(source.IsStackSlot()) << source;
2017 __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
2018 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
2019 }
2020 } else {
2021 DCHECK(destination.IsDoubleStackSlot());
2022 if (source.IsRegister()) {
2023 __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()),
2024 source.AsRegister<CpuRegister>());
2025 } else if (source.IsFpuRegister()) {
2026 __ movsd(Address(CpuRegister(RSP), destination.GetStackIndex()),
2027 source.AsFpuRegister<XmmRegister>());
2028 } else if (source.IsConstant()) {
2029 HConstant* constant = source.GetConstant();
2030 DCHECK(constant->IsLongConstant() || constant->IsDoubleConstant());
2031 int64_t value = GetInt64ValueOf(constant);
2032 Store64BitValueToStack(destination, value);
2033 } else {
2034 DCHECK(source.IsDoubleStackSlot());
2035 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
2036 __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
2037 }
2038 }
2039 }
2040
LoadFromMemoryNoReference(DataType::Type type,Location dst,Address src)2041 void CodeGeneratorX86_64::LoadFromMemoryNoReference(DataType::Type type,
2042 Location dst,
2043 Address src) {
2044 switch (type) {
2045 case DataType::Type::kBool:
2046 case DataType::Type::kUint8:
2047 __ movzxb(dst.AsRegister<CpuRegister>(), src);
2048 break;
2049 case DataType::Type::kInt8:
2050 __ movsxb(dst.AsRegister<CpuRegister>(), src);
2051 break;
2052 case DataType::Type::kUint16:
2053 __ movzxw(dst.AsRegister<CpuRegister>(), src);
2054 break;
2055 case DataType::Type::kInt16:
2056 __ movsxw(dst.AsRegister<CpuRegister>(), src);
2057 break;
2058 case DataType::Type::kInt32:
2059 case DataType::Type::kUint32:
2060 __ movl(dst.AsRegister<CpuRegister>(), src);
2061 break;
2062 case DataType::Type::kInt64:
2063 case DataType::Type::kUint64:
2064 __ movq(dst.AsRegister<CpuRegister>(), src);
2065 break;
2066 case DataType::Type::kFloat32:
2067 __ movss(dst.AsFpuRegister<XmmRegister>(), src);
2068 break;
2069 case DataType::Type::kFloat64:
2070 __ movsd(dst.AsFpuRegister<XmmRegister>(), src);
2071 break;
2072 case DataType::Type::kVoid:
2073 case DataType::Type::kReference:
2074 LOG(FATAL) << "Unreachable type " << type;
2075 UNREACHABLE();
2076 }
2077 }
2078
MoveConstant(Location location,int32_t value)2079 void CodeGeneratorX86_64::MoveConstant(Location location, int32_t value) {
2080 DCHECK(location.IsRegister());
2081 Load64BitValue(location.AsRegister<CpuRegister>(), static_cast<int64_t>(value));
2082 }
2083
MoveLocation(Location dst,Location src,DataType::Type dst_type)2084 void CodeGeneratorX86_64::MoveLocation(Location dst,
2085 Location src,
2086 [[maybe_unused]] DataType::Type dst_type) {
2087 Move(dst, src);
2088 }
2089
AddLocationAsTemp(Location location,LocationSummary * locations)2090 void CodeGeneratorX86_64::AddLocationAsTemp(Location location, LocationSummary* locations) {
2091 if (location.IsRegister()) {
2092 locations->AddTemp(location);
2093 } else {
2094 UNIMPLEMENTED(FATAL) << "AddLocationAsTemp not implemented for location " << location;
2095 }
2096 }
2097
HandleGoto(HInstruction * got,HBasicBlock * successor)2098 void InstructionCodeGeneratorX86_64::HandleGoto(HInstruction* got, HBasicBlock* successor) {
2099 if (successor->IsExitBlock()) {
2100 DCHECK(got->GetPrevious()->AlwaysThrows());
2101 return; // no code needed
2102 }
2103
2104 HBasicBlock* block = got->GetBlock();
2105 HInstruction* previous = got->GetPrevious();
2106
2107 HLoopInformation* info = block->GetLoopInformation();
2108 if (info != nullptr && info->IsBackEdge(*block) && info->HasSuspendCheck()) {
2109 codegen_->MaybeIncrementHotness(info->GetSuspendCheck(), /* is_frame_entry= */ false);
2110 GenerateSuspendCheck(info->GetSuspendCheck(), successor);
2111 return;
2112 }
2113
2114 if (block->IsEntryBlock() && (previous != nullptr) && previous->IsSuspendCheck()) {
2115 GenerateSuspendCheck(previous->AsSuspendCheck(), nullptr);
2116 }
2117 if (!codegen_->GoesToNextBlock(got->GetBlock(), successor)) {
2118 __ jmp(codegen_->GetLabelOf(successor));
2119 }
2120 }
2121
VisitGoto(HGoto * got)2122 void LocationsBuilderX86_64::VisitGoto(HGoto* got) {
2123 got->SetLocations(nullptr);
2124 }
2125
VisitGoto(HGoto * got)2126 void InstructionCodeGeneratorX86_64::VisitGoto(HGoto* got) {
2127 HandleGoto(got, got->GetSuccessor());
2128 }
2129
VisitTryBoundary(HTryBoundary * try_boundary)2130 void LocationsBuilderX86_64::VisitTryBoundary(HTryBoundary* try_boundary) {
2131 try_boundary->SetLocations(nullptr);
2132 }
2133
VisitTryBoundary(HTryBoundary * try_boundary)2134 void InstructionCodeGeneratorX86_64::VisitTryBoundary(HTryBoundary* try_boundary) {
2135 HBasicBlock* successor = try_boundary->GetNormalFlowSuccessor();
2136 if (!successor->IsExitBlock()) {
2137 HandleGoto(try_boundary, successor);
2138 }
2139 }
2140
VisitExit(HExit * exit)2141 void LocationsBuilderX86_64::VisitExit(HExit* exit) {
2142 exit->SetLocations(nullptr);
2143 }
2144
VisitExit(HExit * exit)2145 void InstructionCodeGeneratorX86_64::VisitExit([[maybe_unused]] HExit* exit) {}
2146
2147 template<class LabelType>
GenerateFPJumps(HCondition * cond,LabelType * true_label,LabelType * false_label)2148 void InstructionCodeGeneratorX86_64::GenerateFPJumps(HCondition* cond,
2149 LabelType* true_label,
2150 LabelType* false_label) {
2151 if (cond->IsFPConditionTrueIfNaN()) {
2152 __ j(kUnordered, true_label);
2153 } else if (cond->IsFPConditionFalseIfNaN()) {
2154 __ j(kUnordered, false_label);
2155 }
2156 __ j(X86_64FPCondition(cond->GetCondition()), true_label);
2157 }
2158
GenerateCompareTest(HCondition * condition)2159 void InstructionCodeGeneratorX86_64::GenerateCompareTest(HCondition* condition) {
2160 LocationSummary* locations = condition->GetLocations();
2161
2162 Location left = locations->InAt(0);
2163 Location right = locations->InAt(1);
2164 DataType::Type type = condition->InputAt(0)->GetType();
2165 switch (type) {
2166 case DataType::Type::kBool:
2167 case DataType::Type::kUint8:
2168 case DataType::Type::kInt8:
2169 case DataType::Type::kUint16:
2170 case DataType::Type::kInt16:
2171 case DataType::Type::kInt32:
2172 case DataType::Type::kReference: {
2173 codegen_->GenerateIntCompare(left, right);
2174 break;
2175 }
2176 case DataType::Type::kInt64: {
2177 codegen_->GenerateLongCompare(left, right);
2178 break;
2179 }
2180 case DataType::Type::kFloat32: {
2181 if (right.IsFpuRegister()) {
2182 __ ucomiss(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>());
2183 } else if (right.IsConstant()) {
2184 __ ucomiss(left.AsFpuRegister<XmmRegister>(),
2185 codegen_->LiteralFloatAddress(
2186 right.GetConstant()->AsFloatConstant()->GetValue()));
2187 } else {
2188 DCHECK(right.IsStackSlot());
2189 __ ucomiss(left.AsFpuRegister<XmmRegister>(),
2190 Address(CpuRegister(RSP), right.GetStackIndex()));
2191 }
2192 break;
2193 }
2194 case DataType::Type::kFloat64: {
2195 if (right.IsFpuRegister()) {
2196 __ ucomisd(left.AsFpuRegister<XmmRegister>(), right.AsFpuRegister<XmmRegister>());
2197 } else if (right.IsConstant()) {
2198 __ ucomisd(left.AsFpuRegister<XmmRegister>(),
2199 codegen_->LiteralDoubleAddress(
2200 right.GetConstant()->AsDoubleConstant()->GetValue()));
2201 } else {
2202 DCHECK(right.IsDoubleStackSlot());
2203 __ ucomisd(left.AsFpuRegister<XmmRegister>(),
2204 Address(CpuRegister(RSP), right.GetStackIndex()));
2205 }
2206 break;
2207 }
2208 default:
2209 LOG(FATAL) << "Unexpected condition type " << type;
2210 }
2211 }
2212
2213 template<class LabelType>
GenerateCompareTestAndBranch(HCondition * condition,LabelType * true_target_in,LabelType * false_target_in)2214 void InstructionCodeGeneratorX86_64::GenerateCompareTestAndBranch(HCondition* condition,
2215 LabelType* true_target_in,
2216 LabelType* false_target_in) {
2217 // Generated branching requires both targets to be explicit. If either of the
2218 // targets is nullptr (fallthrough) use and bind `fallthrough_target` instead.
2219 LabelType fallthrough_target;
2220 LabelType* true_target = true_target_in == nullptr ? &fallthrough_target : true_target_in;
2221 LabelType* false_target = false_target_in == nullptr ? &fallthrough_target : false_target_in;
2222
2223 // Generate the comparison to set the CC.
2224 GenerateCompareTest(condition);
2225
2226 // Now generate the correct jump(s).
2227 DataType::Type type = condition->InputAt(0)->GetType();
2228 switch (type) {
2229 case DataType::Type::kInt64: {
2230 __ j(X86_64IntegerCondition(condition->GetCondition()), true_target);
2231 break;
2232 }
2233 case DataType::Type::kFloat32: {
2234 GenerateFPJumps(condition, true_target, false_target);
2235 break;
2236 }
2237 case DataType::Type::kFloat64: {
2238 GenerateFPJumps(condition, true_target, false_target);
2239 break;
2240 }
2241 default:
2242 LOG(FATAL) << "Unexpected condition type " << type;
2243 }
2244
2245 if (false_target != &fallthrough_target) {
2246 __ jmp(false_target);
2247 }
2248
2249 if (fallthrough_target.IsLinked()) {
2250 __ Bind(&fallthrough_target);
2251 }
2252 }
2253
AreEflagsSetFrom(HInstruction * cond,HInstruction * branch,const CompilerOptions & compiler_options)2254 static bool AreEflagsSetFrom(HInstruction* cond,
2255 HInstruction* branch,
2256 const CompilerOptions& compiler_options) {
2257 // Moves may affect the eflags register (move zero uses xorl), so the EFLAGS
2258 // are set only strictly before `branch`. We can't use the eflags on long
2259 // conditions if they are materialized due to the complex branching.
2260 return cond->IsCondition() &&
2261 cond->GetNext() == branch &&
2262 !DataType::IsFloatingPointType(cond->InputAt(0)->GetType()) &&
2263 !(cond->GetBlock()->GetGraph()->IsCompilingBaseline() &&
2264 compiler_options.ProfileBranches());
2265 }
2266
2267 template<class LabelType>
GenerateTestAndBranch(HInstruction * instruction,size_t condition_input_index,LabelType * true_target,LabelType * false_target)2268 void InstructionCodeGeneratorX86_64::GenerateTestAndBranch(HInstruction* instruction,
2269 size_t condition_input_index,
2270 LabelType* true_target,
2271 LabelType* false_target) {
2272 HInstruction* cond = instruction->InputAt(condition_input_index);
2273
2274 if (true_target == nullptr && false_target == nullptr) {
2275 // Nothing to do. The code always falls through.
2276 return;
2277 } else if (cond->IsIntConstant()) {
2278 // Constant condition, statically compared against "true" (integer value 1).
2279 if (cond->AsIntConstant()->IsTrue()) {
2280 if (true_target != nullptr) {
2281 __ jmp(true_target);
2282 }
2283 } else {
2284 DCHECK(cond->AsIntConstant()->IsFalse()) << cond->AsIntConstant()->GetValue();
2285 if (false_target != nullptr) {
2286 __ jmp(false_target);
2287 }
2288 }
2289 return;
2290 }
2291
2292 // The following code generates these patterns:
2293 // (1) true_target == nullptr && false_target != nullptr
2294 // - opposite condition true => branch to false_target
2295 // (2) true_target != nullptr && false_target == nullptr
2296 // - condition true => branch to true_target
2297 // (3) true_target != nullptr && false_target != nullptr
2298 // - condition true => branch to true_target
2299 // - branch to false_target
2300 if (IsBooleanValueOrMaterializedCondition(cond)) {
2301 if (AreEflagsSetFrom(cond, instruction, codegen_->GetCompilerOptions())) {
2302 if (true_target == nullptr) {
2303 __ j(X86_64IntegerCondition(cond->AsCondition()->GetOppositeCondition()), false_target);
2304 } else {
2305 __ j(X86_64IntegerCondition(cond->AsCondition()->GetCondition()), true_target);
2306 }
2307 } else {
2308 // Materialized condition, compare against 0.
2309 Location lhs = instruction->GetLocations()->InAt(condition_input_index);
2310 if (lhs.IsRegister()) {
2311 __ testl(lhs.AsRegister<CpuRegister>(), lhs.AsRegister<CpuRegister>());
2312 } else {
2313 __ cmpl(Address(CpuRegister(RSP), lhs.GetStackIndex()), Immediate(0));
2314 }
2315 if (true_target == nullptr) {
2316 __ j(kEqual, false_target);
2317 } else {
2318 __ j(kNotEqual, true_target);
2319 }
2320 }
2321 } else {
2322 // Condition has not been materialized, use its inputs as the
2323 // comparison and its condition as the branch condition.
2324 HCondition* condition = cond->AsCondition();
2325
2326 // If this is a long or FP comparison that has been folded into
2327 // the HCondition, generate the comparison directly.
2328 DataType::Type type = condition->InputAt(0)->GetType();
2329 if (type == DataType::Type::kInt64 || DataType::IsFloatingPointType(type)) {
2330 GenerateCompareTestAndBranch(condition, true_target, false_target);
2331 return;
2332 }
2333
2334 Location lhs = condition->GetLocations()->InAt(0);
2335 Location rhs = condition->GetLocations()->InAt(1);
2336 codegen_->GenerateIntCompare(lhs, rhs);
2337 if (true_target == nullptr) {
2338 __ j(X86_64IntegerCondition(condition->GetOppositeCondition()), false_target);
2339 } else {
2340 __ j(X86_64IntegerCondition(condition->GetCondition()), true_target);
2341 }
2342 }
2343
2344 // If neither branch falls through (case 3), the conditional branch to `true_target`
2345 // was already emitted (case 2) and we need to emit a jump to `false_target`.
2346 if (true_target != nullptr && false_target != nullptr) {
2347 __ jmp(false_target);
2348 }
2349 }
2350
VisitIf(HIf * if_instr)2351 void LocationsBuilderX86_64::VisitIf(HIf* if_instr) {
2352 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(if_instr);
2353 if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
2354 if (GetGraph()->IsCompilingBaseline() &&
2355 codegen_->GetCompilerOptions().ProfileBranches() &&
2356 !Runtime::Current()->IsAotCompiler()) {
2357 locations->SetInAt(0, Location::RequiresRegister());
2358 locations->AddTemp(Location::RequiresRegister());
2359 } else {
2360 locations->SetInAt(0, Location::Any());
2361 }
2362 }
2363 }
2364
VisitIf(HIf * if_instr)2365 void InstructionCodeGeneratorX86_64::VisitIf(HIf* if_instr) {
2366 HBasicBlock* true_successor = if_instr->IfTrueSuccessor();
2367 HBasicBlock* false_successor = if_instr->IfFalseSuccessor();
2368 Label* true_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), true_successor) ?
2369 nullptr : codegen_->GetLabelOf(true_successor);
2370 Label* false_target = codegen_->GoesToNextBlock(if_instr->GetBlock(), false_successor) ?
2371 nullptr : codegen_->GetLabelOf(false_successor);
2372 if (IsBooleanValueOrMaterializedCondition(if_instr->InputAt(0))) {
2373 if (GetGraph()->IsCompilingBaseline() &&
2374 codegen_->GetCompilerOptions().ProfileBranches() &&
2375 !Runtime::Current()->IsAotCompiler()) {
2376 DCHECK(if_instr->InputAt(0)->IsCondition());
2377 CpuRegister temp = if_instr->GetLocations()->GetTemp(0).AsRegister<CpuRegister>();
2378 ProfilingInfo* info = GetGraph()->GetProfilingInfo();
2379 DCHECK(info != nullptr);
2380 BranchCache* cache = info->GetBranchCache(if_instr->GetDexPc());
2381 // Currently, not all If branches are profiled.
2382 if (cache != nullptr) {
2383 uint64_t address =
2384 reinterpret_cast64<uint64_t>(cache) + BranchCache::FalseOffset().Int32Value();
2385 static_assert(
2386 BranchCache::TrueOffset().Int32Value() - BranchCache::FalseOffset().Int32Value() == 2,
2387 "Unexpected offsets for BranchCache");
2388 NearLabel done;
2389 Location lhs = if_instr->GetLocations()->InAt(0);
2390 __ movq(CpuRegister(TMP), Immediate(address));
2391 __ movzxw(temp, Address(CpuRegister(TMP), lhs.AsRegister<CpuRegister>(), TIMES_2, 0));
2392 __ addw(temp, Immediate(1));
2393 __ j(kZero, &done);
2394 __ movw(Address(CpuRegister(TMP), lhs.AsRegister<CpuRegister>(), TIMES_2, 0), temp);
2395 __ Bind(&done);
2396 }
2397 }
2398 }
2399 GenerateTestAndBranch(if_instr, /* condition_input_index= */ 0, true_target, false_target);
2400 }
2401
VisitDeoptimize(HDeoptimize * deoptimize)2402 void LocationsBuilderX86_64::VisitDeoptimize(HDeoptimize* deoptimize) {
2403 LocationSummary* locations = new (GetGraph()->GetAllocator())
2404 LocationSummary(deoptimize, LocationSummary::kCallOnSlowPath);
2405 InvokeRuntimeCallingConvention calling_convention;
2406 RegisterSet caller_saves = RegisterSet::Empty();
2407 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
2408 locations->SetCustomSlowPathCallerSaves(caller_saves);
2409 if (IsBooleanValueOrMaterializedCondition(deoptimize->InputAt(0))) {
2410 locations->SetInAt(0, Location::Any());
2411 }
2412 }
2413
VisitDeoptimize(HDeoptimize * deoptimize)2414 void InstructionCodeGeneratorX86_64::VisitDeoptimize(HDeoptimize* deoptimize) {
2415 SlowPathCode* slow_path = deopt_slow_paths_.NewSlowPath<DeoptimizationSlowPathX86_64>(deoptimize);
2416 GenerateTestAndBranch<Label>(deoptimize,
2417 /* condition_input_index= */ 0,
2418 slow_path->GetEntryLabel(),
2419 /* false_target= */ nullptr);
2420 }
2421
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)2422 void LocationsBuilderX86_64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
2423 LocationSummary* locations = new (GetGraph()->GetAllocator())
2424 LocationSummary(flag, LocationSummary::kNoCall);
2425 locations->SetOut(Location::RequiresRegister());
2426 }
2427
VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag * flag)2428 void InstructionCodeGeneratorX86_64::VisitShouldDeoptimizeFlag(HShouldDeoptimizeFlag* flag) {
2429 __ movl(flag->GetLocations()->Out().AsRegister<CpuRegister>(),
2430 Address(CpuRegister(RSP), codegen_->GetStackOffsetOfShouldDeoptimizeFlag()));
2431 }
2432
SelectCanUseCMOV(HSelect * select)2433 static bool SelectCanUseCMOV(HSelect* select) {
2434 // There are no conditional move instructions for XMMs.
2435 if (DataType::IsFloatingPointType(select->GetType())) {
2436 return false;
2437 }
2438
2439 // A FP condition doesn't generate the single CC that we need.
2440 HInstruction* condition = select->GetCondition();
2441 if (condition->IsCondition() &&
2442 DataType::IsFloatingPointType(condition->InputAt(0)->GetType())) {
2443 return false;
2444 }
2445
2446 // We can generate a CMOV for this Select.
2447 return true;
2448 }
2449
VisitSelect(HSelect * select)2450 void LocationsBuilderX86_64::VisitSelect(HSelect* select) {
2451 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(select);
2452 if (DataType::IsFloatingPointType(select->GetType())) {
2453 locations->SetInAt(0, Location::RequiresFpuRegister());
2454 locations->SetInAt(1, Location::Any());
2455 } else {
2456 locations->SetInAt(0, Location::RequiresRegister());
2457 if (SelectCanUseCMOV(select)) {
2458 if (select->InputAt(1)->IsConstant()) {
2459 locations->SetInAt(1, Location::RequiresRegister());
2460 } else {
2461 locations->SetInAt(1, Location::Any());
2462 }
2463 } else {
2464 locations->SetInAt(1, Location::Any());
2465 }
2466 }
2467 if (IsBooleanValueOrMaterializedCondition(select->GetCondition())) {
2468 locations->SetInAt(2, Location::RequiresRegister());
2469 }
2470 locations->SetOut(Location::SameAsFirstInput());
2471 }
2472
VisitSelect(HSelect * select)2473 void InstructionCodeGeneratorX86_64::VisitSelect(HSelect* select) {
2474 LocationSummary* locations = select->GetLocations();
2475 if (SelectCanUseCMOV(select)) {
2476 // If both the condition and the source types are integer, we can generate
2477 // a CMOV to implement Select.
2478 CpuRegister value_false = locations->InAt(0).AsRegister<CpuRegister>();
2479 Location value_true_loc = locations->InAt(1);
2480 DCHECK(locations->InAt(0).Equals(locations->Out()));
2481
2482 HInstruction* select_condition = select->GetCondition();
2483 Condition cond = kNotEqual;
2484
2485 // Figure out how to test the 'condition'.
2486 if (select_condition->IsCondition()) {
2487 HCondition* condition = select_condition->AsCondition();
2488 if (!condition->IsEmittedAtUseSite()) {
2489 // This was a previously materialized condition.
2490 // Can we use the existing condition code?
2491 if (AreEflagsSetFrom(condition, select, codegen_->GetCompilerOptions())) {
2492 // Materialization was the previous instruction. Condition codes are right.
2493 cond = X86_64IntegerCondition(condition->GetCondition());
2494 } else {
2495 // No, we have to recreate the condition code.
2496 CpuRegister cond_reg = locations->InAt(2).AsRegister<CpuRegister>();
2497 __ testl(cond_reg, cond_reg);
2498 }
2499 } else {
2500 GenerateCompareTest(condition);
2501 cond = X86_64IntegerCondition(condition->GetCondition());
2502 }
2503 } else {
2504 // Must be a Boolean condition, which needs to be compared to 0.
2505 CpuRegister cond_reg = locations->InAt(2).AsRegister<CpuRegister>();
2506 __ testl(cond_reg, cond_reg);
2507 }
2508
2509 // If the condition is true, overwrite the output, which already contains false.
2510 // Generate the correct sized CMOV.
2511 bool is_64_bit = DataType::Is64BitType(select->GetType());
2512 if (value_true_loc.IsRegister()) {
2513 __ cmov(cond, value_false, value_true_loc.AsRegister<CpuRegister>(), is_64_bit);
2514 } else {
2515 __ cmov(cond,
2516 value_false,
2517 Address(CpuRegister(RSP), value_true_loc.GetStackIndex()), is_64_bit);
2518 }
2519 } else {
2520 NearLabel false_target;
2521 GenerateTestAndBranch<NearLabel>(select,
2522 /* condition_input_index= */ 2,
2523 /* true_target= */ nullptr,
2524 &false_target);
2525 codegen_->MoveLocation(locations->Out(), locations->InAt(1), select->GetType());
2526 __ Bind(&false_target);
2527 }
2528 }
2529
VisitNop(HNop * nop)2530 void LocationsBuilderX86_64::VisitNop(HNop* nop) {
2531 new (GetGraph()->GetAllocator()) LocationSummary(nop);
2532 }
2533
VisitNop(HNop *)2534 void InstructionCodeGeneratorX86_64::VisitNop(HNop*) {
2535 // The environment recording already happened in CodeGenerator::Compile.
2536 }
2537
IncreaseFrame(size_t adjustment)2538 void CodeGeneratorX86_64::IncreaseFrame(size_t adjustment) {
2539 __ subq(CpuRegister(RSP), Immediate(adjustment));
2540 __ cfi().AdjustCFAOffset(adjustment);
2541 }
2542
DecreaseFrame(size_t adjustment)2543 void CodeGeneratorX86_64::DecreaseFrame(size_t adjustment) {
2544 __ addq(CpuRegister(RSP), Immediate(adjustment));
2545 __ cfi().AdjustCFAOffset(-adjustment);
2546 }
2547
GenerateNop()2548 void CodeGeneratorX86_64::GenerateNop() {
2549 __ nop();
2550 }
2551
HandleCondition(HCondition * cond)2552 void LocationsBuilderX86_64::HandleCondition(HCondition* cond) {
2553 LocationSummary* locations =
2554 new (GetGraph()->GetAllocator()) LocationSummary(cond, LocationSummary::kNoCall);
2555 // Handle the long/FP comparisons made in instruction simplification.
2556 switch (cond->InputAt(0)->GetType()) {
2557 case DataType::Type::kInt64:
2558 locations->SetInAt(0, Location::RequiresRegister());
2559 locations->SetInAt(1, Location::Any());
2560 break;
2561 case DataType::Type::kFloat32:
2562 case DataType::Type::kFloat64:
2563 locations->SetInAt(0, Location::RequiresFpuRegister());
2564 locations->SetInAt(1, Location::Any());
2565 break;
2566 default:
2567 locations->SetInAt(0, Location::RequiresRegister());
2568 locations->SetInAt(1, Location::Any());
2569 break;
2570 }
2571 if (!cond->IsEmittedAtUseSite()) {
2572 locations->SetOut(Location::RequiresRegister());
2573 }
2574 }
2575
HandleCondition(HCondition * cond)2576 void InstructionCodeGeneratorX86_64::HandleCondition(HCondition* cond) {
2577 if (cond->IsEmittedAtUseSite()) {
2578 return;
2579 }
2580
2581 LocationSummary* locations = cond->GetLocations();
2582 Location lhs = locations->InAt(0);
2583 Location rhs = locations->InAt(1);
2584 CpuRegister reg = locations->Out().AsRegister<CpuRegister>();
2585 NearLabel true_label, false_label;
2586
2587 switch (cond->InputAt(0)->GetType()) {
2588 default:
2589 // Integer case.
2590
2591 // Clear output register: setcc only sets the low byte.
2592 __ xorl(reg, reg);
2593
2594 codegen_->GenerateIntCompare(lhs, rhs);
2595 __ setcc(X86_64IntegerCondition(cond->GetCondition()), reg);
2596 return;
2597 case DataType::Type::kInt64:
2598 // Clear output register: setcc only sets the low byte.
2599 __ xorl(reg, reg);
2600
2601 codegen_->GenerateLongCompare(lhs, rhs);
2602 __ setcc(X86_64IntegerCondition(cond->GetCondition()), reg);
2603 return;
2604 case DataType::Type::kFloat32: {
2605 XmmRegister lhs_reg = lhs.AsFpuRegister<XmmRegister>();
2606 if (rhs.IsConstant()) {
2607 float value = rhs.GetConstant()->AsFloatConstant()->GetValue();
2608 __ ucomiss(lhs_reg, codegen_->LiteralFloatAddress(value));
2609 } else if (rhs.IsStackSlot()) {
2610 __ ucomiss(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex()));
2611 } else {
2612 __ ucomiss(lhs_reg, rhs.AsFpuRegister<XmmRegister>());
2613 }
2614 GenerateFPJumps(cond, &true_label, &false_label);
2615 break;
2616 }
2617 case DataType::Type::kFloat64: {
2618 XmmRegister lhs_reg = lhs.AsFpuRegister<XmmRegister>();
2619 if (rhs.IsConstant()) {
2620 double value = rhs.GetConstant()->AsDoubleConstant()->GetValue();
2621 __ ucomisd(lhs_reg, codegen_->LiteralDoubleAddress(value));
2622 } else if (rhs.IsDoubleStackSlot()) {
2623 __ ucomisd(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex()));
2624 } else {
2625 __ ucomisd(lhs_reg, rhs.AsFpuRegister<XmmRegister>());
2626 }
2627 GenerateFPJumps(cond, &true_label, &false_label);
2628 break;
2629 }
2630 }
2631
2632 // Convert the jumps into the result.
2633 NearLabel done_label;
2634
2635 // False case: result = 0.
2636 __ Bind(&false_label);
2637 __ xorl(reg, reg);
2638 __ jmp(&done_label);
2639
2640 // True case: result = 1.
2641 __ Bind(&true_label);
2642 __ movl(reg, Immediate(1));
2643 __ Bind(&done_label);
2644 }
2645
VisitEqual(HEqual * comp)2646 void LocationsBuilderX86_64::VisitEqual(HEqual* comp) {
2647 HandleCondition(comp);
2648 }
2649
VisitEqual(HEqual * comp)2650 void InstructionCodeGeneratorX86_64::VisitEqual(HEqual* comp) {
2651 HandleCondition(comp);
2652 }
2653
VisitNotEqual(HNotEqual * comp)2654 void LocationsBuilderX86_64::VisitNotEqual(HNotEqual* comp) {
2655 HandleCondition(comp);
2656 }
2657
VisitNotEqual(HNotEqual * comp)2658 void InstructionCodeGeneratorX86_64::VisitNotEqual(HNotEqual* comp) {
2659 HandleCondition(comp);
2660 }
2661
VisitLessThan(HLessThan * comp)2662 void LocationsBuilderX86_64::VisitLessThan(HLessThan* comp) {
2663 HandleCondition(comp);
2664 }
2665
VisitLessThan(HLessThan * comp)2666 void InstructionCodeGeneratorX86_64::VisitLessThan(HLessThan* comp) {
2667 HandleCondition(comp);
2668 }
2669
VisitLessThanOrEqual(HLessThanOrEqual * comp)2670 void LocationsBuilderX86_64::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
2671 HandleCondition(comp);
2672 }
2673
VisitLessThanOrEqual(HLessThanOrEqual * comp)2674 void InstructionCodeGeneratorX86_64::VisitLessThanOrEqual(HLessThanOrEqual* comp) {
2675 HandleCondition(comp);
2676 }
2677
VisitGreaterThan(HGreaterThan * comp)2678 void LocationsBuilderX86_64::VisitGreaterThan(HGreaterThan* comp) {
2679 HandleCondition(comp);
2680 }
2681
VisitGreaterThan(HGreaterThan * comp)2682 void InstructionCodeGeneratorX86_64::VisitGreaterThan(HGreaterThan* comp) {
2683 HandleCondition(comp);
2684 }
2685
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)2686 void LocationsBuilderX86_64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
2687 HandleCondition(comp);
2688 }
2689
VisitGreaterThanOrEqual(HGreaterThanOrEqual * comp)2690 void InstructionCodeGeneratorX86_64::VisitGreaterThanOrEqual(HGreaterThanOrEqual* comp) {
2691 HandleCondition(comp);
2692 }
2693
VisitBelow(HBelow * comp)2694 void LocationsBuilderX86_64::VisitBelow(HBelow* comp) {
2695 HandleCondition(comp);
2696 }
2697
VisitBelow(HBelow * comp)2698 void InstructionCodeGeneratorX86_64::VisitBelow(HBelow* comp) {
2699 HandleCondition(comp);
2700 }
2701
VisitBelowOrEqual(HBelowOrEqual * comp)2702 void LocationsBuilderX86_64::VisitBelowOrEqual(HBelowOrEqual* comp) {
2703 HandleCondition(comp);
2704 }
2705
VisitBelowOrEqual(HBelowOrEqual * comp)2706 void InstructionCodeGeneratorX86_64::VisitBelowOrEqual(HBelowOrEqual* comp) {
2707 HandleCondition(comp);
2708 }
2709
VisitAbove(HAbove * comp)2710 void LocationsBuilderX86_64::VisitAbove(HAbove* comp) {
2711 HandleCondition(comp);
2712 }
2713
VisitAbove(HAbove * comp)2714 void InstructionCodeGeneratorX86_64::VisitAbove(HAbove* comp) {
2715 HandleCondition(comp);
2716 }
2717
VisitAboveOrEqual(HAboveOrEqual * comp)2718 void LocationsBuilderX86_64::VisitAboveOrEqual(HAboveOrEqual* comp) {
2719 HandleCondition(comp);
2720 }
2721
VisitAboveOrEqual(HAboveOrEqual * comp)2722 void InstructionCodeGeneratorX86_64::VisitAboveOrEqual(HAboveOrEqual* comp) {
2723 HandleCondition(comp);
2724 }
2725
VisitCompare(HCompare * compare)2726 void LocationsBuilderX86_64::VisitCompare(HCompare* compare) {
2727 LocationSummary* locations =
2728 new (GetGraph()->GetAllocator()) LocationSummary(compare, LocationSummary::kNoCall);
2729 switch (compare->GetComparisonType()) {
2730 case DataType::Type::kBool:
2731 case DataType::Type::kUint8:
2732 case DataType::Type::kInt8:
2733 case DataType::Type::kUint16:
2734 case DataType::Type::kInt16:
2735 case DataType::Type::kInt32:
2736 case DataType::Type::kUint32:
2737 case DataType::Type::kInt64:
2738 case DataType::Type::kUint64: {
2739 locations->SetInAt(0, Location::RequiresRegister());
2740 locations->SetInAt(1, Location::Any());
2741 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2742 break;
2743 }
2744 case DataType::Type::kFloat32:
2745 case DataType::Type::kFloat64: {
2746 locations->SetInAt(0, Location::RequiresFpuRegister());
2747 locations->SetInAt(1, Location::Any());
2748 locations->SetOut(Location::RequiresRegister());
2749 break;
2750 }
2751 default:
2752 LOG(FATAL) << "Unexpected type for compare operation " << compare->InputAt(0)->GetType();
2753 }
2754 }
2755
VisitCompare(HCompare * compare)2756 void InstructionCodeGeneratorX86_64::VisitCompare(HCompare* compare) {
2757 LocationSummary* locations = compare->GetLocations();
2758 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
2759 Location left = locations->InAt(0);
2760 Location right = locations->InAt(1);
2761
2762 NearLabel less, greater, done;
2763 DataType::Type type = compare->GetComparisonType();
2764 Condition less_cond = kLess;
2765
2766 switch (type) {
2767 case DataType::Type::kUint32:
2768 less_cond = kBelow;
2769 FALLTHROUGH_INTENDED;
2770 case DataType::Type::kBool:
2771 case DataType::Type::kUint8:
2772 case DataType::Type::kInt8:
2773 case DataType::Type::kUint16:
2774 case DataType::Type::kInt16:
2775 case DataType::Type::kInt32: {
2776 codegen_->GenerateIntCompare(left, right);
2777 break;
2778 }
2779 case DataType::Type::kUint64:
2780 less_cond = kBelow;
2781 FALLTHROUGH_INTENDED;
2782 case DataType::Type::kInt64: {
2783 codegen_->GenerateLongCompare(left, right);
2784 break;
2785 }
2786 case DataType::Type::kFloat32: {
2787 XmmRegister left_reg = left.AsFpuRegister<XmmRegister>();
2788 if (right.IsConstant()) {
2789 float value = right.GetConstant()->AsFloatConstant()->GetValue();
2790 __ ucomiss(left_reg, codegen_->LiteralFloatAddress(value));
2791 } else if (right.IsStackSlot()) {
2792 __ ucomiss(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
2793 } else {
2794 __ ucomiss(left_reg, right.AsFpuRegister<XmmRegister>());
2795 }
2796 __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
2797 less_cond = kBelow; // ucomis{s,d} sets CF
2798 break;
2799 }
2800 case DataType::Type::kFloat64: {
2801 XmmRegister left_reg = left.AsFpuRegister<XmmRegister>();
2802 if (right.IsConstant()) {
2803 double value = right.GetConstant()->AsDoubleConstant()->GetValue();
2804 __ ucomisd(left_reg, codegen_->LiteralDoubleAddress(value));
2805 } else if (right.IsDoubleStackSlot()) {
2806 __ ucomisd(left_reg, Address(CpuRegister(RSP), right.GetStackIndex()));
2807 } else {
2808 __ ucomisd(left_reg, right.AsFpuRegister<XmmRegister>());
2809 }
2810 __ j(kUnordered, compare->IsGtBias() ? &greater : &less);
2811 less_cond = kBelow; // ucomis{s,d} sets CF
2812 break;
2813 }
2814 default:
2815 LOG(FATAL) << "Unexpected compare type " << type;
2816 }
2817
2818 __ movl(out, Immediate(0));
2819 __ j(kEqual, &done);
2820 __ j(less_cond, &less);
2821
2822 __ Bind(&greater);
2823 __ movl(out, Immediate(1));
2824 __ jmp(&done);
2825
2826 __ Bind(&less);
2827 __ movl(out, Immediate(-1));
2828
2829 __ Bind(&done);
2830 }
2831
VisitIntConstant(HIntConstant * constant)2832 void LocationsBuilderX86_64::VisitIntConstant(HIntConstant* constant) {
2833 LocationSummary* locations =
2834 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2835 locations->SetOut(Location::ConstantLocation(constant));
2836 }
2837
VisitIntConstant(HIntConstant * constant)2838 void InstructionCodeGeneratorX86_64::VisitIntConstant([[maybe_unused]] HIntConstant* constant) {
2839 // Will be generated at use site.
2840 }
2841
VisitNullConstant(HNullConstant * constant)2842 void LocationsBuilderX86_64::VisitNullConstant(HNullConstant* constant) {
2843 LocationSummary* locations =
2844 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2845 locations->SetOut(Location::ConstantLocation(constant));
2846 }
2847
VisitNullConstant(HNullConstant * constant)2848 void InstructionCodeGeneratorX86_64::VisitNullConstant([[maybe_unused]] HNullConstant* constant) {
2849 // Will be generated at use site.
2850 }
2851
VisitLongConstant(HLongConstant * constant)2852 void LocationsBuilderX86_64::VisitLongConstant(HLongConstant* constant) {
2853 LocationSummary* locations =
2854 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2855 locations->SetOut(Location::ConstantLocation(constant));
2856 }
2857
VisitLongConstant(HLongConstant * constant)2858 void InstructionCodeGeneratorX86_64::VisitLongConstant([[maybe_unused]] HLongConstant* constant) {
2859 // Will be generated at use site.
2860 }
2861
VisitFloatConstant(HFloatConstant * constant)2862 void LocationsBuilderX86_64::VisitFloatConstant(HFloatConstant* constant) {
2863 LocationSummary* locations =
2864 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2865 locations->SetOut(Location::ConstantLocation(constant));
2866 }
2867
VisitFloatConstant(HFloatConstant * constant)2868 void InstructionCodeGeneratorX86_64::VisitFloatConstant([[maybe_unused]] HFloatConstant* constant) {
2869 // Will be generated at use site.
2870 }
2871
VisitDoubleConstant(HDoubleConstant * constant)2872 void LocationsBuilderX86_64::VisitDoubleConstant(HDoubleConstant* constant) {
2873 LocationSummary* locations =
2874 new (GetGraph()->GetAllocator()) LocationSummary(constant, LocationSummary::kNoCall);
2875 locations->SetOut(Location::ConstantLocation(constant));
2876 }
2877
VisitDoubleConstant(HDoubleConstant * constant)2878 void InstructionCodeGeneratorX86_64::VisitDoubleConstant(
2879 [[maybe_unused]] HDoubleConstant* constant) {
2880 // Will be generated at use site.
2881 }
2882
VisitConstructorFence(HConstructorFence * constructor_fence)2883 void LocationsBuilderX86_64::VisitConstructorFence(HConstructorFence* constructor_fence) {
2884 constructor_fence->SetLocations(nullptr);
2885 }
2886
VisitConstructorFence(HConstructorFence * constructor_fence)2887 void InstructionCodeGeneratorX86_64::VisitConstructorFence(
2888 [[maybe_unused]] HConstructorFence* constructor_fence) {
2889 codegen_->GenerateMemoryBarrier(MemBarrierKind::kStoreStore);
2890 }
2891
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)2892 void LocationsBuilderX86_64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
2893 memory_barrier->SetLocations(nullptr);
2894 }
2895
VisitMemoryBarrier(HMemoryBarrier * memory_barrier)2896 void InstructionCodeGeneratorX86_64::VisitMemoryBarrier(HMemoryBarrier* memory_barrier) {
2897 codegen_->GenerateMemoryBarrier(memory_barrier->GetBarrierKind());
2898 }
2899
VisitReturnVoid(HReturnVoid * ret)2900 void LocationsBuilderX86_64::VisitReturnVoid(HReturnVoid* ret) {
2901 ret->SetLocations(nullptr);
2902 }
2903
VisitReturnVoid(HReturnVoid * ret)2904 void InstructionCodeGeneratorX86_64::VisitReturnVoid([[maybe_unused]] HReturnVoid* ret) {
2905 codegen_->GenerateFrameExit();
2906 }
2907
VisitReturn(HReturn * ret)2908 void LocationsBuilderX86_64::VisitReturn(HReturn* ret) {
2909 LocationSummary* locations =
2910 new (GetGraph()->GetAllocator()) LocationSummary(ret, LocationSummary::kNoCall);
2911 SetInForReturnValue(ret, locations);
2912 }
2913
VisitReturn(HReturn * ret)2914 void InstructionCodeGeneratorX86_64::VisitReturn(HReturn* ret) {
2915 switch (ret->InputAt(0)->GetType()) {
2916 case DataType::Type::kReference:
2917 case DataType::Type::kBool:
2918 case DataType::Type::kUint8:
2919 case DataType::Type::kInt8:
2920 case DataType::Type::kUint16:
2921 case DataType::Type::kInt16:
2922 case DataType::Type::kInt32:
2923 case DataType::Type::kInt64:
2924 DCHECK_EQ(ret->GetLocations()->InAt(0).AsRegister<CpuRegister>().AsRegister(), RAX);
2925 break;
2926
2927 case DataType::Type::kFloat32: {
2928 DCHECK_EQ(ret->GetLocations()->InAt(0).AsFpuRegister<XmmRegister>().AsFloatRegister(),
2929 XMM0);
2930 // To simplify callers of an OSR method, we put the return value in both
2931 // floating point and core register.
2932 if (GetGraph()->IsCompilingOsr()) {
2933 __ movd(CpuRegister(RAX), XmmRegister(XMM0));
2934 }
2935 break;
2936 }
2937 case DataType::Type::kFloat64: {
2938 DCHECK_EQ(ret->GetLocations()->InAt(0).AsFpuRegister<XmmRegister>().AsFloatRegister(),
2939 XMM0);
2940 // To simplify callers of an OSR method, we put the return value in both
2941 // floating point and core register.
2942 if (GetGraph()->IsCompilingOsr()) {
2943 __ movq(CpuRegister(RAX), XmmRegister(XMM0));
2944 }
2945 break;
2946 }
2947
2948 default:
2949 LOG(FATAL) << "Unexpected return type " << ret->InputAt(0)->GetType();
2950 }
2951 codegen_->GenerateFrameExit();
2952 }
2953
GetReturnLocation(DataType::Type type) const2954 Location InvokeDexCallingConventionVisitorX86_64::GetReturnLocation(DataType::Type type) const {
2955 switch (type) {
2956 case DataType::Type::kReference:
2957 case DataType::Type::kBool:
2958 case DataType::Type::kUint8:
2959 case DataType::Type::kInt8:
2960 case DataType::Type::kUint16:
2961 case DataType::Type::kInt16:
2962 case DataType::Type::kUint32:
2963 case DataType::Type::kInt32:
2964 case DataType::Type::kUint64:
2965 case DataType::Type::kInt64:
2966 return Location::RegisterLocation(RAX);
2967
2968 case DataType::Type::kVoid:
2969 return Location::NoLocation();
2970
2971 case DataType::Type::kFloat64:
2972 case DataType::Type::kFloat32:
2973 return Location::FpuRegisterLocation(XMM0);
2974 }
2975 }
2976
GetMethodLocation() const2977 Location InvokeDexCallingConventionVisitorX86_64::GetMethodLocation() const {
2978 return Location::RegisterLocation(kMethodRegisterArgument);
2979 }
2980
GetNextLocation(DataType::Type type)2981 Location InvokeDexCallingConventionVisitorX86_64::GetNextLocation(DataType::Type type) {
2982 switch (type) {
2983 case DataType::Type::kReference:
2984 case DataType::Type::kBool:
2985 case DataType::Type::kUint8:
2986 case DataType::Type::kInt8:
2987 case DataType::Type::kUint16:
2988 case DataType::Type::kInt16:
2989 case DataType::Type::kInt32: {
2990 uint32_t index = gp_index_++;
2991 stack_index_++;
2992 if (index < calling_convention.GetNumberOfRegisters()) {
2993 return Location::RegisterLocation(calling_convention.GetRegisterAt(index));
2994 } else {
2995 return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
2996 }
2997 }
2998
2999 case DataType::Type::kInt64: {
3000 uint32_t index = gp_index_;
3001 stack_index_ += 2;
3002 if (index < calling_convention.GetNumberOfRegisters()) {
3003 gp_index_ += 1;
3004 return Location::RegisterLocation(calling_convention.GetRegisterAt(index));
3005 } else {
3006 gp_index_ += 2;
3007 return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
3008 }
3009 }
3010
3011 case DataType::Type::kFloat32: {
3012 uint32_t index = float_index_++;
3013 stack_index_++;
3014 if (index < calling_convention.GetNumberOfFpuRegisters()) {
3015 return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
3016 } else {
3017 return Location::StackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 1));
3018 }
3019 }
3020
3021 case DataType::Type::kFloat64: {
3022 uint32_t index = float_index_++;
3023 stack_index_ += 2;
3024 if (index < calling_convention.GetNumberOfFpuRegisters()) {
3025 return Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(index));
3026 } else {
3027 return Location::DoubleStackSlot(calling_convention.GetStackOffsetOf(stack_index_ - 2));
3028 }
3029 }
3030
3031 case DataType::Type::kUint32:
3032 case DataType::Type::kUint64:
3033 case DataType::Type::kVoid:
3034 LOG(FATAL) << "Unexpected parameter type " << type;
3035 UNREACHABLE();
3036 }
3037 return Location::NoLocation();
3038 }
3039
GetNextLocation(DataType::Type type)3040 Location CriticalNativeCallingConventionVisitorX86_64::GetNextLocation(DataType::Type type) {
3041 DCHECK_NE(type, DataType::Type::kReference);
3042
3043 Location location = Location::NoLocation();
3044 if (DataType::IsFloatingPointType(type)) {
3045 if (fpr_index_ < kParameterFloatRegistersLength) {
3046 location = Location::FpuRegisterLocation(kParameterFloatRegisters[fpr_index_]);
3047 ++fpr_index_;
3048 }
3049 } else {
3050 // Native ABI uses the same registers as managed, except that the method register RDI
3051 // is a normal argument.
3052 if (gpr_index_ < 1u + kParameterCoreRegistersLength) {
3053 location = Location::RegisterLocation(
3054 gpr_index_ == 0u ? RDI : kParameterCoreRegisters[gpr_index_ - 1u]);
3055 ++gpr_index_;
3056 }
3057 }
3058 if (location.IsInvalid()) {
3059 if (DataType::Is64BitType(type)) {
3060 location = Location::DoubleStackSlot(stack_offset_);
3061 } else {
3062 location = Location::StackSlot(stack_offset_);
3063 }
3064 stack_offset_ += kFramePointerSize;
3065
3066 if (for_register_allocation_) {
3067 location = Location::Any();
3068 }
3069 }
3070 return location;
3071 }
3072
GetReturnLocation(DataType::Type type) const3073 Location CriticalNativeCallingConventionVisitorX86_64::GetReturnLocation(DataType::Type type)
3074 const {
3075 // We perform conversion to the managed ABI return register after the call if needed.
3076 InvokeDexCallingConventionVisitorX86_64 dex_calling_convention;
3077 return dex_calling_convention.GetReturnLocation(type);
3078 }
3079
GetMethodLocation() const3080 Location CriticalNativeCallingConventionVisitorX86_64::GetMethodLocation() const {
3081 // Pass the method in the hidden argument RAX.
3082 return Location::RegisterLocation(RAX);
3083 }
3084
VisitInvokeUnresolved(HInvokeUnresolved * invoke)3085 void LocationsBuilderX86_64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
3086 // The trampoline uses the same calling convention as dex calling conventions,
3087 // except instead of loading arg0/r0 with the target Method*, arg0/r0 will contain
3088 // the method_idx.
3089 HandleInvoke(invoke);
3090 }
3091
VisitInvokeUnresolved(HInvokeUnresolved * invoke)3092 void InstructionCodeGeneratorX86_64::VisitInvokeUnresolved(HInvokeUnresolved* invoke) {
3093 codegen_->GenerateInvokeUnresolvedRuntimeCall(invoke);
3094 }
3095
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)3096 void LocationsBuilderX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
3097 // Explicit clinit checks triggered by static invokes must have been pruned by
3098 // art::PrepareForRegisterAllocation.
3099 DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
3100
3101 IntrinsicLocationsBuilderX86_64 intrinsic(codegen_);
3102 if (intrinsic.TryDispatch(invoke)) {
3103 return;
3104 }
3105
3106 if (invoke->GetCodePtrLocation() == CodePtrLocation::kCallCriticalNative) {
3107 CriticalNativeCallingConventionVisitorX86_64 calling_convention_visitor(
3108 /*for_register_allocation=*/ true);
3109 CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
3110 CodeGeneratorX86_64::BlockNonVolatileXmmRegisters(invoke->GetLocations());
3111 } else {
3112 HandleInvoke(invoke);
3113 }
3114 }
3115
TryGenerateIntrinsicCode(HInvoke * invoke,CodeGeneratorX86_64 * codegen)3116 static bool TryGenerateIntrinsicCode(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
3117 if (invoke->GetLocations()->Intrinsified()) {
3118 IntrinsicCodeGeneratorX86_64 intrinsic(codegen);
3119 intrinsic.Dispatch(invoke);
3120 return true;
3121 }
3122 return false;
3123 }
3124
VisitInvokeStaticOrDirect(HInvokeStaticOrDirect * invoke)3125 void InstructionCodeGeneratorX86_64::VisitInvokeStaticOrDirect(HInvokeStaticOrDirect* invoke) {
3126 // Explicit clinit checks triggered by static invokes must have been pruned by
3127 // art::PrepareForRegisterAllocation.
3128 DCHECK(!invoke->IsStaticWithExplicitClinitCheck());
3129
3130 if (TryGenerateIntrinsicCode(invoke, codegen_)) {
3131 return;
3132 }
3133
3134 LocationSummary* locations = invoke->GetLocations();
3135 codegen_->GenerateStaticOrDirectCall(
3136 invoke, locations->HasTemps() ? locations->GetTemp(0) : Location::NoLocation());
3137 }
3138
HandleInvoke(HInvoke * invoke)3139 void LocationsBuilderX86_64::HandleInvoke(HInvoke* invoke) {
3140 InvokeDexCallingConventionVisitorX86_64 calling_convention_visitor;
3141 CodeGenerator::CreateCommonInvokeLocationSummary(invoke, &calling_convention_visitor);
3142 }
3143
VisitInvokeVirtual(HInvokeVirtual * invoke)3144 void LocationsBuilderX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
3145 IntrinsicLocationsBuilderX86_64 intrinsic(codegen_);
3146 if (intrinsic.TryDispatch(invoke)) {
3147 return;
3148 }
3149
3150 HandleInvoke(invoke);
3151 }
3152
VisitInvokeVirtual(HInvokeVirtual * invoke)3153 void InstructionCodeGeneratorX86_64::VisitInvokeVirtual(HInvokeVirtual* invoke) {
3154 if (TryGenerateIntrinsicCode(invoke, codegen_)) {
3155 return;
3156 }
3157
3158 codegen_->GenerateVirtualCall(invoke, invoke->GetLocations()->GetTemp(0));
3159 DCHECK(!codegen_->IsLeafMethod());
3160 }
3161
VisitInvokeInterface(HInvokeInterface * invoke)3162 void LocationsBuilderX86_64::VisitInvokeInterface(HInvokeInterface* invoke) {
3163 HandleInvoke(invoke);
3164 // Add the hidden argument.
3165 if (invoke->GetHiddenArgumentLoadKind() == MethodLoadKind::kRecursive) {
3166 invoke->GetLocations()->SetInAt(invoke->GetNumberOfArguments() - 1,
3167 Location::RegisterLocation(RAX));
3168 }
3169 invoke->GetLocations()->AddTemp(Location::RegisterLocation(RAX));
3170 }
3171
MaybeGenerateInlineCacheCheck(HInstruction * instruction,CpuRegister klass)3172 void CodeGeneratorX86_64::MaybeGenerateInlineCacheCheck(HInstruction* instruction,
3173 CpuRegister klass) {
3174 DCHECK_EQ(RDI, klass.AsRegister());
3175 if (ProfilingInfoBuilder::IsInlineCacheUseful(instruction->AsInvoke(), this)) {
3176 ProfilingInfo* info = GetGraph()->GetProfilingInfo();
3177 DCHECK(info != nullptr);
3178 InlineCache* cache = ProfilingInfoBuilder::GetInlineCache(
3179 info, GetCompilerOptions(), instruction->AsInvoke());
3180 if (cache != nullptr) {
3181 uint64_t address = reinterpret_cast64<uint64_t>(cache);
3182 NearLabel done;
3183 __ movq(CpuRegister(TMP), Immediate(address));
3184 // Fast path for a monomorphic cache.
3185 __ cmpl(Address(CpuRegister(TMP), InlineCache::ClassesOffset().Int32Value()), klass);
3186 __ j(kEqual, &done);
3187 GenerateInvokeRuntime(
3188 GetThreadOffset<kX86_64PointerSize>(kQuickUpdateInlineCache).Int32Value());
3189 __ Bind(&done);
3190 } else {
3191 // This is unexpected, but we don't guarantee stable compilation across
3192 // JIT runs so just warn about it.
3193 ScopedObjectAccess soa(Thread::Current());
3194 LOG(WARNING) << "Missing inline cache for " << GetGraph()->GetArtMethod()->PrettyMethod();
3195 }
3196 }
3197 }
3198
VisitInvokeInterface(HInvokeInterface * invoke)3199 void InstructionCodeGeneratorX86_64::VisitInvokeInterface(HInvokeInterface* invoke) {
3200 // TODO: b/18116999, our IMTs can miss an IncompatibleClassChangeError.
3201 LocationSummary* locations = invoke->GetLocations();
3202 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
3203 Location receiver = locations->InAt(0);
3204 size_t class_offset = mirror::Object::ClassOffset().SizeValue();
3205
3206 if (receiver.IsStackSlot()) {
3207 __ movl(temp, Address(CpuRegister(RSP), receiver.GetStackIndex()));
3208 // /* HeapReference<Class> */ temp = temp->klass_
3209 __ movl(temp, Address(temp, class_offset));
3210 } else {
3211 // /* HeapReference<Class> */ temp = receiver->klass_
3212 __ movl(temp, Address(receiver.AsRegister<CpuRegister>(), class_offset));
3213 }
3214 codegen_->MaybeRecordImplicitNullCheck(invoke);
3215 // Instead of simply (possibly) unpoisoning `temp` here, we should
3216 // emit a read barrier for the previous class reference load.
3217 // However this is not required in practice, as this is an
3218 // intermediate/temporary reference and because the current
3219 // concurrent copying collector keeps the from-space memory
3220 // intact/accessible until the end of the marking phase (the
3221 // concurrent copying collector may not in the future).
3222 __ MaybeUnpoisonHeapReference(temp);
3223
3224 codegen_->MaybeGenerateInlineCacheCheck(invoke, temp);
3225
3226 if (invoke->GetHiddenArgumentLoadKind() != MethodLoadKind::kRecursive &&
3227 invoke->GetHiddenArgumentLoadKind() != MethodLoadKind::kRuntimeCall) {
3228 Location hidden_reg = locations->GetTemp(1);
3229 // Set the hidden argument. This is safe to do this here, as RAX
3230 // won't be modified thereafter, before the `call` instruction.
3231 // We also do it after MaybeGenerateInlineCache that may use RAX.
3232 DCHECK_EQ(RAX, hidden_reg.AsRegister<Register>());
3233 codegen_->LoadMethod(invoke->GetHiddenArgumentLoadKind(), hidden_reg, invoke);
3234 }
3235
3236 // temp = temp->GetAddressOfIMT()
3237 __ movq(temp,
3238 Address(temp, mirror::Class::ImtPtrOffset(kX86_64PointerSize).Uint32Value()));
3239 // temp = temp->GetImtEntryAt(method_offset);
3240 uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
3241 invoke->GetImtIndex(), kX86_64PointerSize));
3242 // temp = temp->GetImtEntryAt(method_offset);
3243 __ movq(temp, Address(temp, method_offset));
3244 if (invoke->GetHiddenArgumentLoadKind() == MethodLoadKind::kRuntimeCall) {
3245 // We pass the method from the IMT in case of a conflict. This will ensure
3246 // we go into the runtime to resolve the actual method.
3247 Location hidden_reg = locations->GetTemp(1);
3248 __ movq(hidden_reg.AsRegister<CpuRegister>(), temp);
3249 }
3250 // call temp->GetEntryPoint();
3251 __ call(Address(
3252 temp, ArtMethod::EntryPointFromQuickCompiledCodeOffset(kX86_64PointerSize).SizeValue()));
3253
3254 DCHECK(!codegen_->IsLeafMethod());
3255 codegen_->RecordPcInfo(invoke);
3256 }
3257
VisitInvokePolymorphic(HInvokePolymorphic * invoke)3258 void LocationsBuilderX86_64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
3259 IntrinsicLocationsBuilderX86_64 intrinsic(codegen_);
3260 if (intrinsic.TryDispatch(invoke)) {
3261 return;
3262 }
3263 HandleInvoke(invoke);
3264 }
3265
VisitInvokePolymorphic(HInvokePolymorphic * invoke)3266 void InstructionCodeGeneratorX86_64::VisitInvokePolymorphic(HInvokePolymorphic* invoke) {
3267 if (TryGenerateIntrinsicCode(invoke, codegen_)) {
3268 return;
3269 }
3270 codegen_->GenerateInvokePolymorphicCall(invoke);
3271 }
3272
VisitInvokeCustom(HInvokeCustom * invoke)3273 void LocationsBuilderX86_64::VisitInvokeCustom(HInvokeCustom* invoke) {
3274 HandleInvoke(invoke);
3275 }
3276
VisitInvokeCustom(HInvokeCustom * invoke)3277 void InstructionCodeGeneratorX86_64::VisitInvokeCustom(HInvokeCustom* invoke) {
3278 codegen_->GenerateInvokeCustomCall(invoke);
3279 }
3280
VisitNeg(HNeg * neg)3281 void LocationsBuilderX86_64::VisitNeg(HNeg* neg) {
3282 LocationSummary* locations =
3283 new (GetGraph()->GetAllocator()) LocationSummary(neg, LocationSummary::kNoCall);
3284 switch (neg->GetResultType()) {
3285 case DataType::Type::kInt32:
3286 case DataType::Type::kInt64:
3287 locations->SetInAt(0, Location::RequiresRegister());
3288 locations->SetOut(Location::SameAsFirstInput());
3289 break;
3290
3291 case DataType::Type::kFloat32:
3292 case DataType::Type::kFloat64:
3293 locations->SetInAt(0, Location::RequiresFpuRegister());
3294 locations->SetOut(Location::SameAsFirstInput());
3295 locations->AddTemp(Location::RequiresFpuRegister());
3296 break;
3297
3298 default:
3299 LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
3300 }
3301 }
3302
VisitNeg(HNeg * neg)3303 void InstructionCodeGeneratorX86_64::VisitNeg(HNeg* neg) {
3304 LocationSummary* locations = neg->GetLocations();
3305 Location out = locations->Out();
3306 Location in = locations->InAt(0);
3307 switch (neg->GetResultType()) {
3308 case DataType::Type::kInt32:
3309 DCHECK(in.IsRegister());
3310 DCHECK(in.Equals(out));
3311 __ negl(out.AsRegister<CpuRegister>());
3312 break;
3313
3314 case DataType::Type::kInt64:
3315 DCHECK(in.IsRegister());
3316 DCHECK(in.Equals(out));
3317 __ negq(out.AsRegister<CpuRegister>());
3318 break;
3319
3320 case DataType::Type::kFloat32: {
3321 DCHECK(in.Equals(out));
3322 XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
3323 // Implement float negation with an exclusive or with value
3324 // 0x80000000 (mask for bit 31, representing the sign of a
3325 // single-precision floating-point number).
3326 __ movss(mask, codegen_->LiteralInt32Address(0x80000000));
3327 __ xorps(out.AsFpuRegister<XmmRegister>(), mask);
3328 break;
3329 }
3330
3331 case DataType::Type::kFloat64: {
3332 DCHECK(in.Equals(out));
3333 XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
3334 // Implement double negation with an exclusive or with value
3335 // 0x8000000000000000 (mask for bit 63, representing the sign of
3336 // a double-precision floating-point number).
3337 __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x8000000000000000)));
3338 __ xorpd(out.AsFpuRegister<XmmRegister>(), mask);
3339 break;
3340 }
3341
3342 default:
3343 LOG(FATAL) << "Unexpected neg type " << neg->GetResultType();
3344 }
3345 }
3346
VisitTypeConversion(HTypeConversion * conversion)3347 void LocationsBuilderX86_64::VisitTypeConversion(HTypeConversion* conversion) {
3348 LocationSummary* locations =
3349 new (GetGraph()->GetAllocator()) LocationSummary(conversion, LocationSummary::kNoCall);
3350 DataType::Type result_type = conversion->GetResultType();
3351 DataType::Type input_type = conversion->GetInputType();
3352 DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
3353 << input_type << " -> " << result_type;
3354
3355 switch (result_type) {
3356 case DataType::Type::kUint8:
3357 case DataType::Type::kInt8:
3358 case DataType::Type::kUint16:
3359 case DataType::Type::kInt16:
3360 DCHECK(DataType::IsIntegralType(input_type)) << input_type;
3361 locations->SetInAt(0, Location::Any());
3362 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3363 break;
3364
3365 case DataType::Type::kInt32:
3366 switch (input_type) {
3367 case DataType::Type::kInt64:
3368 locations->SetInAt(0, Location::Any());
3369 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3370 break;
3371
3372 case DataType::Type::kFloat32:
3373 locations->SetInAt(0, Location::RequiresFpuRegister());
3374 locations->SetOut(Location::RequiresRegister());
3375 break;
3376
3377 case DataType::Type::kFloat64:
3378 locations->SetInAt(0, Location::RequiresFpuRegister());
3379 locations->SetOut(Location::RequiresRegister());
3380 break;
3381
3382 default:
3383 LOG(FATAL) << "Unexpected type conversion from " << input_type
3384 << " to " << result_type;
3385 }
3386 break;
3387
3388 case DataType::Type::kInt64:
3389 switch (input_type) {
3390 case DataType::Type::kBool:
3391 case DataType::Type::kUint8:
3392 case DataType::Type::kInt8:
3393 case DataType::Type::kUint16:
3394 case DataType::Type::kInt16:
3395 case DataType::Type::kInt32:
3396 // TODO: We would benefit from a (to-be-implemented)
3397 // Location::RegisterOrStackSlot requirement for this input.
3398 locations->SetInAt(0, Location::RequiresRegister());
3399 locations->SetOut(Location::RequiresRegister());
3400 break;
3401
3402 case DataType::Type::kFloat32:
3403 locations->SetInAt(0, Location::RequiresFpuRegister());
3404 locations->SetOut(Location::RequiresRegister());
3405 break;
3406
3407 case DataType::Type::kFloat64:
3408 locations->SetInAt(0, Location::RequiresFpuRegister());
3409 locations->SetOut(Location::RequiresRegister());
3410 break;
3411
3412 default:
3413 LOG(FATAL) << "Unexpected type conversion from " << input_type
3414 << " to " << result_type;
3415 }
3416 break;
3417
3418 case DataType::Type::kFloat32:
3419 switch (input_type) {
3420 case DataType::Type::kBool:
3421 case DataType::Type::kUint8:
3422 case DataType::Type::kInt8:
3423 case DataType::Type::kUint16:
3424 case DataType::Type::kInt16:
3425 case DataType::Type::kInt32:
3426 locations->SetInAt(0, Location::Any());
3427 locations->SetOut(Location::RequiresFpuRegister());
3428 break;
3429
3430 case DataType::Type::kInt64:
3431 locations->SetInAt(0, Location::Any());
3432 locations->SetOut(Location::RequiresFpuRegister());
3433 break;
3434
3435 case DataType::Type::kFloat64:
3436 locations->SetInAt(0, Location::Any());
3437 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
3438 break;
3439
3440 default:
3441 LOG(FATAL) << "Unexpected type conversion from " << input_type
3442 << " to " << result_type;
3443 }
3444 break;
3445
3446 case DataType::Type::kFloat64:
3447 switch (input_type) {
3448 case DataType::Type::kBool:
3449 case DataType::Type::kUint8:
3450 case DataType::Type::kInt8:
3451 case DataType::Type::kUint16:
3452 case DataType::Type::kInt16:
3453 case DataType::Type::kInt32:
3454 locations->SetInAt(0, Location::Any());
3455 locations->SetOut(Location::RequiresFpuRegister());
3456 break;
3457
3458 case DataType::Type::kInt64:
3459 locations->SetInAt(0, Location::Any());
3460 locations->SetOut(Location::RequiresFpuRegister());
3461 break;
3462
3463 case DataType::Type::kFloat32:
3464 locations->SetInAt(0, Location::Any());
3465 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
3466 break;
3467
3468 default:
3469 LOG(FATAL) << "Unexpected type conversion from " << input_type
3470 << " to " << result_type;
3471 }
3472 break;
3473
3474 default:
3475 LOG(FATAL) << "Unexpected type conversion from " << input_type
3476 << " to " << result_type;
3477 }
3478 }
3479
VisitTypeConversion(HTypeConversion * conversion)3480 void InstructionCodeGeneratorX86_64::VisitTypeConversion(HTypeConversion* conversion) {
3481 LocationSummary* locations = conversion->GetLocations();
3482 Location out = locations->Out();
3483 Location in = locations->InAt(0);
3484 DataType::Type result_type = conversion->GetResultType();
3485 DataType::Type input_type = conversion->GetInputType();
3486 DCHECK(!DataType::IsTypeConversionImplicit(input_type, result_type))
3487 << input_type << " -> " << result_type;
3488 switch (result_type) {
3489 case DataType::Type::kUint8:
3490 switch (input_type) {
3491 case DataType::Type::kInt8:
3492 case DataType::Type::kUint16:
3493 case DataType::Type::kInt16:
3494 case DataType::Type::kInt32:
3495 case DataType::Type::kInt64:
3496 if (in.IsRegister()) {
3497 __ movzxb(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
3498 } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
3499 __ movzxb(out.AsRegister<CpuRegister>(),
3500 Address(CpuRegister(RSP), in.GetStackIndex()));
3501 } else {
3502 __ movl(out.AsRegister<CpuRegister>(),
3503 Immediate(static_cast<uint8_t>(Int64FromConstant(in.GetConstant()))));
3504 }
3505 break;
3506
3507 default:
3508 LOG(FATAL) << "Unexpected type conversion from " << input_type
3509 << " to " << result_type;
3510 }
3511 break;
3512
3513 case DataType::Type::kInt8:
3514 switch (input_type) {
3515 case DataType::Type::kUint8:
3516 case DataType::Type::kUint16:
3517 case DataType::Type::kInt16:
3518 case DataType::Type::kInt32:
3519 case DataType::Type::kInt64:
3520 if (in.IsRegister()) {
3521 __ movsxb(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
3522 } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
3523 __ movsxb(out.AsRegister<CpuRegister>(),
3524 Address(CpuRegister(RSP), in.GetStackIndex()));
3525 } else {
3526 __ movl(out.AsRegister<CpuRegister>(),
3527 Immediate(static_cast<int8_t>(Int64FromConstant(in.GetConstant()))));
3528 }
3529 break;
3530
3531 default:
3532 LOG(FATAL) << "Unexpected type conversion from " << input_type
3533 << " to " << result_type;
3534 }
3535 break;
3536
3537 case DataType::Type::kUint16:
3538 switch (input_type) {
3539 case DataType::Type::kInt8:
3540 case DataType::Type::kInt16:
3541 case DataType::Type::kInt32:
3542 case DataType::Type::kInt64:
3543 if (in.IsRegister()) {
3544 __ movzxw(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
3545 } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
3546 __ movzxw(out.AsRegister<CpuRegister>(),
3547 Address(CpuRegister(RSP), in.GetStackIndex()));
3548 } else {
3549 __ movl(out.AsRegister<CpuRegister>(),
3550 Immediate(static_cast<uint16_t>(Int64FromConstant(in.GetConstant()))));
3551 }
3552 break;
3553
3554 default:
3555 LOG(FATAL) << "Unexpected type conversion from " << input_type
3556 << " to " << result_type;
3557 }
3558 break;
3559
3560 case DataType::Type::kInt16:
3561 switch (input_type) {
3562 case DataType::Type::kUint16:
3563 case DataType::Type::kInt32:
3564 case DataType::Type::kInt64:
3565 if (in.IsRegister()) {
3566 __ movsxw(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
3567 } else if (in.IsStackSlot() || in.IsDoubleStackSlot()) {
3568 __ movsxw(out.AsRegister<CpuRegister>(),
3569 Address(CpuRegister(RSP), in.GetStackIndex()));
3570 } else {
3571 __ movl(out.AsRegister<CpuRegister>(),
3572 Immediate(static_cast<int16_t>(Int64FromConstant(in.GetConstant()))));
3573 }
3574 break;
3575
3576 default:
3577 LOG(FATAL) << "Unexpected type conversion from " << input_type
3578 << " to " << result_type;
3579 }
3580 break;
3581
3582 case DataType::Type::kInt32:
3583 switch (input_type) {
3584 case DataType::Type::kInt64:
3585 if (in.IsRegister()) {
3586 __ movl(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
3587 } else if (in.IsDoubleStackSlot()) {
3588 __ movl(out.AsRegister<CpuRegister>(),
3589 Address(CpuRegister(RSP), in.GetStackIndex()));
3590 } else {
3591 DCHECK(in.IsConstant());
3592 DCHECK(in.GetConstant()->IsLongConstant());
3593 int64_t value = in.GetConstant()->AsLongConstant()->GetValue();
3594 __ movl(out.AsRegister<CpuRegister>(), Immediate(static_cast<int32_t>(value)));
3595 }
3596 break;
3597
3598 case DataType::Type::kFloat32: {
3599 XmmRegister input = in.AsFpuRegister<XmmRegister>();
3600 CpuRegister output = out.AsRegister<CpuRegister>();
3601 NearLabel done, nan;
3602
3603 __ movl(output, Immediate(kPrimIntMax));
3604 // if input >= (float)INT_MAX goto done
3605 __ comiss(input, codegen_->LiteralFloatAddress(static_cast<float>(kPrimIntMax)));
3606 __ j(kAboveEqual, &done);
3607 // if input == NaN goto nan
3608 __ j(kUnordered, &nan);
3609 // output = float-to-int-truncate(input)
3610 __ cvttss2si(output, input, false);
3611 __ jmp(&done);
3612 __ Bind(&nan);
3613 // output = 0
3614 __ xorl(output, output);
3615 __ Bind(&done);
3616 break;
3617 }
3618
3619 case DataType::Type::kFloat64: {
3620 XmmRegister input = in.AsFpuRegister<XmmRegister>();
3621 CpuRegister output = out.AsRegister<CpuRegister>();
3622 NearLabel done, nan;
3623
3624 __ movl(output, Immediate(kPrimIntMax));
3625 // if input >= (double)INT_MAX goto done
3626 __ comisd(input, codegen_->LiteralDoubleAddress(kPrimIntMax));
3627 __ j(kAboveEqual, &done);
3628 // if input == NaN goto nan
3629 __ j(kUnordered, &nan);
3630 // output = double-to-int-truncate(input)
3631 __ cvttsd2si(output, input);
3632 __ jmp(&done);
3633 __ Bind(&nan);
3634 // output = 0
3635 __ xorl(output, output);
3636 __ Bind(&done);
3637 break;
3638 }
3639
3640 default:
3641 LOG(FATAL) << "Unexpected type conversion from " << input_type
3642 << " to " << result_type;
3643 }
3644 break;
3645
3646 case DataType::Type::kInt64:
3647 switch (input_type) {
3648 DCHECK(out.IsRegister());
3649 case DataType::Type::kBool:
3650 case DataType::Type::kUint8:
3651 case DataType::Type::kInt8:
3652 case DataType::Type::kUint16:
3653 case DataType::Type::kInt16:
3654 case DataType::Type::kInt32:
3655 DCHECK(in.IsRegister());
3656 __ movsxd(out.AsRegister<CpuRegister>(), in.AsRegister<CpuRegister>());
3657 break;
3658
3659 case DataType::Type::kFloat32: {
3660 XmmRegister input = in.AsFpuRegister<XmmRegister>();
3661 CpuRegister output = out.AsRegister<CpuRegister>();
3662 NearLabel done, nan;
3663
3664 codegen_->Load64BitValue(output, kPrimLongMax);
3665 // if input >= (float)LONG_MAX goto done
3666 __ comiss(input, codegen_->LiteralFloatAddress(static_cast<float>(kPrimLongMax)));
3667 __ j(kAboveEqual, &done);
3668 // if input == NaN goto nan
3669 __ j(kUnordered, &nan);
3670 // output = float-to-long-truncate(input)
3671 __ cvttss2si(output, input, true);
3672 __ jmp(&done);
3673 __ Bind(&nan);
3674 // output = 0
3675 __ xorl(output, output);
3676 __ Bind(&done);
3677 break;
3678 }
3679
3680 case DataType::Type::kFloat64: {
3681 XmmRegister input = in.AsFpuRegister<XmmRegister>();
3682 CpuRegister output = out.AsRegister<CpuRegister>();
3683 NearLabel done, nan;
3684
3685 codegen_->Load64BitValue(output, kPrimLongMax);
3686 // if input >= (double)LONG_MAX goto done
3687 __ comisd(input, codegen_->LiteralDoubleAddress(
3688 static_cast<double>(kPrimLongMax)));
3689 __ j(kAboveEqual, &done);
3690 // if input == NaN goto nan
3691 __ j(kUnordered, &nan);
3692 // output = double-to-long-truncate(input)
3693 __ cvttsd2si(output, input, true);
3694 __ jmp(&done);
3695 __ Bind(&nan);
3696 // output = 0
3697 __ xorl(output, output);
3698 __ Bind(&done);
3699 break;
3700 }
3701
3702 default:
3703 LOG(FATAL) << "Unexpected type conversion from " << input_type
3704 << " to " << result_type;
3705 }
3706 break;
3707
3708 case DataType::Type::kFloat32:
3709 switch (input_type) {
3710 case DataType::Type::kBool:
3711 case DataType::Type::kUint8:
3712 case DataType::Type::kInt8:
3713 case DataType::Type::kUint16:
3714 case DataType::Type::kInt16:
3715 case DataType::Type::kInt32:
3716 if (in.IsRegister()) {
3717 __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), false);
3718 } else if (in.IsConstant()) {
3719 int32_t v = in.GetConstant()->AsIntConstant()->GetValue();
3720 XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3721 codegen_->Load32BitValue(dest, static_cast<float>(v));
3722 } else {
3723 __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(),
3724 Address(CpuRegister(RSP), in.GetStackIndex()), false);
3725 }
3726 break;
3727
3728 case DataType::Type::kInt64:
3729 if (in.IsRegister()) {
3730 __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), true);
3731 } else if (in.IsConstant()) {
3732 int64_t v = in.GetConstant()->AsLongConstant()->GetValue();
3733 XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3734 codegen_->Load32BitValue(dest, static_cast<float>(v));
3735 } else {
3736 __ cvtsi2ss(out.AsFpuRegister<XmmRegister>(),
3737 Address(CpuRegister(RSP), in.GetStackIndex()), true);
3738 }
3739 break;
3740
3741 case DataType::Type::kFloat64:
3742 if (in.IsFpuRegister()) {
3743 __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
3744 } else if (in.IsConstant()) {
3745 double v = in.GetConstant()->AsDoubleConstant()->GetValue();
3746 XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3747 codegen_->Load32BitValue(dest, static_cast<float>(v));
3748 } else {
3749 __ cvtsd2ss(out.AsFpuRegister<XmmRegister>(),
3750 Address(CpuRegister(RSP), in.GetStackIndex()));
3751 }
3752 break;
3753
3754 default:
3755 LOG(FATAL) << "Unexpected type conversion from " << input_type
3756 << " to " << result_type;
3757 }
3758 break;
3759
3760 case DataType::Type::kFloat64:
3761 switch (input_type) {
3762 case DataType::Type::kBool:
3763 case DataType::Type::kUint8:
3764 case DataType::Type::kInt8:
3765 case DataType::Type::kUint16:
3766 case DataType::Type::kInt16:
3767 case DataType::Type::kInt32:
3768 if (in.IsRegister()) {
3769 __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), false);
3770 } else if (in.IsConstant()) {
3771 int32_t v = in.GetConstant()->AsIntConstant()->GetValue();
3772 XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3773 codegen_->Load64BitValue(dest, static_cast<double>(v));
3774 } else {
3775 __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(),
3776 Address(CpuRegister(RSP), in.GetStackIndex()), false);
3777 }
3778 break;
3779
3780 case DataType::Type::kInt64:
3781 if (in.IsRegister()) {
3782 __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(), in.AsRegister<CpuRegister>(), true);
3783 } else if (in.IsConstant()) {
3784 int64_t v = in.GetConstant()->AsLongConstant()->GetValue();
3785 XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3786 codegen_->Load64BitValue(dest, static_cast<double>(v));
3787 } else {
3788 __ cvtsi2sd(out.AsFpuRegister<XmmRegister>(),
3789 Address(CpuRegister(RSP), in.GetStackIndex()), true);
3790 }
3791 break;
3792
3793 case DataType::Type::kFloat32:
3794 if (in.IsFpuRegister()) {
3795 __ cvtss2sd(out.AsFpuRegister<XmmRegister>(), in.AsFpuRegister<XmmRegister>());
3796 } else if (in.IsConstant()) {
3797 float v = in.GetConstant()->AsFloatConstant()->GetValue();
3798 XmmRegister dest = out.AsFpuRegister<XmmRegister>();
3799 codegen_->Load64BitValue(dest, static_cast<double>(v));
3800 } else {
3801 __ cvtss2sd(out.AsFpuRegister<XmmRegister>(),
3802 Address(CpuRegister(RSP), in.GetStackIndex()));
3803 }
3804 break;
3805
3806 default:
3807 LOG(FATAL) << "Unexpected type conversion from " << input_type
3808 << " to " << result_type;
3809 }
3810 break;
3811
3812 default:
3813 LOG(FATAL) << "Unexpected type conversion from " << input_type
3814 << " to " << result_type;
3815 }
3816 }
3817
VisitAdd(HAdd * add)3818 void LocationsBuilderX86_64::VisitAdd(HAdd* add) {
3819 LocationSummary* locations =
3820 new (GetGraph()->GetAllocator()) LocationSummary(add, LocationSummary::kNoCall);
3821 switch (add->GetResultType()) {
3822 case DataType::Type::kInt32: {
3823 locations->SetInAt(0, Location::RequiresRegister());
3824 locations->SetInAt(1, Location::RegisterOrConstant(add->InputAt(1)));
3825 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3826 break;
3827 }
3828
3829 case DataType::Type::kInt64: {
3830 locations->SetInAt(0, Location::RequiresRegister());
3831 // We can use a leaq or addq if the constant can fit in an immediate.
3832 locations->SetInAt(1, Location::RegisterOrInt32Constant(add->InputAt(1)));
3833 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
3834 break;
3835 }
3836
3837 case DataType::Type::kFloat64:
3838 case DataType::Type::kFloat32: {
3839 locations->SetInAt(0, Location::RequiresFpuRegister());
3840 locations->SetInAt(1, Location::Any());
3841 locations->SetOut(Location::SameAsFirstInput());
3842 break;
3843 }
3844
3845 default:
3846 LOG(FATAL) << "Unexpected add type " << add->GetResultType();
3847 }
3848 }
3849
VisitAdd(HAdd * add)3850 void InstructionCodeGeneratorX86_64::VisitAdd(HAdd* add) {
3851 LocationSummary* locations = add->GetLocations();
3852 Location first = locations->InAt(0);
3853 Location second = locations->InAt(1);
3854 Location out = locations->Out();
3855
3856 switch (add->GetResultType()) {
3857 case DataType::Type::kInt32: {
3858 if (second.IsRegister()) {
3859 if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3860 __ addl(out.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3861 } else if (out.AsRegister<Register>() == second.AsRegister<Register>()) {
3862 __ addl(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>());
3863 } else {
3864 __ leal(out.AsRegister<CpuRegister>(), Address(
3865 first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>(), TIMES_1, 0));
3866 }
3867 } else if (second.IsConstant()) {
3868 if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3869 __ addl(out.AsRegister<CpuRegister>(),
3870 Immediate(second.GetConstant()->AsIntConstant()->GetValue()));
3871 } else {
3872 __ leal(out.AsRegister<CpuRegister>(), Address(
3873 first.AsRegister<CpuRegister>(), second.GetConstant()->AsIntConstant()->GetValue()));
3874 }
3875 } else {
3876 DCHECK(first.Equals(locations->Out()));
3877 __ addl(first.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), second.GetStackIndex()));
3878 }
3879 break;
3880 }
3881
3882 case DataType::Type::kInt64: {
3883 if (second.IsRegister()) {
3884 if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3885 __ addq(out.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3886 } else if (out.AsRegister<Register>() == second.AsRegister<Register>()) {
3887 __ addq(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>());
3888 } else {
3889 __ leaq(out.AsRegister<CpuRegister>(), Address(
3890 first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>(), TIMES_1, 0));
3891 }
3892 } else {
3893 DCHECK(second.IsConstant());
3894 int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3895 int32_t int32_value = Low32Bits(value);
3896 DCHECK_EQ(int32_value, value);
3897 if (out.AsRegister<Register>() == first.AsRegister<Register>()) {
3898 __ addq(out.AsRegister<CpuRegister>(), Immediate(int32_value));
3899 } else {
3900 __ leaq(out.AsRegister<CpuRegister>(), Address(
3901 first.AsRegister<CpuRegister>(), int32_value));
3902 }
3903 }
3904 break;
3905 }
3906
3907 case DataType::Type::kFloat32: {
3908 if (second.IsFpuRegister()) {
3909 __ addss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3910 } else if (second.IsConstant()) {
3911 __ addss(first.AsFpuRegister<XmmRegister>(),
3912 codegen_->LiteralFloatAddress(
3913 second.GetConstant()->AsFloatConstant()->GetValue()));
3914 } else {
3915 DCHECK(second.IsStackSlot());
3916 __ addss(first.AsFpuRegister<XmmRegister>(),
3917 Address(CpuRegister(RSP), second.GetStackIndex()));
3918 }
3919 break;
3920 }
3921
3922 case DataType::Type::kFloat64: {
3923 if (second.IsFpuRegister()) {
3924 __ addsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
3925 } else if (second.IsConstant()) {
3926 __ addsd(first.AsFpuRegister<XmmRegister>(),
3927 codegen_->LiteralDoubleAddress(
3928 second.GetConstant()->AsDoubleConstant()->GetValue()));
3929 } else {
3930 DCHECK(second.IsDoubleStackSlot());
3931 __ addsd(first.AsFpuRegister<XmmRegister>(),
3932 Address(CpuRegister(RSP), second.GetStackIndex()));
3933 }
3934 break;
3935 }
3936
3937 default:
3938 LOG(FATAL) << "Unexpected add type " << add->GetResultType();
3939 }
3940 }
3941
VisitSub(HSub * sub)3942 void LocationsBuilderX86_64::VisitSub(HSub* sub) {
3943 LocationSummary* locations =
3944 new (GetGraph()->GetAllocator()) LocationSummary(sub, LocationSummary::kNoCall);
3945 switch (sub->GetResultType()) {
3946 case DataType::Type::kInt32: {
3947 locations->SetInAt(0, Location::RequiresRegister());
3948 locations->SetInAt(1, Location::Any());
3949 locations->SetOut(Location::SameAsFirstInput());
3950 break;
3951 }
3952 case DataType::Type::kInt64: {
3953 locations->SetInAt(0, Location::RequiresRegister());
3954 locations->SetInAt(1, Location::RegisterOrInt32Constant(sub->InputAt(1)));
3955 locations->SetOut(Location::SameAsFirstInput());
3956 break;
3957 }
3958 case DataType::Type::kFloat32:
3959 case DataType::Type::kFloat64: {
3960 locations->SetInAt(0, Location::RequiresFpuRegister());
3961 locations->SetInAt(1, Location::Any());
3962 locations->SetOut(Location::SameAsFirstInput());
3963 break;
3964 }
3965 default:
3966 LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
3967 }
3968 }
3969
VisitSub(HSub * sub)3970 void InstructionCodeGeneratorX86_64::VisitSub(HSub* sub) {
3971 LocationSummary* locations = sub->GetLocations();
3972 Location first = locations->InAt(0);
3973 Location second = locations->InAt(1);
3974 DCHECK(first.Equals(locations->Out()));
3975 switch (sub->GetResultType()) {
3976 case DataType::Type::kInt32: {
3977 if (second.IsRegister()) {
3978 __ subl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3979 } else if (second.IsConstant()) {
3980 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue());
3981 __ subl(first.AsRegister<CpuRegister>(), imm);
3982 } else {
3983 __ subl(first.AsRegister<CpuRegister>(), Address(CpuRegister(RSP), second.GetStackIndex()));
3984 }
3985 break;
3986 }
3987 case DataType::Type::kInt64: {
3988 if (second.IsConstant()) {
3989 int64_t value = second.GetConstant()->AsLongConstant()->GetValue();
3990 DCHECK(IsInt<32>(value));
3991 __ subq(first.AsRegister<CpuRegister>(), Immediate(static_cast<int32_t>(value)));
3992 } else {
3993 __ subq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
3994 }
3995 break;
3996 }
3997
3998 case DataType::Type::kFloat32: {
3999 if (second.IsFpuRegister()) {
4000 __ subss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
4001 } else if (second.IsConstant()) {
4002 __ subss(first.AsFpuRegister<XmmRegister>(),
4003 codegen_->LiteralFloatAddress(
4004 second.GetConstant()->AsFloatConstant()->GetValue()));
4005 } else {
4006 DCHECK(second.IsStackSlot());
4007 __ subss(first.AsFpuRegister<XmmRegister>(),
4008 Address(CpuRegister(RSP), second.GetStackIndex()));
4009 }
4010 break;
4011 }
4012
4013 case DataType::Type::kFloat64: {
4014 if (second.IsFpuRegister()) {
4015 __ subsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
4016 } else if (second.IsConstant()) {
4017 __ subsd(first.AsFpuRegister<XmmRegister>(),
4018 codegen_->LiteralDoubleAddress(
4019 second.GetConstant()->AsDoubleConstant()->GetValue()));
4020 } else {
4021 DCHECK(second.IsDoubleStackSlot());
4022 __ subsd(first.AsFpuRegister<XmmRegister>(),
4023 Address(CpuRegister(RSP), second.GetStackIndex()));
4024 }
4025 break;
4026 }
4027
4028 default:
4029 LOG(FATAL) << "Unexpected sub type " << sub->GetResultType();
4030 }
4031 }
4032
VisitMul(HMul * mul)4033 void LocationsBuilderX86_64::VisitMul(HMul* mul) {
4034 LocationSummary* locations =
4035 new (GetGraph()->GetAllocator()) LocationSummary(mul, LocationSummary::kNoCall);
4036 switch (mul->GetResultType()) {
4037 case DataType::Type::kInt32: {
4038 locations->SetInAt(0, Location::RequiresRegister());
4039 locations->SetInAt(1, Location::Any());
4040 if (mul->InputAt(1)->IsIntConstant()) {
4041 // Can use 3 operand multiply.
4042 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4043 } else {
4044 locations->SetOut(Location::SameAsFirstInput());
4045 }
4046 break;
4047 }
4048 case DataType::Type::kInt64: {
4049 locations->SetInAt(0, Location::RequiresRegister());
4050 locations->SetInAt(1, Location::Any());
4051 if (mul->InputAt(1)->IsLongConstant() &&
4052 IsInt<32>(mul->InputAt(1)->AsLongConstant()->GetValue())) {
4053 // Can use 3 operand multiply.
4054 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
4055 } else {
4056 locations->SetOut(Location::SameAsFirstInput());
4057 }
4058 break;
4059 }
4060 case DataType::Type::kFloat32:
4061 case DataType::Type::kFloat64: {
4062 locations->SetInAt(0, Location::RequiresFpuRegister());
4063 locations->SetInAt(1, Location::Any());
4064 locations->SetOut(Location::SameAsFirstInput());
4065 break;
4066 }
4067
4068 default:
4069 LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
4070 }
4071 }
4072
VisitMul(HMul * mul)4073 void InstructionCodeGeneratorX86_64::VisitMul(HMul* mul) {
4074 LocationSummary* locations = mul->GetLocations();
4075 Location first = locations->InAt(0);
4076 Location second = locations->InAt(1);
4077 Location out = locations->Out();
4078 switch (mul->GetResultType()) {
4079 case DataType::Type::kInt32:
4080 // The constant may have ended up in a register, so test explicitly to avoid
4081 // problems where the output may not be the same as the first operand.
4082 if (mul->InputAt(1)->IsIntConstant()) {
4083 Immediate imm(mul->InputAt(1)->AsIntConstant()->GetValue());
4084 __ imull(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(), imm);
4085 } else if (second.IsRegister()) {
4086 DCHECK(first.Equals(out));
4087 __ imull(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
4088 } else {
4089 DCHECK(first.Equals(out));
4090 DCHECK(second.IsStackSlot());
4091 __ imull(first.AsRegister<CpuRegister>(),
4092 Address(CpuRegister(RSP), second.GetStackIndex()));
4093 }
4094 break;
4095 case DataType::Type::kInt64: {
4096 // The constant may have ended up in a register, so test explicitly to avoid
4097 // problems where the output may not be the same as the first operand.
4098 if (mul->InputAt(1)->IsLongConstant()) {
4099 int64_t value = mul->InputAt(1)->AsLongConstant()->GetValue();
4100 if (IsInt<32>(value)) {
4101 __ imulq(out.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(),
4102 Immediate(static_cast<int32_t>(value)));
4103 } else {
4104 // Have to use the constant area.
4105 DCHECK(first.Equals(out));
4106 __ imulq(first.AsRegister<CpuRegister>(), codegen_->LiteralInt64Address(value));
4107 }
4108 } else if (second.IsRegister()) {
4109 DCHECK(first.Equals(out));
4110 __ imulq(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
4111 } else {
4112 DCHECK(second.IsDoubleStackSlot());
4113 DCHECK(first.Equals(out));
4114 __ imulq(first.AsRegister<CpuRegister>(),
4115 Address(CpuRegister(RSP), second.GetStackIndex()));
4116 }
4117 break;
4118 }
4119
4120 case DataType::Type::kFloat32: {
4121 DCHECK(first.Equals(out));
4122 if (second.IsFpuRegister()) {
4123 __ mulss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
4124 } else if (second.IsConstant()) {
4125 __ mulss(first.AsFpuRegister<XmmRegister>(),
4126 codegen_->LiteralFloatAddress(
4127 second.GetConstant()->AsFloatConstant()->GetValue()));
4128 } else {
4129 DCHECK(second.IsStackSlot());
4130 __ mulss(first.AsFpuRegister<XmmRegister>(),
4131 Address(CpuRegister(RSP), second.GetStackIndex()));
4132 }
4133 break;
4134 }
4135
4136 case DataType::Type::kFloat64: {
4137 DCHECK(first.Equals(out));
4138 if (second.IsFpuRegister()) {
4139 __ mulsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
4140 } else if (second.IsConstant()) {
4141 __ mulsd(first.AsFpuRegister<XmmRegister>(),
4142 codegen_->LiteralDoubleAddress(
4143 second.GetConstant()->AsDoubleConstant()->GetValue()));
4144 } else {
4145 DCHECK(second.IsDoubleStackSlot());
4146 __ mulsd(first.AsFpuRegister<XmmRegister>(),
4147 Address(CpuRegister(RSP), second.GetStackIndex()));
4148 }
4149 break;
4150 }
4151
4152 default:
4153 LOG(FATAL) << "Unexpected mul type " << mul->GetResultType();
4154 }
4155 }
4156
PushOntoFPStack(Location source,uint32_t temp_offset,uint32_t stack_adjustment,bool is_float)4157 void InstructionCodeGeneratorX86_64::PushOntoFPStack(Location source, uint32_t temp_offset,
4158 uint32_t stack_adjustment, bool is_float) {
4159 if (source.IsStackSlot()) {
4160 DCHECK(is_float);
4161 __ flds(Address(CpuRegister(RSP), source.GetStackIndex() + stack_adjustment));
4162 } else if (source.IsDoubleStackSlot()) {
4163 DCHECK(!is_float);
4164 __ fldl(Address(CpuRegister(RSP), source.GetStackIndex() + stack_adjustment));
4165 } else {
4166 // Write the value to the temporary location on the stack and load to FP stack.
4167 if (is_float) {
4168 Location stack_temp = Location::StackSlot(temp_offset);
4169 codegen_->Move(stack_temp, source);
4170 __ flds(Address(CpuRegister(RSP), temp_offset));
4171 } else {
4172 Location stack_temp = Location::DoubleStackSlot(temp_offset);
4173 codegen_->Move(stack_temp, source);
4174 __ fldl(Address(CpuRegister(RSP), temp_offset));
4175 }
4176 }
4177 }
4178
GenerateRemFP(HRem * rem)4179 void InstructionCodeGeneratorX86_64::GenerateRemFP(HRem *rem) {
4180 DataType::Type type = rem->GetResultType();
4181 bool is_float = type == DataType::Type::kFloat32;
4182 size_t elem_size = DataType::Size(type);
4183 LocationSummary* locations = rem->GetLocations();
4184 Location first = locations->InAt(0);
4185 Location second = locations->InAt(1);
4186 Location out = locations->Out();
4187
4188 // Create stack space for 2 elements.
4189 // TODO: enhance register allocator to ask for stack temporaries.
4190 __ subq(CpuRegister(RSP), Immediate(2 * elem_size));
4191
4192 // Load the values to the FP stack in reverse order, using temporaries if needed.
4193 PushOntoFPStack(second, elem_size, 2 * elem_size, is_float);
4194 PushOntoFPStack(first, 0, 2 * elem_size, is_float);
4195
4196 // Loop doing FPREM until we stabilize.
4197 NearLabel retry;
4198 __ Bind(&retry);
4199 __ fprem();
4200
4201 // Move FP status to AX.
4202 __ fstsw();
4203
4204 // And see if the argument reduction is complete. This is signaled by the
4205 // C2 FPU flag bit set to 0.
4206 __ andl(CpuRegister(RAX), Immediate(kC2ConditionMask));
4207 __ j(kNotEqual, &retry);
4208
4209 // We have settled on the final value. Retrieve it into an XMM register.
4210 // Store FP top of stack to real stack.
4211 if (is_float) {
4212 __ fsts(Address(CpuRegister(RSP), 0));
4213 } else {
4214 __ fstl(Address(CpuRegister(RSP), 0));
4215 }
4216
4217 // Pop the 2 items from the FP stack.
4218 __ fucompp();
4219
4220 // Load the value from the stack into an XMM register.
4221 DCHECK(out.IsFpuRegister()) << out;
4222 if (is_float) {
4223 __ movss(out.AsFpuRegister<XmmRegister>(), Address(CpuRegister(RSP), 0));
4224 } else {
4225 __ movsd(out.AsFpuRegister<XmmRegister>(), Address(CpuRegister(RSP), 0));
4226 }
4227
4228 // And remove the temporary stack space we allocated.
4229 __ addq(CpuRegister(RSP), Immediate(2 * elem_size));
4230 }
4231
DivRemOneOrMinusOne(HBinaryOperation * instruction)4232 void InstructionCodeGeneratorX86_64::DivRemOneOrMinusOne(HBinaryOperation* instruction) {
4233 DCHECK(instruction->IsDiv() || instruction->IsRem());
4234
4235 LocationSummary* locations = instruction->GetLocations();
4236 Location second = locations->InAt(1);
4237 DCHECK(second.IsConstant());
4238
4239 CpuRegister output_register = locations->Out().AsRegister<CpuRegister>();
4240 CpuRegister input_register = locations->InAt(0).AsRegister<CpuRegister>();
4241 int64_t imm = Int64FromConstant(second.GetConstant());
4242
4243 DCHECK(imm == 1 || imm == -1);
4244
4245 switch (instruction->GetResultType()) {
4246 case DataType::Type::kInt32: {
4247 if (instruction->IsRem()) {
4248 __ xorl(output_register, output_register);
4249 } else {
4250 __ movl(output_register, input_register);
4251 if (imm == -1) {
4252 __ negl(output_register);
4253 }
4254 }
4255 break;
4256 }
4257
4258 case DataType::Type::kInt64: {
4259 if (instruction->IsRem()) {
4260 __ xorl(output_register, output_register);
4261 } else {
4262 __ movq(output_register, input_register);
4263 if (imm == -1) {
4264 __ negq(output_register);
4265 }
4266 }
4267 break;
4268 }
4269
4270 default:
4271 LOG(FATAL) << "Unexpected type for div by (-)1 " << instruction->GetResultType();
4272 }
4273 }
RemByPowerOfTwo(HRem * instruction)4274 void InstructionCodeGeneratorX86_64::RemByPowerOfTwo(HRem* instruction) {
4275 LocationSummary* locations = instruction->GetLocations();
4276 Location second = locations->InAt(1);
4277 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
4278 CpuRegister numerator = locations->InAt(0).AsRegister<CpuRegister>();
4279 int64_t imm = Int64FromConstant(second.GetConstant());
4280 DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
4281 uint64_t abs_imm = AbsOrMin(imm);
4282 CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>();
4283 if (instruction->GetResultType() == DataType::Type::kInt32) {
4284 NearLabel done;
4285 __ movl(out, numerator);
4286 __ andl(out, Immediate(abs_imm-1));
4287 __ j(Condition::kZero, &done);
4288 __ leal(tmp, Address(out, static_cast<int32_t>(~(abs_imm-1))));
4289 __ testl(numerator, numerator);
4290 __ cmov(Condition::kLess, out, tmp, false);
4291 __ Bind(&done);
4292
4293 } else {
4294 DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
4295 codegen_->Load64BitValue(tmp, abs_imm - 1);
4296 NearLabel done;
4297
4298 __ movq(out, numerator);
4299 __ andq(out, tmp);
4300 __ j(Condition::kZero, &done);
4301 __ movq(tmp, numerator);
4302 __ sarq(tmp, Immediate(63));
4303 __ shlq(tmp, Immediate(WhichPowerOf2(abs_imm)));
4304 __ orq(out, tmp);
4305 __ Bind(&done);
4306 }
4307 }
DivByPowerOfTwo(HDiv * instruction)4308 void InstructionCodeGeneratorX86_64::DivByPowerOfTwo(HDiv* instruction) {
4309 LocationSummary* locations = instruction->GetLocations();
4310 Location second = locations->InAt(1);
4311
4312 CpuRegister output_register = locations->Out().AsRegister<CpuRegister>();
4313 CpuRegister numerator = locations->InAt(0).AsRegister<CpuRegister>();
4314
4315 int64_t imm = Int64FromConstant(second.GetConstant());
4316 DCHECK(IsPowerOfTwo(AbsOrMin(imm)));
4317 uint64_t abs_imm = AbsOrMin(imm);
4318
4319 CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>();
4320
4321 if (instruction->GetResultType() == DataType::Type::kInt32) {
4322 // When denominator is equal to 2, we can add signed bit and numerator to tmp.
4323 // Below we are using addl instruction instead of cmov which give us 1 cycle benefit.
4324 if (abs_imm == 2) {
4325 __ leal(tmp, Address(numerator, 0));
4326 __ shrl(tmp, Immediate(31));
4327 __ addl(tmp, numerator);
4328 } else {
4329 __ leal(tmp, Address(numerator, abs_imm - 1));
4330 __ testl(numerator, numerator);
4331 __ cmov(kGreaterEqual, tmp, numerator);
4332 }
4333 int shift = CTZ(imm);
4334 __ sarl(tmp, Immediate(shift));
4335
4336 if (imm < 0) {
4337 __ negl(tmp);
4338 }
4339
4340 __ movl(output_register, tmp);
4341 } else {
4342 DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
4343 CpuRegister rdx = locations->GetTemp(0).AsRegister<CpuRegister>();
4344 if (abs_imm == 2) {
4345 __ movq(rdx, numerator);
4346 __ shrq(rdx, Immediate(63));
4347 __ addq(rdx, numerator);
4348 } else {
4349 codegen_->Load64BitValue(rdx, abs_imm - 1);
4350 __ addq(rdx, numerator);
4351 __ testq(numerator, numerator);
4352 __ cmov(kGreaterEqual, rdx, numerator);
4353 }
4354 int shift = CTZ(imm);
4355 __ sarq(rdx, Immediate(shift));
4356
4357 if (imm < 0) {
4358 __ negq(rdx);
4359 }
4360
4361 __ movq(output_register, rdx);
4362 }
4363 }
4364
GenerateDivRemWithAnyConstant(HBinaryOperation * instruction)4365 void InstructionCodeGeneratorX86_64::GenerateDivRemWithAnyConstant(HBinaryOperation* instruction) {
4366 DCHECK(instruction->IsDiv() || instruction->IsRem());
4367
4368 LocationSummary* locations = instruction->GetLocations();
4369 Location second = locations->InAt(1);
4370
4371 CpuRegister numerator = instruction->IsDiv() ? locations->GetTemp(1).AsRegister<CpuRegister>()
4372 : locations->GetTemp(0).AsRegister<CpuRegister>();
4373 CpuRegister eax = locations->InAt(0).AsRegister<CpuRegister>();
4374 CpuRegister edx = instruction->IsDiv() ? locations->GetTemp(0).AsRegister<CpuRegister>()
4375 : locations->Out().AsRegister<CpuRegister>();
4376 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
4377
4378 DCHECK_EQ(RAX, eax.AsRegister());
4379 DCHECK_EQ(RDX, edx.AsRegister());
4380 if (instruction->IsDiv()) {
4381 DCHECK_EQ(RAX, out.AsRegister());
4382 } else {
4383 DCHECK_EQ(RDX, out.AsRegister());
4384 }
4385
4386 int64_t magic;
4387 int shift;
4388
4389 // TODO: can these branches be written as one?
4390 if (instruction->GetResultType() == DataType::Type::kInt32) {
4391 int imm = second.GetConstant()->AsIntConstant()->GetValue();
4392
4393 CalculateMagicAndShiftForDivRem(imm, false /* is_long= */, &magic, &shift);
4394
4395 __ movl(numerator, eax);
4396
4397 __ movl(eax, Immediate(magic));
4398 __ imull(numerator);
4399
4400 if (imm > 0 && magic < 0) {
4401 __ addl(edx, numerator);
4402 } else if (imm < 0 && magic > 0) {
4403 __ subl(edx, numerator);
4404 }
4405
4406 if (shift != 0) {
4407 __ sarl(edx, Immediate(shift));
4408 }
4409
4410 __ movl(eax, edx);
4411 __ shrl(edx, Immediate(31));
4412 __ addl(edx, eax);
4413
4414 if (instruction->IsRem()) {
4415 __ movl(eax, numerator);
4416 __ imull(edx, Immediate(imm));
4417 __ subl(eax, edx);
4418 __ movl(edx, eax);
4419 } else {
4420 __ movl(eax, edx);
4421 }
4422 } else {
4423 int64_t imm = second.GetConstant()->AsLongConstant()->GetValue();
4424
4425 DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
4426
4427 CpuRegister rax = eax;
4428 CpuRegister rdx = edx;
4429
4430 CalculateMagicAndShiftForDivRem(imm, true /* is_long= */, &magic, &shift);
4431
4432 // Save the numerator.
4433 __ movq(numerator, rax);
4434
4435 // RAX = magic
4436 codegen_->Load64BitValue(rax, magic);
4437
4438 // RDX:RAX = magic * numerator
4439 __ imulq(numerator);
4440
4441 if (imm > 0 && magic < 0) {
4442 // RDX += numerator
4443 __ addq(rdx, numerator);
4444 } else if (imm < 0 && magic > 0) {
4445 // RDX -= numerator
4446 __ subq(rdx, numerator);
4447 }
4448
4449 // Shift if needed.
4450 if (shift != 0) {
4451 __ sarq(rdx, Immediate(shift));
4452 }
4453
4454 // RDX += 1 if RDX < 0
4455 __ movq(rax, rdx);
4456 __ shrq(rdx, Immediate(63));
4457 __ addq(rdx, rax);
4458
4459 if (instruction->IsRem()) {
4460 __ movq(rax, numerator);
4461
4462 if (IsInt<32>(imm)) {
4463 __ imulq(rdx, Immediate(static_cast<int32_t>(imm)));
4464 } else {
4465 __ imulq(rdx, codegen_->LiteralInt64Address(imm));
4466 }
4467
4468 __ subq(rax, rdx);
4469 __ movq(rdx, rax);
4470 } else {
4471 __ movq(rax, rdx);
4472 }
4473 }
4474 }
4475
GenerateDivRemIntegral(HBinaryOperation * instruction)4476 void InstructionCodeGeneratorX86_64::GenerateDivRemIntegral(HBinaryOperation* instruction) {
4477 DCHECK(instruction->IsDiv() || instruction->IsRem());
4478 DataType::Type type = instruction->GetResultType();
4479 DCHECK(type == DataType::Type::kInt32 || type == DataType::Type::kInt64);
4480
4481 bool is_div = instruction->IsDiv();
4482 LocationSummary* locations = instruction->GetLocations();
4483
4484 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
4485 Location second = locations->InAt(1);
4486
4487 DCHECK_EQ(RAX, locations->InAt(0).AsRegister<CpuRegister>().AsRegister());
4488 DCHECK_EQ(is_div ? RAX : RDX, out.AsRegister());
4489
4490 if (second.IsConstant()) {
4491 int64_t imm = Int64FromConstant(second.GetConstant());
4492
4493 if (imm == 0) {
4494 // Do not generate anything. DivZeroCheck would prevent any code to be executed.
4495 } else if (imm == 1 || imm == -1) {
4496 DivRemOneOrMinusOne(instruction);
4497 } else if (IsPowerOfTwo(AbsOrMin(imm))) {
4498 if (is_div) {
4499 DivByPowerOfTwo(instruction->AsDiv());
4500 } else {
4501 RemByPowerOfTwo(instruction->AsRem());
4502 }
4503 } else {
4504 DCHECK(imm <= -2 || imm >= 2);
4505 GenerateDivRemWithAnyConstant(instruction);
4506 }
4507 } else {
4508 SlowPathCode* slow_path =
4509 new (codegen_->GetScopedAllocator()) DivRemMinusOneSlowPathX86_64(
4510 instruction, out.AsRegister(), type, is_div);
4511 codegen_->AddSlowPath(slow_path);
4512
4513 CpuRegister second_reg = second.AsRegister<CpuRegister>();
4514 // 0x80000000(00000000)/-1 triggers an arithmetic exception!
4515 // Dividing by -1 is actually negation and -0x800000000(00000000) = 0x80000000(00000000)
4516 // so it's safe to just use negl instead of more complex comparisons.
4517 if (type == DataType::Type::kInt32) {
4518 __ cmpl(second_reg, Immediate(-1));
4519 __ j(kEqual, slow_path->GetEntryLabel());
4520 // edx:eax <- sign-extended of eax
4521 __ cdq();
4522 // eax = quotient, edx = remainder
4523 __ idivl(second_reg);
4524 } else {
4525 __ cmpq(second_reg, Immediate(-1));
4526 __ j(kEqual, slow_path->GetEntryLabel());
4527 // rdx:rax <- sign-extended of rax
4528 __ cqo();
4529 // rax = quotient, rdx = remainder
4530 __ idivq(second_reg);
4531 }
4532 __ Bind(slow_path->GetExitLabel());
4533 }
4534 }
4535
VisitDiv(HDiv * div)4536 void LocationsBuilderX86_64::VisitDiv(HDiv* div) {
4537 LocationSummary* locations =
4538 new (GetGraph()->GetAllocator()) LocationSummary(div, LocationSummary::kNoCall);
4539 switch (div->GetResultType()) {
4540 case DataType::Type::kInt32:
4541 case DataType::Type::kInt64: {
4542 locations->SetInAt(0, Location::RegisterLocation(RAX));
4543 locations->SetInAt(1, Location::RegisterOrConstant(div->InputAt(1)));
4544 locations->SetOut(Location::SameAsFirstInput());
4545 // Intel uses edx:eax as the dividend.
4546 locations->AddTemp(Location::RegisterLocation(RDX));
4547 // We need to save the numerator while we tweak rax and rdx. As we are using imul in a way
4548 // which enforces results to be in RAX and RDX, things are simpler if we use RDX also as
4549 // output and request another temp.
4550 if (div->InputAt(1)->IsConstant()) {
4551 locations->AddTemp(Location::RequiresRegister());
4552 }
4553 break;
4554 }
4555
4556 case DataType::Type::kFloat32:
4557 case DataType::Type::kFloat64: {
4558 locations->SetInAt(0, Location::RequiresFpuRegister());
4559 locations->SetInAt(1, Location::Any());
4560 locations->SetOut(Location::SameAsFirstInput());
4561 break;
4562 }
4563
4564 default:
4565 LOG(FATAL) << "Unexpected div type " << div->GetResultType();
4566 }
4567 }
4568
VisitDiv(HDiv * div)4569 void InstructionCodeGeneratorX86_64::VisitDiv(HDiv* div) {
4570 LocationSummary* locations = div->GetLocations();
4571 Location first = locations->InAt(0);
4572 Location second = locations->InAt(1);
4573 DCHECK(first.Equals(locations->Out()));
4574
4575 DataType::Type type = div->GetResultType();
4576 switch (type) {
4577 case DataType::Type::kInt32:
4578 case DataType::Type::kInt64: {
4579 GenerateDivRemIntegral(div);
4580 break;
4581 }
4582
4583 case DataType::Type::kFloat32: {
4584 if (second.IsFpuRegister()) {
4585 __ divss(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
4586 } else if (second.IsConstant()) {
4587 __ divss(first.AsFpuRegister<XmmRegister>(),
4588 codegen_->LiteralFloatAddress(
4589 second.GetConstant()->AsFloatConstant()->GetValue()));
4590 } else {
4591 DCHECK(second.IsStackSlot());
4592 __ divss(first.AsFpuRegister<XmmRegister>(),
4593 Address(CpuRegister(RSP), second.GetStackIndex()));
4594 }
4595 break;
4596 }
4597
4598 case DataType::Type::kFloat64: {
4599 if (second.IsFpuRegister()) {
4600 __ divsd(first.AsFpuRegister<XmmRegister>(), second.AsFpuRegister<XmmRegister>());
4601 } else if (second.IsConstant()) {
4602 __ divsd(first.AsFpuRegister<XmmRegister>(),
4603 codegen_->LiteralDoubleAddress(
4604 second.GetConstant()->AsDoubleConstant()->GetValue()));
4605 } else {
4606 DCHECK(second.IsDoubleStackSlot());
4607 __ divsd(first.AsFpuRegister<XmmRegister>(),
4608 Address(CpuRegister(RSP), second.GetStackIndex()));
4609 }
4610 break;
4611 }
4612
4613 default:
4614 LOG(FATAL) << "Unexpected div type " << div->GetResultType();
4615 }
4616 }
4617
VisitRem(HRem * rem)4618 void LocationsBuilderX86_64::VisitRem(HRem* rem) {
4619 DataType::Type type = rem->GetResultType();
4620 LocationSummary* locations =
4621 new (GetGraph()->GetAllocator()) LocationSummary(rem, LocationSummary::kNoCall);
4622
4623 switch (type) {
4624 case DataType::Type::kInt32:
4625 case DataType::Type::kInt64: {
4626 locations->SetInAt(0, Location::RegisterLocation(RAX));
4627 locations->SetInAt(1, Location::RegisterOrConstant(rem->InputAt(1)));
4628 // Intel uses rdx:rax as the dividend and puts the remainder in rdx
4629 locations->SetOut(Location::RegisterLocation(RDX));
4630 // We need to save the numerator while we tweak eax and edx. As we are using imul in a way
4631 // which enforces results to be in RAX and RDX, things are simpler if we use EAX also as
4632 // output and request another temp.
4633 if (rem->InputAt(1)->IsConstant()) {
4634 locations->AddTemp(Location::RequiresRegister());
4635 }
4636 break;
4637 }
4638
4639 case DataType::Type::kFloat32:
4640 case DataType::Type::kFloat64: {
4641 locations->SetInAt(0, Location::Any());
4642 locations->SetInAt(1, Location::Any());
4643 locations->SetOut(Location::RequiresFpuRegister());
4644 locations->AddTemp(Location::RegisterLocation(RAX));
4645 break;
4646 }
4647
4648 default:
4649 LOG(FATAL) << "Unexpected rem type " << type;
4650 }
4651 }
4652
VisitRem(HRem * rem)4653 void InstructionCodeGeneratorX86_64::VisitRem(HRem* rem) {
4654 DataType::Type type = rem->GetResultType();
4655 switch (type) {
4656 case DataType::Type::kInt32:
4657 case DataType::Type::kInt64: {
4658 GenerateDivRemIntegral(rem);
4659 break;
4660 }
4661 case DataType::Type::kFloat32:
4662 case DataType::Type::kFloat64: {
4663 GenerateRemFP(rem);
4664 break;
4665 }
4666 default:
4667 LOG(FATAL) << "Unexpected rem type " << rem->GetResultType();
4668 }
4669 }
4670
CreateMinMaxLocations(ArenaAllocator * allocator,HBinaryOperation * minmax)4671 static void CreateMinMaxLocations(ArenaAllocator* allocator, HBinaryOperation* minmax) {
4672 LocationSummary* locations = new (allocator) LocationSummary(minmax);
4673 switch (minmax->GetResultType()) {
4674 case DataType::Type::kInt32:
4675 case DataType::Type::kInt64:
4676 locations->SetInAt(0, Location::RequiresRegister());
4677 locations->SetInAt(1, Location::RequiresRegister());
4678 locations->SetOut(Location::SameAsFirstInput());
4679 break;
4680 case DataType::Type::kFloat32:
4681 case DataType::Type::kFloat64:
4682 locations->SetInAt(0, Location::RequiresFpuRegister());
4683 locations->SetInAt(1, Location::RequiresFpuRegister());
4684 // The following is sub-optimal, but all we can do for now. It would be fine to also accept
4685 // the second input to be the output (we can simply swap inputs).
4686 locations->SetOut(Location::SameAsFirstInput());
4687 break;
4688 default:
4689 LOG(FATAL) << "Unexpected type for HMinMax " << minmax->GetResultType();
4690 }
4691 }
4692
GenerateMinMaxInt(LocationSummary * locations,bool is_min,DataType::Type type)4693 void InstructionCodeGeneratorX86_64::GenerateMinMaxInt(LocationSummary* locations,
4694 bool is_min,
4695 DataType::Type type) {
4696 Location op1_loc = locations->InAt(0);
4697 Location op2_loc = locations->InAt(1);
4698
4699 // Shortcut for same input locations.
4700 if (op1_loc.Equals(op2_loc)) {
4701 // Can return immediately, as op1_loc == out_loc.
4702 // Note: if we ever support separate registers, e.g., output into memory, we need to check for
4703 // a copy here.
4704 DCHECK(locations->Out().Equals(op1_loc));
4705 return;
4706 }
4707
4708 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
4709 CpuRegister op2 = op2_loc.AsRegister<CpuRegister>();
4710
4711 // (out := op1)
4712 // out <=? op2
4713 // if out is min jmp done
4714 // out := op2
4715 // done:
4716
4717 if (type == DataType::Type::kInt64) {
4718 __ cmpq(out, op2);
4719 __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, /*is64bit*/ true);
4720 } else {
4721 DCHECK_EQ(type, DataType::Type::kInt32);
4722 __ cmpl(out, op2);
4723 __ cmov(is_min ? Condition::kGreater : Condition::kLess, out, op2, /*is64bit*/ false);
4724 }
4725 }
4726
GenerateMinMaxFP(LocationSummary * locations,bool is_min,DataType::Type type)4727 void InstructionCodeGeneratorX86_64::GenerateMinMaxFP(LocationSummary* locations,
4728 bool is_min,
4729 DataType::Type type) {
4730 Location op1_loc = locations->InAt(0);
4731 Location op2_loc = locations->InAt(1);
4732 Location out_loc = locations->Out();
4733 XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
4734
4735 // Shortcut for same input locations.
4736 if (op1_loc.Equals(op2_loc)) {
4737 DCHECK(out_loc.Equals(op1_loc));
4738 return;
4739 }
4740
4741 // (out := op1)
4742 // out <=? op2
4743 // if Nan jmp Nan_label
4744 // if out is min jmp done
4745 // if op2 is min jmp op2_label
4746 // handle -0/+0
4747 // jmp done
4748 // Nan_label:
4749 // out := NaN
4750 // op2_label:
4751 // out := op2
4752 // done:
4753 //
4754 // This removes one jmp, but needs to copy one input (op1) to out.
4755 //
4756 // TODO: This is straight from Quick. Make NaN an out-of-line slowpath?
4757
4758 XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
4759
4760 NearLabel nan, done, op2_label;
4761 if (type == DataType::Type::kFloat64) {
4762 __ ucomisd(out, op2);
4763 } else {
4764 DCHECK_EQ(type, DataType::Type::kFloat32);
4765 __ ucomiss(out, op2);
4766 }
4767
4768 __ j(Condition::kParityEven, &nan);
4769
4770 __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label);
4771 __ j(is_min ? Condition::kBelow : Condition::kAbove, &done);
4772
4773 // Handle 0.0/-0.0.
4774 if (is_min) {
4775 if (type == DataType::Type::kFloat64) {
4776 __ orpd(out, op2);
4777 } else {
4778 __ orps(out, op2);
4779 }
4780 } else {
4781 if (type == DataType::Type::kFloat64) {
4782 __ andpd(out, op2);
4783 } else {
4784 __ andps(out, op2);
4785 }
4786 }
4787 __ jmp(&done);
4788
4789 // NaN handling.
4790 __ Bind(&nan);
4791 if (type == DataType::Type::kFloat64) {
4792 __ movsd(out, codegen_->LiteralInt64Address(INT64_C(0x7FF8000000000000)));
4793 } else {
4794 __ movss(out, codegen_->LiteralInt32Address(INT32_C(0x7FC00000)));
4795 }
4796 __ jmp(&done);
4797
4798 // out := op2;
4799 __ Bind(&op2_label);
4800 if (type == DataType::Type::kFloat64) {
4801 __ movsd(out, op2);
4802 } else {
4803 __ movss(out, op2);
4804 }
4805
4806 // Done.
4807 __ Bind(&done);
4808 }
4809
GenerateMinMax(HBinaryOperation * minmax,bool is_min)4810 void InstructionCodeGeneratorX86_64::GenerateMinMax(HBinaryOperation* minmax, bool is_min) {
4811 DataType::Type type = minmax->GetResultType();
4812 switch (type) {
4813 case DataType::Type::kInt32:
4814 case DataType::Type::kInt64:
4815 GenerateMinMaxInt(minmax->GetLocations(), is_min, type);
4816 break;
4817 case DataType::Type::kFloat32:
4818 case DataType::Type::kFloat64:
4819 GenerateMinMaxFP(minmax->GetLocations(), is_min, type);
4820 break;
4821 default:
4822 LOG(FATAL) << "Unexpected type for HMinMax " << type;
4823 }
4824 }
4825
VisitMin(HMin * min)4826 void LocationsBuilderX86_64::VisitMin(HMin* min) {
4827 CreateMinMaxLocations(GetGraph()->GetAllocator(), min);
4828 }
4829
VisitMin(HMin * min)4830 void InstructionCodeGeneratorX86_64::VisitMin(HMin* min) {
4831 GenerateMinMax(min, /*is_min*/ true);
4832 }
4833
VisitMax(HMax * max)4834 void LocationsBuilderX86_64::VisitMax(HMax* max) {
4835 CreateMinMaxLocations(GetGraph()->GetAllocator(), max);
4836 }
4837
VisitMax(HMax * max)4838 void InstructionCodeGeneratorX86_64::VisitMax(HMax* max) {
4839 GenerateMinMax(max, /*is_min*/ false);
4840 }
4841
VisitAbs(HAbs * abs)4842 void LocationsBuilderX86_64::VisitAbs(HAbs* abs) {
4843 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(abs);
4844 switch (abs->GetResultType()) {
4845 case DataType::Type::kInt32:
4846 case DataType::Type::kInt64:
4847 locations->SetInAt(0, Location::RequiresRegister());
4848 locations->SetOut(Location::SameAsFirstInput());
4849 locations->AddTemp(Location::RequiresRegister());
4850 break;
4851 case DataType::Type::kFloat32:
4852 case DataType::Type::kFloat64:
4853 locations->SetInAt(0, Location::RequiresFpuRegister());
4854 locations->SetOut(Location::SameAsFirstInput());
4855 locations->AddTemp(Location::RequiresFpuRegister());
4856 break;
4857 default:
4858 LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType();
4859 }
4860 }
4861
VisitAbs(HAbs * abs)4862 void InstructionCodeGeneratorX86_64::VisitAbs(HAbs* abs) {
4863 LocationSummary* locations = abs->GetLocations();
4864 switch (abs->GetResultType()) {
4865 case DataType::Type::kInt32: {
4866 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
4867 CpuRegister mask = locations->GetTemp(0).AsRegister<CpuRegister>();
4868 // Create mask.
4869 __ movl(mask, out);
4870 __ sarl(mask, Immediate(31));
4871 // Add mask.
4872 __ addl(out, mask);
4873 __ xorl(out, mask);
4874 break;
4875 }
4876 case DataType::Type::kInt64: {
4877 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
4878 CpuRegister mask = locations->GetTemp(0).AsRegister<CpuRegister>();
4879 // Create mask.
4880 __ movq(mask, out);
4881 __ sarq(mask, Immediate(63));
4882 // Add mask.
4883 __ addq(out, mask);
4884 __ xorq(out, mask);
4885 break;
4886 }
4887 case DataType::Type::kFloat32: {
4888 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
4889 XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
4890 __ movss(mask, codegen_->LiteralInt32Address(INT32_C(0x7FFFFFFF)));
4891 __ andps(out, mask);
4892 break;
4893 }
4894 case DataType::Type::kFloat64: {
4895 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
4896 XmmRegister mask = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
4897 __ movsd(mask, codegen_->LiteralInt64Address(INT64_C(0x7FFFFFFFFFFFFFFF)));
4898 __ andpd(out, mask);
4899 break;
4900 }
4901 default:
4902 LOG(FATAL) << "Unexpected type for HAbs " << abs->GetResultType();
4903 }
4904 }
4905
VisitDivZeroCheck(HDivZeroCheck * instruction)4906 void LocationsBuilderX86_64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
4907 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
4908 locations->SetInAt(0, Location::Any());
4909 }
4910
VisitDivZeroCheck(HDivZeroCheck * instruction)4911 void InstructionCodeGeneratorX86_64::VisitDivZeroCheck(HDivZeroCheck* instruction) {
4912 SlowPathCode* slow_path =
4913 new (codegen_->GetScopedAllocator()) DivZeroCheckSlowPathX86_64(instruction);
4914 codegen_->AddSlowPath(slow_path);
4915
4916 LocationSummary* locations = instruction->GetLocations();
4917 Location value = locations->InAt(0);
4918
4919 switch (instruction->GetType()) {
4920 case DataType::Type::kBool:
4921 case DataType::Type::kUint8:
4922 case DataType::Type::kInt8:
4923 case DataType::Type::kUint16:
4924 case DataType::Type::kInt16:
4925 case DataType::Type::kInt32: {
4926 if (value.IsRegister()) {
4927 __ testl(value.AsRegister<CpuRegister>(), value.AsRegister<CpuRegister>());
4928 __ j(kEqual, slow_path->GetEntryLabel());
4929 } else if (value.IsStackSlot()) {
4930 __ cmpl(Address(CpuRegister(RSP), value.GetStackIndex()), Immediate(0));
4931 __ j(kEqual, slow_path->GetEntryLabel());
4932 } else {
4933 DCHECK(value.IsConstant()) << value;
4934 if (value.GetConstant()->AsIntConstant()->GetValue() == 0) {
4935 __ jmp(slow_path->GetEntryLabel());
4936 }
4937 }
4938 break;
4939 }
4940 case DataType::Type::kInt64: {
4941 if (value.IsRegister()) {
4942 __ testq(value.AsRegister<CpuRegister>(), value.AsRegister<CpuRegister>());
4943 __ j(kEqual, slow_path->GetEntryLabel());
4944 } else if (value.IsDoubleStackSlot()) {
4945 __ cmpq(Address(CpuRegister(RSP), value.GetStackIndex()), Immediate(0));
4946 __ j(kEqual, slow_path->GetEntryLabel());
4947 } else {
4948 DCHECK(value.IsConstant()) << value;
4949 if (value.GetConstant()->AsLongConstant()->GetValue() == 0) {
4950 __ jmp(slow_path->GetEntryLabel());
4951 }
4952 }
4953 break;
4954 }
4955 default:
4956 LOG(FATAL) << "Unexpected type for HDivZeroCheck " << instruction->GetType();
4957 }
4958 }
4959
HandleShift(HBinaryOperation * op)4960 void LocationsBuilderX86_64::HandleShift(HBinaryOperation* op) {
4961 DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
4962
4963 LocationSummary* locations =
4964 new (GetGraph()->GetAllocator()) LocationSummary(op, LocationSummary::kNoCall);
4965
4966 switch (op->GetResultType()) {
4967 case DataType::Type::kInt32:
4968 case DataType::Type::kInt64: {
4969 locations->SetInAt(0, Location::RequiresRegister());
4970 // The shift count needs to be in CL.
4971 locations->SetInAt(1, Location::ByteRegisterOrConstant(RCX, op->InputAt(1)));
4972 locations->SetOut(Location::SameAsFirstInput());
4973 break;
4974 }
4975 default:
4976 LOG(FATAL) << "Unexpected operation type " << op->GetResultType();
4977 }
4978 }
4979
HandleShift(HBinaryOperation * op)4980 void InstructionCodeGeneratorX86_64::HandleShift(HBinaryOperation* op) {
4981 DCHECK(op->IsShl() || op->IsShr() || op->IsUShr());
4982
4983 LocationSummary* locations = op->GetLocations();
4984 CpuRegister first_reg = locations->InAt(0).AsRegister<CpuRegister>();
4985 Location second = locations->InAt(1);
4986
4987 switch (op->GetResultType()) {
4988 case DataType::Type::kInt32: {
4989 if (second.IsRegister()) {
4990 CpuRegister second_reg = second.AsRegister<CpuRegister>();
4991 if (op->IsShl()) {
4992 __ shll(first_reg, second_reg);
4993 } else if (op->IsShr()) {
4994 __ sarl(first_reg, second_reg);
4995 } else {
4996 __ shrl(first_reg, second_reg);
4997 }
4998 } else {
4999 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance);
5000 if (op->IsShl()) {
5001 __ shll(first_reg, imm);
5002 } else if (op->IsShr()) {
5003 __ sarl(first_reg, imm);
5004 } else {
5005 __ shrl(first_reg, imm);
5006 }
5007 }
5008 break;
5009 }
5010 case DataType::Type::kInt64: {
5011 if (second.IsRegister()) {
5012 CpuRegister second_reg = second.AsRegister<CpuRegister>();
5013 if (op->IsShl()) {
5014 __ shlq(first_reg, second_reg);
5015 } else if (op->IsShr()) {
5016 __ sarq(first_reg, second_reg);
5017 } else {
5018 __ shrq(first_reg, second_reg);
5019 }
5020 } else {
5021 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance);
5022 if (op->IsShl()) {
5023 __ shlq(first_reg, imm);
5024 } else if (op->IsShr()) {
5025 __ sarq(first_reg, imm);
5026 } else {
5027 __ shrq(first_reg, imm);
5028 }
5029 }
5030 break;
5031 }
5032 default:
5033 LOG(FATAL) << "Unexpected operation type " << op->GetResultType();
5034 UNREACHABLE();
5035 }
5036 }
5037
HandleRotate(HBinaryOperation * rotate)5038 void LocationsBuilderX86_64::HandleRotate(HBinaryOperation* rotate) {
5039 LocationSummary* locations =
5040 new (GetGraph()->GetAllocator()) LocationSummary(rotate, LocationSummary::kNoCall);
5041
5042 switch (rotate->GetResultType()) {
5043 case DataType::Type::kInt32:
5044 case DataType::Type::kInt64: {
5045 locations->SetInAt(0, Location::RequiresRegister());
5046 // The shift count needs to be in CL (unless it is a constant).
5047 locations->SetInAt(1, Location::ByteRegisterOrConstant(RCX, rotate->InputAt(1)));
5048 locations->SetOut(Location::SameAsFirstInput());
5049 break;
5050 }
5051 default:
5052 LOG(FATAL) << "Unexpected operation type " << rotate->GetResultType();
5053 UNREACHABLE();
5054 }
5055 }
5056
HandleRotate(HBinaryOperation * rotate)5057 void InstructionCodeGeneratorX86_64::HandleRotate(HBinaryOperation* rotate) {
5058 LocationSummary* locations = rotate->GetLocations();
5059 CpuRegister first_reg = locations->InAt(0).AsRegister<CpuRegister>();
5060 Location second = locations->InAt(1);
5061
5062 switch (rotate->GetResultType()) {
5063 case DataType::Type::kInt32:
5064 if (second.IsRegister()) {
5065 CpuRegister second_reg = second.AsRegister<CpuRegister>();
5066 if (rotate->IsRor()) {
5067 __ rorl(first_reg, second_reg);
5068 } else {
5069 DCHECK(rotate->IsRol());
5070 __ roll(first_reg, second_reg);
5071 }
5072 } else {
5073 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxIntShiftDistance);
5074 if (rotate->IsRor()) {
5075 __ rorl(first_reg, imm);
5076 } else {
5077 DCHECK(rotate->IsRol());
5078 __ roll(first_reg, imm);
5079 }
5080 }
5081 break;
5082 case DataType::Type::kInt64:
5083 if (second.IsRegister()) {
5084 CpuRegister second_reg = second.AsRegister<CpuRegister>();
5085 if (rotate->IsRor()) {
5086 __ rorq(first_reg, second_reg);
5087 } else {
5088 DCHECK(rotate->IsRol());
5089 __ rolq(first_reg, second_reg);
5090 }
5091 } else {
5092 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue() & kMaxLongShiftDistance);
5093 if (rotate->IsRor()) {
5094 __ rorq(first_reg, imm);
5095 } else {
5096 DCHECK(rotate->IsRol());
5097 __ rolq(first_reg, imm);
5098 }
5099 }
5100 break;
5101 default:
5102 LOG(FATAL) << "Unexpected operation type " << rotate->GetResultType();
5103 UNREACHABLE();
5104 }
5105 }
5106
VisitRol(HRol * rol)5107 void LocationsBuilderX86_64::VisitRol(HRol* rol) {
5108 HandleRotate(rol);
5109 }
5110
VisitRol(HRol * rol)5111 void InstructionCodeGeneratorX86_64::VisitRol(HRol* rol) {
5112 HandleRotate(rol);
5113 }
5114
VisitRor(HRor * ror)5115 void LocationsBuilderX86_64::VisitRor(HRor* ror) {
5116 HandleRotate(ror);
5117 }
5118
VisitRor(HRor * ror)5119 void InstructionCodeGeneratorX86_64::VisitRor(HRor* ror) {
5120 HandleRotate(ror);
5121 }
5122
VisitShl(HShl * shl)5123 void LocationsBuilderX86_64::VisitShl(HShl* shl) {
5124 HandleShift(shl);
5125 }
5126
VisitShl(HShl * shl)5127 void InstructionCodeGeneratorX86_64::VisitShl(HShl* shl) {
5128 HandleShift(shl);
5129 }
5130
VisitShr(HShr * shr)5131 void LocationsBuilderX86_64::VisitShr(HShr* shr) {
5132 HandleShift(shr);
5133 }
5134
VisitShr(HShr * shr)5135 void InstructionCodeGeneratorX86_64::VisitShr(HShr* shr) {
5136 HandleShift(shr);
5137 }
5138
VisitUShr(HUShr * ushr)5139 void LocationsBuilderX86_64::VisitUShr(HUShr* ushr) {
5140 HandleShift(ushr);
5141 }
5142
VisitUShr(HUShr * ushr)5143 void InstructionCodeGeneratorX86_64::VisitUShr(HUShr* ushr) {
5144 HandleShift(ushr);
5145 }
5146
VisitNewInstance(HNewInstance * instruction)5147 void LocationsBuilderX86_64::VisitNewInstance(HNewInstance* instruction) {
5148 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5149 instruction, LocationSummary::kCallOnMainOnly);
5150 InvokeRuntimeCallingConvention calling_convention;
5151 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
5152 locations->SetOut(Location::RegisterLocation(RAX));
5153 }
5154
VisitNewInstance(HNewInstance * instruction)5155 void InstructionCodeGeneratorX86_64::VisitNewInstance(HNewInstance* instruction) {
5156 codegen_->InvokeRuntime(instruction->GetEntrypoint(), instruction);
5157 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
5158 DCHECK(!codegen_->IsLeafMethod());
5159 }
5160
VisitNewArray(HNewArray * instruction)5161 void LocationsBuilderX86_64::VisitNewArray(HNewArray* instruction) {
5162 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5163 instruction, LocationSummary::kCallOnMainOnly);
5164 InvokeRuntimeCallingConvention calling_convention;
5165 locations->SetOut(Location::RegisterLocation(RAX));
5166 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
5167 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
5168 }
5169
VisitNewArray(HNewArray * instruction)5170 void InstructionCodeGeneratorX86_64::VisitNewArray(HNewArray* instruction) {
5171 // Note: if heap poisoning is enabled, the entry point takes care of poisoning the reference.
5172 QuickEntrypointEnum entrypoint = CodeGenerator::GetArrayAllocationEntrypoint(instruction);
5173 codegen_->InvokeRuntime(entrypoint, instruction);
5174 CheckEntrypointTypes<kQuickAllocArrayResolved, void*, mirror::Class*, int32_t>();
5175 DCHECK(!codegen_->IsLeafMethod());
5176 }
5177
VisitParameterValue(HParameterValue * instruction)5178 void LocationsBuilderX86_64::VisitParameterValue(HParameterValue* instruction) {
5179 LocationSummary* locations =
5180 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5181 Location location = parameter_visitor_.GetNextLocation(instruction->GetType());
5182 if (location.IsStackSlot()) {
5183 location = Location::StackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
5184 } else if (location.IsDoubleStackSlot()) {
5185 location = Location::DoubleStackSlot(location.GetStackIndex() + codegen_->GetFrameSize());
5186 }
5187 locations->SetOut(location);
5188 }
5189
VisitParameterValue(HParameterValue * instruction)5190 void InstructionCodeGeneratorX86_64::VisitParameterValue(
5191 [[maybe_unused]] HParameterValue* instruction) {
5192 // Nothing to do, the parameter is already at its location.
5193 }
5194
VisitCurrentMethod(HCurrentMethod * instruction)5195 void LocationsBuilderX86_64::VisitCurrentMethod(HCurrentMethod* instruction) {
5196 LocationSummary* locations =
5197 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5198 locations->SetOut(Location::RegisterLocation(kMethodRegisterArgument));
5199 }
5200
VisitCurrentMethod(HCurrentMethod * instruction)5201 void InstructionCodeGeneratorX86_64::VisitCurrentMethod(
5202 [[maybe_unused]] HCurrentMethod* instruction) {
5203 // Nothing to do, the method is already at its location.
5204 }
5205
VisitClassTableGet(HClassTableGet * instruction)5206 void LocationsBuilderX86_64::VisitClassTableGet(HClassTableGet* instruction) {
5207 LocationSummary* locations =
5208 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5209 locations->SetInAt(0, Location::RequiresRegister());
5210 locations->SetOut(Location::RequiresRegister());
5211 }
5212
VisitClassTableGet(HClassTableGet * instruction)5213 void InstructionCodeGeneratorX86_64::VisitClassTableGet(HClassTableGet* instruction) {
5214 LocationSummary* locations = instruction->GetLocations();
5215 if (instruction->GetTableKind() == HClassTableGet::TableKind::kVTable) {
5216 uint32_t method_offset = mirror::Class::EmbeddedVTableEntryOffset(
5217 instruction->GetIndex(), kX86_64PointerSize).SizeValue();
5218 __ movq(locations->Out().AsRegister<CpuRegister>(),
5219 Address(locations->InAt(0).AsRegister<CpuRegister>(), method_offset));
5220 } else {
5221 uint32_t method_offset = static_cast<uint32_t>(ImTable::OffsetOfElement(
5222 instruction->GetIndex(), kX86_64PointerSize));
5223 __ movq(locations->Out().AsRegister<CpuRegister>(),
5224 Address(locations->InAt(0).AsRegister<CpuRegister>(),
5225 mirror::Class::ImtPtrOffset(kX86_64PointerSize).Uint32Value()));
5226 __ movq(locations->Out().AsRegister<CpuRegister>(),
5227 Address(locations->Out().AsRegister<CpuRegister>(), method_offset));
5228 }
5229 }
5230
VisitNot(HNot * not_)5231 void LocationsBuilderX86_64::VisitNot(HNot* not_) {
5232 LocationSummary* locations =
5233 new (GetGraph()->GetAllocator()) LocationSummary(not_, LocationSummary::kNoCall);
5234 locations->SetInAt(0, Location::RequiresRegister());
5235 locations->SetOut(Location::SameAsFirstInput());
5236 }
5237
VisitNot(HNot * not_)5238 void InstructionCodeGeneratorX86_64::VisitNot(HNot* not_) {
5239 LocationSummary* locations = not_->GetLocations();
5240 DCHECK_EQ(locations->InAt(0).AsRegister<CpuRegister>().AsRegister(),
5241 locations->Out().AsRegister<CpuRegister>().AsRegister());
5242 Location out = locations->Out();
5243 switch (not_->GetResultType()) {
5244 case DataType::Type::kInt32:
5245 __ notl(out.AsRegister<CpuRegister>());
5246 break;
5247
5248 case DataType::Type::kInt64:
5249 __ notq(out.AsRegister<CpuRegister>());
5250 break;
5251
5252 default:
5253 LOG(FATAL) << "Unimplemented type for not operation " << not_->GetResultType();
5254 }
5255 }
5256
VisitBooleanNot(HBooleanNot * bool_not)5257 void LocationsBuilderX86_64::VisitBooleanNot(HBooleanNot* bool_not) {
5258 LocationSummary* locations =
5259 new (GetGraph()->GetAllocator()) LocationSummary(bool_not, LocationSummary::kNoCall);
5260 locations->SetInAt(0, Location::RequiresRegister());
5261 locations->SetOut(Location::SameAsFirstInput());
5262 }
5263
VisitBooleanNot(HBooleanNot * bool_not)5264 void InstructionCodeGeneratorX86_64::VisitBooleanNot(HBooleanNot* bool_not) {
5265 LocationSummary* locations = bool_not->GetLocations();
5266 DCHECK_EQ(locations->InAt(0).AsRegister<CpuRegister>().AsRegister(),
5267 locations->Out().AsRegister<CpuRegister>().AsRegister());
5268 Location out = locations->Out();
5269 __ xorl(out.AsRegister<CpuRegister>(), Immediate(1));
5270 }
5271
VisitPhi(HPhi * instruction)5272 void LocationsBuilderX86_64::VisitPhi(HPhi* instruction) {
5273 LocationSummary* locations =
5274 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5275 for (size_t i = 0, e = locations->GetInputCount(); i < e; ++i) {
5276 locations->SetInAt(i, Location::Any());
5277 }
5278 locations->SetOut(Location::Any());
5279 }
5280
VisitPhi(HPhi * instruction)5281 void InstructionCodeGeneratorX86_64::VisitPhi([[maybe_unused]] HPhi* instruction) {
5282 LOG(FATAL) << "Unimplemented";
5283 }
5284
GenerateMemoryBarrier(MemBarrierKind kind)5285 void CodeGeneratorX86_64::GenerateMemoryBarrier(MemBarrierKind kind) {
5286 /*
5287 * According to the JSR-133 Cookbook, for x86-64 only StoreLoad/AnyAny barriers need memory fence.
5288 * All other barriers (LoadAny, AnyStore, StoreStore) are nops due to the x86-64 memory model.
5289 * For those cases, all we need to ensure is that there is a scheduling barrier in place.
5290 */
5291 switch (kind) {
5292 case MemBarrierKind::kAnyAny: {
5293 MemoryFence();
5294 break;
5295 }
5296 case MemBarrierKind::kAnyStore:
5297 case MemBarrierKind::kLoadAny:
5298 case MemBarrierKind::kStoreStore: {
5299 // nop
5300 break;
5301 }
5302 case MemBarrierKind::kNTStoreStore:
5303 // Non-Temporal Store/Store needs an explicit fence.
5304 MemoryFence(/* non-temporal= */ true);
5305 break;
5306 }
5307 }
5308
HandleFieldGet(HInstruction * instruction)5309 void LocationsBuilderX86_64::HandleFieldGet(HInstruction* instruction) {
5310 DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
5311
5312 bool object_field_get_with_read_barrier =
5313 (instruction->GetType() == DataType::Type::kReference) && codegen_->EmitReadBarrier();
5314 LocationSummary* locations =
5315 new (GetGraph()->GetAllocator()) LocationSummary(instruction,
5316 object_field_get_with_read_barrier
5317 ? LocationSummary::kCallOnSlowPath
5318 : LocationSummary::kNoCall);
5319 if (object_field_get_with_read_barrier && kUseBakerReadBarrier) {
5320 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
5321 }
5322 // receiver_input
5323 locations->SetInAt(0, Location::RequiresRegister());
5324 if (DataType::IsFloatingPointType(instruction->GetType())) {
5325 locations->SetOut(Location::RequiresFpuRegister());
5326 } else {
5327 // The output overlaps for an object field get when read barriers are
5328 // enabled: we do not want the move to overwrite the object's location, as
5329 // we need it to emit the read barrier. For predicated instructions we can
5330 // always overlap since the output is SameAsFirst and the default value.
5331 locations->SetOut(
5332 Location::RequiresRegister(),
5333 object_field_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
5334 }
5335 }
5336
HandleFieldGet(HInstruction * instruction,const FieldInfo & field_info)5337 void InstructionCodeGeneratorX86_64::HandleFieldGet(HInstruction* instruction,
5338 const FieldInfo& field_info) {
5339 DCHECK(instruction->IsInstanceFieldGet() || instruction->IsStaticFieldGet());
5340
5341 LocationSummary* locations = instruction->GetLocations();
5342 Location base_loc = locations->InAt(0);
5343 CpuRegister base = base_loc.AsRegister<CpuRegister>();
5344 Location out = locations->Out();
5345 bool is_volatile = field_info.IsVolatile();
5346 DCHECK_EQ(DataType::Size(field_info.GetFieldType()), DataType::Size(instruction->GetType()));
5347 DataType::Type load_type = instruction->GetType();
5348 uint32_t offset = field_info.GetFieldOffset().Uint32Value();
5349
5350 if (load_type == DataType::Type::kReference) {
5351 // /* HeapReference<Object> */ out = *(base + offset)
5352 if (codegen_->EmitBakerReadBarrier()) {
5353 // Note that a potential implicit null check is handled in this
5354 // CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier call.
5355 codegen_->GenerateFieldLoadWithBakerReadBarrier(
5356 instruction, out, base, offset, /* needs_null_check= */ true);
5357 if (is_volatile) {
5358 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
5359 }
5360 } else {
5361 __ movl(out.AsRegister<CpuRegister>(), Address(base, offset));
5362 codegen_->MaybeRecordImplicitNullCheck(instruction);
5363 if (is_volatile) {
5364 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
5365 }
5366 // If read barriers are enabled, emit read barriers other than
5367 // Baker's using a slow path (and also unpoison the loaded
5368 // reference, if heap poisoning is enabled).
5369 codegen_->MaybeGenerateReadBarrierSlow(instruction, out, out, base_loc, offset);
5370 }
5371 } else {
5372 codegen_->LoadFromMemoryNoReference(load_type, out, Address(base, offset));
5373 codegen_->MaybeRecordImplicitNullCheck(instruction);
5374 if (is_volatile) {
5375 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
5376 }
5377 }
5378 }
5379
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info,WriteBarrierKind write_barrier_kind)5380 void LocationsBuilderX86_64::HandleFieldSet(HInstruction* instruction,
5381 const FieldInfo& field_info,
5382 WriteBarrierKind write_barrier_kind) {
5383 DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
5384
5385 LocationSummary* locations =
5386 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
5387 DataType::Type field_type = field_info.GetFieldType();
5388 bool is_volatile = field_info.IsVolatile();
5389 bool needs_write_barrier =
5390 codegen_->StoreNeedsWriteBarrier(field_type, instruction->InputAt(1), write_barrier_kind);
5391 bool check_gc_card =
5392 codegen_->ShouldCheckGCCard(field_type, instruction->InputAt(1), write_barrier_kind);
5393
5394 locations->SetInAt(0, Location::RequiresRegister());
5395 if (DataType::IsFloatingPointType(instruction->InputAt(1)->GetType())) {
5396 if (is_volatile) {
5397 // In order to satisfy the semantics of volatile, this must be a single instruction store.
5398 locations->SetInAt(1, Location::FpuRegisterOrInt32Constant(instruction->InputAt(1)));
5399 } else {
5400 locations->SetInAt(1, Location::FpuRegisterOrConstant(instruction->InputAt(1)));
5401 }
5402 } else {
5403 if (is_volatile) {
5404 // In order to satisfy the semantics of volatile, this must be a single instruction store.
5405 locations->SetInAt(1, Location::RegisterOrInt32Constant(instruction->InputAt(1)));
5406 } else {
5407 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
5408 }
5409 }
5410
5411 // TODO(solanes): We could reduce the temp usage but it requires some non-trivial refactoring of
5412 // InstructionCodeGeneratorX86_64::HandleFieldSet, GenerateVarHandleSet due to `extra_temp_index`.
5413 if (needs_write_barrier ||
5414 check_gc_card ||
5415 (kPoisonHeapReferences && field_type == DataType::Type::kReference)) {
5416 // Temporary registers for the write barrier / reference poisoning.
5417 locations->AddRegisterTemps(2);
5418 }
5419 }
5420
Bswap(Location value,DataType::Type type,CpuRegister * temp)5421 void InstructionCodeGeneratorX86_64::Bswap(Location value,
5422 DataType::Type type,
5423 CpuRegister* temp) {
5424 switch (type) {
5425 case DataType::Type::kInt16:
5426 // This should sign-extend, even if reimplemented with an XCHG of 8-bit registers.
5427 __ bswapl(value.AsRegister<CpuRegister>());
5428 __ sarl(value.AsRegister<CpuRegister>(), Immediate(16));
5429 break;
5430 case DataType::Type::kUint16:
5431 // TODO: Can be done with an XCHG of 8-bit registers. This is straight from Quick.
5432 __ bswapl(value.AsRegister<CpuRegister>());
5433 __ shrl(value.AsRegister<CpuRegister>(), Immediate(16));
5434 break;
5435 case DataType::Type::kInt32:
5436 case DataType::Type::kUint32:
5437 __ bswapl(value.AsRegister<CpuRegister>());
5438 break;
5439 case DataType::Type::kInt64:
5440 case DataType::Type::kUint64:
5441 __ bswapq(value.AsRegister<CpuRegister>());
5442 break;
5443 case DataType::Type::kFloat32: {
5444 DCHECK_NE(temp, nullptr);
5445 __ movd(*temp, value.AsFpuRegister<XmmRegister>());
5446 __ bswapl(*temp);
5447 __ movd(value.AsFpuRegister<XmmRegister>(), *temp);
5448 break;
5449 }
5450 case DataType::Type::kFloat64: {
5451 DCHECK_NE(temp, nullptr);
5452 __ movq(*temp, value.AsFpuRegister<XmmRegister>());
5453 __ bswapq(*temp);
5454 __ movq(value.AsFpuRegister<XmmRegister>(), *temp);
5455 break;
5456 }
5457 default:
5458 LOG(FATAL) << "Unexpected type for reverse-bytes: " << type;
5459 UNREACHABLE();
5460 }
5461 }
5462
HandleFieldSet(HInstruction * instruction,uint32_t value_index,uint32_t extra_temp_index,DataType::Type field_type,Address field_addr,CpuRegister base,bool is_volatile,bool is_atomic,bool value_can_be_null,bool byte_swap,WriteBarrierKind write_barrier_kind)5463 void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction,
5464 uint32_t value_index,
5465 uint32_t extra_temp_index,
5466 DataType::Type field_type,
5467 Address field_addr,
5468 CpuRegister base,
5469 bool is_volatile,
5470 bool is_atomic,
5471 bool value_can_be_null,
5472 bool byte_swap,
5473 WriteBarrierKind write_barrier_kind) {
5474 LocationSummary* locations = instruction->GetLocations();
5475 Location value = locations->InAt(value_index);
5476
5477 if (is_volatile) {
5478 codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
5479 }
5480
5481 bool maybe_record_implicit_null_check_done = false;
5482
5483 if (value.IsConstant()) {
5484 switch (field_type) {
5485 case DataType::Type::kBool:
5486 case DataType::Type::kUint8:
5487 case DataType::Type::kInt8:
5488 __ movb(field_addr, Immediate(CodeGenerator::GetInt8ValueOf(value.GetConstant())));
5489 break;
5490 case DataType::Type::kUint16:
5491 case DataType::Type::kInt16: {
5492 int16_t v = CodeGenerator::GetInt16ValueOf(value.GetConstant());
5493 if (byte_swap) {
5494 v = BSWAP(v);
5495 }
5496 __ movw(field_addr, Immediate(v));
5497 break;
5498 }
5499 case DataType::Type::kUint32:
5500 case DataType::Type::kInt32:
5501 case DataType::Type::kFloat32:
5502 case DataType::Type::kReference: {
5503 int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
5504 if (byte_swap) {
5505 v = BSWAP(v);
5506 }
5507 DCHECK_IMPLIES(field_type == DataType::Type::kReference, v == 0);
5508 // Note: if heap poisoning is enabled, no need to poison
5509 // (negate) `v` if it is a reference, as it would be null.
5510 __ movl(field_addr, Immediate(v));
5511 break;
5512 }
5513 case DataType::Type::kUint64:
5514 case DataType::Type::kInt64:
5515 case DataType::Type::kFloat64: {
5516 int64_t v = CodeGenerator::GetInt64ValueOf(value.GetConstant());
5517 if (byte_swap) {
5518 v = BSWAP(v);
5519 }
5520 if (is_atomic) {
5521 // Move constant into a register, then atomically store the register to memory.
5522 CpuRegister temp = locations->GetTemp(extra_temp_index).AsRegister<CpuRegister>();
5523 __ movq(temp, Immediate(v));
5524 __ movq(field_addr, temp);
5525 } else {
5526 Address field_addr2 = Address::displace(field_addr, sizeof(int32_t));
5527 codegen_->MoveInt64ToAddress(field_addr, field_addr2, v, instruction);
5528 }
5529 maybe_record_implicit_null_check_done = true;
5530 break;
5531 }
5532 case DataType::Type::kVoid:
5533 LOG(FATAL) << "Unreachable type " << field_type;
5534 UNREACHABLE();
5535 }
5536 } else {
5537 if (byte_swap) {
5538 // Swap byte order in-place in the input register (we will restore it later).
5539 CpuRegister temp = locations->GetTemp(extra_temp_index).AsRegister<CpuRegister>();
5540 Bswap(value, field_type, &temp);
5541 }
5542
5543 switch (field_type) {
5544 case DataType::Type::kBool:
5545 case DataType::Type::kUint8:
5546 case DataType::Type::kInt8:
5547 __ movb(field_addr, value.AsRegister<CpuRegister>());
5548 break;
5549 case DataType::Type::kUint16:
5550 case DataType::Type::kInt16:
5551 __ movw(field_addr, value.AsRegister<CpuRegister>());
5552 break;
5553 case DataType::Type::kUint32:
5554 case DataType::Type::kInt32:
5555 case DataType::Type::kReference:
5556 if (kPoisonHeapReferences && field_type == DataType::Type::kReference) {
5557 CpuRegister temp = locations->GetTemp(extra_temp_index).AsRegister<CpuRegister>();
5558 __ movl(temp, value.AsRegister<CpuRegister>());
5559 __ PoisonHeapReference(temp);
5560 __ movl(field_addr, temp);
5561 } else {
5562 __ movl(field_addr, value.AsRegister<CpuRegister>());
5563 }
5564 break;
5565 case DataType::Type::kUint64:
5566 case DataType::Type::kInt64:
5567 __ movq(field_addr, value.AsRegister<CpuRegister>());
5568 break;
5569 case DataType::Type::kFloat32:
5570 __ movss(field_addr, value.AsFpuRegister<XmmRegister>());
5571 break;
5572 case DataType::Type::kFloat64:
5573 __ movsd(field_addr, value.AsFpuRegister<XmmRegister>());
5574 break;
5575 case DataType::Type::kVoid:
5576 LOG(FATAL) << "Unreachable type " << field_type;
5577 UNREACHABLE();
5578 }
5579
5580 if (byte_swap) {
5581 // Restore byte order.
5582 CpuRegister temp = locations->GetTemp(extra_temp_index).AsRegister<CpuRegister>();
5583 Bswap(value, field_type, &temp);
5584 }
5585 }
5586
5587 if (!maybe_record_implicit_null_check_done) {
5588 codegen_->MaybeRecordImplicitNullCheck(instruction);
5589 }
5590
5591 bool needs_write_barrier =
5592 codegen_->StoreNeedsWriteBarrier(field_type, instruction->InputAt(1), write_barrier_kind);
5593 if (needs_write_barrier) {
5594 if (value.IsConstant()) {
5595 DCHECK(value.GetConstant()->IsNullConstant());
5596 if (write_barrier_kind == WriteBarrierKind::kEmitBeingReliedOn) {
5597 DCHECK_NE(extra_temp_index, 0u);
5598 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
5599 CpuRegister card = locations->GetTemp(extra_temp_index).AsRegister<CpuRegister>();
5600 codegen_->MarkGCCard(temp, card, base);
5601 }
5602 } else {
5603 DCHECK_NE(extra_temp_index, 0u);
5604 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
5605 CpuRegister card = locations->GetTemp(extra_temp_index).AsRegister<CpuRegister>();
5606 codegen_->MaybeMarkGCCard(
5607 temp,
5608 card,
5609 base,
5610 value.AsRegister<CpuRegister>(),
5611 value_can_be_null && write_barrier_kind == WriteBarrierKind::kEmitNotBeingReliedOn);
5612 }
5613 } else if (codegen_->ShouldCheckGCCard(
5614 field_type, instruction->InputAt(value_index), write_barrier_kind)) {
5615 DCHECK_NE(extra_temp_index, 0u);
5616 DCHECK(value.IsRegister());
5617 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
5618 CpuRegister card = locations->GetTemp(extra_temp_index).AsRegister<CpuRegister>();
5619 codegen_->CheckGCCardIsValid(temp, card, base);
5620 }
5621
5622 if (is_volatile) {
5623 codegen_->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
5624 }
5625 }
5626
HandleFieldSet(HInstruction * instruction,const FieldInfo & field_info,bool value_can_be_null,WriteBarrierKind write_barrier_kind)5627 void InstructionCodeGeneratorX86_64::HandleFieldSet(HInstruction* instruction,
5628 const FieldInfo& field_info,
5629 bool value_can_be_null,
5630 WriteBarrierKind write_barrier_kind) {
5631 DCHECK(instruction->IsInstanceFieldSet() || instruction->IsStaticFieldSet());
5632
5633 LocationSummary* locations = instruction->GetLocations();
5634 CpuRegister base = locations->InAt(0).AsRegister<CpuRegister>();
5635 bool is_volatile = field_info.IsVolatile();
5636 DataType::Type field_type = field_info.GetFieldType();
5637 uint32_t offset = field_info.GetFieldOffset().Uint32Value();
5638
5639 HandleFieldSet(instruction,
5640 /*value_index=*/ 1,
5641 /*extra_temp_index=*/ 1,
5642 field_type,
5643 Address(base, offset),
5644 base,
5645 is_volatile,
5646 /*is_atomic=*/ false,
5647 value_can_be_null,
5648 /*byte_swap=*/ false,
5649 write_barrier_kind);
5650 }
5651
VisitInstanceFieldSet(HInstanceFieldSet * instruction)5652 void LocationsBuilderX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
5653 HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetWriteBarrierKind());
5654 }
5655
VisitInstanceFieldSet(HInstanceFieldSet * instruction)5656 void InstructionCodeGeneratorX86_64::VisitInstanceFieldSet(HInstanceFieldSet* instruction) {
5657 HandleFieldSet(instruction,
5658 instruction->GetFieldInfo(),
5659 instruction->GetValueCanBeNull(),
5660 instruction->GetWriteBarrierKind());
5661 }
5662
VisitInstanceFieldGet(HInstanceFieldGet * instruction)5663 void LocationsBuilderX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
5664 HandleFieldGet(instruction);
5665 }
5666
VisitInstanceFieldGet(HInstanceFieldGet * instruction)5667 void InstructionCodeGeneratorX86_64::VisitInstanceFieldGet(HInstanceFieldGet* instruction) {
5668 HandleFieldGet(instruction, instruction->GetFieldInfo());
5669 }
5670
VisitStaticFieldGet(HStaticFieldGet * instruction)5671 void LocationsBuilderX86_64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
5672 HandleFieldGet(instruction);
5673 }
5674
VisitStaticFieldGet(HStaticFieldGet * instruction)5675 void InstructionCodeGeneratorX86_64::VisitStaticFieldGet(HStaticFieldGet* instruction) {
5676 HandleFieldGet(instruction, instruction->GetFieldInfo());
5677 }
5678
VisitStaticFieldSet(HStaticFieldSet * instruction)5679 void LocationsBuilderX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
5680 HandleFieldSet(instruction, instruction->GetFieldInfo(), instruction->GetWriteBarrierKind());
5681 }
5682
VisitStaticFieldSet(HStaticFieldSet * instruction)5683 void InstructionCodeGeneratorX86_64::VisitStaticFieldSet(HStaticFieldSet* instruction) {
5684 HandleFieldSet(instruction,
5685 instruction->GetFieldInfo(),
5686 instruction->GetValueCanBeNull(),
5687 instruction->GetWriteBarrierKind());
5688 }
5689
VisitStringBuilderAppend(HStringBuilderAppend * instruction)5690 void LocationsBuilderX86_64::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
5691 codegen_->CreateStringBuilderAppendLocations(instruction, Location::RegisterLocation(RAX));
5692 }
5693
VisitStringBuilderAppend(HStringBuilderAppend * instruction)5694 void InstructionCodeGeneratorX86_64::VisitStringBuilderAppend(HStringBuilderAppend* instruction) {
5695 __ movl(CpuRegister(RDI), Immediate(instruction->GetFormat()->GetValue()));
5696 codegen_->InvokeRuntime(kQuickStringBuilderAppend, instruction);
5697 }
5698
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)5699 void LocationsBuilderX86_64::VisitUnresolvedInstanceFieldGet(
5700 HUnresolvedInstanceFieldGet* instruction) {
5701 FieldAccessCallingConventionX86_64 calling_convention;
5702 codegen_->CreateUnresolvedFieldLocationSummary(
5703 instruction, instruction->GetFieldType(), calling_convention);
5704 }
5705
VisitUnresolvedInstanceFieldGet(HUnresolvedInstanceFieldGet * instruction)5706 void InstructionCodeGeneratorX86_64::VisitUnresolvedInstanceFieldGet(
5707 HUnresolvedInstanceFieldGet* instruction) {
5708 FieldAccessCallingConventionX86_64 calling_convention;
5709 codegen_->GenerateUnresolvedFieldAccess(instruction,
5710 instruction->GetFieldType(),
5711 instruction->GetFieldIndex(),
5712 calling_convention);
5713 }
5714
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)5715 void LocationsBuilderX86_64::VisitUnresolvedInstanceFieldSet(
5716 HUnresolvedInstanceFieldSet* instruction) {
5717 FieldAccessCallingConventionX86_64 calling_convention;
5718 codegen_->CreateUnresolvedFieldLocationSummary(
5719 instruction, instruction->GetFieldType(), calling_convention);
5720 }
5721
VisitUnresolvedInstanceFieldSet(HUnresolvedInstanceFieldSet * instruction)5722 void InstructionCodeGeneratorX86_64::VisitUnresolvedInstanceFieldSet(
5723 HUnresolvedInstanceFieldSet* instruction) {
5724 FieldAccessCallingConventionX86_64 calling_convention;
5725 codegen_->GenerateUnresolvedFieldAccess(instruction,
5726 instruction->GetFieldType(),
5727 instruction->GetFieldIndex(),
5728 calling_convention);
5729 }
5730
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)5731 void LocationsBuilderX86_64::VisitUnresolvedStaticFieldGet(
5732 HUnresolvedStaticFieldGet* instruction) {
5733 FieldAccessCallingConventionX86_64 calling_convention;
5734 codegen_->CreateUnresolvedFieldLocationSummary(
5735 instruction, instruction->GetFieldType(), calling_convention);
5736 }
5737
VisitUnresolvedStaticFieldGet(HUnresolvedStaticFieldGet * instruction)5738 void InstructionCodeGeneratorX86_64::VisitUnresolvedStaticFieldGet(
5739 HUnresolvedStaticFieldGet* instruction) {
5740 FieldAccessCallingConventionX86_64 calling_convention;
5741 codegen_->GenerateUnresolvedFieldAccess(instruction,
5742 instruction->GetFieldType(),
5743 instruction->GetFieldIndex(),
5744 calling_convention);
5745 }
5746
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)5747 void LocationsBuilderX86_64::VisitUnresolvedStaticFieldSet(
5748 HUnresolvedStaticFieldSet* instruction) {
5749 FieldAccessCallingConventionX86_64 calling_convention;
5750 codegen_->CreateUnresolvedFieldLocationSummary(
5751 instruction, instruction->GetFieldType(), calling_convention);
5752 }
5753
VisitUnresolvedStaticFieldSet(HUnresolvedStaticFieldSet * instruction)5754 void InstructionCodeGeneratorX86_64::VisitUnresolvedStaticFieldSet(
5755 HUnresolvedStaticFieldSet* instruction) {
5756 FieldAccessCallingConventionX86_64 calling_convention;
5757 codegen_->GenerateUnresolvedFieldAccess(instruction,
5758 instruction->GetFieldType(),
5759 instruction->GetFieldIndex(),
5760 calling_convention);
5761 }
5762
VisitNullCheck(HNullCheck * instruction)5763 void LocationsBuilderX86_64::VisitNullCheck(HNullCheck* instruction) {
5764 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction);
5765 Location loc = codegen_->GetCompilerOptions().GetImplicitNullChecks()
5766 ? Location::RequiresRegister()
5767 : Location::Any();
5768 locations->SetInAt(0, loc);
5769 }
5770
GenerateImplicitNullCheck(HNullCheck * instruction)5771 void CodeGeneratorX86_64::GenerateImplicitNullCheck(HNullCheck* instruction) {
5772 if (CanMoveNullCheckToUser(instruction)) {
5773 return;
5774 }
5775 LocationSummary* locations = instruction->GetLocations();
5776 Location obj = locations->InAt(0);
5777
5778 __ testl(CpuRegister(RAX), Address(obj.AsRegister<CpuRegister>(), 0));
5779 RecordPcInfo(instruction);
5780 }
5781
GenerateExplicitNullCheck(HNullCheck * instruction)5782 void CodeGeneratorX86_64::GenerateExplicitNullCheck(HNullCheck* instruction) {
5783 SlowPathCode* slow_path = new (GetScopedAllocator()) NullCheckSlowPathX86_64(instruction);
5784 AddSlowPath(slow_path);
5785
5786 LocationSummary* locations = instruction->GetLocations();
5787 Location obj = locations->InAt(0);
5788
5789 if (obj.IsRegister()) {
5790 __ testl(obj.AsRegister<CpuRegister>(), obj.AsRegister<CpuRegister>());
5791 } else if (obj.IsStackSlot()) {
5792 __ cmpl(Address(CpuRegister(RSP), obj.GetStackIndex()), Immediate(0));
5793 } else {
5794 DCHECK(obj.IsConstant()) << obj;
5795 DCHECK(obj.GetConstant()->IsNullConstant());
5796 __ jmp(slow_path->GetEntryLabel());
5797 return;
5798 }
5799 __ j(kEqual, slow_path->GetEntryLabel());
5800 }
5801
VisitNullCheck(HNullCheck * instruction)5802 void InstructionCodeGeneratorX86_64::VisitNullCheck(HNullCheck* instruction) {
5803 codegen_->GenerateNullCheck(instruction);
5804 }
5805
VisitArrayGet(HArrayGet * instruction)5806 void LocationsBuilderX86_64::VisitArrayGet(HArrayGet* instruction) {
5807 bool object_array_get_with_read_barrier =
5808 (instruction->GetType() == DataType::Type::kReference) && codegen_->EmitReadBarrier();
5809 LocationSummary* locations =
5810 new (GetGraph()->GetAllocator()) LocationSummary(instruction,
5811 object_array_get_with_read_barrier
5812 ? LocationSummary::kCallOnSlowPath
5813 : LocationSummary::kNoCall);
5814 if (object_array_get_with_read_barrier && kUseBakerReadBarrier) {
5815 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
5816 }
5817 locations->SetInAt(0, Location::RequiresRegister());
5818 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
5819 if (DataType::IsFloatingPointType(instruction->GetType())) {
5820 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
5821 } else {
5822 // The output overlaps for an object array get when read barriers
5823 // are enabled: we do not want the move to overwrite the array's
5824 // location, as we need it to emit the read barrier.
5825 locations->SetOut(
5826 Location::RequiresRegister(),
5827 object_array_get_with_read_barrier ? Location::kOutputOverlap : Location::kNoOutputOverlap);
5828 }
5829 }
5830
VisitArrayGet(HArrayGet * instruction)5831 void InstructionCodeGeneratorX86_64::VisitArrayGet(HArrayGet* instruction) {
5832 LocationSummary* locations = instruction->GetLocations();
5833 Location obj_loc = locations->InAt(0);
5834 CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
5835 Location index = locations->InAt(1);
5836 Location out_loc = locations->Out();
5837 uint32_t data_offset = CodeGenerator::GetArrayDataOffset(instruction);
5838
5839 DataType::Type type = instruction->GetType();
5840 if (type == DataType::Type::kReference) {
5841 static_assert(
5842 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
5843 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
5844 // /* HeapReference<Object> */ out =
5845 // *(obj + data_offset + index * sizeof(HeapReference<Object>))
5846 if (codegen_->EmitBakerReadBarrier()) {
5847 // Note that a potential implicit null check is handled in this
5848 // CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier call.
5849 codegen_->GenerateArrayLoadWithBakerReadBarrier(
5850 instruction, out_loc, obj, data_offset, index, /* needs_null_check= */ true);
5851 } else {
5852 CpuRegister out = out_loc.AsRegister<CpuRegister>();
5853 __ movl(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset));
5854 codegen_->MaybeRecordImplicitNullCheck(instruction);
5855 // If read barriers are enabled, emit read barriers other than
5856 // Baker's using a slow path (and also unpoison the loaded
5857 // reference, if heap poisoning is enabled).
5858 if (index.IsConstant()) {
5859 uint32_t offset =
5860 (index.GetConstant()->AsIntConstant()->GetValue() << TIMES_4) + data_offset;
5861 codegen_->MaybeGenerateReadBarrierSlow(instruction, out_loc, out_loc, obj_loc, offset);
5862 } else {
5863 codegen_->MaybeGenerateReadBarrierSlow(
5864 instruction, out_loc, out_loc, obj_loc, data_offset, index);
5865 }
5866 }
5867 } else {
5868 if (type == DataType::Type::kUint16
5869 && mirror::kUseStringCompression
5870 && instruction->IsStringCharAt()) {
5871 // Branch cases into compressed and uncompressed for each index's type.
5872 CpuRegister out = out_loc.AsRegister<CpuRegister>();
5873 uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
5874 NearLabel done, not_compressed;
5875 __ testb(Address(obj, count_offset), Immediate(1));
5876 codegen_->MaybeRecordImplicitNullCheck(instruction);
5877 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
5878 "Expecting 0=compressed, 1=uncompressed");
5879 __ j(kNotZero, ¬_compressed);
5880 __ movzxb(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_1, data_offset));
5881 __ jmp(&done);
5882 __ Bind(¬_compressed);
5883 __ movzxw(out, CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_2, data_offset));
5884 __ Bind(&done);
5885 } else {
5886 ScaleFactor scale = CodeGenerator::ScaleFactorForType(type);
5887 Address src = CodeGeneratorX86_64::ArrayAddress(obj, index, scale, data_offset);
5888 codegen_->LoadFromMemoryNoReference(type, out_loc, src);
5889 }
5890 codegen_->MaybeRecordImplicitNullCheck(instruction);
5891 }
5892 }
5893
VisitArraySet(HArraySet * instruction)5894 void LocationsBuilderX86_64::VisitArraySet(HArraySet* instruction) {
5895 DataType::Type value_type = instruction->GetComponentType();
5896
5897 WriteBarrierKind write_barrier_kind = instruction->GetWriteBarrierKind();
5898 bool needs_write_barrier =
5899 codegen_->StoreNeedsWriteBarrier(value_type, instruction->GetValue(), write_barrier_kind);
5900 bool check_gc_card =
5901 codegen_->ShouldCheckGCCard(value_type, instruction->GetValue(), write_barrier_kind);
5902 bool needs_type_check = instruction->NeedsTypeCheck();
5903
5904 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
5905 instruction,
5906 needs_type_check ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall);
5907
5908 locations->SetInAt(0, Location::RequiresRegister());
5909 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
5910 if (DataType::IsFloatingPointType(value_type)) {
5911 locations->SetInAt(2, Location::FpuRegisterOrConstant(instruction->InputAt(2)));
5912 } else {
5913 locations->SetInAt(2, Location::RegisterOrConstant(instruction->InputAt(2)));
5914 }
5915
5916 if (needs_write_barrier || check_gc_card) {
5917 // Used by reference poisoning, type checking, emitting write barrier, or checking write
5918 // barrier.
5919 locations->AddTemp(Location::RequiresRegister());
5920 // Only used when emitting a write barrier, or when checking for the card table.
5921 locations->AddTemp(Location::RequiresRegister());
5922 } else if ((kPoisonHeapReferences && value_type == DataType::Type::kReference) ||
5923 instruction->NeedsTypeCheck()) {
5924 // Used for poisoning or type checking.
5925 locations->AddTemp(Location::RequiresRegister());
5926 }
5927 }
5928
VisitArraySet(HArraySet * instruction)5929 void InstructionCodeGeneratorX86_64::VisitArraySet(HArraySet* instruction) {
5930 LocationSummary* locations = instruction->GetLocations();
5931 Location array_loc = locations->InAt(0);
5932 CpuRegister array = array_loc.AsRegister<CpuRegister>();
5933 Location index = locations->InAt(1);
5934 Location value = locations->InAt(2);
5935 DataType::Type value_type = instruction->GetComponentType();
5936 bool needs_type_check = instruction->NeedsTypeCheck();
5937 const WriteBarrierKind write_barrier_kind = instruction->GetWriteBarrierKind();
5938 bool needs_write_barrier =
5939 codegen_->StoreNeedsWriteBarrier(value_type, instruction->GetValue(), write_barrier_kind);
5940
5941 switch (value_type) {
5942 case DataType::Type::kBool:
5943 case DataType::Type::kUint8:
5944 case DataType::Type::kInt8: {
5945 uint32_t offset = mirror::Array::DataOffset(sizeof(uint8_t)).Uint32Value();
5946 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_1, offset);
5947 if (value.IsRegister()) {
5948 __ movb(address, value.AsRegister<CpuRegister>());
5949 } else {
5950 __ movb(address, Immediate(CodeGenerator::GetInt8ValueOf(value.GetConstant())));
5951 }
5952 codegen_->MaybeRecordImplicitNullCheck(instruction);
5953 break;
5954 }
5955
5956 case DataType::Type::kUint16:
5957 case DataType::Type::kInt16: {
5958 uint32_t offset = mirror::Array::DataOffset(sizeof(uint16_t)).Uint32Value();
5959 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_2, offset);
5960 if (value.IsRegister()) {
5961 __ movw(address, value.AsRegister<CpuRegister>());
5962 } else {
5963 DCHECK(value.IsConstant()) << value;
5964 __ movw(address, Immediate(CodeGenerator::GetInt16ValueOf(value.GetConstant())));
5965 }
5966 codegen_->MaybeRecordImplicitNullCheck(instruction);
5967 break;
5968 }
5969
5970 case DataType::Type::kReference: {
5971 uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
5972 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_4, offset);
5973
5974 if (!value.IsRegister()) {
5975 // Just setting null.
5976 DCHECK(instruction->InputAt(2)->IsNullConstant());
5977 DCHECK(value.IsConstant()) << value;
5978 __ movl(address, Immediate(0));
5979 codegen_->MaybeRecordImplicitNullCheck(instruction);
5980 if (write_barrier_kind == WriteBarrierKind::kEmitBeingReliedOn) {
5981 // We need to set a write barrier here even though we are writing null, since this write
5982 // barrier is being relied on.
5983 DCHECK(needs_write_barrier);
5984 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
5985 CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>();
5986 codegen_->MarkGCCard(temp, card, array);
5987 }
5988 DCHECK(!needs_type_check);
5989 break;
5990 }
5991
5992 CpuRegister register_value = value.AsRegister<CpuRegister>();
5993 const bool can_value_be_null = instruction->GetValueCanBeNull();
5994 // The WriteBarrierKind::kEmitNotBeingReliedOn case is able to skip the write barrier when its
5995 // value is null (without an extra CompareAndBranchIfZero since we already checked if the
5996 // value is null for the type check).
5997 const bool skip_marking_gc_card =
5998 can_value_be_null && write_barrier_kind == WriteBarrierKind::kEmitNotBeingReliedOn;
5999 NearLabel do_store;
6000 NearLabel skip_writing_card;
6001 if (can_value_be_null) {
6002 __ testl(register_value, register_value);
6003 if (skip_marking_gc_card) {
6004 __ j(kEqual, &skip_writing_card);
6005 } else {
6006 __ j(kEqual, &do_store);
6007 }
6008 }
6009
6010 SlowPathCode* slow_path = nullptr;
6011 if (needs_type_check) {
6012 slow_path = new (codegen_->GetScopedAllocator()) ArraySetSlowPathX86_64(instruction);
6013 codegen_->AddSlowPath(slow_path);
6014
6015 const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
6016 const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
6017 const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
6018
6019 // Note that when Baker read barriers are enabled, the type
6020 // checks are performed without read barriers. This is fine,
6021 // even in the case where a class object is in the from-space
6022 // after the flip, as a comparison involving such a type would
6023 // not produce a false positive; it may of course produce a
6024 // false negative, in which case we would take the ArraySet
6025 // slow path.
6026
6027 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
6028 // /* HeapReference<Class> */ temp = array->klass_
6029 __ movl(temp, Address(array, class_offset));
6030 codegen_->MaybeRecordImplicitNullCheck(instruction);
6031 __ MaybeUnpoisonHeapReference(temp);
6032
6033 // /* HeapReference<Class> */ temp = temp->component_type_
6034 __ movl(temp, Address(temp, component_offset));
6035 // If heap poisoning is enabled, no need to unpoison `temp`
6036 // nor the object reference in `register_value->klass`, as
6037 // we are comparing two poisoned references.
6038 __ cmpl(temp, Address(register_value, class_offset));
6039
6040 if (instruction->StaticTypeOfArrayIsObjectArray()) {
6041 NearLabel do_put;
6042 __ j(kEqual, &do_put);
6043 // If heap poisoning is enabled, the `temp` reference has
6044 // not been unpoisoned yet; unpoison it now.
6045 __ MaybeUnpoisonHeapReference(temp);
6046
6047 // If heap poisoning is enabled, no need to unpoison the
6048 // heap reference loaded below, as it is only used for a
6049 // comparison with null.
6050 __ cmpl(Address(temp, super_offset), Immediate(0));
6051 __ j(kNotEqual, slow_path->GetEntryLabel());
6052 __ Bind(&do_put);
6053 } else {
6054 __ j(kNotEqual, slow_path->GetEntryLabel());
6055 }
6056 }
6057
6058 if (can_value_be_null && !skip_marking_gc_card) {
6059 DCHECK(do_store.IsLinked());
6060 __ Bind(&do_store);
6061 }
6062
6063 if (needs_write_barrier) {
6064 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
6065 CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>();
6066 codegen_->MarkGCCard(temp, card, array);
6067 } else if (codegen_->ShouldCheckGCCard(
6068 value_type, instruction->GetValue(), write_barrier_kind)) {
6069 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
6070 CpuRegister card = locations->GetTemp(1).AsRegister<CpuRegister>();
6071 codegen_->CheckGCCardIsValid(temp, card, array);
6072 }
6073
6074 if (skip_marking_gc_card) {
6075 // Note that we don't check that the GC card is valid as it can be correctly clean.
6076 DCHECK(skip_writing_card.IsLinked());
6077 __ Bind(&skip_writing_card);
6078 }
6079
6080 Location source = value;
6081 if (kPoisonHeapReferences) {
6082 Location temp_loc = locations->GetTemp(0);
6083 CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
6084 __ movl(temp, register_value);
6085 __ PoisonHeapReference(temp);
6086 source = temp_loc;
6087 }
6088
6089 __ movl(address, source.AsRegister<CpuRegister>());
6090
6091 if (can_value_be_null || !needs_type_check) {
6092 codegen_->MaybeRecordImplicitNullCheck(instruction);
6093 }
6094
6095 if (slow_path != nullptr) {
6096 __ Bind(slow_path->GetExitLabel());
6097 }
6098
6099 break;
6100 }
6101
6102 case DataType::Type::kInt32: {
6103 uint32_t offset = mirror::Array::DataOffset(sizeof(int32_t)).Uint32Value();
6104 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_4, offset);
6105 if (value.IsRegister()) {
6106 __ movl(address, value.AsRegister<CpuRegister>());
6107 } else {
6108 DCHECK(value.IsConstant()) << value;
6109 int32_t v = CodeGenerator::GetInt32ValueOf(value.GetConstant());
6110 __ movl(address, Immediate(v));
6111 }
6112 codegen_->MaybeRecordImplicitNullCheck(instruction);
6113 break;
6114 }
6115
6116 case DataType::Type::kInt64: {
6117 uint32_t offset = mirror::Array::DataOffset(sizeof(int64_t)).Uint32Value();
6118 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset);
6119 if (value.IsRegister()) {
6120 __ movq(address, value.AsRegister<CpuRegister>());
6121 codegen_->MaybeRecordImplicitNullCheck(instruction);
6122 } else {
6123 int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
6124 Address address_high =
6125 CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset + sizeof(int32_t));
6126 codegen_->MoveInt64ToAddress(address, address_high, v, instruction);
6127 }
6128 break;
6129 }
6130
6131 case DataType::Type::kFloat32: {
6132 uint32_t offset = mirror::Array::DataOffset(sizeof(float)).Uint32Value();
6133 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_4, offset);
6134 if (value.IsFpuRegister()) {
6135 __ movss(address, value.AsFpuRegister<XmmRegister>());
6136 } else {
6137 DCHECK(value.IsConstant());
6138 int32_t v = bit_cast<int32_t, float>(value.GetConstant()->AsFloatConstant()->GetValue());
6139 __ movl(address, Immediate(v));
6140 }
6141 codegen_->MaybeRecordImplicitNullCheck(instruction);
6142 break;
6143 }
6144
6145 case DataType::Type::kFloat64: {
6146 uint32_t offset = mirror::Array::DataOffset(sizeof(double)).Uint32Value();
6147 Address address = CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset);
6148 if (value.IsFpuRegister()) {
6149 __ movsd(address, value.AsFpuRegister<XmmRegister>());
6150 codegen_->MaybeRecordImplicitNullCheck(instruction);
6151 } else {
6152 int64_t v = bit_cast<int64_t, double>(value.GetConstant()->AsDoubleConstant()->GetValue());
6153 Address address_high =
6154 CodeGeneratorX86_64::ArrayAddress(array, index, TIMES_8, offset + sizeof(int32_t));
6155 codegen_->MoveInt64ToAddress(address, address_high, v, instruction);
6156 }
6157 break;
6158 }
6159
6160 case DataType::Type::kUint32:
6161 case DataType::Type::kUint64:
6162 case DataType::Type::kVoid:
6163 LOG(FATAL) << "Unreachable type " << instruction->GetType();
6164 UNREACHABLE();
6165 }
6166 }
6167
VisitArrayLength(HArrayLength * instruction)6168 void LocationsBuilderX86_64::VisitArrayLength(HArrayLength* instruction) {
6169 LocationSummary* locations =
6170 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
6171 locations->SetInAt(0, Location::RequiresRegister());
6172 if (!instruction->IsEmittedAtUseSite()) {
6173 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
6174 }
6175 }
6176
VisitArrayLength(HArrayLength * instruction)6177 void InstructionCodeGeneratorX86_64::VisitArrayLength(HArrayLength* instruction) {
6178 if (instruction->IsEmittedAtUseSite()) {
6179 return;
6180 }
6181
6182 LocationSummary* locations = instruction->GetLocations();
6183 uint32_t offset = CodeGenerator::GetArrayLengthOffset(instruction);
6184 CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
6185 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
6186 __ movl(out, Address(obj, offset));
6187 codegen_->MaybeRecordImplicitNullCheck(instruction);
6188 // Mask out most significant bit in case the array is String's array of char.
6189 if (mirror::kUseStringCompression && instruction->IsStringLength()) {
6190 __ shrl(out, Immediate(1));
6191 }
6192 }
6193
VisitBoundsCheck(HBoundsCheck * instruction)6194 void LocationsBuilderX86_64::VisitBoundsCheck(HBoundsCheck* instruction) {
6195 RegisterSet caller_saves = RegisterSet::Empty();
6196 InvokeRuntimeCallingConvention calling_convention;
6197 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
6198 caller_saves.Add(Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
6199 LocationSummary* locations = codegen_->CreateThrowingSlowPathLocations(instruction, caller_saves);
6200 locations->SetInAt(0, Location::RegisterOrConstant(instruction->InputAt(0)));
6201 HInstruction* length = instruction->InputAt(1);
6202 if (!length->IsEmittedAtUseSite()) {
6203 locations->SetInAt(1, Location::RegisterOrConstant(length));
6204 }
6205 }
6206
VisitBoundsCheck(HBoundsCheck * instruction)6207 void InstructionCodeGeneratorX86_64::VisitBoundsCheck(HBoundsCheck* instruction) {
6208 LocationSummary* locations = instruction->GetLocations();
6209 Location index_loc = locations->InAt(0);
6210 Location length_loc = locations->InAt(1);
6211 SlowPathCode* slow_path =
6212 new (codegen_->GetScopedAllocator()) BoundsCheckSlowPathX86_64(instruction);
6213
6214 if (length_loc.IsConstant()) {
6215 int32_t length = CodeGenerator::GetInt32ValueOf(length_loc.GetConstant());
6216 if (index_loc.IsConstant()) {
6217 // BCE will remove the bounds check if we are guarenteed to pass.
6218 int32_t index = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
6219 if (index < 0 || index >= length) {
6220 codegen_->AddSlowPath(slow_path);
6221 __ jmp(slow_path->GetEntryLabel());
6222 } else {
6223 // Some optimization after BCE may have generated this, and we should not
6224 // generate a bounds check if it is a valid range.
6225 }
6226 return;
6227 }
6228
6229 // We have to reverse the jump condition because the length is the constant.
6230 CpuRegister index_reg = index_loc.AsRegister<CpuRegister>();
6231 __ cmpl(index_reg, Immediate(length));
6232 codegen_->AddSlowPath(slow_path);
6233 __ j(kAboveEqual, slow_path->GetEntryLabel());
6234 } else {
6235 HInstruction* array_length = instruction->InputAt(1);
6236 if (array_length->IsEmittedAtUseSite()) {
6237 // Address the length field in the array.
6238 DCHECK(array_length->IsArrayLength());
6239 uint32_t len_offset = CodeGenerator::GetArrayLengthOffset(array_length->AsArrayLength());
6240 Location array_loc = array_length->GetLocations()->InAt(0);
6241 Address array_len(array_loc.AsRegister<CpuRegister>(), len_offset);
6242 if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
6243 // TODO: if index_loc.IsConstant(), compare twice the index (to compensate for
6244 // the string compression flag) with the in-memory length and avoid the temporary.
6245 CpuRegister length_reg = CpuRegister(TMP);
6246 __ movl(length_reg, array_len);
6247 codegen_->MaybeRecordImplicitNullCheck(array_length);
6248 __ shrl(length_reg, Immediate(1));
6249 codegen_->GenerateIntCompare(length_reg, index_loc);
6250 } else {
6251 // Checking the bound for general case:
6252 // Array of char or String's array when the compression feature off.
6253 if (index_loc.IsConstant()) {
6254 int32_t value = CodeGenerator::GetInt32ValueOf(index_loc.GetConstant());
6255 __ cmpl(array_len, Immediate(value));
6256 } else {
6257 __ cmpl(array_len, index_loc.AsRegister<CpuRegister>());
6258 }
6259 codegen_->MaybeRecordImplicitNullCheck(array_length);
6260 }
6261 } else {
6262 codegen_->GenerateIntCompare(length_loc, index_loc);
6263 }
6264 codegen_->AddSlowPath(slow_path);
6265 __ j(kBelowEqual, slow_path->GetEntryLabel());
6266 }
6267 }
6268
MaybeMarkGCCard(CpuRegister temp,CpuRegister card,CpuRegister object,CpuRegister value,bool emit_null_check)6269 void CodeGeneratorX86_64::MaybeMarkGCCard(CpuRegister temp,
6270 CpuRegister card,
6271 CpuRegister object,
6272 CpuRegister value,
6273 bool emit_null_check) {
6274 NearLabel is_null;
6275 if (emit_null_check) {
6276 __ testl(value, value);
6277 __ j(kEqual, &is_null);
6278 }
6279 MarkGCCard(temp, card, object);
6280 if (emit_null_check) {
6281 __ Bind(&is_null);
6282 }
6283 }
6284
MarkGCCard(CpuRegister temp,CpuRegister card,CpuRegister object)6285 void CodeGeneratorX86_64::MarkGCCard(CpuRegister temp, CpuRegister card, CpuRegister object) {
6286 // Load the address of the card table into `card`.
6287 __ gs()->movq(card,
6288 Address::Absolute(Thread::CardTableOffset<kX86_64PointerSize>().Int32Value(),
6289 /* no_rip= */ true));
6290 // Calculate the offset (in the card table) of the card corresponding to `object`.
6291 __ movq(temp, object);
6292 __ shrq(temp, Immediate(gc::accounting::CardTable::kCardShift));
6293 // Write the `art::gc::accounting::CardTable::kCardDirty` value into the
6294 // `object`'s card.
6295 //
6296 // Register `card` contains the address of the card table. Note that the card
6297 // table's base is biased during its creation so that it always starts at an
6298 // address whose least-significant byte is equal to `kCardDirty` (see
6299 // art::gc::accounting::CardTable::Create). Therefore the MOVB instruction
6300 // below writes the `kCardDirty` (byte) value into the `object`'s card
6301 // (located at `card + object >> kCardShift`).
6302 //
6303 // This dual use of the value in register `card` (1. to calculate the location
6304 // of the card to mark; and 2. to load the `kCardDirty` value) saves a load
6305 // (no need to explicitly load `kCardDirty` as an immediate value).
6306 __ movb(Address(temp, card, TIMES_1, 0), card);
6307 }
6308
CheckGCCardIsValid(CpuRegister temp,CpuRegister card,CpuRegister object)6309 void CodeGeneratorX86_64::CheckGCCardIsValid(CpuRegister temp,
6310 CpuRegister card,
6311 CpuRegister object) {
6312 NearLabel done;
6313 // Load the address of the card table into `card`.
6314 __ gs()->movq(card,
6315 Address::Absolute(Thread::CardTableOffset<kX86_64PointerSize>().Int32Value(),
6316 /* no_rip= */ true));
6317 // Calculate the offset (in the card table) of the card corresponding to `object`.
6318 __ movq(temp, object);
6319 __ shrq(temp, Immediate(gc::accounting::CardTable::kCardShift));
6320 // assert (!clean || !self->is_gc_marking)
6321 __ cmpb(Address(temp, card, TIMES_1, 0), Immediate(gc::accounting::CardTable::kCardClean));
6322 __ j(kNotEqual, &done);
6323 __ gs()->cmpl(
6324 Address::Absolute(Thread::IsGcMarkingOffset<kX86_64PointerSize>(), /* no_rip= */ true),
6325 Immediate(0));
6326 __ j(kEqual, &done);
6327 __ int3();
6328 __ Bind(&done);
6329 }
6330
VisitParallelMove(HParallelMove * instruction)6331 void LocationsBuilderX86_64::VisitParallelMove([[maybe_unused]] HParallelMove* instruction) {
6332 LOG(FATAL) << "Unimplemented";
6333 }
6334
VisitParallelMove(HParallelMove * instruction)6335 void InstructionCodeGeneratorX86_64::VisitParallelMove(HParallelMove* instruction) {
6336 if (instruction->GetNext()->IsSuspendCheck() &&
6337 instruction->GetBlock()->GetLoopInformation() != nullptr) {
6338 HSuspendCheck* suspend_check = instruction->GetNext()->AsSuspendCheck();
6339 // The back edge will generate the suspend check.
6340 codegen_->ClearSpillSlotsFromLoopPhisInStackMap(suspend_check, instruction);
6341 }
6342
6343 codegen_->GetMoveResolver()->EmitNativeCode(instruction);
6344 }
6345
VisitSuspendCheck(HSuspendCheck * instruction)6346 void LocationsBuilderX86_64::VisitSuspendCheck(HSuspendCheck* instruction) {
6347 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
6348 instruction, LocationSummary::kCallOnSlowPath);
6349 // In suspend check slow path, usually there are no caller-save registers at all.
6350 // If SIMD instructions are present, however, we force spilling all live SIMD
6351 // registers in full width (since the runtime only saves/restores lower part).
6352 locations->SetCustomSlowPathCallerSaves(
6353 GetGraph()->HasSIMD() ? RegisterSet::AllFpu() : RegisterSet::Empty());
6354 }
6355
VisitSuspendCheck(HSuspendCheck * instruction)6356 void InstructionCodeGeneratorX86_64::VisitSuspendCheck(HSuspendCheck* instruction) {
6357 HBasicBlock* block = instruction->GetBlock();
6358 if (block->GetLoopInformation() != nullptr) {
6359 DCHECK(block->GetLoopInformation()->GetSuspendCheck() == instruction);
6360 // The back edge will generate the suspend check.
6361 return;
6362 }
6363 if (block->IsEntryBlock() && instruction->GetNext()->IsGoto()) {
6364 // The goto will generate the suspend check.
6365 return;
6366 }
6367 GenerateSuspendCheck(instruction, nullptr);
6368 }
6369
GenerateSuspendCheck(HSuspendCheck * instruction,HBasicBlock * successor)6370 void InstructionCodeGeneratorX86_64::GenerateSuspendCheck(HSuspendCheck* instruction,
6371 HBasicBlock* successor) {
6372 SuspendCheckSlowPathX86_64* slow_path =
6373 down_cast<SuspendCheckSlowPathX86_64*>(instruction->GetSlowPath());
6374 if (slow_path == nullptr) {
6375 slow_path =
6376 new (codegen_->GetScopedAllocator()) SuspendCheckSlowPathX86_64(instruction, successor);
6377 instruction->SetSlowPath(slow_path);
6378 codegen_->AddSlowPath(slow_path);
6379 if (successor != nullptr) {
6380 DCHECK(successor->IsLoopHeader());
6381 }
6382 } else {
6383 DCHECK_EQ(slow_path->GetSuccessor(), successor);
6384 }
6385
6386 __ gs()->testl(Address::Absolute(Thread::ThreadFlagsOffset<kX86_64PointerSize>().Int32Value(),
6387 /* no_rip= */ true),
6388 Immediate(Thread::SuspendOrCheckpointRequestFlags()));
6389 if (successor == nullptr) {
6390 __ j(kNotZero, slow_path->GetEntryLabel());
6391 __ Bind(slow_path->GetReturnLabel());
6392 } else {
6393 __ j(kZero, codegen_->GetLabelOf(successor));
6394 __ jmp(slow_path->GetEntryLabel());
6395 }
6396 }
6397
GetAssembler() const6398 X86_64Assembler* ParallelMoveResolverX86_64::GetAssembler() const {
6399 return codegen_->GetAssembler();
6400 }
6401
EmitMove(size_t index)6402 void ParallelMoveResolverX86_64::EmitMove(size_t index) {
6403 MoveOperands* move = moves_[index];
6404 Location source = move->GetSource();
6405 Location destination = move->GetDestination();
6406
6407 if (source.IsRegister()) {
6408 if (destination.IsRegister()) {
6409 __ movq(destination.AsRegister<CpuRegister>(), source.AsRegister<CpuRegister>());
6410 } else if (destination.IsStackSlot()) {
6411 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()),
6412 source.AsRegister<CpuRegister>());
6413 } else {
6414 DCHECK(destination.IsDoubleStackSlot());
6415 __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()),
6416 source.AsRegister<CpuRegister>());
6417 }
6418 } else if (source.IsStackSlot()) {
6419 if (destination.IsRegister()) {
6420 __ movl(destination.AsRegister<CpuRegister>(),
6421 Address(CpuRegister(RSP), source.GetStackIndex()));
6422 } else if (destination.IsFpuRegister()) {
6423 __ movss(destination.AsFpuRegister<XmmRegister>(),
6424 Address(CpuRegister(RSP), source.GetStackIndex()));
6425 } else {
6426 DCHECK(destination.IsStackSlot());
6427 __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
6428 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
6429 }
6430 } else if (source.IsDoubleStackSlot()) {
6431 if (destination.IsRegister()) {
6432 __ movq(destination.AsRegister<CpuRegister>(),
6433 Address(CpuRegister(RSP), source.GetStackIndex()));
6434 } else if (destination.IsFpuRegister()) {
6435 __ movsd(destination.AsFpuRegister<XmmRegister>(),
6436 Address(CpuRegister(RSP), source.GetStackIndex()));
6437 } else {
6438 DCHECK(destination.IsDoubleStackSlot()) << destination;
6439 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
6440 __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
6441 }
6442 } else if (source.IsSIMDStackSlot()) {
6443 if (destination.IsFpuRegister()) {
6444 __ movups(destination.AsFpuRegister<XmmRegister>(),
6445 Address(CpuRegister(RSP), source.GetStackIndex()));
6446 } else {
6447 DCHECK(destination.IsSIMDStackSlot());
6448 size_t high = kX86_64WordSize;
6449 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex()));
6450 __ movq(Address(CpuRegister(RSP), destination.GetStackIndex()), CpuRegister(TMP));
6451 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), source.GetStackIndex() + high));
6452 __ movq(Address(CpuRegister(RSP), destination.GetStackIndex() + high), CpuRegister(TMP));
6453 }
6454 } else if (source.IsConstant()) {
6455 HConstant* constant = source.GetConstant();
6456 if (constant->IsIntConstant() || constant->IsNullConstant()) {
6457 int32_t value = CodeGenerator::GetInt32ValueOf(constant);
6458 if (destination.IsRegister()) {
6459 if (value == 0) {
6460 __ xorl(destination.AsRegister<CpuRegister>(), destination.AsRegister<CpuRegister>());
6461 } else {
6462 __ movl(destination.AsRegister<CpuRegister>(), Immediate(value));
6463 }
6464 } else {
6465 DCHECK(destination.IsStackSlot()) << destination;
6466 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), Immediate(value));
6467 }
6468 } else if (constant->IsLongConstant()) {
6469 int64_t value = constant->AsLongConstant()->GetValue();
6470 if (destination.IsRegister()) {
6471 codegen_->Load64BitValue(destination.AsRegister<CpuRegister>(), value);
6472 } else {
6473 DCHECK(destination.IsDoubleStackSlot()) << destination;
6474 codegen_->Store64BitValueToStack(destination, value);
6475 }
6476 } else if (constant->IsFloatConstant()) {
6477 float fp_value = constant->AsFloatConstant()->GetValue();
6478 if (destination.IsFpuRegister()) {
6479 XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
6480 codegen_->Load32BitValue(dest, fp_value);
6481 } else {
6482 DCHECK(destination.IsStackSlot()) << destination;
6483 Immediate imm(bit_cast<int32_t, float>(fp_value));
6484 __ movl(Address(CpuRegister(RSP), destination.GetStackIndex()), imm);
6485 }
6486 } else {
6487 DCHECK(constant->IsDoubleConstant()) << constant->DebugName();
6488 double fp_value = constant->AsDoubleConstant()->GetValue();
6489 int64_t value = bit_cast<int64_t, double>(fp_value);
6490 if (destination.IsFpuRegister()) {
6491 XmmRegister dest = destination.AsFpuRegister<XmmRegister>();
6492 codegen_->Load64BitValue(dest, fp_value);
6493 } else {
6494 DCHECK(destination.IsDoubleStackSlot()) << destination;
6495 codegen_->Store64BitValueToStack(destination, value);
6496 }
6497 }
6498 } else if (source.IsFpuRegister()) {
6499 if (destination.IsFpuRegister()) {
6500 __ movaps(destination.AsFpuRegister<XmmRegister>(), source.AsFpuRegister<XmmRegister>());
6501 } else if (destination.IsStackSlot()) {
6502 __ movss(Address(CpuRegister(RSP), destination.GetStackIndex()),
6503 source.AsFpuRegister<XmmRegister>());
6504 } else if (destination.IsDoubleStackSlot()) {
6505 __ movsd(Address(CpuRegister(RSP), destination.GetStackIndex()),
6506 source.AsFpuRegister<XmmRegister>());
6507 } else {
6508 DCHECK(destination.IsSIMDStackSlot());
6509 __ movups(Address(CpuRegister(RSP), destination.GetStackIndex()),
6510 source.AsFpuRegister<XmmRegister>());
6511 }
6512 }
6513 }
6514
Exchange32(CpuRegister reg,int mem)6515 void ParallelMoveResolverX86_64::Exchange32(CpuRegister reg, int mem) {
6516 __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
6517 __ movl(Address(CpuRegister(RSP), mem), reg);
6518 __ movl(reg, CpuRegister(TMP));
6519 }
6520
Exchange64(CpuRegister reg1,CpuRegister reg2)6521 void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg1, CpuRegister reg2) {
6522 __ movq(CpuRegister(TMP), reg1);
6523 __ movq(reg1, reg2);
6524 __ movq(reg2, CpuRegister(TMP));
6525 }
6526
Exchange64(CpuRegister reg,int mem)6527 void ParallelMoveResolverX86_64::Exchange64(CpuRegister reg, int mem) {
6528 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
6529 __ movq(Address(CpuRegister(RSP), mem), reg);
6530 __ movq(reg, CpuRegister(TMP));
6531 }
6532
Exchange32(XmmRegister reg,int mem)6533 void ParallelMoveResolverX86_64::Exchange32(XmmRegister reg, int mem) {
6534 __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
6535 __ movss(Address(CpuRegister(RSP), mem), reg);
6536 __ movd(reg, CpuRegister(TMP));
6537 }
6538
Exchange64(XmmRegister reg,int mem)6539 void ParallelMoveResolverX86_64::Exchange64(XmmRegister reg, int mem) {
6540 __ movq(CpuRegister(TMP), Address(CpuRegister(RSP), mem));
6541 __ movsd(Address(CpuRegister(RSP), mem), reg);
6542 __ movq(reg, CpuRegister(TMP));
6543 }
6544
Exchange128(XmmRegister reg,int mem)6545 void ParallelMoveResolverX86_64::Exchange128(XmmRegister reg, int mem) {
6546 size_t extra_slot = 2 * kX86_64WordSize;
6547 __ subq(CpuRegister(RSP), Immediate(extra_slot));
6548 __ movups(Address(CpuRegister(RSP), 0), XmmRegister(reg));
6549 ExchangeMemory64(0, mem + extra_slot, 2);
6550 __ movups(XmmRegister(reg), Address(CpuRegister(RSP), 0));
6551 __ addq(CpuRegister(RSP), Immediate(extra_slot));
6552 }
6553
ExchangeMemory32(int mem1,int mem2)6554 void ParallelMoveResolverX86_64::ExchangeMemory32(int mem1, int mem2) {
6555 ScratchRegisterScope ensure_scratch(
6556 this, TMP, RAX, codegen_->GetNumberOfCoreRegisters());
6557
6558 int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0;
6559 __ movl(CpuRegister(TMP), Address(CpuRegister(RSP), mem1 + stack_offset));
6560 __ movl(CpuRegister(ensure_scratch.GetRegister()),
6561 Address(CpuRegister(RSP), mem2 + stack_offset));
6562 __ movl(Address(CpuRegister(RSP), mem2 + stack_offset), CpuRegister(TMP));
6563 __ movl(Address(CpuRegister(RSP), mem1 + stack_offset),
6564 CpuRegister(ensure_scratch.GetRegister()));
6565 }
6566
ExchangeMemory64(int mem1,int mem2,int num_of_qwords)6567 void ParallelMoveResolverX86_64::ExchangeMemory64(int mem1, int mem2, int num_of_qwords) {
6568 ScratchRegisterScope ensure_scratch(
6569 this, TMP, RAX, codegen_->GetNumberOfCoreRegisters());
6570
6571 int stack_offset = ensure_scratch.IsSpilled() ? kX86_64WordSize : 0;
6572
6573 // Now that temp registers are available (possibly spilled), exchange blocks of memory.
6574 for (int i = 0; i < num_of_qwords; i++) {
6575 __ movq(CpuRegister(TMP),
6576 Address(CpuRegister(RSP), mem1 + stack_offset));
6577 __ movq(CpuRegister(ensure_scratch.GetRegister()),
6578 Address(CpuRegister(RSP), mem2 + stack_offset));
6579 __ movq(Address(CpuRegister(RSP), mem2 + stack_offset),
6580 CpuRegister(TMP));
6581 __ movq(Address(CpuRegister(RSP), mem1 + stack_offset),
6582 CpuRegister(ensure_scratch.GetRegister()));
6583 stack_offset += kX86_64WordSize;
6584 }
6585 }
6586
EmitSwap(size_t index)6587 void ParallelMoveResolverX86_64::EmitSwap(size_t index) {
6588 MoveOperands* move = moves_[index];
6589 Location source = move->GetSource();
6590 Location destination = move->GetDestination();
6591
6592 if (source.IsRegister() && destination.IsRegister()) {
6593 Exchange64(source.AsRegister<CpuRegister>(), destination.AsRegister<CpuRegister>());
6594 } else if (source.IsRegister() && destination.IsStackSlot()) {
6595 Exchange32(source.AsRegister<CpuRegister>(), destination.GetStackIndex());
6596 } else if (source.IsStackSlot() && destination.IsRegister()) {
6597 Exchange32(destination.AsRegister<CpuRegister>(), source.GetStackIndex());
6598 } else if (source.IsStackSlot() && destination.IsStackSlot()) {
6599 ExchangeMemory32(destination.GetStackIndex(), source.GetStackIndex());
6600 } else if (source.IsRegister() && destination.IsDoubleStackSlot()) {
6601 Exchange64(source.AsRegister<CpuRegister>(), destination.GetStackIndex());
6602 } else if (source.IsDoubleStackSlot() && destination.IsRegister()) {
6603 Exchange64(destination.AsRegister<CpuRegister>(), source.GetStackIndex());
6604 } else if (source.IsDoubleStackSlot() && destination.IsDoubleStackSlot()) {
6605 ExchangeMemory64(destination.GetStackIndex(), source.GetStackIndex(), 1);
6606 } else if (source.IsFpuRegister() && destination.IsFpuRegister()) {
6607 __ movq(CpuRegister(TMP), source.AsFpuRegister<XmmRegister>());
6608 __ movaps(source.AsFpuRegister<XmmRegister>(), destination.AsFpuRegister<XmmRegister>());
6609 __ movq(destination.AsFpuRegister<XmmRegister>(), CpuRegister(TMP));
6610 } else if (source.IsFpuRegister() && destination.IsStackSlot()) {
6611 Exchange32(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
6612 } else if (source.IsStackSlot() && destination.IsFpuRegister()) {
6613 Exchange32(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
6614 } else if (source.IsFpuRegister() && destination.IsDoubleStackSlot()) {
6615 Exchange64(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
6616 } else if (source.IsDoubleStackSlot() && destination.IsFpuRegister()) {
6617 Exchange64(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
6618 } else if (source.IsSIMDStackSlot() && destination.IsSIMDStackSlot()) {
6619 ExchangeMemory64(destination.GetStackIndex(), source.GetStackIndex(), 2);
6620 } else if (source.IsFpuRegister() && destination.IsSIMDStackSlot()) {
6621 Exchange128(source.AsFpuRegister<XmmRegister>(), destination.GetStackIndex());
6622 } else if (destination.IsFpuRegister() && source.IsSIMDStackSlot()) {
6623 Exchange128(destination.AsFpuRegister<XmmRegister>(), source.GetStackIndex());
6624 } else {
6625 LOG(FATAL) << "Unimplemented swap between " << source << " and " << destination;
6626 }
6627 }
6628
6629
SpillScratch(int reg)6630 void ParallelMoveResolverX86_64::SpillScratch(int reg) {
6631 __ pushq(CpuRegister(reg));
6632 }
6633
6634
RestoreScratch(int reg)6635 void ParallelMoveResolverX86_64::RestoreScratch(int reg) {
6636 __ popq(CpuRegister(reg));
6637 }
6638
GenerateClassInitializationCheck(SlowPathCode * slow_path,CpuRegister class_reg)6639 void InstructionCodeGeneratorX86_64::GenerateClassInitializationCheck(
6640 SlowPathCode* slow_path, CpuRegister class_reg) {
6641 __ cmpb(Address(class_reg, kClassStatusByteOffset), Immediate(kShiftedVisiblyInitializedValue));
6642 __ j(kBelow, slow_path->GetEntryLabel());
6643 __ Bind(slow_path->GetExitLabel());
6644 }
6645
GenerateBitstringTypeCheckCompare(HTypeCheckInstruction * check,CpuRegister temp)6646 void InstructionCodeGeneratorX86_64::GenerateBitstringTypeCheckCompare(HTypeCheckInstruction* check,
6647 CpuRegister temp) {
6648 uint32_t path_to_root = check->GetBitstringPathToRoot();
6649 uint32_t mask = check->GetBitstringMask();
6650 DCHECK(IsPowerOfTwo(mask + 1));
6651 size_t mask_bits = WhichPowerOf2(mask + 1);
6652
6653 if (mask_bits == 16u) {
6654 // Compare the bitstring in memory.
6655 __ cmpw(Address(temp, mirror::Class::StatusOffset()), Immediate(path_to_root));
6656 } else {
6657 // /* uint32_t */ temp = temp->status_
6658 __ movl(temp, Address(temp, mirror::Class::StatusOffset()));
6659 // Compare the bitstring bits using SUB.
6660 __ subl(temp, Immediate(path_to_root));
6661 // Shift out bits that do not contribute to the comparison.
6662 __ shll(temp, Immediate(32u - mask_bits));
6663 }
6664 }
6665
GetSupportedLoadClassKind(HLoadClass::LoadKind desired_class_load_kind)6666 HLoadClass::LoadKind CodeGeneratorX86_64::GetSupportedLoadClassKind(
6667 HLoadClass::LoadKind desired_class_load_kind) {
6668 switch (desired_class_load_kind) {
6669 case HLoadClass::LoadKind::kInvalid:
6670 LOG(FATAL) << "UNREACHABLE";
6671 UNREACHABLE();
6672 case HLoadClass::LoadKind::kReferrersClass:
6673 break;
6674 case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
6675 case HLoadClass::LoadKind::kBootImageRelRo:
6676 case HLoadClass::LoadKind::kAppImageRelRo:
6677 case HLoadClass::LoadKind::kBssEntry:
6678 case HLoadClass::LoadKind::kBssEntryPublic:
6679 case HLoadClass::LoadKind::kBssEntryPackage:
6680 DCHECK(!GetCompilerOptions().IsJitCompiler());
6681 break;
6682 case HLoadClass::LoadKind::kJitBootImageAddress:
6683 case HLoadClass::LoadKind::kJitTableAddress:
6684 DCHECK(GetCompilerOptions().IsJitCompiler());
6685 break;
6686 case HLoadClass::LoadKind::kRuntimeCall:
6687 break;
6688 }
6689 return desired_class_load_kind;
6690 }
6691
VisitLoadClass(HLoadClass * cls)6692 void LocationsBuilderX86_64::VisitLoadClass(HLoadClass* cls) {
6693 HLoadClass::LoadKind load_kind = cls->GetLoadKind();
6694 if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
6695 // Custom calling convention: RAX serves as both input and output.
6696 CodeGenerator::CreateLoadClassRuntimeCallLocationSummary(
6697 cls,
6698 Location::RegisterLocation(RAX),
6699 Location::RegisterLocation(RAX));
6700 return;
6701 }
6702 DCHECK_EQ(cls->NeedsAccessCheck(),
6703 load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
6704 load_kind == HLoadClass::LoadKind::kBssEntryPackage);
6705
6706 const bool requires_read_barrier = !cls->IsInImage() && codegen_->EmitReadBarrier();
6707 LocationSummary::CallKind call_kind = (cls->NeedsEnvironment() || requires_read_barrier)
6708 ? LocationSummary::kCallOnSlowPath
6709 : LocationSummary::kNoCall;
6710 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(cls, call_kind);
6711 if (kUseBakerReadBarrier && requires_read_barrier && !cls->NeedsEnvironment()) {
6712 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
6713 }
6714
6715 if (load_kind == HLoadClass::LoadKind::kReferrersClass) {
6716 locations->SetInAt(0, Location::RequiresRegister());
6717 }
6718 locations->SetOut(Location::RequiresRegister());
6719 if (load_kind == HLoadClass::LoadKind::kBssEntry ||
6720 load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
6721 load_kind == HLoadClass::LoadKind::kBssEntryPackage) {
6722 if (codegen_->EmitNonBakerReadBarrier()) {
6723 // For non-Baker read barrier we have a temp-clobbering call.
6724 } else {
6725 // Rely on the type resolution and/or initialization to save everything.
6726 locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
6727 }
6728 }
6729 }
6730
NewJitRootClassPatch(const DexFile & dex_file,dex::TypeIndex type_index,Handle<mirror::Class> handle)6731 Label* CodeGeneratorX86_64::NewJitRootClassPatch(const DexFile& dex_file,
6732 dex::TypeIndex type_index,
6733 Handle<mirror::Class> handle) {
6734 ReserveJitClassRoot(TypeReference(&dex_file, type_index), handle);
6735 // Add a patch entry and return the label.
6736 jit_class_patches_.emplace_back(&dex_file, type_index.index_);
6737 PatchInfo<Label>* info = &jit_class_patches_.back();
6738 return &info->label;
6739 }
6740
6741 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
6742 // move.
VisitLoadClass(HLoadClass * cls)6743 void InstructionCodeGeneratorX86_64::VisitLoadClass(HLoadClass* cls) NO_THREAD_SAFETY_ANALYSIS {
6744 HLoadClass::LoadKind load_kind = cls->GetLoadKind();
6745 if (load_kind == HLoadClass::LoadKind::kRuntimeCall) {
6746 codegen_->GenerateLoadClassRuntimeCall(cls);
6747 return;
6748 }
6749 DCHECK_EQ(cls->NeedsAccessCheck(),
6750 load_kind == HLoadClass::LoadKind::kBssEntryPublic ||
6751 load_kind == HLoadClass::LoadKind::kBssEntryPackage);
6752
6753 LocationSummary* locations = cls->GetLocations();
6754 Location out_loc = locations->Out();
6755 CpuRegister out = out_loc.AsRegister<CpuRegister>();
6756
6757 const ReadBarrierOption read_barrier_option =
6758 cls->IsInImage() ? kWithoutReadBarrier : codegen_->GetCompilerReadBarrierOption();
6759 bool generate_null_check = false;
6760 switch (load_kind) {
6761 case HLoadClass::LoadKind::kReferrersClass: {
6762 DCHECK(!cls->CanCallRuntime());
6763 DCHECK(!cls->MustGenerateClinitCheck());
6764 // /* GcRoot<mirror::Class> */ out = current_method->declaring_class_
6765 CpuRegister current_method = locations->InAt(0).AsRegister<CpuRegister>();
6766 GenerateGcRootFieldLoad(
6767 cls,
6768 out_loc,
6769 Address(current_method, ArtMethod::DeclaringClassOffset().Int32Value()),
6770 /* fixup_label= */ nullptr,
6771 read_barrier_option);
6772 break;
6773 }
6774 case HLoadClass::LoadKind::kBootImageLinkTimePcRelative:
6775 DCHECK(codegen_->GetCompilerOptions().IsBootImage() ||
6776 codegen_->GetCompilerOptions().IsBootImageExtension());
6777 DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
6778 __ leal(out,
6779 Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
6780 codegen_->RecordBootImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex());
6781 break;
6782 case HLoadClass::LoadKind::kBootImageRelRo: {
6783 DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
6784 __ movl(out,
6785 Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
6786 codegen_->RecordBootImageRelRoPatch(CodeGenerator::GetBootImageOffset(cls));
6787 break;
6788 }
6789 case HLoadClass::LoadKind::kAppImageRelRo: {
6790 DCHECK(codegen_->GetCompilerOptions().IsAppImage());
6791 DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
6792 __ movl(out,
6793 Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
6794 codegen_->RecordAppImageTypePatch(cls->GetDexFile(), cls->GetTypeIndex());
6795 break;
6796 }
6797 case HLoadClass::LoadKind::kBssEntry:
6798 case HLoadClass::LoadKind::kBssEntryPublic:
6799 case HLoadClass::LoadKind::kBssEntryPackage: {
6800 Address address = Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset,
6801 /* no_rip= */ false);
6802 Label* fixup_label = codegen_->NewTypeBssEntryPatch(cls);
6803 // /* GcRoot<mirror::Class> */ out = *address /* PC-relative */
6804 GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option);
6805 // No need for memory fence, thanks to the x86-64 memory model.
6806 generate_null_check = true;
6807 break;
6808 }
6809 case HLoadClass::LoadKind::kJitBootImageAddress: {
6810 DCHECK_EQ(read_barrier_option, kWithoutReadBarrier);
6811 uint32_t address = reinterpret_cast32<uint32_t>(cls->GetClass().Get());
6812 DCHECK_NE(address, 0u);
6813 __ movl(out, Immediate(static_cast<int32_t>(address))); // Zero-extended.
6814 break;
6815 }
6816 case HLoadClass::LoadKind::kJitTableAddress: {
6817 Address address = Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset,
6818 /* no_rip= */ true);
6819 Label* fixup_label =
6820 codegen_->NewJitRootClassPatch(cls->GetDexFile(), cls->GetTypeIndex(), cls->GetClass());
6821 // /* GcRoot<mirror::Class> */ out = *address
6822 GenerateGcRootFieldLoad(cls, out_loc, address, fixup_label, read_barrier_option);
6823 break;
6824 }
6825 default:
6826 LOG(FATAL) << "Unexpected load kind: " << cls->GetLoadKind();
6827 UNREACHABLE();
6828 }
6829
6830 if (generate_null_check || cls->MustGenerateClinitCheck()) {
6831 DCHECK(cls->CanCallRuntime());
6832 SlowPathCode* slow_path =
6833 new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86_64(cls, cls);
6834 codegen_->AddSlowPath(slow_path);
6835 if (generate_null_check) {
6836 __ testl(out, out);
6837 __ j(kEqual, slow_path->GetEntryLabel());
6838 }
6839 if (cls->MustGenerateClinitCheck()) {
6840 GenerateClassInitializationCheck(slow_path, out);
6841 } else {
6842 __ Bind(slow_path->GetExitLabel());
6843 }
6844 }
6845 }
6846
VisitClinitCheck(HClinitCheck * check)6847 void LocationsBuilderX86_64::VisitClinitCheck(HClinitCheck* check) {
6848 LocationSummary* locations =
6849 new (GetGraph()->GetAllocator()) LocationSummary(check, LocationSummary::kCallOnSlowPath);
6850 locations->SetInAt(0, Location::RequiresRegister());
6851 if (check->HasUses()) {
6852 locations->SetOut(Location::SameAsFirstInput());
6853 }
6854 // Rely on the type initialization to save everything we need.
6855 locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
6856 }
6857
VisitLoadMethodHandle(HLoadMethodHandle * load)6858 void LocationsBuilderX86_64::VisitLoadMethodHandle(HLoadMethodHandle* load) {
6859 // Custom calling convention: RAX serves as both input and output.
6860 Location location = Location::RegisterLocation(RAX);
6861 CodeGenerator::CreateLoadMethodHandleRuntimeCallLocationSummary(load, location, location);
6862 }
6863
VisitLoadMethodHandle(HLoadMethodHandle * load)6864 void InstructionCodeGeneratorX86_64::VisitLoadMethodHandle(HLoadMethodHandle* load) {
6865 codegen_->GenerateLoadMethodHandleRuntimeCall(load);
6866 }
6867
NewJitRootMethodTypePatch(const DexFile & dex_file,dex::ProtoIndex proto_index,Handle<mirror::MethodType> handle)6868 Label* CodeGeneratorX86_64::NewJitRootMethodTypePatch(const DexFile& dex_file,
6869 dex::ProtoIndex proto_index,
6870 Handle<mirror::MethodType> handle) {
6871 ReserveJitMethodTypeRoot(ProtoReference(&dex_file, proto_index), handle);
6872 // Add a patch entry and return the label.
6873 jit_method_type_patches_.emplace_back(&dex_file, proto_index.index_);
6874 PatchInfo<Label>* info = &jit_method_type_patches_.back();
6875 return &info->label;
6876 }
6877
VisitLoadMethodType(HLoadMethodType * load)6878 void LocationsBuilderX86_64::VisitLoadMethodType(HLoadMethodType* load) {
6879 LocationSummary* locations =
6880 new (GetGraph()->GetAllocator()) LocationSummary(load, LocationSummary::kCallOnSlowPath);
6881 if (load->GetLoadKind() == HLoadMethodType::LoadKind::kRuntimeCall) {
6882 Location location = Location::RegisterLocation(RAX);
6883 CodeGenerator::CreateLoadMethodTypeRuntimeCallLocationSummary(load, location, location);
6884 } else {
6885 locations->SetOut(Location::RequiresRegister());
6886 if (load->GetLoadKind() == HLoadMethodType::LoadKind::kBssEntry) {
6887 if (codegen_->EmitNonBakerReadBarrier()) {
6888 // For non-Baker read barrier we have a temp-clobbering call.
6889 } else {
6890 // Rely on the pResolveMethodType to save everything.
6891 locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
6892 }
6893 }
6894 }
6895 }
6896
VisitLoadMethodType(HLoadMethodType * load)6897 void InstructionCodeGeneratorX86_64::VisitLoadMethodType(HLoadMethodType* load) {
6898 LocationSummary* locations = load->GetLocations();
6899 Location out_loc = locations->Out();
6900 CpuRegister out = out_loc.AsRegister<CpuRegister>();
6901
6902 switch (load->GetLoadKind()) {
6903 case HLoadMethodType::LoadKind::kBssEntry: {
6904 Address address = Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset,
6905 /* no_rip= */ false);
6906 Label* fixup_label = codegen_->NewMethodTypeBssEntryPatch(load);
6907 // /* GcRoot<mirror::MethodType> */ out = *address /* PC-relative */
6908 GenerateGcRootFieldLoad(
6909 load, out_loc, address, fixup_label, codegen_->GetCompilerReadBarrierOption());
6910 // No need for memory fence, thanks to the x86-64 memory model.
6911 SlowPathCode* slow_path =
6912 new (codegen_->GetScopedAllocator()) LoadMethodTypeSlowPathX86_64(load);
6913 codegen_->AddSlowPath(slow_path);
6914 __ testl(out, out);
6915 __ j(kEqual, slow_path->GetEntryLabel());
6916 __ Bind(slow_path->GetExitLabel());
6917 return;
6918 }
6919 case HLoadMethodType::LoadKind::kJitTableAddress: {
6920 Address address = Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset,
6921 /* no_rip= */ true);
6922 Handle<mirror::MethodType> method_type = load->GetMethodType();
6923 DCHECK(method_type != nullptr);
6924 Label* fixup_label = codegen_->NewJitRootMethodTypePatch(
6925 load->GetDexFile(), load->GetProtoIndex(), method_type);
6926 GenerateGcRootFieldLoad(
6927 load, out_loc, address, fixup_label, codegen_->GetCompilerReadBarrierOption());
6928 return;
6929 }
6930 default:
6931 DCHECK_EQ(load->GetLoadKind(), HLoadMethodType::LoadKind::kRuntimeCall);
6932 codegen_->GenerateLoadMethodTypeRuntimeCall(load);
6933 break;
6934 }
6935 }
6936
VisitClinitCheck(HClinitCheck * check)6937 void InstructionCodeGeneratorX86_64::VisitClinitCheck(HClinitCheck* check) {
6938 // We assume the class to not be null.
6939 SlowPathCode* slow_path =
6940 new (codegen_->GetScopedAllocator()) LoadClassSlowPathX86_64(check->GetLoadClass(), check);
6941 codegen_->AddSlowPath(slow_path);
6942 GenerateClassInitializationCheck(slow_path,
6943 check->GetLocations()->InAt(0).AsRegister<CpuRegister>());
6944 }
6945
GetSupportedLoadStringKind(HLoadString::LoadKind desired_string_load_kind)6946 HLoadString::LoadKind CodeGeneratorX86_64::GetSupportedLoadStringKind(
6947 HLoadString::LoadKind desired_string_load_kind) {
6948 switch (desired_string_load_kind) {
6949 case HLoadString::LoadKind::kBootImageLinkTimePcRelative:
6950 case HLoadString::LoadKind::kBootImageRelRo:
6951 case HLoadString::LoadKind::kBssEntry:
6952 DCHECK(!GetCompilerOptions().IsJitCompiler());
6953 break;
6954 case HLoadString::LoadKind::kJitBootImageAddress:
6955 case HLoadString::LoadKind::kJitTableAddress:
6956 DCHECK(GetCompilerOptions().IsJitCompiler());
6957 break;
6958 case HLoadString::LoadKind::kRuntimeCall:
6959 break;
6960 }
6961 return desired_string_load_kind;
6962 }
6963
VisitLoadString(HLoadString * load)6964 void LocationsBuilderX86_64::VisitLoadString(HLoadString* load) {
6965 LocationSummary::CallKind call_kind = codegen_->GetLoadStringCallKind(load);
6966 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(load, call_kind);
6967 if (load->GetLoadKind() == HLoadString::LoadKind::kRuntimeCall) {
6968 locations->SetOut(Location::RegisterLocation(RAX));
6969 } else {
6970 locations->SetOut(Location::RequiresRegister());
6971 if (load->GetLoadKind() == HLoadString::LoadKind::kBssEntry) {
6972 if (codegen_->EmitNonBakerReadBarrier()) {
6973 // For non-Baker read barrier we have a temp-clobbering call.
6974 } else {
6975 // Rely on the pResolveString to save everything.
6976 locations->SetCustomSlowPathCallerSaves(OneRegInReferenceOutSaveEverythingCallerSaves());
6977 }
6978 }
6979 }
6980 }
6981
NewJitRootStringPatch(const DexFile & dex_file,dex::StringIndex string_index,Handle<mirror::String> handle)6982 Label* CodeGeneratorX86_64::NewJitRootStringPatch(const DexFile& dex_file,
6983 dex::StringIndex string_index,
6984 Handle<mirror::String> handle) {
6985 ReserveJitStringRoot(StringReference(&dex_file, string_index), handle);
6986 // Add a patch entry and return the label.
6987 jit_string_patches_.emplace_back(&dex_file, string_index.index_);
6988 PatchInfo<Label>* info = &jit_string_patches_.back();
6989 return &info->label;
6990 }
6991
6992 // NO_THREAD_SAFETY_ANALYSIS as we manipulate handles whose internal object we know does not
6993 // move.
VisitLoadString(HLoadString * load)6994 void InstructionCodeGeneratorX86_64::VisitLoadString(HLoadString* load) NO_THREAD_SAFETY_ANALYSIS {
6995 LocationSummary* locations = load->GetLocations();
6996 Location out_loc = locations->Out();
6997 CpuRegister out = out_loc.AsRegister<CpuRegister>();
6998
6999 switch (load->GetLoadKind()) {
7000 case HLoadString::LoadKind::kBootImageLinkTimePcRelative: {
7001 DCHECK(codegen_->GetCompilerOptions().IsBootImage() ||
7002 codegen_->GetCompilerOptions().IsBootImageExtension());
7003 __ leal(out,
7004 Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
7005 codegen_->RecordBootImageStringPatch(load);
7006 return;
7007 }
7008 case HLoadString::LoadKind::kBootImageRelRo: {
7009 DCHECK(!codegen_->GetCompilerOptions().IsBootImage());
7010 __ movl(out,
7011 Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /* no_rip= */ false));
7012 codegen_->RecordBootImageRelRoPatch(CodeGenerator::GetBootImageOffset(load));
7013 return;
7014 }
7015 case HLoadString::LoadKind::kBssEntry: {
7016 Address address = Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset,
7017 /* no_rip= */ false);
7018 Label* fixup_label = codegen_->NewStringBssEntryPatch(load);
7019 // /* GcRoot<mirror::Class> */ out = *address /* PC-relative */
7020 GenerateGcRootFieldLoad(
7021 load, out_loc, address, fixup_label, codegen_->GetCompilerReadBarrierOption());
7022 // No need for memory fence, thanks to the x86-64 memory model.
7023 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) LoadStringSlowPathX86_64(load);
7024 codegen_->AddSlowPath(slow_path);
7025 __ testl(out, out);
7026 __ j(kEqual, slow_path->GetEntryLabel());
7027 __ Bind(slow_path->GetExitLabel());
7028 return;
7029 }
7030 case HLoadString::LoadKind::kJitBootImageAddress: {
7031 uint32_t address = reinterpret_cast32<uint32_t>(load->GetString().Get());
7032 DCHECK_NE(address, 0u);
7033 __ movl(out, Immediate(static_cast<int32_t>(address))); // Zero-extended.
7034 return;
7035 }
7036 case HLoadString::LoadKind::kJitTableAddress: {
7037 Address address = Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset,
7038 /* no_rip= */ true);
7039 Label* fixup_label = codegen_->NewJitRootStringPatch(
7040 load->GetDexFile(), load->GetStringIndex(), load->GetString());
7041 // /* GcRoot<mirror::String> */ out = *address
7042 GenerateGcRootFieldLoad(
7043 load, out_loc, address, fixup_label, codegen_->GetCompilerReadBarrierOption());
7044 return;
7045 }
7046 default:
7047 break;
7048 }
7049
7050 // Custom calling convention: RAX serves as both input and output.
7051 __ movl(CpuRegister(RAX), Immediate(load->GetStringIndex().index_));
7052 codegen_->InvokeRuntime(kQuickResolveString, load);
7053 CheckEntrypointTypes<kQuickResolveString, void*, uint32_t>();
7054 }
7055
GetExceptionTlsAddress()7056 static Address GetExceptionTlsAddress() {
7057 return Address::Absolute(Thread::ExceptionOffset<kX86_64PointerSize>().Int32Value(),
7058 /* no_rip= */ true);
7059 }
7060
VisitLoadException(HLoadException * load)7061 void LocationsBuilderX86_64::VisitLoadException(HLoadException* load) {
7062 LocationSummary* locations =
7063 new (GetGraph()->GetAllocator()) LocationSummary(load, LocationSummary::kNoCall);
7064 locations->SetOut(Location::RequiresRegister());
7065 }
7066
VisitLoadException(HLoadException * load)7067 void InstructionCodeGeneratorX86_64::VisitLoadException(HLoadException* load) {
7068 __ gs()->movl(load->GetLocations()->Out().AsRegister<CpuRegister>(), GetExceptionTlsAddress());
7069 }
7070
VisitClearException(HClearException * clear)7071 void LocationsBuilderX86_64::VisitClearException(HClearException* clear) {
7072 new (GetGraph()->GetAllocator()) LocationSummary(clear, LocationSummary::kNoCall);
7073 }
7074
VisitClearException(HClearException * clear)7075 void InstructionCodeGeneratorX86_64::VisitClearException([[maybe_unused]] HClearException* clear) {
7076 __ gs()->movl(GetExceptionTlsAddress(), Immediate(0));
7077 }
7078
VisitThrow(HThrow * instruction)7079 void LocationsBuilderX86_64::VisitThrow(HThrow* instruction) {
7080 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
7081 instruction, LocationSummary::kCallOnMainOnly);
7082 InvokeRuntimeCallingConvention calling_convention;
7083 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
7084 }
7085
VisitThrow(HThrow * instruction)7086 void InstructionCodeGeneratorX86_64::VisitThrow(HThrow* instruction) {
7087 codegen_->InvokeRuntime(kQuickDeliverException, instruction);
7088 CheckEntrypointTypes<kQuickDeliverException, void, mirror::Object*>();
7089 }
7090
7091 // Temp is used for read barrier.
NumberOfInstanceOfTemps(bool emit_read_barrier,TypeCheckKind type_check_kind)7092 static size_t NumberOfInstanceOfTemps(bool emit_read_barrier, TypeCheckKind type_check_kind) {
7093 if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
7094 return 1;
7095 }
7096 if (emit_read_barrier &&
7097 !kUseBakerReadBarrier &&
7098 (type_check_kind == TypeCheckKind::kAbstractClassCheck ||
7099 type_check_kind == TypeCheckKind::kClassHierarchyCheck ||
7100 type_check_kind == TypeCheckKind::kArrayObjectCheck)) {
7101 return 1;
7102 }
7103 return 0;
7104 }
7105
7106 // Interface case has 2 temps, one for holding the number of interfaces, one for the current
7107 // interface pointer, the current interface is compared in memory.
7108 // The other checks have one temp for loading the object's class.
NumberOfCheckCastTemps(bool emit_read_barrier,TypeCheckKind type_check_kind)7109 static size_t NumberOfCheckCastTemps(bool emit_read_barrier, TypeCheckKind type_check_kind) {
7110 return 1 + NumberOfInstanceOfTemps(emit_read_barrier, type_check_kind);
7111 }
7112
VisitInstanceOf(HInstanceOf * instruction)7113 void LocationsBuilderX86_64::VisitInstanceOf(HInstanceOf* instruction) {
7114 LocationSummary::CallKind call_kind = LocationSummary::kNoCall;
7115 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
7116 bool baker_read_barrier_slow_path = false;
7117 switch (type_check_kind) {
7118 case TypeCheckKind::kExactCheck:
7119 case TypeCheckKind::kAbstractClassCheck:
7120 case TypeCheckKind::kClassHierarchyCheck:
7121 case TypeCheckKind::kArrayObjectCheck:
7122 case TypeCheckKind::kInterfaceCheck: {
7123 bool needs_read_barrier = codegen_->InstanceOfNeedsReadBarrier(instruction);
7124 call_kind = needs_read_barrier ? LocationSummary::kCallOnSlowPath : LocationSummary::kNoCall;
7125 baker_read_barrier_slow_path = (kUseBakerReadBarrier && needs_read_barrier) &&
7126 (type_check_kind != TypeCheckKind::kInterfaceCheck);
7127 break;
7128 }
7129 case TypeCheckKind::kArrayCheck:
7130 case TypeCheckKind::kUnresolvedCheck:
7131 call_kind = LocationSummary::kCallOnSlowPath;
7132 break;
7133 case TypeCheckKind::kBitstringCheck:
7134 break;
7135 }
7136
7137 LocationSummary* locations =
7138 new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
7139 if (baker_read_barrier_slow_path) {
7140 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
7141 }
7142 locations->SetInAt(0, Location::RequiresRegister());
7143 if (type_check_kind == TypeCheckKind::kBitstringCheck) {
7144 locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)));
7145 locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)));
7146 locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)));
7147 } else if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
7148 locations->SetInAt(1, Location::RequiresRegister());
7149 } else {
7150 locations->SetInAt(1, Location::Any());
7151 }
7152 // Note that TypeCheckSlowPathX86_64 uses this "out" register too.
7153 locations->SetOut(Location::RequiresRegister());
7154 locations->AddRegisterTemps(
7155 NumberOfInstanceOfTemps(codegen_->EmitReadBarrier(), type_check_kind));
7156 }
7157
VisitInstanceOf(HInstanceOf * instruction)7158 void InstructionCodeGeneratorX86_64::VisitInstanceOf(HInstanceOf* instruction) {
7159 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
7160 LocationSummary* locations = instruction->GetLocations();
7161 Location obj_loc = locations->InAt(0);
7162 CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
7163 Location cls = locations->InAt(1);
7164 Location out_loc = locations->Out();
7165 CpuRegister out = out_loc.AsRegister<CpuRegister>();
7166 const size_t num_temps = NumberOfInstanceOfTemps(codegen_->EmitReadBarrier(), type_check_kind);
7167 DCHECK_LE(num_temps, 1u);
7168 Location maybe_temp_loc = (num_temps >= 1u) ? locations->GetTemp(0) : Location::NoLocation();
7169 const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
7170 const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
7171 const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
7172 const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
7173 const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
7174 const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
7175 const uint32_t object_array_data_offset =
7176 mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
7177 SlowPathCode* slow_path = nullptr;
7178 NearLabel done, zero;
7179
7180 // Return 0 if `obj` is null.
7181 // Avoid null check if we know obj is not null.
7182 if (instruction->MustDoNullCheck()) {
7183 __ testl(obj, obj);
7184 __ j(kEqual, &zero);
7185 }
7186
7187 switch (type_check_kind) {
7188 case TypeCheckKind::kExactCheck: {
7189 ReadBarrierOption read_barrier_option =
7190 codegen_->ReadBarrierOptionForInstanceOf(instruction);
7191 // /* HeapReference<Class> */ out = obj->klass_
7192 GenerateReferenceLoadTwoRegisters(instruction,
7193 out_loc,
7194 obj_loc,
7195 class_offset,
7196 read_barrier_option);
7197 if (cls.IsRegister()) {
7198 __ cmpl(out, cls.AsRegister<CpuRegister>());
7199 } else {
7200 DCHECK(cls.IsStackSlot()) << cls;
7201 __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
7202 }
7203 if (zero.IsLinked()) {
7204 // Classes must be equal for the instanceof to succeed.
7205 __ j(kNotEqual, &zero);
7206 __ movl(out, Immediate(1));
7207 __ jmp(&done);
7208 } else {
7209 __ setcc(kEqual, out);
7210 // setcc only sets the low byte.
7211 __ andl(out, Immediate(1));
7212 }
7213 break;
7214 }
7215
7216 case TypeCheckKind::kAbstractClassCheck: {
7217 ReadBarrierOption read_barrier_option =
7218 codegen_->ReadBarrierOptionForInstanceOf(instruction);
7219 // /* HeapReference<Class> */ out = obj->klass_
7220 GenerateReferenceLoadTwoRegisters(instruction,
7221 out_loc,
7222 obj_loc,
7223 class_offset,
7224 read_barrier_option);
7225 // If the class is abstract, we eagerly fetch the super class of the
7226 // object to avoid doing a comparison we know will fail.
7227 NearLabel loop, success;
7228 __ Bind(&loop);
7229 // /* HeapReference<Class> */ out = out->super_class_
7230 GenerateReferenceLoadOneRegister(instruction,
7231 out_loc,
7232 super_offset,
7233 maybe_temp_loc,
7234 read_barrier_option);
7235 __ testl(out, out);
7236 // If `out` is null, we use it for the result, and jump to `done`.
7237 __ j(kEqual, &done);
7238 if (cls.IsRegister()) {
7239 __ cmpl(out, cls.AsRegister<CpuRegister>());
7240 } else {
7241 DCHECK(cls.IsStackSlot()) << cls;
7242 __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
7243 }
7244 __ j(kNotEqual, &loop);
7245 __ movl(out, Immediate(1));
7246 if (zero.IsLinked()) {
7247 __ jmp(&done);
7248 }
7249 break;
7250 }
7251
7252 case TypeCheckKind::kClassHierarchyCheck: {
7253 ReadBarrierOption read_barrier_option =
7254 codegen_->ReadBarrierOptionForInstanceOf(instruction);
7255 // /* HeapReference<Class> */ out = obj->klass_
7256 GenerateReferenceLoadTwoRegisters(instruction,
7257 out_loc,
7258 obj_loc,
7259 class_offset,
7260 read_barrier_option);
7261 // Walk over the class hierarchy to find a match.
7262 NearLabel loop, success;
7263 __ Bind(&loop);
7264 if (cls.IsRegister()) {
7265 __ cmpl(out, cls.AsRegister<CpuRegister>());
7266 } else {
7267 DCHECK(cls.IsStackSlot()) << cls;
7268 __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
7269 }
7270 __ j(kEqual, &success);
7271 // /* HeapReference<Class> */ out = out->super_class_
7272 GenerateReferenceLoadOneRegister(instruction,
7273 out_loc,
7274 super_offset,
7275 maybe_temp_loc,
7276 read_barrier_option);
7277 __ testl(out, out);
7278 __ j(kNotEqual, &loop);
7279 // If `out` is null, we use it for the result, and jump to `done`.
7280 __ jmp(&done);
7281 __ Bind(&success);
7282 __ movl(out, Immediate(1));
7283 if (zero.IsLinked()) {
7284 __ jmp(&done);
7285 }
7286 break;
7287 }
7288
7289 case TypeCheckKind::kArrayObjectCheck: {
7290 ReadBarrierOption read_barrier_option =
7291 codegen_->ReadBarrierOptionForInstanceOf(instruction);
7292 // /* HeapReference<Class> */ out = obj->klass_
7293 GenerateReferenceLoadTwoRegisters(instruction,
7294 out_loc,
7295 obj_loc,
7296 class_offset,
7297 read_barrier_option);
7298 // Do an exact check.
7299 NearLabel exact_check;
7300 if (cls.IsRegister()) {
7301 __ cmpl(out, cls.AsRegister<CpuRegister>());
7302 } else {
7303 DCHECK(cls.IsStackSlot()) << cls;
7304 __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
7305 }
7306 __ j(kEqual, &exact_check);
7307 // Otherwise, we need to check that the object's class is a non-primitive array.
7308 // /* HeapReference<Class> */ out = out->component_type_
7309 GenerateReferenceLoadOneRegister(instruction,
7310 out_loc,
7311 component_offset,
7312 maybe_temp_loc,
7313 read_barrier_option);
7314 __ testl(out, out);
7315 // If `out` is null, we use it for the result, and jump to `done`.
7316 __ j(kEqual, &done);
7317 __ cmpw(Address(out, primitive_offset), Immediate(Primitive::kPrimNot));
7318 __ j(kNotEqual, &zero);
7319 __ Bind(&exact_check);
7320 __ movl(out, Immediate(1));
7321 __ jmp(&done);
7322 break;
7323 }
7324
7325 case TypeCheckKind::kArrayCheck: {
7326 // No read barrier since the slow path will retry upon failure.
7327 // /* HeapReference<Class> */ out = obj->klass_
7328 GenerateReferenceLoadTwoRegisters(instruction,
7329 out_loc,
7330 obj_loc,
7331 class_offset,
7332 kWithoutReadBarrier);
7333 if (cls.IsRegister()) {
7334 __ cmpl(out, cls.AsRegister<CpuRegister>());
7335 } else {
7336 DCHECK(cls.IsStackSlot()) << cls;
7337 __ cmpl(out, Address(CpuRegister(RSP), cls.GetStackIndex()));
7338 }
7339 DCHECK(locations->OnlyCallsOnSlowPath());
7340 slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86_64(
7341 instruction, /* is_fatal= */ false);
7342 codegen_->AddSlowPath(slow_path);
7343 __ j(kNotEqual, slow_path->GetEntryLabel());
7344 __ movl(out, Immediate(1));
7345 if (zero.IsLinked()) {
7346 __ jmp(&done);
7347 }
7348 break;
7349 }
7350
7351 case TypeCheckKind::kInterfaceCheck: {
7352 if (codegen_->InstanceOfNeedsReadBarrier(instruction)) {
7353 DCHECK(locations->OnlyCallsOnSlowPath());
7354 slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86_64(
7355 instruction, /* is_fatal= */ false);
7356 codegen_->AddSlowPath(slow_path);
7357 if (codegen_->EmitNonBakerReadBarrier()) {
7358 __ jmp(slow_path->GetEntryLabel());
7359 break;
7360 }
7361 // For Baker read barrier, take the slow path while marking.
7362 __ gs()->cmpl(
7363 Address::Absolute(Thread::IsGcMarkingOffset<kX86_64PointerSize>(), /* no_rip= */ true),
7364 Immediate(0));
7365 __ j(kNotEqual, slow_path->GetEntryLabel());
7366 }
7367
7368 // Fast-path without read barriers.
7369 CpuRegister temp = maybe_temp_loc.AsRegister<CpuRegister>();
7370 // /* HeapReference<Class> */ temp = obj->klass_
7371 __ movl(temp, Address(obj, class_offset));
7372 __ MaybeUnpoisonHeapReference(temp);
7373 // /* HeapReference<Class> */ temp = temp->iftable_
7374 __ movl(temp, Address(temp, iftable_offset));
7375 __ MaybeUnpoisonHeapReference(temp);
7376 // Load the size of the `IfTable`. The `Class::iftable_` is never null.
7377 __ movl(out, Address(temp, array_length_offset));
7378 // Maybe poison the `cls` for direct comparison with memory.
7379 __ MaybePoisonHeapReference(cls.AsRegister<CpuRegister>());
7380 // Loop through the iftable and check if any class matches.
7381 NearLabel loop, end;
7382 __ Bind(&loop);
7383 // Check if we still have an entry to compare.
7384 __ subl(out, Immediate(2));
7385 __ j(kNegative, (zero.IsLinked() && !kPoisonHeapReferences) ? &zero : &end);
7386 // Go to next interface if the classes do not match.
7387 __ cmpl(cls.AsRegister<CpuRegister>(),
7388 CodeGeneratorX86_64::ArrayAddress(temp, out_loc, TIMES_4, object_array_data_offset));
7389 __ j(kNotEqual, &loop);
7390 if (zero.IsLinked()) {
7391 __ movl(out, Immediate(1));
7392 // If `cls` was poisoned above, unpoison it.
7393 __ MaybeUnpoisonHeapReference(cls.AsRegister<CpuRegister>());
7394 __ jmp(&done);
7395 if (kPoisonHeapReferences) {
7396 // The false case needs to unpoison the class before jumping to `zero`.
7397 __ Bind(&end);
7398 __ UnpoisonHeapReference(cls.AsRegister<CpuRegister>());
7399 __ jmp(&zero);
7400 }
7401 } else {
7402 // To reduce branching, use the fact that the false case branches with a `-2` in `out`.
7403 __ movl(out, Immediate(-1));
7404 __ Bind(&end);
7405 __ addl(out, Immediate(2));
7406 // If `cls` was poisoned above, unpoison it.
7407 __ MaybeUnpoisonHeapReference(cls.AsRegister<CpuRegister>());
7408 }
7409 break;
7410 }
7411
7412 case TypeCheckKind::kUnresolvedCheck: {
7413 // Note that we indeed only call on slow path, but we always go
7414 // into the slow path for the unresolved check case.
7415 //
7416 // We cannot directly call the InstanceofNonTrivial runtime
7417 // entry point without resorting to a type checking slow path
7418 // here (i.e. by calling InvokeRuntime directly), as it would
7419 // require to assign fixed registers for the inputs of this
7420 // HInstanceOf instruction (following the runtime calling
7421 // convention), which might be cluttered by the potential first
7422 // read barrier emission at the beginning of this method.
7423 //
7424 // TODO: Introduce a new runtime entry point taking the object
7425 // to test (instead of its class) as argument, and let it deal
7426 // with the read barrier issues. This will let us refactor this
7427 // case of the `switch` code as it was previously (with a direct
7428 // call to the runtime not using a type checking slow path).
7429 // This should also be beneficial for the other cases above.
7430 DCHECK(locations->OnlyCallsOnSlowPath());
7431 slow_path = new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86_64(
7432 instruction, /* is_fatal= */ false);
7433 codegen_->AddSlowPath(slow_path);
7434 __ jmp(slow_path->GetEntryLabel());
7435 break;
7436 }
7437
7438 case TypeCheckKind::kBitstringCheck: {
7439 // /* HeapReference<Class> */ temp = obj->klass_
7440 GenerateReferenceLoadTwoRegisters(instruction,
7441 out_loc,
7442 obj_loc,
7443 class_offset,
7444 kWithoutReadBarrier);
7445
7446 GenerateBitstringTypeCheckCompare(instruction, out);
7447 if (zero.IsLinked()) {
7448 __ j(kNotEqual, &zero);
7449 __ movl(out, Immediate(1));
7450 __ jmp(&done);
7451 } else {
7452 __ setcc(kEqual, out);
7453 // setcc only sets the low byte.
7454 __ andl(out, Immediate(1));
7455 }
7456 break;
7457 }
7458 }
7459
7460 if (zero.IsLinked()) {
7461 __ Bind(&zero);
7462 __ xorl(out, out);
7463 }
7464
7465 if (done.IsLinked()) {
7466 __ Bind(&done);
7467 }
7468
7469 if (slow_path != nullptr) {
7470 __ Bind(slow_path->GetExitLabel());
7471 }
7472 }
7473
VisitCheckCast(HCheckCast * instruction)7474 void LocationsBuilderX86_64::VisitCheckCast(HCheckCast* instruction) {
7475 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
7476 LocationSummary::CallKind call_kind = codegen_->GetCheckCastCallKind(instruction);
7477 LocationSummary* locations =
7478 new (GetGraph()->GetAllocator()) LocationSummary(instruction, call_kind);
7479 locations->SetInAt(0, Location::RequiresRegister());
7480 if (type_check_kind == TypeCheckKind::kInterfaceCheck) {
7481 // Require a register for the interface check since there is a loop that compares the class to
7482 // a memory address.
7483 locations->SetInAt(1, Location::RequiresRegister());
7484 } else if (type_check_kind == TypeCheckKind::kBitstringCheck) {
7485 locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)));
7486 locations->SetInAt(2, Location::ConstantLocation(instruction->InputAt(2)));
7487 locations->SetInAt(3, Location::ConstantLocation(instruction->InputAt(3)));
7488 } else {
7489 locations->SetInAt(1, Location::Any());
7490 }
7491 locations->AddRegisterTemps(NumberOfCheckCastTemps(codegen_->EmitReadBarrier(), type_check_kind));
7492 }
7493
VisitCheckCast(HCheckCast * instruction)7494 void InstructionCodeGeneratorX86_64::VisitCheckCast(HCheckCast* instruction) {
7495 TypeCheckKind type_check_kind = instruction->GetTypeCheckKind();
7496 LocationSummary* locations = instruction->GetLocations();
7497 Location obj_loc = locations->InAt(0);
7498 CpuRegister obj = obj_loc.AsRegister<CpuRegister>();
7499 Location cls = locations->InAt(1);
7500 Location temp_loc = locations->GetTemp(0);
7501 CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
7502 const size_t num_temps = NumberOfCheckCastTemps(codegen_->EmitReadBarrier(), type_check_kind);
7503 DCHECK_GE(num_temps, 1u);
7504 DCHECK_LE(num_temps, 2u);
7505 Location maybe_temp2_loc = (num_temps >= 2u) ? locations->GetTemp(1) : Location::NoLocation();
7506 const uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
7507 const uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
7508 const uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
7509 const uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
7510 const uint32_t iftable_offset = mirror::Class::IfTableOffset().Uint32Value();
7511 const uint32_t array_length_offset = mirror::Array::LengthOffset().Uint32Value();
7512 const uint32_t object_array_data_offset =
7513 mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
7514
7515 bool is_type_check_slow_path_fatal = codegen_->IsTypeCheckSlowPathFatal(instruction);
7516 SlowPathCode* type_check_slow_path =
7517 new (codegen_->GetScopedAllocator()) TypeCheckSlowPathX86_64(
7518 instruction, is_type_check_slow_path_fatal);
7519 codegen_->AddSlowPath(type_check_slow_path);
7520
7521
7522 NearLabel done;
7523 // Avoid null check if we know obj is not null.
7524 if (instruction->MustDoNullCheck()) {
7525 __ testl(obj, obj);
7526 __ j(kEqual, &done);
7527 }
7528
7529 switch (type_check_kind) {
7530 case TypeCheckKind::kExactCheck:
7531 case TypeCheckKind::kArrayCheck: {
7532 // /* HeapReference<Class> */ temp = obj->klass_
7533 GenerateReferenceLoadTwoRegisters(instruction,
7534 temp_loc,
7535 obj_loc,
7536 class_offset,
7537 kWithoutReadBarrier);
7538 if (cls.IsRegister()) {
7539 __ cmpl(temp, cls.AsRegister<CpuRegister>());
7540 } else {
7541 DCHECK(cls.IsStackSlot()) << cls;
7542 __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
7543 }
7544 // Jump to slow path for throwing the exception or doing a
7545 // more involved array check.
7546 __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
7547 break;
7548 }
7549
7550 case TypeCheckKind::kAbstractClassCheck: {
7551 // /* HeapReference<Class> */ temp = obj->klass_
7552 GenerateReferenceLoadTwoRegisters(instruction,
7553 temp_loc,
7554 obj_loc,
7555 class_offset,
7556 kWithoutReadBarrier);
7557 // If the class is abstract, we eagerly fetch the super class of the
7558 // object to avoid doing a comparison we know will fail.
7559 NearLabel loop;
7560 __ Bind(&loop);
7561 // /* HeapReference<Class> */ temp = temp->super_class_
7562 GenerateReferenceLoadOneRegister(instruction,
7563 temp_loc,
7564 super_offset,
7565 maybe_temp2_loc,
7566 kWithoutReadBarrier);
7567
7568 // If the class reference currently in `temp` is null, jump to the slow path to throw the
7569 // exception.
7570 __ testl(temp, temp);
7571 // Otherwise, compare the classes.
7572 __ j(kZero, type_check_slow_path->GetEntryLabel());
7573 if (cls.IsRegister()) {
7574 __ cmpl(temp, cls.AsRegister<CpuRegister>());
7575 } else {
7576 DCHECK(cls.IsStackSlot()) << cls;
7577 __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
7578 }
7579 __ j(kNotEqual, &loop);
7580 break;
7581 }
7582
7583 case TypeCheckKind::kClassHierarchyCheck: {
7584 // /* HeapReference<Class> */ temp = obj->klass_
7585 GenerateReferenceLoadTwoRegisters(instruction,
7586 temp_loc,
7587 obj_loc,
7588 class_offset,
7589 kWithoutReadBarrier);
7590 // Walk over the class hierarchy to find a match.
7591 NearLabel loop;
7592 __ Bind(&loop);
7593 if (cls.IsRegister()) {
7594 __ cmpl(temp, cls.AsRegister<CpuRegister>());
7595 } else {
7596 DCHECK(cls.IsStackSlot()) << cls;
7597 __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
7598 }
7599 __ j(kEqual, &done);
7600
7601 // /* HeapReference<Class> */ temp = temp->super_class_
7602 GenerateReferenceLoadOneRegister(instruction,
7603 temp_loc,
7604 super_offset,
7605 maybe_temp2_loc,
7606 kWithoutReadBarrier);
7607
7608 // If the class reference currently in `temp` is not null, jump
7609 // back at the beginning of the loop.
7610 __ testl(temp, temp);
7611 __ j(kNotZero, &loop);
7612 // Otherwise, jump to the slow path to throw the exception.
7613 __ jmp(type_check_slow_path->GetEntryLabel());
7614 break;
7615 }
7616
7617 case TypeCheckKind::kArrayObjectCheck: {
7618 // /* HeapReference<Class> */ temp = obj->klass_
7619 GenerateReferenceLoadTwoRegisters(instruction,
7620 temp_loc,
7621 obj_loc,
7622 class_offset,
7623 kWithoutReadBarrier);
7624 // Do an exact check.
7625 NearLabel check_non_primitive_component_type;
7626 if (cls.IsRegister()) {
7627 __ cmpl(temp, cls.AsRegister<CpuRegister>());
7628 } else {
7629 DCHECK(cls.IsStackSlot()) << cls;
7630 __ cmpl(temp, Address(CpuRegister(RSP), cls.GetStackIndex()));
7631 }
7632 __ j(kEqual, &done);
7633
7634 // Otherwise, we need to check that the object's class is a non-primitive array.
7635 // /* HeapReference<Class> */ temp = temp->component_type_
7636 GenerateReferenceLoadOneRegister(instruction,
7637 temp_loc,
7638 component_offset,
7639 maybe_temp2_loc,
7640 kWithoutReadBarrier);
7641
7642 // If the component type is not null (i.e. the object is indeed
7643 // an array), jump to label `check_non_primitive_component_type`
7644 // to further check that this component type is not a primitive
7645 // type.
7646 __ testl(temp, temp);
7647 // Otherwise, jump to the slow path to throw the exception.
7648 __ j(kZero, type_check_slow_path->GetEntryLabel());
7649 __ cmpw(Address(temp, primitive_offset), Immediate(Primitive::kPrimNot));
7650 __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
7651 break;
7652 }
7653
7654 case TypeCheckKind::kUnresolvedCheck: {
7655 // We always go into the type check slow path for the unresolved case.
7656 //
7657 // We cannot directly call the CheckCast runtime entry point
7658 // without resorting to a type checking slow path here (i.e. by
7659 // calling InvokeRuntime directly), as it would require to
7660 // assign fixed registers for the inputs of this HInstanceOf
7661 // instruction (following the runtime calling convention), which
7662 // might be cluttered by the potential first read barrier
7663 // emission at the beginning of this method.
7664 __ jmp(type_check_slow_path->GetEntryLabel());
7665 break;
7666 }
7667
7668 case TypeCheckKind::kInterfaceCheck: {
7669 // Fast path for the interface check. Try to avoid read barriers to improve the fast path.
7670 // We can not get false positives by doing this.
7671 // /* HeapReference<Class> */ temp = obj->klass_
7672 GenerateReferenceLoadTwoRegisters(instruction,
7673 temp_loc,
7674 obj_loc,
7675 class_offset,
7676 kWithoutReadBarrier);
7677
7678 // /* HeapReference<Class> */ temp = temp->iftable_
7679 GenerateReferenceLoadOneRegister(instruction,
7680 temp_loc,
7681 iftable_offset,
7682 maybe_temp2_loc,
7683 kWithoutReadBarrier);
7684 // Load the size of the `IfTable`. The `Class::iftable_` is never null.
7685 __ movl(maybe_temp2_loc.AsRegister<CpuRegister>(), Address(temp, array_length_offset));
7686 // Maybe poison the `cls` for direct comparison with memory.
7687 __ MaybePoisonHeapReference(cls.AsRegister<CpuRegister>());
7688 // Loop through the iftable and check if any class matches.
7689 NearLabel start_loop;
7690 __ Bind(&start_loop);
7691 // Check if we still have an entry to compare.
7692 __ subl(maybe_temp2_loc.AsRegister<CpuRegister>(), Immediate(2));
7693 __ j(kNegative, type_check_slow_path->GetEntryLabel());
7694 // Go to next interface if the classes do not match.
7695 __ cmpl(cls.AsRegister<CpuRegister>(),
7696 CodeGeneratorX86_64::ArrayAddress(temp,
7697 maybe_temp2_loc,
7698 TIMES_4,
7699 object_array_data_offset));
7700 __ j(kNotEqual, &start_loop); // Return if same class.
7701 // If `cls` was poisoned above, unpoison it.
7702 __ MaybeUnpoisonHeapReference(cls.AsRegister<CpuRegister>());
7703 break;
7704 }
7705
7706 case TypeCheckKind::kBitstringCheck: {
7707 // /* HeapReference<Class> */ temp = obj->klass_
7708 GenerateReferenceLoadTwoRegisters(instruction,
7709 temp_loc,
7710 obj_loc,
7711 class_offset,
7712 kWithoutReadBarrier);
7713
7714 GenerateBitstringTypeCheckCompare(instruction, temp);
7715 __ j(kNotEqual, type_check_slow_path->GetEntryLabel());
7716 break;
7717 }
7718 }
7719
7720 if (done.IsLinked()) {
7721 __ Bind(&done);
7722 }
7723
7724 __ Bind(type_check_slow_path->GetExitLabel());
7725 }
7726
VisitMonitorOperation(HMonitorOperation * instruction)7727 void LocationsBuilderX86_64::VisitMonitorOperation(HMonitorOperation* instruction) {
7728 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(
7729 instruction, LocationSummary::kCallOnMainOnly);
7730 InvokeRuntimeCallingConvention calling_convention;
7731 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
7732 }
7733
VisitMonitorOperation(HMonitorOperation * instruction)7734 void InstructionCodeGeneratorX86_64::VisitMonitorOperation(HMonitorOperation* instruction) {
7735 codegen_->InvokeRuntime(instruction->IsEnter() ? kQuickLockObject : kQuickUnlockObject,
7736 instruction);
7737 if (instruction->IsEnter()) {
7738 CheckEntrypointTypes<kQuickLockObject, void, mirror::Object*>();
7739 } else {
7740 CheckEntrypointTypes<kQuickUnlockObject, void, mirror::Object*>();
7741 }
7742 }
7743
VisitX86AndNot(HX86AndNot * instruction)7744 void LocationsBuilderX86_64::VisitX86AndNot(HX86AndNot* instruction) {
7745 DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
7746 DCHECK(DataType::IsIntOrLongType(instruction->GetType())) << instruction->GetType();
7747 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
7748 locations->SetInAt(0, Location::RequiresRegister());
7749 // There is no immediate variant of negated bitwise and in X86.
7750 locations->SetInAt(1, Location::RequiresRegister());
7751 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
7752 }
7753
VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit * instruction)7754 void LocationsBuilderX86_64::VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit* instruction) {
7755 DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
7756 DCHECK(DataType::IsIntOrLongType(instruction->GetType())) << instruction->GetType();
7757 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
7758 locations->SetInAt(0, Location::RequiresRegister());
7759 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
7760 }
7761
VisitX86AndNot(HX86AndNot * instruction)7762 void InstructionCodeGeneratorX86_64::VisitX86AndNot(HX86AndNot* instruction) {
7763 LocationSummary* locations = instruction->GetLocations();
7764 Location first = locations->InAt(0);
7765 Location second = locations->InAt(1);
7766 Location dest = locations->Out();
7767 __ andn(dest.AsRegister<CpuRegister>(), first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
7768 }
7769
VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit * instruction)7770 void InstructionCodeGeneratorX86_64::VisitX86MaskOrResetLeastSetBit(HX86MaskOrResetLeastSetBit* instruction) {
7771 LocationSummary* locations = instruction->GetLocations();
7772 Location src = locations->InAt(0);
7773 Location dest = locations->Out();
7774 switch (instruction->GetOpKind()) {
7775 case HInstruction::kAnd:
7776 __ blsr(dest.AsRegister<CpuRegister>(), src.AsRegister<CpuRegister>());
7777 break;
7778 case HInstruction::kXor:
7779 __ blsmsk(dest.AsRegister<CpuRegister>(), src.AsRegister<CpuRegister>());
7780 break;
7781 default:
7782 LOG(FATAL) << "Unreachable";
7783 }
7784 }
7785
VisitAnd(HAnd * instruction)7786 void LocationsBuilderX86_64::VisitAnd(HAnd* instruction) { HandleBitwiseOperation(instruction); }
VisitOr(HOr * instruction)7787 void LocationsBuilderX86_64::VisitOr(HOr* instruction) { HandleBitwiseOperation(instruction); }
VisitXor(HXor * instruction)7788 void LocationsBuilderX86_64::VisitXor(HXor* instruction) { HandleBitwiseOperation(instruction); }
7789
HandleBitwiseOperation(HBinaryOperation * instruction)7790 void LocationsBuilderX86_64::HandleBitwiseOperation(HBinaryOperation* instruction) {
7791 LocationSummary* locations =
7792 new (GetGraph()->GetAllocator()) LocationSummary(instruction, LocationSummary::kNoCall);
7793 DCHECK(instruction->GetResultType() == DataType::Type::kInt32
7794 || instruction->GetResultType() == DataType::Type::kInt64);
7795 locations->SetInAt(0, Location::RequiresRegister());
7796 locations->SetInAt(1, Location::Any());
7797 locations->SetOut(Location::SameAsFirstInput());
7798 }
7799
VisitAnd(HAnd * instruction)7800 void InstructionCodeGeneratorX86_64::VisitAnd(HAnd* instruction) {
7801 HandleBitwiseOperation(instruction);
7802 }
7803
VisitOr(HOr * instruction)7804 void InstructionCodeGeneratorX86_64::VisitOr(HOr* instruction) {
7805 HandleBitwiseOperation(instruction);
7806 }
7807
VisitXor(HXor * instruction)7808 void InstructionCodeGeneratorX86_64::VisitXor(HXor* instruction) {
7809 HandleBitwiseOperation(instruction);
7810 }
7811
HandleBitwiseOperation(HBinaryOperation * instruction)7812 void InstructionCodeGeneratorX86_64::HandleBitwiseOperation(HBinaryOperation* instruction) {
7813 LocationSummary* locations = instruction->GetLocations();
7814 Location first = locations->InAt(0);
7815 Location second = locations->InAt(1);
7816 DCHECK(first.Equals(locations->Out()));
7817
7818 if (instruction->GetResultType() == DataType::Type::kInt32) {
7819 if (second.IsRegister()) {
7820 if (instruction->IsAnd()) {
7821 __ andl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
7822 } else if (instruction->IsOr()) {
7823 __ orl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
7824 } else {
7825 DCHECK(instruction->IsXor());
7826 __ xorl(first.AsRegister<CpuRegister>(), second.AsRegister<CpuRegister>());
7827 }
7828 } else if (second.IsConstant()) {
7829 Immediate imm(second.GetConstant()->AsIntConstant()->GetValue());
7830 if (instruction->IsAnd()) {
7831 __ andl(first.AsRegister<CpuRegister>(), imm);
7832 } else if (instruction->IsOr()) {
7833 __ orl(first.AsRegister<CpuRegister>(), imm);
7834 } else {
7835 DCHECK(instruction->IsXor());
7836 __ xorl(first.AsRegister<CpuRegister>(), imm);
7837 }
7838 } else {
7839 Address address(CpuRegister(RSP), second.GetStackIndex());
7840 if (instruction->IsAnd()) {
7841 __ andl(first.AsRegister<CpuRegister>(), address);
7842 } else if (instruction->IsOr()) {
7843 __ orl(first.AsRegister<CpuRegister>(), address);
7844 } else {
7845 DCHECK(instruction->IsXor());
7846 __ xorl(first.AsRegister<CpuRegister>(), address);
7847 }
7848 }
7849 } else {
7850 DCHECK_EQ(instruction->GetResultType(), DataType::Type::kInt64);
7851 CpuRegister first_reg = first.AsRegister<CpuRegister>();
7852 bool second_is_constant = false;
7853 int64_t value = 0;
7854 if (second.IsConstant()) {
7855 second_is_constant = true;
7856 value = second.GetConstant()->AsLongConstant()->GetValue();
7857 }
7858 bool is_int32_value = IsInt<32>(value);
7859
7860 if (instruction->IsAnd()) {
7861 if (second_is_constant) {
7862 if (is_int32_value) {
7863 __ andq(first_reg, Immediate(static_cast<int32_t>(value)));
7864 } else {
7865 __ andq(first_reg, codegen_->LiteralInt64Address(value));
7866 }
7867 } else if (second.IsDoubleStackSlot()) {
7868 __ andq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
7869 } else {
7870 __ andq(first_reg, second.AsRegister<CpuRegister>());
7871 }
7872 } else if (instruction->IsOr()) {
7873 if (second_is_constant) {
7874 if (is_int32_value) {
7875 __ orq(first_reg, Immediate(static_cast<int32_t>(value)));
7876 } else {
7877 __ orq(first_reg, codegen_->LiteralInt64Address(value));
7878 }
7879 } else if (second.IsDoubleStackSlot()) {
7880 __ orq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
7881 } else {
7882 __ orq(first_reg, second.AsRegister<CpuRegister>());
7883 }
7884 } else {
7885 DCHECK(instruction->IsXor());
7886 if (second_is_constant) {
7887 if (is_int32_value) {
7888 __ xorq(first_reg, Immediate(static_cast<int32_t>(value)));
7889 } else {
7890 __ xorq(first_reg, codegen_->LiteralInt64Address(value));
7891 }
7892 } else if (second.IsDoubleStackSlot()) {
7893 __ xorq(first_reg, Address(CpuRegister(RSP), second.GetStackIndex()));
7894 } else {
7895 __ xorq(first_reg, second.AsRegister<CpuRegister>());
7896 }
7897 }
7898 }
7899 }
7900
GenerateReferenceLoadOneRegister(HInstruction * instruction,Location out,uint32_t offset,Location maybe_temp,ReadBarrierOption read_barrier_option)7901 void InstructionCodeGeneratorX86_64::GenerateReferenceLoadOneRegister(
7902 HInstruction* instruction,
7903 Location out,
7904 uint32_t offset,
7905 Location maybe_temp,
7906 ReadBarrierOption read_barrier_option) {
7907 CpuRegister out_reg = out.AsRegister<CpuRegister>();
7908 if (read_barrier_option == kWithReadBarrier) {
7909 DCHECK(codegen_->EmitReadBarrier());
7910 if (kUseBakerReadBarrier) {
7911 // Load with fast path based Baker's read barrier.
7912 // /* HeapReference<Object> */ out = *(out + offset)
7913 codegen_->GenerateFieldLoadWithBakerReadBarrier(
7914 instruction, out, out_reg, offset, /* needs_null_check= */ false);
7915 } else {
7916 // Load with slow path based read barrier.
7917 // Save the value of `out` into `maybe_temp` before overwriting it
7918 // in the following move operation, as we will need it for the
7919 // read barrier below.
7920 DCHECK(maybe_temp.IsRegister()) << maybe_temp;
7921 __ movl(maybe_temp.AsRegister<CpuRegister>(), out_reg);
7922 // /* HeapReference<Object> */ out = *(out + offset)
7923 __ movl(out_reg, Address(out_reg, offset));
7924 codegen_->GenerateReadBarrierSlow(instruction, out, out, maybe_temp, offset);
7925 }
7926 } else {
7927 // Plain load with no read barrier.
7928 // /* HeapReference<Object> */ out = *(out + offset)
7929 __ movl(out_reg, Address(out_reg, offset));
7930 __ MaybeUnpoisonHeapReference(out_reg);
7931 }
7932 }
7933
GenerateReferenceLoadTwoRegisters(HInstruction * instruction,Location out,Location obj,uint32_t offset,ReadBarrierOption read_barrier_option)7934 void InstructionCodeGeneratorX86_64::GenerateReferenceLoadTwoRegisters(
7935 HInstruction* instruction,
7936 Location out,
7937 Location obj,
7938 uint32_t offset,
7939 ReadBarrierOption read_barrier_option) {
7940 CpuRegister out_reg = out.AsRegister<CpuRegister>();
7941 CpuRegister obj_reg = obj.AsRegister<CpuRegister>();
7942 if (read_barrier_option == kWithReadBarrier) {
7943 DCHECK(codegen_->EmitReadBarrier());
7944 if (kUseBakerReadBarrier) {
7945 // Load with fast path based Baker's read barrier.
7946 // /* HeapReference<Object> */ out = *(obj + offset)
7947 codegen_->GenerateFieldLoadWithBakerReadBarrier(
7948 instruction, out, obj_reg, offset, /* needs_null_check= */ false);
7949 } else {
7950 // Load with slow path based read barrier.
7951 // /* HeapReference<Object> */ out = *(obj + offset)
7952 __ movl(out_reg, Address(obj_reg, offset));
7953 codegen_->GenerateReadBarrierSlow(instruction, out, out, obj, offset);
7954 }
7955 } else {
7956 // Plain load with no read barrier.
7957 // /* HeapReference<Object> */ out = *(obj + offset)
7958 __ movl(out_reg, Address(obj_reg, offset));
7959 __ MaybeUnpoisonHeapReference(out_reg);
7960 }
7961 }
7962
GenerateGcRootFieldLoad(HInstruction * instruction,Location root,const Address & address,Label * fixup_label,ReadBarrierOption read_barrier_option)7963 void InstructionCodeGeneratorX86_64::GenerateGcRootFieldLoad(
7964 HInstruction* instruction,
7965 Location root,
7966 const Address& address,
7967 Label* fixup_label,
7968 ReadBarrierOption read_barrier_option) {
7969 CpuRegister root_reg = root.AsRegister<CpuRegister>();
7970 if (read_barrier_option == kWithReadBarrier) {
7971 DCHECK(codegen_->EmitReadBarrier());
7972 if (kUseBakerReadBarrier) {
7973 // Fast path implementation of art::ReadBarrier::BarrierForRoot when
7974 // Baker's read barrier are used:
7975 //
7976 // root = obj.field;
7977 // temp = Thread::Current()->pReadBarrierMarkReg ## root.reg()
7978 // if (temp != null) {
7979 // root = temp(root)
7980 // }
7981
7982 // /* GcRoot<mirror::Object> */ root = *address
7983 __ movl(root_reg, address);
7984 if (fixup_label != nullptr) {
7985 __ Bind(fixup_label);
7986 }
7987 static_assert(
7988 sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(GcRoot<mirror::Object>),
7989 "art::mirror::CompressedReference<mirror::Object> and art::GcRoot<mirror::Object> "
7990 "have different sizes.");
7991 static_assert(sizeof(mirror::CompressedReference<mirror::Object>) == sizeof(int32_t),
7992 "art::mirror::CompressedReference<mirror::Object> and int32_t "
7993 "have different sizes.");
7994
7995 // Slow path marking the GC root `root`.
7996 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) ReadBarrierMarkSlowPathX86_64(
7997 instruction, root, /* unpoison_ref_before_marking= */ false);
7998 codegen_->AddSlowPath(slow_path);
7999
8000 // Test the `Thread::Current()->pReadBarrierMarkReg ## root.reg()` entrypoint.
8001 const int32_t entry_point_offset =
8002 Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(root.reg());
8003 __ gs()->cmpl(Address::Absolute(entry_point_offset, /* no_rip= */ true), Immediate(0));
8004 // The entrypoint is null when the GC is not marking.
8005 __ j(kNotEqual, slow_path->GetEntryLabel());
8006 __ Bind(slow_path->GetExitLabel());
8007 } else {
8008 // GC root loaded through a slow path for read barriers other
8009 // than Baker's.
8010 // /* GcRoot<mirror::Object>* */ root = address
8011 __ leaq(root_reg, address);
8012 if (fixup_label != nullptr) {
8013 __ Bind(fixup_label);
8014 }
8015 // /* mirror::Object* */ root = root->Read()
8016 codegen_->GenerateReadBarrierForRootSlow(instruction, root, root);
8017 }
8018 } else {
8019 // Plain GC root load with no read barrier.
8020 // /* GcRoot<mirror::Object> */ root = *address
8021 __ movl(root_reg, address);
8022 if (fixup_label != nullptr) {
8023 __ Bind(fixup_label);
8024 }
8025 // Note that GC roots are not affected by heap poisoning, thus we
8026 // do not have to unpoison `root_reg` here.
8027 }
8028 }
8029
GenerateFieldLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,CpuRegister obj,uint32_t offset,bool needs_null_check)8030 void CodeGeneratorX86_64::GenerateFieldLoadWithBakerReadBarrier(HInstruction* instruction,
8031 Location ref,
8032 CpuRegister obj,
8033 uint32_t offset,
8034 bool needs_null_check) {
8035 DCHECK(EmitBakerReadBarrier());
8036
8037 // /* HeapReference<Object> */ ref = *(obj + offset)
8038 Address src(obj, offset);
8039 GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check);
8040 }
8041
GenerateArrayLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,CpuRegister obj,uint32_t data_offset,Location index,bool needs_null_check)8042 void CodeGeneratorX86_64::GenerateArrayLoadWithBakerReadBarrier(HInstruction* instruction,
8043 Location ref,
8044 CpuRegister obj,
8045 uint32_t data_offset,
8046 Location index,
8047 bool needs_null_check) {
8048 DCHECK(EmitBakerReadBarrier());
8049
8050 static_assert(
8051 sizeof(mirror::HeapReference<mirror::Object>) == sizeof(int32_t),
8052 "art::mirror::HeapReference<art::mirror::Object> and int32_t have different sizes.");
8053 // /* HeapReference<Object> */ ref =
8054 // *(obj + data_offset + index * sizeof(HeapReference<Object>))
8055 Address src = CodeGeneratorX86_64::ArrayAddress(obj, index, TIMES_4, data_offset);
8056 GenerateReferenceLoadWithBakerReadBarrier(instruction, ref, obj, src, needs_null_check);
8057 }
8058
GenerateReferenceLoadWithBakerReadBarrier(HInstruction * instruction,Location ref,CpuRegister obj,const Address & src,bool needs_null_check,bool always_update_field,CpuRegister * temp1,CpuRegister * temp2)8059 void CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier(HInstruction* instruction,
8060 Location ref,
8061 CpuRegister obj,
8062 const Address& src,
8063 bool needs_null_check,
8064 bool always_update_field,
8065 CpuRegister* temp1,
8066 CpuRegister* temp2) {
8067 DCHECK(EmitBakerReadBarrier());
8068
8069 // In slow path based read barriers, the read barrier call is
8070 // inserted after the original load. However, in fast path based
8071 // Baker's read barriers, we need to perform the load of
8072 // mirror::Object::monitor_ *before* the original reference load.
8073 // This load-load ordering is required by the read barrier.
8074 // The fast path/slow path (for Baker's algorithm) should look like:
8075 //
8076 // uint32_t rb_state = Lockword(obj->monitor_).ReadBarrierState();
8077 // lfence; // Load fence or artificial data dependency to prevent load-load reordering
8078 // HeapReference<Object> ref = *src; // Original reference load.
8079 // bool is_gray = (rb_state == ReadBarrier::GrayState());
8080 // if (is_gray) {
8081 // ref = ReadBarrier::Mark(ref); // Performed by runtime entrypoint slow path.
8082 // }
8083 //
8084 // Note: the original implementation in ReadBarrier::Barrier is
8085 // slightly more complex as:
8086 // - it implements the load-load fence using a data dependency on
8087 // the high-bits of rb_state, which are expected to be all zeroes
8088 // (we use CodeGeneratorX86_64::GenerateMemoryBarrier instead
8089 // here, which is a no-op thanks to the x86-64 memory model);
8090 // - it performs additional checks that we do not do here for
8091 // performance reasons.
8092
8093 CpuRegister ref_reg = ref.AsRegister<CpuRegister>();
8094 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
8095
8096 // Given the numeric representation, it's enough to check the low bit of the rb_state.
8097 static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
8098 static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
8099 constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
8100 constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
8101 constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
8102
8103 // if (rb_state == ReadBarrier::GrayState())
8104 // ref = ReadBarrier::Mark(ref);
8105 // At this point, just do the "if" and make sure that flags are preserved until the branch.
8106 __ testb(Address(obj, monitor_offset + gray_byte_position), Immediate(test_value));
8107 if (needs_null_check) {
8108 MaybeRecordImplicitNullCheck(instruction);
8109 }
8110
8111 // Load fence to prevent load-load reordering.
8112 // Note that this is a no-op, thanks to the x86-64 memory model.
8113 GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
8114
8115 // The actual reference load.
8116 // /* HeapReference<Object> */ ref = *src
8117 __ movl(ref_reg, src); // Flags are unaffected.
8118
8119 // Note: Reference unpoisoning modifies the flags, so we need to delay it after the branch.
8120 // Slow path marking the object `ref` when it is gray.
8121 SlowPathCode* slow_path;
8122 if (always_update_field) {
8123 DCHECK(temp1 != nullptr);
8124 DCHECK(temp2 != nullptr);
8125 slow_path = new (GetScopedAllocator()) ReadBarrierMarkAndUpdateFieldSlowPathX86_64(
8126 instruction, ref, obj, src, /* unpoison_ref_before_marking= */ true, *temp1, *temp2);
8127 } else {
8128 slow_path = new (GetScopedAllocator()) ReadBarrierMarkSlowPathX86_64(
8129 instruction, ref, /* unpoison_ref_before_marking= */ true);
8130 }
8131 AddSlowPath(slow_path);
8132
8133 // We have done the "if" of the gray bit check above, now branch based on the flags.
8134 __ j(kNotZero, slow_path->GetEntryLabel());
8135
8136 // Object* ref = ref_addr->AsMirrorPtr()
8137 __ MaybeUnpoisonHeapReference(ref_reg);
8138
8139 __ Bind(slow_path->GetExitLabel());
8140 }
8141
GenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)8142 void CodeGeneratorX86_64::GenerateReadBarrierSlow(HInstruction* instruction,
8143 Location out,
8144 Location ref,
8145 Location obj,
8146 uint32_t offset,
8147 Location index) {
8148 DCHECK(EmitReadBarrier());
8149
8150 // Insert a slow path based read barrier *after* the reference load.
8151 //
8152 // If heap poisoning is enabled, the unpoisoning of the loaded
8153 // reference will be carried out by the runtime within the slow
8154 // path.
8155 //
8156 // Note that `ref` currently does not get unpoisoned (when heap
8157 // poisoning is enabled), which is alright as the `ref` argument is
8158 // not used by the artReadBarrierSlow entry point.
8159 //
8160 // TODO: Unpoison `ref` when it is used by artReadBarrierSlow.
8161 SlowPathCode* slow_path = new (GetScopedAllocator())
8162 ReadBarrierForHeapReferenceSlowPathX86_64(instruction, out, ref, obj, offset, index);
8163 AddSlowPath(slow_path);
8164
8165 __ jmp(slow_path->GetEntryLabel());
8166 __ Bind(slow_path->GetExitLabel());
8167 }
8168
MaybeGenerateReadBarrierSlow(HInstruction * instruction,Location out,Location ref,Location obj,uint32_t offset,Location index)8169 void CodeGeneratorX86_64::MaybeGenerateReadBarrierSlow(HInstruction* instruction,
8170 Location out,
8171 Location ref,
8172 Location obj,
8173 uint32_t offset,
8174 Location index) {
8175 if (EmitReadBarrier()) {
8176 // Baker's read barriers shall be handled by the fast path
8177 // (CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier).
8178 DCHECK(!kUseBakerReadBarrier);
8179 // If heap poisoning is enabled, unpoisoning will be taken care of
8180 // by the runtime within the slow path.
8181 GenerateReadBarrierSlow(instruction, out, ref, obj, offset, index);
8182 } else if (kPoisonHeapReferences) {
8183 __ UnpoisonHeapReference(out.AsRegister<CpuRegister>());
8184 }
8185 }
8186
GenerateReadBarrierForRootSlow(HInstruction * instruction,Location out,Location root)8187 void CodeGeneratorX86_64::GenerateReadBarrierForRootSlow(HInstruction* instruction,
8188 Location out,
8189 Location root) {
8190 DCHECK(EmitReadBarrier());
8191
8192 // Insert a slow path based read barrier *after* the GC root load.
8193 //
8194 // Note that GC roots are not affected by heap poisoning, so we do
8195 // not need to do anything special for this here.
8196 SlowPathCode* slow_path =
8197 new (GetScopedAllocator()) ReadBarrierForRootSlowPathX86_64(instruction, out, root);
8198 AddSlowPath(slow_path);
8199
8200 __ jmp(slow_path->GetEntryLabel());
8201 __ Bind(slow_path->GetExitLabel());
8202 }
8203
VisitBoundType(HBoundType * instruction)8204 void LocationsBuilderX86_64::VisitBoundType([[maybe_unused]] HBoundType* instruction) {
8205 // Nothing to do, this should be removed during prepare for register allocator.
8206 LOG(FATAL) << "Unreachable";
8207 }
8208
VisitBoundType(HBoundType * instruction)8209 void InstructionCodeGeneratorX86_64::VisitBoundType([[maybe_unused]] HBoundType* instruction) {
8210 // Nothing to do, this should be removed during prepare for register allocator.
8211 LOG(FATAL) << "Unreachable";
8212 }
8213
8214 // Simple implementation of packed switch - generate cascaded compare/jumps.
VisitPackedSwitch(HPackedSwitch * switch_instr)8215 void LocationsBuilderX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
8216 LocationSummary* locations =
8217 new (GetGraph()->GetAllocator()) LocationSummary(switch_instr, LocationSummary::kNoCall);
8218 locations->SetInAt(0, Location::RequiresRegister());
8219 locations->AddRegisterTemps(2);
8220 }
8221
VisitPackedSwitch(HPackedSwitch * switch_instr)8222 void InstructionCodeGeneratorX86_64::VisitPackedSwitch(HPackedSwitch* switch_instr) {
8223 int32_t lower_bound = switch_instr->GetStartValue();
8224 uint32_t num_entries = switch_instr->GetNumEntries();
8225 LocationSummary* locations = switch_instr->GetLocations();
8226 CpuRegister value_reg_in = locations->InAt(0).AsRegister<CpuRegister>();
8227 CpuRegister temp_reg = locations->GetTemp(0).AsRegister<CpuRegister>();
8228 CpuRegister base_reg = locations->GetTemp(1).AsRegister<CpuRegister>();
8229 HBasicBlock* default_block = switch_instr->GetDefaultBlock();
8230
8231 // Should we generate smaller inline compare/jumps?
8232 if (num_entries <= kPackedSwitchJumpTableThreshold) {
8233 // Figure out the correct compare values and jump conditions.
8234 // Handle the first compare/branch as a special case because it might
8235 // jump to the default case.
8236 DCHECK_GT(num_entries, 2u);
8237 Condition first_condition;
8238 uint32_t index;
8239 const ArenaVector<HBasicBlock*>& successors = switch_instr->GetBlock()->GetSuccessors();
8240 if (lower_bound != 0) {
8241 first_condition = kLess;
8242 __ cmpl(value_reg_in, Immediate(lower_bound));
8243 __ j(first_condition, codegen_->GetLabelOf(default_block));
8244 __ j(kEqual, codegen_->GetLabelOf(successors[0]));
8245
8246 index = 1;
8247 } else {
8248 // Handle all the compare/jumps below.
8249 first_condition = kBelow;
8250 index = 0;
8251 }
8252
8253 // Handle the rest of the compare/jumps.
8254 for (; index + 1 < num_entries; index += 2) {
8255 int32_t compare_to_value = lower_bound + index + 1;
8256 __ cmpl(value_reg_in, Immediate(compare_to_value));
8257 // Jump to successors[index] if value < case_value[index].
8258 __ j(first_condition, codegen_->GetLabelOf(successors[index]));
8259 // Jump to successors[index + 1] if value == case_value[index + 1].
8260 __ j(kEqual, codegen_->GetLabelOf(successors[index + 1]));
8261 }
8262
8263 if (index != num_entries) {
8264 // There are an odd number of entries. Handle the last one.
8265 DCHECK_EQ(index + 1, num_entries);
8266 __ cmpl(value_reg_in, Immediate(static_cast<int32_t>(lower_bound + index)));
8267 __ j(kEqual, codegen_->GetLabelOf(successors[index]));
8268 }
8269
8270 // And the default for any other value.
8271 if (!codegen_->GoesToNextBlock(switch_instr->GetBlock(), default_block)) {
8272 __ jmp(codegen_->GetLabelOf(default_block));
8273 }
8274 return;
8275 }
8276
8277 // Remove the bias, if needed.
8278 Register value_reg_out = value_reg_in.AsRegister();
8279 if (lower_bound != 0) {
8280 __ leal(temp_reg, Address(value_reg_in, -lower_bound));
8281 value_reg_out = temp_reg.AsRegister();
8282 }
8283 CpuRegister value_reg(value_reg_out);
8284
8285 // Is the value in range?
8286 __ cmpl(value_reg, Immediate(num_entries - 1));
8287 __ j(kAbove, codegen_->GetLabelOf(default_block));
8288
8289 // We are in the range of the table.
8290 // Load the address of the jump table in the constant area.
8291 __ leaq(base_reg, codegen_->LiteralCaseTable(switch_instr));
8292
8293 // Load the (signed) offset from the jump table.
8294 __ movsxd(temp_reg, Address(base_reg, value_reg, TIMES_4, 0));
8295
8296 // Add the offset to the address of the table base.
8297 __ addq(temp_reg, base_reg);
8298
8299 // And jump.
8300 __ jmp(temp_reg);
8301 }
8302
VisitIntermediateAddress(HIntermediateAddress * instruction)8303 void LocationsBuilderX86_64::VisitIntermediateAddress(
8304 [[maybe_unused]] HIntermediateAddress* instruction) {
8305 LOG(FATAL) << "Unreachable";
8306 }
8307
VisitIntermediateAddress(HIntermediateAddress * instruction)8308 void InstructionCodeGeneratorX86_64::VisitIntermediateAddress(
8309 [[maybe_unused]] HIntermediateAddress* instruction) {
8310 LOG(FATAL) << "Unreachable";
8311 }
8312
Load32BitValue(CpuRegister dest,int32_t value)8313 void CodeGeneratorX86_64::Load32BitValue(CpuRegister dest, int32_t value) {
8314 if (value == 0) {
8315 __ xorl(dest, dest);
8316 } else {
8317 __ movl(dest, Immediate(value));
8318 }
8319 }
8320
Load64BitValue(CpuRegister dest,int64_t value)8321 void CodeGeneratorX86_64::Load64BitValue(CpuRegister dest, int64_t value) {
8322 if (value == 0) {
8323 // Clears upper bits too.
8324 __ xorl(dest, dest);
8325 } else if (IsUint<32>(value)) {
8326 // We can use a 32 bit move, as it will zero-extend and is shorter.
8327 __ movl(dest, Immediate(static_cast<int32_t>(value)));
8328 } else {
8329 __ movq(dest, Immediate(value));
8330 }
8331 }
8332
Load32BitValue(XmmRegister dest,int32_t value)8333 void CodeGeneratorX86_64::Load32BitValue(XmmRegister dest, int32_t value) {
8334 if (value == 0) {
8335 __ xorps(dest, dest);
8336 } else {
8337 __ movss(dest, LiteralInt32Address(value));
8338 }
8339 }
8340
Load64BitValue(XmmRegister dest,int64_t value)8341 void CodeGeneratorX86_64::Load64BitValue(XmmRegister dest, int64_t value) {
8342 if (value == 0) {
8343 __ xorpd(dest, dest);
8344 } else {
8345 __ movsd(dest, LiteralInt64Address(value));
8346 }
8347 }
8348
Load32BitValue(XmmRegister dest,float value)8349 void CodeGeneratorX86_64::Load32BitValue(XmmRegister dest, float value) {
8350 Load32BitValue(dest, bit_cast<int32_t, float>(value));
8351 }
8352
Load64BitValue(XmmRegister dest,double value)8353 void CodeGeneratorX86_64::Load64BitValue(XmmRegister dest, double value) {
8354 Load64BitValue(dest, bit_cast<int64_t, double>(value));
8355 }
8356
Compare32BitValue(CpuRegister dest,int32_t value)8357 void CodeGeneratorX86_64::Compare32BitValue(CpuRegister dest, int32_t value) {
8358 if (value == 0) {
8359 __ testl(dest, dest);
8360 } else {
8361 __ cmpl(dest, Immediate(value));
8362 }
8363 }
8364
Compare64BitValue(CpuRegister dest,int64_t value)8365 void CodeGeneratorX86_64::Compare64BitValue(CpuRegister dest, int64_t value) {
8366 if (IsInt<32>(value)) {
8367 if (value == 0) {
8368 __ testq(dest, dest);
8369 } else {
8370 __ cmpq(dest, Immediate(static_cast<int32_t>(value)));
8371 }
8372 } else {
8373 // Value won't fit in an int.
8374 __ cmpq(dest, LiteralInt64Address(value));
8375 }
8376 }
8377
GenerateIntCompare(Location lhs,Location rhs)8378 void CodeGeneratorX86_64::GenerateIntCompare(Location lhs, Location rhs) {
8379 CpuRegister lhs_reg = lhs.AsRegister<CpuRegister>();
8380 GenerateIntCompare(lhs_reg, rhs);
8381 }
8382
GenerateIntCompare(CpuRegister lhs,Location rhs)8383 void CodeGeneratorX86_64::GenerateIntCompare(CpuRegister lhs, Location rhs) {
8384 if (rhs.IsConstant()) {
8385 int32_t value = CodeGenerator::GetInt32ValueOf(rhs.GetConstant());
8386 Compare32BitValue(lhs, value);
8387 } else if (rhs.IsStackSlot()) {
8388 __ cmpl(lhs, Address(CpuRegister(RSP), rhs.GetStackIndex()));
8389 } else {
8390 __ cmpl(lhs, rhs.AsRegister<CpuRegister>());
8391 }
8392 }
8393
GenerateLongCompare(Location lhs,Location rhs)8394 void CodeGeneratorX86_64::GenerateLongCompare(Location lhs, Location rhs) {
8395 CpuRegister lhs_reg = lhs.AsRegister<CpuRegister>();
8396 if (rhs.IsConstant()) {
8397 int64_t value = rhs.GetConstant()->AsLongConstant()->GetValue();
8398 Compare64BitValue(lhs_reg, value);
8399 } else if (rhs.IsDoubleStackSlot()) {
8400 __ cmpq(lhs_reg, Address(CpuRegister(RSP), rhs.GetStackIndex()));
8401 } else {
8402 __ cmpq(lhs_reg, rhs.AsRegister<CpuRegister>());
8403 }
8404 }
8405
ArrayAddress(CpuRegister obj,Location index,ScaleFactor scale,uint32_t data_offset)8406 Address CodeGeneratorX86_64::ArrayAddress(CpuRegister obj,
8407 Location index,
8408 ScaleFactor scale,
8409 uint32_t data_offset) {
8410 return index.IsConstant()
8411 ? Address(obj, (index.GetConstant()->AsIntConstant()->GetValue() << scale) + data_offset)
8412 : Address(obj, index.AsRegister<CpuRegister>(), scale, data_offset);
8413 }
8414
Store64BitValueToStack(Location dest,int64_t value)8415 void CodeGeneratorX86_64::Store64BitValueToStack(Location dest, int64_t value) {
8416 DCHECK(dest.IsDoubleStackSlot());
8417 if (IsInt<32>(value)) {
8418 // Can move directly as an int32 constant.
8419 __ movq(Address(CpuRegister(RSP), dest.GetStackIndex()),
8420 Immediate(static_cast<int32_t>(value)));
8421 } else {
8422 Load64BitValue(CpuRegister(TMP), value);
8423 __ movq(Address(CpuRegister(RSP), dest.GetStackIndex()), CpuRegister(TMP));
8424 }
8425 }
8426
8427 /**
8428 * Class to handle late fixup of offsets into constant area.
8429 */
8430 class RIPFixup : public AssemblerFixup, public ArenaObject<kArenaAllocCodeGenerator> {
8431 public:
RIPFixup(CodeGeneratorX86_64 & codegen,size_t offset)8432 RIPFixup(CodeGeneratorX86_64& codegen, size_t offset)
8433 : codegen_(&codegen), offset_into_constant_area_(offset) {}
8434
8435 protected:
SetOffset(size_t offset)8436 void SetOffset(size_t offset) { offset_into_constant_area_ = offset; }
8437
8438 CodeGeneratorX86_64* codegen_;
8439
8440 private:
Process(const MemoryRegion & region,int pos)8441 void Process(const MemoryRegion& region, int pos) override {
8442 // Patch the correct offset for the instruction. We use the address of the
8443 // 'next' instruction, which is 'pos' (patch the 4 bytes before).
8444 int32_t constant_offset = codegen_->ConstantAreaStart() + offset_into_constant_area_;
8445 int32_t relative_position = constant_offset - pos;
8446
8447 // Patch in the right value.
8448 region.StoreUnaligned<int32_t>(pos - 4, relative_position);
8449 }
8450
8451 // Location in constant area that the fixup refers to.
8452 size_t offset_into_constant_area_;
8453 };
8454
8455 /**
8456 t * Class to handle late fixup of offsets to a jump table that will be created in the
8457 * constant area.
8458 */
8459 class JumpTableRIPFixup : public RIPFixup {
8460 public:
JumpTableRIPFixup(CodeGeneratorX86_64 & codegen,HPackedSwitch * switch_instr)8461 JumpTableRIPFixup(CodeGeneratorX86_64& codegen, HPackedSwitch* switch_instr)
8462 : RIPFixup(codegen, -1), switch_instr_(switch_instr) {}
8463
CreateJumpTable()8464 void CreateJumpTable() {
8465 X86_64Assembler* assembler = codegen_->GetAssembler();
8466
8467 // Ensure that the reference to the jump table has the correct offset.
8468 const int32_t offset_in_constant_table = assembler->ConstantAreaSize();
8469 SetOffset(offset_in_constant_table);
8470
8471 // Compute the offset from the start of the function to this jump table.
8472 const int32_t current_table_offset = assembler->CodeSize() + offset_in_constant_table;
8473
8474 // Populate the jump table with the correct values for the jump table.
8475 int32_t num_entries = switch_instr_->GetNumEntries();
8476 HBasicBlock* block = switch_instr_->GetBlock();
8477 const ArenaVector<HBasicBlock*>& successors = block->GetSuccessors();
8478 // The value that we want is the target offset - the position of the table.
8479 for (int32_t i = 0; i < num_entries; i++) {
8480 HBasicBlock* b = successors[i];
8481 Label* l = codegen_->GetLabelOf(b);
8482 DCHECK(l->IsBound());
8483 int32_t offset_to_block = l->Position() - current_table_offset;
8484 assembler->AppendInt32(offset_to_block);
8485 }
8486 }
8487
8488 private:
8489 const HPackedSwitch* switch_instr_;
8490 };
8491
Finalize()8492 void CodeGeneratorX86_64::Finalize() {
8493 // Generate the constant area if needed.
8494 X86_64Assembler* assembler = GetAssembler();
8495 if (!assembler->IsConstantAreaEmpty() || !fixups_to_jump_tables_.empty()) {
8496 // Align to 4 byte boundary to reduce cache misses, as the data is 4 and 8 byte values.
8497 assembler->Align(4, 0);
8498 constant_area_start_ = assembler->CodeSize();
8499
8500 // Populate any jump tables.
8501 for (JumpTableRIPFixup* jump_table : fixups_to_jump_tables_) {
8502 jump_table->CreateJumpTable();
8503 }
8504
8505 // And now add the constant area to the generated code.
8506 assembler->AddConstantArea();
8507 }
8508
8509 // And finish up.
8510 CodeGenerator::Finalize();
8511 }
8512
LiteralDoubleAddress(double v)8513 Address CodeGeneratorX86_64::LiteralDoubleAddress(double v) {
8514 AssemblerFixup* fixup = new (GetGraph()->GetAllocator()) RIPFixup(*this, __ AddDouble(v));
8515 return Address::RIP(fixup);
8516 }
8517
LiteralFloatAddress(float v)8518 Address CodeGeneratorX86_64::LiteralFloatAddress(float v) {
8519 AssemblerFixup* fixup = new (GetGraph()->GetAllocator()) RIPFixup(*this, __ AddFloat(v));
8520 return Address::RIP(fixup);
8521 }
8522
LiteralInt32Address(int32_t v)8523 Address CodeGeneratorX86_64::LiteralInt32Address(int32_t v) {
8524 AssemblerFixup* fixup = new (GetGraph()->GetAllocator()) RIPFixup(*this, __ AddInt32(v));
8525 return Address::RIP(fixup);
8526 }
8527
LiteralInt64Address(int64_t v)8528 Address CodeGeneratorX86_64::LiteralInt64Address(int64_t v) {
8529 AssemblerFixup* fixup = new (GetGraph()->GetAllocator()) RIPFixup(*this, __ AddInt64(v));
8530 return Address::RIP(fixup);
8531 }
8532
8533 // TODO: trg as memory.
MoveFromReturnRegister(Location trg,DataType::Type type)8534 void CodeGeneratorX86_64::MoveFromReturnRegister(Location trg, DataType::Type type) {
8535 if (!trg.IsValid()) {
8536 DCHECK_EQ(type, DataType::Type::kVoid);
8537 return;
8538 }
8539
8540 DCHECK_NE(type, DataType::Type::kVoid);
8541
8542 Location return_loc = InvokeDexCallingConventionVisitorX86_64().GetReturnLocation(type);
8543 if (trg.Equals(return_loc)) {
8544 return;
8545 }
8546
8547 // Let the parallel move resolver take care of all of this.
8548 HParallelMove parallel_move(GetGraph()->GetAllocator());
8549 parallel_move.AddMove(return_loc, trg, type, nullptr);
8550 GetMoveResolver()->EmitNativeCode(¶llel_move);
8551 }
8552
LiteralCaseTable(HPackedSwitch * switch_instr)8553 Address CodeGeneratorX86_64::LiteralCaseTable(HPackedSwitch* switch_instr) {
8554 // Create a fixup to be used to create and address the jump table.
8555 JumpTableRIPFixup* table_fixup =
8556 new (GetGraph()->GetAllocator()) JumpTableRIPFixup(*this, switch_instr);
8557
8558 // We have to populate the jump tables.
8559 fixups_to_jump_tables_.push_back(table_fixup);
8560 return Address::RIP(table_fixup);
8561 }
8562
MoveInt64ToAddress(const Address & addr_low,const Address & addr_high,int64_t v,HInstruction * instruction)8563 void CodeGeneratorX86_64::MoveInt64ToAddress(const Address& addr_low,
8564 const Address& addr_high,
8565 int64_t v,
8566 HInstruction* instruction) {
8567 if (IsInt<32>(v)) {
8568 int32_t v_32 = v;
8569 __ movq(addr_low, Immediate(v_32));
8570 MaybeRecordImplicitNullCheck(instruction);
8571 } else {
8572 // Didn't fit in a register. Do it in pieces.
8573 int32_t low_v = Low32Bits(v);
8574 int32_t high_v = High32Bits(v);
8575 __ movl(addr_low, Immediate(low_v));
8576 MaybeRecordImplicitNullCheck(instruction);
8577 __ movl(addr_high, Immediate(high_v));
8578 }
8579 }
8580
PatchJitRootUse(uint8_t * code,const uint8_t * roots_data,const PatchInfo<Label> & info,uint64_t index_in_table) const8581 void CodeGeneratorX86_64::PatchJitRootUse(uint8_t* code,
8582 const uint8_t* roots_data,
8583 const PatchInfo<Label>& info,
8584 uint64_t index_in_table) const {
8585 uint32_t code_offset = info.label.Position() - kLabelPositionToLiteralOffsetAdjustment;
8586 uintptr_t address =
8587 reinterpret_cast<uintptr_t>(roots_data) + index_in_table * sizeof(GcRoot<mirror::Object>);
8588 using unaligned_uint32_t __attribute__((__aligned__(1))) = uint32_t;
8589 reinterpret_cast<unaligned_uint32_t*>(code + code_offset)[0] =
8590 dchecked_integral_cast<uint32_t>(address);
8591 }
8592
EmitJitRootPatches(uint8_t * code,const uint8_t * roots_data)8593 void CodeGeneratorX86_64::EmitJitRootPatches(uint8_t* code, const uint8_t* roots_data) {
8594 for (const PatchInfo<Label>& info : jit_string_patches_) {
8595 StringReference string_reference(info.target_dex_file, dex::StringIndex(info.offset_or_index));
8596 uint64_t index_in_table = GetJitStringRootIndex(string_reference);
8597 PatchJitRootUse(code, roots_data, info, index_in_table);
8598 }
8599
8600 for (const PatchInfo<Label>& info : jit_class_patches_) {
8601 TypeReference type_reference(info.target_dex_file, dex::TypeIndex(info.offset_or_index));
8602 uint64_t index_in_table = GetJitClassRootIndex(type_reference);
8603 PatchJitRootUse(code, roots_data, info, index_in_table);
8604 }
8605
8606 for (const PatchInfo<Label>& info : jit_method_type_patches_) {
8607 ProtoReference proto_reference(info.target_dex_file, dex::ProtoIndex(info.offset_or_index));
8608 uint64_t index_in_table = GetJitMethodTypeRootIndex(proto_reference);
8609 PatchJitRootUse(code, roots_data, info, index_in_table);
8610 }
8611 }
8612
CpuHasAvxFeatureFlag()8613 bool LocationsBuilderX86_64::CpuHasAvxFeatureFlag() {
8614 return codegen_->GetInstructionSetFeatures().HasAVX();
8615 }
8616
CpuHasAvx2FeatureFlag()8617 bool LocationsBuilderX86_64::CpuHasAvx2FeatureFlag() {
8618 return codegen_->GetInstructionSetFeatures().HasAVX2();
8619 }
8620
CpuHasAvxFeatureFlag()8621 bool InstructionCodeGeneratorX86_64::CpuHasAvxFeatureFlag() {
8622 return codegen_->GetInstructionSetFeatures().HasAVX();
8623 }
8624
CpuHasAvx2FeatureFlag()8625 bool InstructionCodeGeneratorX86_64::CpuHasAvx2FeatureFlag() {
8626 return codegen_->GetInstructionSetFeatures().HasAVX2();
8627 }
8628
VisitBitwiseNegatedRight(HBitwiseNegatedRight * instruction)8629 void LocationsBuilderX86_64::VisitBitwiseNegatedRight(
8630 [[maybe_unused]] HBitwiseNegatedRight* instruction) {
8631 LOG(FATAL) << "Unimplemented";
8632 }
8633
VisitBitwiseNegatedRight(HBitwiseNegatedRight * instruction)8634 void InstructionCodeGeneratorX86_64::VisitBitwiseNegatedRight(
8635 [[maybe_unused]] HBitwiseNegatedRight* instruction) {
8636 LOG(FATAL) << "Unimplemented";
8637 }
8638
8639 #undef __
8640
8641 } // namespace x86_64
8642 } // namespace art
8643