1 /*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "intrinsics_x86_64.h"
18
19 #include <limits>
20
21 #include "arch/x86_64/instruction_set_features_x86_64.h"
22 #include "arch/x86_64/registers_x86_64.h"
23 #include "art_method.h"
24 #include "base/bit_utils.h"
25 #include "code_generator_x86_64.h"
26 #include "dex/modifiers.h"
27 #include "entrypoints/quick/quick_entrypoints.h"
28 #include "entrypoints/quick/quick_entrypoints_enum.h"
29 #include "heap_poisoning.h"
30 #include "intrinsic_objects.h"
31 #include "intrinsics.h"
32 #include "intrinsics_utils.h"
33 #include "lock_word.h"
34 #include "mirror/array-inl.h"
35 #include "mirror/method_handle_impl.h"
36 #include "mirror/object_array-inl.h"
37 #include "mirror/reference.h"
38 #include "mirror/string.h"
39 #include "optimizing/code_generator.h"
40 #include "optimizing/data_type.h"
41 #include "optimizing/locations.h"
42 #include "scoped_thread_state_change-inl.h"
43 #include "thread-current-inl.h"
44 #include "utils/x86_64/assembler_x86_64.h"
45 #include "utils/x86_64/constants_x86_64.h"
46 #include "well_known_classes.h"
47
48 namespace art HIDDEN {
49
50 namespace x86_64 {
51
IntrinsicLocationsBuilderX86_64(CodeGeneratorX86_64 * codegen)52 IntrinsicLocationsBuilderX86_64::IntrinsicLocationsBuilderX86_64(CodeGeneratorX86_64* codegen)
53 : allocator_(codegen->GetGraph()->GetAllocator()), codegen_(codegen) {
54 }
55
GetAssembler()56 X86_64Assembler* IntrinsicCodeGeneratorX86_64::GetAssembler() {
57 return down_cast<X86_64Assembler*>(codegen_->GetAssembler());
58 }
59
GetAllocator()60 ArenaAllocator* IntrinsicCodeGeneratorX86_64::GetAllocator() {
61 return codegen_->GetGraph()->GetAllocator();
62 }
63
TryDispatch(HInvoke * invoke)64 bool IntrinsicLocationsBuilderX86_64::TryDispatch(HInvoke* invoke) {
65 Dispatch(invoke);
66 LocationSummary* res = invoke->GetLocations();
67 if (res == nullptr) {
68 return false;
69 }
70 return res->Intrinsified();
71 }
72
73 using IntrinsicSlowPathX86_64 = IntrinsicSlowPath<InvokeDexCallingConventionVisitorX86_64>;
74
75 #define __ assembler->
76
GenArrayAddress(X86_64Assembler * assembler,CpuRegister dest,CpuRegister base,Location pos,DataType::Type type,uint32_t data_offset)77 static void GenArrayAddress(X86_64Assembler* assembler,
78 CpuRegister dest,
79 CpuRegister base,
80 Location pos,
81 DataType::Type type,
82 uint32_t data_offset) {
83 // Note: The heap is in low 4GiB, so we're using LEAL rather than LEAQ to save on code size.
84 if (pos.IsConstant()) {
85 int32_t constant = pos.GetConstant()->AsIntConstant()->GetValue();
86 __ leal(dest, Address(base, DataType::Size(type) * constant + data_offset));
87 } else {
88 const ScaleFactor scale_factor = static_cast<ScaleFactor>(DataType::SizeShift(type));
89 __ leal(dest, Address(base, pos.AsRegister<CpuRegister>(), scale_factor, data_offset));
90 }
91 }
92
93 // Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
94 class ReadBarrierSystemArrayCopySlowPathX86_64 : public SlowPathCode {
95 public:
ReadBarrierSystemArrayCopySlowPathX86_64(HInstruction * instruction)96 explicit ReadBarrierSystemArrayCopySlowPathX86_64(HInstruction* instruction)
97 : SlowPathCode(instruction) {
98 }
99
EmitNativeCode(CodeGenerator * codegen)100 void EmitNativeCode(CodeGenerator* codegen) override {
101 DCHECK(codegen->EmitBakerReadBarrier());
102 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
103 X86_64Assembler* assembler = x86_64_codegen->GetAssembler();
104 LocationSummary* locations = instruction_->GetLocations();
105 DCHECK(locations->CanCall());
106 DCHECK(instruction_->IsInvokeStaticOrDirect())
107 << "Unexpected instruction in read barrier arraycopy slow path: "
108 << instruction_->DebugName();
109 DCHECK(instruction_->GetLocations()->Intrinsified());
110 DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
111 Location length = locations->InAt(4);
112
113 const DataType::Type type = DataType::Type::kReference;
114 const int32_t element_size = DataType::Size(type);
115
116 CpuRegister src_curr_addr = locations->GetTemp(0).AsRegister<CpuRegister>();
117 CpuRegister dst_curr_addr = locations->GetTemp(1).AsRegister<CpuRegister>();
118 CpuRegister src_stop_addr = locations->GetTemp(2).AsRegister<CpuRegister>();
119
120 __ Bind(GetEntryLabel());
121 // The `src_curr_addr` and `dst_curr_addr` were initialized before entering the slow-path.
122 GenArrayAddress(assembler, src_stop_addr, src_curr_addr, length, type, /*data_offset=*/ 0u);
123
124 NearLabel loop;
125 __ Bind(&loop);
126 __ movl(CpuRegister(TMP), Address(src_curr_addr, 0));
127 __ MaybeUnpoisonHeapReference(CpuRegister(TMP));
128 // TODO: Inline the mark bit check before calling the runtime?
129 // TMP = ReadBarrier::Mark(TMP);
130 // No need to save live registers; it's taken care of by the
131 // entrypoint. Also, there is no need to update the stack mask,
132 // as this runtime call will not trigger a garbage collection.
133 int32_t entry_point_offset = Thread::ReadBarrierMarkEntryPointsOffset<kX86_64PointerSize>(TMP);
134 // This runtime call does not require a stack map.
135 x86_64_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
136 __ MaybePoisonHeapReference(CpuRegister(TMP));
137 __ movl(Address(dst_curr_addr, 0), CpuRegister(TMP));
138 __ addl(src_curr_addr, Immediate(element_size));
139 __ addl(dst_curr_addr, Immediate(element_size));
140 __ cmpl(src_curr_addr, src_stop_addr);
141 __ j(kNotEqual, &loop);
142 __ jmp(GetExitLabel());
143 }
144
GetDescription() const145 const char* GetDescription() const override { return "ReadBarrierSystemArrayCopySlowPathX86_64"; }
146
147 private:
148 DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathX86_64);
149 };
150
151 // The MethodHandle.invokeExact intrinsic sets up arguments to match the target method call. If we
152 // need to go to the slow path, we call art_quick_invoke_polymorphic_with_hidden_receiver, which
153 // expects the MethodHandle object in RDI (in place of the actual ArtMethod).
154 class InvokePolymorphicSlowPathX86_64 : public SlowPathCode {
155 public:
InvokePolymorphicSlowPathX86_64(HInstruction * instruction,CpuRegister method_handle)156 InvokePolymorphicSlowPathX86_64(HInstruction* instruction, CpuRegister method_handle)
157 : SlowPathCode(instruction), method_handle_(method_handle) {
158 DCHECK(instruction->IsInvokePolymorphic());
159 }
160
EmitNativeCode(CodeGenerator * codegen)161 void EmitNativeCode(CodeGenerator* codegen) override {
162 CodeGeneratorX86_64* x86_64_codegen = down_cast<CodeGeneratorX86_64*>(codegen);
163 X86_64Assembler* assembler = x86_64_codegen->GetAssembler();
164 __ Bind(GetEntryLabel());
165 SaveLiveRegisters(codegen, instruction_->GetLocations());
166
167 // Passing `MethodHandle` object as hidden argument.
168 __ movl(CpuRegister(RDI), method_handle_);
169 x86_64_codegen->InvokeRuntime(QuickEntrypointEnum::kQuickInvokePolymorphicWithHiddenReceiver,
170 instruction_);
171
172 RestoreLiveRegisters(codegen, instruction_->GetLocations());
173 __ jmp(GetExitLabel());
174 }
175
GetDescription() const176 const char* GetDescription() const override { return "InvokePolymorphicSlowPathX86_64"; }
177
178 private:
179 const CpuRegister method_handle_;
180 DISALLOW_COPY_AND_ASSIGN(InvokePolymorphicSlowPathX86_64);
181 };
182
CreateFPToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)183 static void CreateFPToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
184 LocationSummary* locations =
185 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
186 locations->SetInAt(0, Location::RequiresFpuRegister());
187 locations->SetOut(Location::RequiresRegister());
188 }
189
CreateIntToFPLocations(ArenaAllocator * allocator,HInvoke * invoke)190 static void CreateIntToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
191 LocationSummary* locations =
192 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
193 locations->SetInAt(0, Location::RequiresRegister());
194 locations->SetOut(Location::RequiresFpuRegister());
195 }
196
MoveFPToInt(CpuRegister dst,XmmRegister src,bool is64bit,X86_64Assembler * assembler)197 static void MoveFPToInt(
198 CpuRegister dst, XmmRegister src, bool is64bit, X86_64Assembler* assembler) {
199 if (is64bit) {
200 __ movq(dst, src);
201 } else {
202 __ movd(dst, src);
203 }
204 }
205
MoveIntToFP(XmmRegister dst,CpuRegister src,bool is64bit,X86_64Assembler * assembler)206 static void MoveIntToFP(
207 XmmRegister dst, CpuRegister src, bool is64bit, X86_64Assembler* assembler) {
208 if (is64bit) {
209 __ movq(dst, src);
210 } else {
211 __ movd(dst, src);
212 }
213 }
214
MoveFPToInt(LocationSummary * locations,bool is64bit,X86_64Assembler * assembler)215 static void MoveFPToInt(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
216 XmmRegister input = locations->InAt(0).AsFpuRegister<XmmRegister>();
217 CpuRegister output = locations->Out().AsRegister<CpuRegister>();
218 MoveFPToInt(output, input, is64bit, assembler);
219 }
220
MoveIntToFP(LocationSummary * locations,bool is64bit,X86_64Assembler * assembler)221 static void MoveIntToFP(LocationSummary* locations, bool is64bit, X86_64Assembler* assembler) {
222 CpuRegister input = locations->InAt(0).AsRegister<CpuRegister>();
223 XmmRegister output = locations->Out().AsFpuRegister<XmmRegister>();
224 MoveIntToFP(output, input, is64bit, assembler);
225 }
226
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)227 void IntrinsicLocationsBuilderX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
228 CreateFPToIntLocations(allocator_, invoke);
229 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)230 void IntrinsicLocationsBuilderX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
231 CreateIntToFPLocations(allocator_, invoke);
232 }
233
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)234 void IntrinsicCodeGeneratorX86_64::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
235 MoveFPToInt(invoke->GetLocations(), /* is64bit= */ true, GetAssembler());
236 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)237 void IntrinsicCodeGeneratorX86_64::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
238 MoveIntToFP(invoke->GetLocations(), /* is64bit= */ true, GetAssembler());
239 }
240
VisitFloatFloatToRawIntBits(HInvoke * invoke)241 void IntrinsicLocationsBuilderX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
242 CreateFPToIntLocations(allocator_, invoke);
243 }
VisitFloatIntBitsToFloat(HInvoke * invoke)244 void IntrinsicLocationsBuilderX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
245 CreateIntToFPLocations(allocator_, invoke);
246 }
247
VisitFloatFloatToRawIntBits(HInvoke * invoke)248 void IntrinsicCodeGeneratorX86_64::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
249 MoveFPToInt(invoke->GetLocations(), /* is64bit= */ false, GetAssembler());
250 }
VisitFloatIntBitsToFloat(HInvoke * invoke)251 void IntrinsicCodeGeneratorX86_64::VisitFloatIntBitsToFloat(HInvoke* invoke) {
252 MoveIntToFP(invoke->GetLocations(), /* is64bit= */ false, GetAssembler());
253 }
254
CreateIntToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)255 static void CreateIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
256 LocationSummary* locations =
257 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
258 locations->SetInAt(0, Location::RequiresRegister());
259 locations->SetOut(Location::SameAsFirstInput());
260 }
261
VisitIntegerReverseBytes(HInvoke * invoke)262 void IntrinsicLocationsBuilderX86_64::VisitIntegerReverseBytes(HInvoke* invoke) {
263 CreateIntToIntLocations(allocator_, invoke);
264 }
265
VisitIntegerReverseBytes(HInvoke * invoke)266 void IntrinsicCodeGeneratorX86_64::VisitIntegerReverseBytes(HInvoke* invoke) {
267 codegen_->GetInstructionCodegen()->Bswap(invoke->GetLocations()->Out(), DataType::Type::kInt32);
268 }
269
VisitLongReverseBytes(HInvoke * invoke)270 void IntrinsicLocationsBuilderX86_64::VisitLongReverseBytes(HInvoke* invoke) {
271 CreateIntToIntLocations(allocator_, invoke);
272 }
273
VisitLongReverseBytes(HInvoke * invoke)274 void IntrinsicCodeGeneratorX86_64::VisitLongReverseBytes(HInvoke* invoke) {
275 codegen_->GetInstructionCodegen()->Bswap(invoke->GetLocations()->Out(), DataType::Type::kInt64);
276 }
277
VisitShortReverseBytes(HInvoke * invoke)278 void IntrinsicLocationsBuilderX86_64::VisitShortReverseBytes(HInvoke* invoke) {
279 CreateIntToIntLocations(allocator_, invoke);
280 }
281
VisitShortReverseBytes(HInvoke * invoke)282 void IntrinsicCodeGeneratorX86_64::VisitShortReverseBytes(HInvoke* invoke) {
283 codegen_->GetInstructionCodegen()->Bswap(invoke->GetLocations()->Out(), DataType::Type::kInt16);
284 }
285
GenIsInfinite(LocationSummary * locations,bool is64bit,CodeGeneratorX86_64 * codegen)286 static void GenIsInfinite(LocationSummary* locations,
287 bool is64bit,
288 CodeGeneratorX86_64* codegen) {
289 X86_64Assembler* assembler = codegen->GetAssembler();
290
291 XmmRegister input = locations->InAt(0).AsFpuRegister<XmmRegister>();
292 CpuRegister output = locations->Out().AsRegister<CpuRegister>();
293
294 NearLabel done1, done2;
295
296 if (is64bit) {
297 double kPositiveInfinity = std::numeric_limits<double>::infinity();
298 double kNegativeInfinity = -1 * kPositiveInfinity;
299
300 __ xorq(output, output);
301 __ comisd(input, codegen->LiteralDoubleAddress(kPositiveInfinity));
302 __ j(kNotEqual, &done1);
303 __ j(kParityEven, &done2);
304 __ movq(output, Immediate(1));
305 __ jmp(&done2);
306 __ Bind(&done1);
307 __ comisd(input, codegen->LiteralDoubleAddress(kNegativeInfinity));
308 __ j(kNotEqual, &done2);
309 __ j(kParityEven, &done2);
310 __ movq(output, Immediate(1));
311 __ Bind(&done2);
312 } else {
313 float kPositiveInfinity = std::numeric_limits<float>::infinity();
314 float kNegativeInfinity = -1 * kPositiveInfinity;
315
316 __ xorl(output, output);
317 __ comiss(input, codegen->LiteralFloatAddress(kPositiveInfinity));
318 __ j(kNotEqual, &done1);
319 __ j(kParityEven, &done2);
320 __ movl(output, Immediate(1));
321 __ jmp(&done2);
322 __ Bind(&done1);
323 __ comiss(input, codegen->LiteralFloatAddress(kNegativeInfinity));
324 __ j(kNotEqual, &done2);
325 __ j(kParityEven, &done2);
326 __ movl(output, Immediate(1));
327 __ Bind(&done2);
328 }
329 }
330
VisitFloatIsInfinite(HInvoke * invoke)331 void IntrinsicLocationsBuilderX86_64::VisitFloatIsInfinite(HInvoke* invoke) {
332 CreateFPToIntLocations(allocator_, invoke);
333 }
334
VisitFloatIsInfinite(HInvoke * invoke)335 void IntrinsicCodeGeneratorX86_64::VisitFloatIsInfinite(HInvoke* invoke) {
336 GenIsInfinite(invoke->GetLocations(), /* is64bit=*/ false, codegen_);
337 }
338
VisitDoubleIsInfinite(HInvoke * invoke)339 void IntrinsicLocationsBuilderX86_64::VisitDoubleIsInfinite(HInvoke* invoke) {
340 CreateFPToIntLocations(allocator_, invoke);
341 }
342
VisitDoubleIsInfinite(HInvoke * invoke)343 void IntrinsicCodeGeneratorX86_64::VisitDoubleIsInfinite(HInvoke* invoke) {
344 GenIsInfinite(invoke->GetLocations(), /* is64bit=*/ true, codegen_);
345 }
346
CreateFPToFPLocations(ArenaAllocator * allocator,HInvoke * invoke)347 static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
348 LocationSummary* locations =
349 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
350 locations->SetInAt(0, Location::RequiresFpuRegister());
351 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
352 }
353
VisitMathSqrt(HInvoke * invoke)354 void IntrinsicLocationsBuilderX86_64::VisitMathSqrt(HInvoke* invoke) {
355 CreateFPToFPLocations(allocator_, invoke);
356 }
357
VisitMathSqrt(HInvoke * invoke)358 void IntrinsicCodeGeneratorX86_64::VisitMathSqrt(HInvoke* invoke) {
359 LocationSummary* locations = invoke->GetLocations();
360 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
361 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
362
363 GetAssembler()->sqrtsd(out, in);
364 }
365
CreateSSE41FPToFPLocations(ArenaAllocator * allocator,HInvoke * invoke,CodeGeneratorX86_64 * codegen)366 static void CreateSSE41FPToFPLocations(ArenaAllocator* allocator,
367 HInvoke* invoke,
368 CodeGeneratorX86_64* codegen) {
369 // Do we have instruction support?
370 if (!codegen->GetInstructionSetFeatures().HasSSE4_1()) {
371 return;
372 }
373
374 CreateFPToFPLocations(allocator, invoke);
375 }
376
GenSSE41FPToFPIntrinsic(HInvoke * invoke,X86_64Assembler * assembler,int round_mode)377 static void GenSSE41FPToFPIntrinsic(HInvoke* invoke, X86_64Assembler* assembler, int round_mode) {
378 LocationSummary* locations = invoke->GetLocations();
379 DCHECK(!locations->WillCall());
380 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
381 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
382 __ roundsd(out, in, Immediate(round_mode));
383 }
384
VisitMathCeil(HInvoke * invoke)385 void IntrinsicLocationsBuilderX86_64::VisitMathCeil(HInvoke* invoke) {
386 CreateSSE41FPToFPLocations(allocator_, invoke, codegen_);
387 }
388
VisitMathCeil(HInvoke * invoke)389 void IntrinsicCodeGeneratorX86_64::VisitMathCeil(HInvoke* invoke) {
390 GenSSE41FPToFPIntrinsic(invoke, GetAssembler(), 2);
391 }
392
VisitMathFloor(HInvoke * invoke)393 void IntrinsicLocationsBuilderX86_64::VisitMathFloor(HInvoke* invoke) {
394 CreateSSE41FPToFPLocations(allocator_, invoke, codegen_);
395 }
396
VisitMathFloor(HInvoke * invoke)397 void IntrinsicCodeGeneratorX86_64::VisitMathFloor(HInvoke* invoke) {
398 GenSSE41FPToFPIntrinsic(invoke, GetAssembler(), 1);
399 }
400
VisitMathRint(HInvoke * invoke)401 void IntrinsicLocationsBuilderX86_64::VisitMathRint(HInvoke* invoke) {
402 CreateSSE41FPToFPLocations(allocator_, invoke, codegen_);
403 }
404
VisitMathRint(HInvoke * invoke)405 void IntrinsicCodeGeneratorX86_64::VisitMathRint(HInvoke* invoke) {
406 GenSSE41FPToFPIntrinsic(invoke, GetAssembler(), 0);
407 }
408
CreateSSE41FPToIntLocations(ArenaAllocator * allocator,HInvoke * invoke,CodeGeneratorX86_64 * codegen)409 static void CreateSSE41FPToIntLocations(ArenaAllocator* allocator,
410 HInvoke* invoke,
411 CodeGeneratorX86_64* codegen) {
412 // Do we have instruction support?
413 if (!codegen->GetInstructionSetFeatures().HasSSE4_1()) {
414 return;
415 }
416
417 LocationSummary* locations =
418 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
419 locations->SetInAt(0, Location::RequiresFpuRegister());
420 locations->SetOut(Location::RequiresRegister());
421 locations->AddTemp(Location::RequiresFpuRegister());
422 locations->AddTemp(Location::RequiresFpuRegister());
423 }
424
VisitMathRoundFloat(HInvoke * invoke)425 void IntrinsicLocationsBuilderX86_64::VisitMathRoundFloat(HInvoke* invoke) {
426 CreateSSE41FPToIntLocations(allocator_, invoke, codegen_);
427 }
428
VisitMathRoundFloat(HInvoke * invoke)429 void IntrinsicCodeGeneratorX86_64::VisitMathRoundFloat(HInvoke* invoke) {
430 LocationSummary* locations = invoke->GetLocations();
431 DCHECK(!locations->WillCall());
432
433 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
434 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
435 XmmRegister t1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
436 XmmRegister t2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
437 NearLabel skip_incr, done;
438 X86_64Assembler* assembler = GetAssembler();
439
440 // Since no direct x86 rounding instruction matches the required semantics,
441 // this intrinsic is implemented as follows:
442 // result = floor(in);
443 // if (in - result >= 0.5f)
444 // result = result + 1.0f;
445 __ movss(t2, in);
446 __ roundss(t1, in, Immediate(1));
447 __ subss(t2, t1);
448 __ comiss(t2, codegen_->LiteralFloatAddress(0.5f));
449 __ j(kBelow, &skip_incr);
450 __ addss(t1, codegen_->LiteralFloatAddress(1.0f));
451 __ Bind(&skip_incr);
452
453 // Final conversion to an integer. Unfortunately this also does not have a
454 // direct x86 instruction, since NaN should map to 0 and large positive
455 // values need to be clipped to the extreme value.
456 codegen_->Load32BitValue(out, kPrimIntMax);
457 __ cvtsi2ss(t2, out);
458 __ comiss(t1, t2);
459 __ j(kAboveEqual, &done); // clipped to max (already in out), does not jump on unordered
460 __ movl(out, Immediate(0)); // does not change flags
461 __ j(kUnordered, &done); // NaN mapped to 0 (just moved in out)
462 __ cvttss2si(out, t1);
463 __ Bind(&done);
464 }
465
VisitMathRoundDouble(HInvoke * invoke)466 void IntrinsicLocationsBuilderX86_64::VisitMathRoundDouble(HInvoke* invoke) {
467 CreateSSE41FPToIntLocations(allocator_, invoke, codegen_);
468 }
469
VisitMathRoundDouble(HInvoke * invoke)470 void IntrinsicCodeGeneratorX86_64::VisitMathRoundDouble(HInvoke* invoke) {
471 LocationSummary* locations = invoke->GetLocations();
472 DCHECK(!locations->WillCall());
473
474 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
475 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
476 XmmRegister t1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
477 XmmRegister t2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
478 NearLabel skip_incr, done;
479 X86_64Assembler* assembler = GetAssembler();
480
481 // Since no direct x86 rounding instruction matches the required semantics,
482 // this intrinsic is implemented as follows:
483 // result = floor(in);
484 // if (in - result >= 0.5)
485 // result = result + 1.0f;
486 __ movsd(t2, in);
487 __ roundsd(t1, in, Immediate(1));
488 __ subsd(t2, t1);
489 __ comisd(t2, codegen_->LiteralDoubleAddress(0.5));
490 __ j(kBelow, &skip_incr);
491 __ addsd(t1, codegen_->LiteralDoubleAddress(1.0f));
492 __ Bind(&skip_incr);
493
494 // Final conversion to an integer. Unfortunately this also does not have a
495 // direct x86 instruction, since NaN should map to 0 and large positive
496 // values need to be clipped to the extreme value.
497 codegen_->Load64BitValue(out, kPrimLongMax);
498 __ cvtsi2sd(t2, out, /* is64bit= */ true);
499 __ comisd(t1, t2);
500 __ j(kAboveEqual, &done); // clipped to max (already in out), does not jump on unordered
501 __ movl(out, Immediate(0)); // does not change flags, implicit zero extension to 64-bit
502 __ j(kUnordered, &done); // NaN mapped to 0 (just moved in out)
503 __ cvttsd2si(out, t1, /* is64bit= */ true);
504 __ Bind(&done);
505 }
506
CreateFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)507 static void CreateFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
508 LocationSummary* locations =
509 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
510 InvokeRuntimeCallingConvention calling_convention;
511 locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
512 locations->SetOut(Location::FpuRegisterLocation(XMM0));
513
514 CodeGeneratorX86_64::BlockNonVolatileXmmRegisters(locations);
515 }
516
GenFPToFPCall(HInvoke * invoke,CodeGeneratorX86_64 * codegen,QuickEntrypointEnum entry)517 static void GenFPToFPCall(HInvoke* invoke, CodeGeneratorX86_64* codegen,
518 QuickEntrypointEnum entry) {
519 LocationSummary* locations = invoke->GetLocations();
520 DCHECK(locations->WillCall());
521 DCHECK(invoke->IsInvokeStaticOrDirect());
522
523 codegen->InvokeRuntime(entry, invoke);
524 }
525
VisitMathCos(HInvoke * invoke)526 void IntrinsicLocationsBuilderX86_64::VisitMathCos(HInvoke* invoke) {
527 CreateFPToFPCallLocations(allocator_, invoke);
528 }
529
VisitMathCos(HInvoke * invoke)530 void IntrinsicCodeGeneratorX86_64::VisitMathCos(HInvoke* invoke) {
531 GenFPToFPCall(invoke, codegen_, kQuickCos);
532 }
533
VisitMathSin(HInvoke * invoke)534 void IntrinsicLocationsBuilderX86_64::VisitMathSin(HInvoke* invoke) {
535 CreateFPToFPCallLocations(allocator_, invoke);
536 }
537
VisitMathSin(HInvoke * invoke)538 void IntrinsicCodeGeneratorX86_64::VisitMathSin(HInvoke* invoke) {
539 GenFPToFPCall(invoke, codegen_, kQuickSin);
540 }
541
VisitMathAcos(HInvoke * invoke)542 void IntrinsicLocationsBuilderX86_64::VisitMathAcos(HInvoke* invoke) {
543 CreateFPToFPCallLocations(allocator_, invoke);
544 }
545
VisitMathAcos(HInvoke * invoke)546 void IntrinsicCodeGeneratorX86_64::VisitMathAcos(HInvoke* invoke) {
547 GenFPToFPCall(invoke, codegen_, kQuickAcos);
548 }
549
VisitMathAsin(HInvoke * invoke)550 void IntrinsicLocationsBuilderX86_64::VisitMathAsin(HInvoke* invoke) {
551 CreateFPToFPCallLocations(allocator_, invoke);
552 }
553
VisitMathAsin(HInvoke * invoke)554 void IntrinsicCodeGeneratorX86_64::VisitMathAsin(HInvoke* invoke) {
555 GenFPToFPCall(invoke, codegen_, kQuickAsin);
556 }
557
VisitMathAtan(HInvoke * invoke)558 void IntrinsicLocationsBuilderX86_64::VisitMathAtan(HInvoke* invoke) {
559 CreateFPToFPCallLocations(allocator_, invoke);
560 }
561
VisitMathAtan(HInvoke * invoke)562 void IntrinsicCodeGeneratorX86_64::VisitMathAtan(HInvoke* invoke) {
563 GenFPToFPCall(invoke, codegen_, kQuickAtan);
564 }
565
VisitMathCbrt(HInvoke * invoke)566 void IntrinsicLocationsBuilderX86_64::VisitMathCbrt(HInvoke* invoke) {
567 CreateFPToFPCallLocations(allocator_, invoke);
568 }
569
VisitMathCbrt(HInvoke * invoke)570 void IntrinsicCodeGeneratorX86_64::VisitMathCbrt(HInvoke* invoke) {
571 GenFPToFPCall(invoke, codegen_, kQuickCbrt);
572 }
573
VisitMathCosh(HInvoke * invoke)574 void IntrinsicLocationsBuilderX86_64::VisitMathCosh(HInvoke* invoke) {
575 CreateFPToFPCallLocations(allocator_, invoke);
576 }
577
VisitMathCosh(HInvoke * invoke)578 void IntrinsicCodeGeneratorX86_64::VisitMathCosh(HInvoke* invoke) {
579 GenFPToFPCall(invoke, codegen_, kQuickCosh);
580 }
581
VisitMathExp(HInvoke * invoke)582 void IntrinsicLocationsBuilderX86_64::VisitMathExp(HInvoke* invoke) {
583 CreateFPToFPCallLocations(allocator_, invoke);
584 }
585
VisitMathExp(HInvoke * invoke)586 void IntrinsicCodeGeneratorX86_64::VisitMathExp(HInvoke* invoke) {
587 GenFPToFPCall(invoke, codegen_, kQuickExp);
588 }
589
VisitMathExpm1(HInvoke * invoke)590 void IntrinsicLocationsBuilderX86_64::VisitMathExpm1(HInvoke* invoke) {
591 CreateFPToFPCallLocations(allocator_, invoke);
592 }
593
VisitMathExpm1(HInvoke * invoke)594 void IntrinsicCodeGeneratorX86_64::VisitMathExpm1(HInvoke* invoke) {
595 GenFPToFPCall(invoke, codegen_, kQuickExpm1);
596 }
597
VisitMathLog(HInvoke * invoke)598 void IntrinsicLocationsBuilderX86_64::VisitMathLog(HInvoke* invoke) {
599 CreateFPToFPCallLocations(allocator_, invoke);
600 }
601
VisitMathLog(HInvoke * invoke)602 void IntrinsicCodeGeneratorX86_64::VisitMathLog(HInvoke* invoke) {
603 GenFPToFPCall(invoke, codegen_, kQuickLog);
604 }
605
VisitMathLog10(HInvoke * invoke)606 void IntrinsicLocationsBuilderX86_64::VisitMathLog10(HInvoke* invoke) {
607 CreateFPToFPCallLocations(allocator_, invoke);
608 }
609
VisitMathLog10(HInvoke * invoke)610 void IntrinsicCodeGeneratorX86_64::VisitMathLog10(HInvoke* invoke) {
611 GenFPToFPCall(invoke, codegen_, kQuickLog10);
612 }
613
VisitMathSinh(HInvoke * invoke)614 void IntrinsicLocationsBuilderX86_64::VisitMathSinh(HInvoke* invoke) {
615 CreateFPToFPCallLocations(allocator_, invoke);
616 }
617
VisitMathSinh(HInvoke * invoke)618 void IntrinsicCodeGeneratorX86_64::VisitMathSinh(HInvoke* invoke) {
619 GenFPToFPCall(invoke, codegen_, kQuickSinh);
620 }
621
VisitMathTan(HInvoke * invoke)622 void IntrinsicLocationsBuilderX86_64::VisitMathTan(HInvoke* invoke) {
623 CreateFPToFPCallLocations(allocator_, invoke);
624 }
625
VisitMathTan(HInvoke * invoke)626 void IntrinsicCodeGeneratorX86_64::VisitMathTan(HInvoke* invoke) {
627 GenFPToFPCall(invoke, codegen_, kQuickTan);
628 }
629
VisitMathTanh(HInvoke * invoke)630 void IntrinsicLocationsBuilderX86_64::VisitMathTanh(HInvoke* invoke) {
631 CreateFPToFPCallLocations(allocator_, invoke);
632 }
633
VisitMathTanh(HInvoke * invoke)634 void IntrinsicCodeGeneratorX86_64::VisitMathTanh(HInvoke* invoke) {
635 GenFPToFPCall(invoke, codegen_, kQuickTanh);
636 }
637
CreateFPFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)638 static void CreateFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
639 LocationSummary* locations =
640 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
641 InvokeRuntimeCallingConvention calling_convention;
642 locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
643 locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1)));
644 locations->SetOut(Location::FpuRegisterLocation(XMM0));
645
646 CodeGeneratorX86_64::BlockNonVolatileXmmRegisters(locations);
647 }
648
CreateFPFPFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)649 static void CreateFPFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
650 DCHECK_EQ(invoke->GetNumberOfArguments(), 3U);
651 LocationSummary* locations =
652 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
653 InvokeRuntimeCallingConvention calling_convention;
654 locations->SetInAt(0, Location::RequiresFpuRegister());
655 locations->SetInAt(1, Location::RequiresFpuRegister());
656 locations->SetInAt(2, Location::RequiresFpuRegister());
657 locations->SetOut(Location::SameAsFirstInput());
658 }
659
VisitMathAtan2(HInvoke * invoke)660 void IntrinsicLocationsBuilderX86_64::VisitMathAtan2(HInvoke* invoke) {
661 CreateFPFPToFPCallLocations(allocator_, invoke);
662 }
663
VisitMathAtan2(HInvoke * invoke)664 void IntrinsicCodeGeneratorX86_64::VisitMathAtan2(HInvoke* invoke) {
665 GenFPToFPCall(invoke, codegen_, kQuickAtan2);
666 }
667
VisitMathPow(HInvoke * invoke)668 void IntrinsicLocationsBuilderX86_64::VisitMathPow(HInvoke* invoke) {
669 CreateFPFPToFPCallLocations(allocator_, invoke);
670 }
671
VisitMathPow(HInvoke * invoke)672 void IntrinsicCodeGeneratorX86_64::VisitMathPow(HInvoke* invoke) {
673 GenFPToFPCall(invoke, codegen_, kQuickPow);
674 }
675
VisitMathHypot(HInvoke * invoke)676 void IntrinsicLocationsBuilderX86_64::VisitMathHypot(HInvoke* invoke) {
677 CreateFPFPToFPCallLocations(allocator_, invoke);
678 }
679
VisitMathHypot(HInvoke * invoke)680 void IntrinsicCodeGeneratorX86_64::VisitMathHypot(HInvoke* invoke) {
681 GenFPToFPCall(invoke, codegen_, kQuickHypot);
682 }
683
VisitMathNextAfter(HInvoke * invoke)684 void IntrinsicLocationsBuilderX86_64::VisitMathNextAfter(HInvoke* invoke) {
685 CreateFPFPToFPCallLocations(allocator_, invoke);
686 }
687
VisitMathNextAfter(HInvoke * invoke)688 void IntrinsicCodeGeneratorX86_64::VisitMathNextAfter(HInvoke* invoke) {
689 GenFPToFPCall(invoke, codegen_, kQuickNextAfter);
690 }
691
CreateSystemArrayCopyLocations(HInvoke * invoke)692 static void CreateSystemArrayCopyLocations(HInvoke* invoke) {
693 // Check to see if we have known failures that will cause us to have to bail out
694 // to the runtime, and just generate the runtime call directly.
695 HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstantOrNull();
696 HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstantOrNull();
697
698 // The positions must be non-negative.
699 if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
700 (dest_pos != nullptr && dest_pos->GetValue() < 0)) {
701 // We will have to fail anyways.
702 return;
703 }
704
705 // The length must be > 0.
706 HIntConstant* length = invoke->InputAt(4)->AsIntConstantOrNull();
707 if (length != nullptr) {
708 int32_t len = length->GetValue();
709 if (len < 0) {
710 // Just call as normal.
711 return;
712 }
713 }
714 LocationSummary* locations =
715 new (invoke->GetBlock()->GetGraph()->GetAllocator()) LocationSummary
716 (invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
717 // arraycopy(Object src, int src_pos, Object dest, int dest_pos, int length).
718 locations->SetInAt(0, Location::RequiresRegister());
719 locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
720 locations->SetInAt(2, Location::RequiresRegister());
721 locations->SetInAt(3, Location::RegisterOrConstant(invoke->InputAt(3)));
722 locations->SetInAt(4, Location::RegisterOrConstant(invoke->InputAt(4)));
723
724 // And we need some temporaries. We will use REP MOVS{B,W,L}, so we need fixed registers.
725 locations->AddTemp(Location::RegisterLocation(RSI));
726 locations->AddTemp(Location::RegisterLocation(RDI));
727 locations->AddTemp(Location::RegisterLocation(RCX));
728 }
729
730 template <typename LhsType>
EmitCmplJLess(X86_64Assembler * assembler,LhsType lhs,Location rhs,Label * label)731 static void EmitCmplJLess(X86_64Assembler* assembler,
732 LhsType lhs,
733 Location rhs,
734 Label* label) {
735 static_assert(std::is_same_v<LhsType, CpuRegister> || std::is_same_v<LhsType, Address>);
736 if (rhs.IsConstant()) {
737 int32_t rhs_constant = rhs.GetConstant()->AsIntConstant()->GetValue();
738 __ cmpl(lhs, Immediate(rhs_constant));
739 } else {
740 __ cmpl(lhs, rhs.AsRegister<CpuRegister>());
741 }
742 __ j(kLess, label);
743 }
744
CheckSystemArrayCopyPosition(X86_64Assembler * assembler,CpuRegister array,Location pos,Location length,SlowPathCode * slow_path,CpuRegister temp,bool length_is_array_length,bool position_sign_checked)745 static void CheckSystemArrayCopyPosition(X86_64Assembler* assembler,
746 CpuRegister array,
747 Location pos,
748 Location length,
749 SlowPathCode* slow_path,
750 CpuRegister temp,
751 bool length_is_array_length,
752 bool position_sign_checked) {
753 // Where is the length in the Array?
754 const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value();
755
756 if (pos.IsConstant()) {
757 int32_t pos_const = pos.GetConstant()->AsIntConstant()->GetValue();
758 if (pos_const == 0) {
759 if (!length_is_array_length) {
760 // Check that length(array) >= length.
761 EmitCmplJLess(assembler, Address(array, length_offset), length, slow_path->GetEntryLabel());
762 }
763 } else {
764 // Calculate length(array) - pos.
765 // Both operands are known to be non-negative `int32_t`, so the difference cannot underflow
766 // as `int32_t`. If the result is negative, the JL below shall go to the slow path.
767 __ movl(temp, Address(array, length_offset));
768 __ subl(temp, Immediate(pos_const));
769
770 // Check that (length(array) - pos) >= length.
771 EmitCmplJLess(assembler, temp, length, slow_path->GetEntryLabel());
772 }
773 } else if (length_is_array_length) {
774 // The only way the copy can succeed is if pos is zero.
775 CpuRegister pos_reg = pos.AsRegister<CpuRegister>();
776 __ testl(pos_reg, pos_reg);
777 __ j(kNotEqual, slow_path->GetEntryLabel());
778 } else {
779 // Check that pos >= 0.
780 CpuRegister pos_reg = pos.AsRegister<CpuRegister>();
781 if (!position_sign_checked) {
782 __ testl(pos_reg, pos_reg);
783 __ j(kLess, slow_path->GetEntryLabel());
784 }
785
786 // Calculate length(array) - pos.
787 // Both operands are known to be non-negative `int32_t`, so the difference cannot underflow
788 // as `int32_t`. If the result is negative, the JL below shall go to the slow path.
789 __ movl(temp, Address(array, length_offset));
790 __ subl(temp, pos_reg);
791
792 // Check that (length(array) - pos) >= length.
793 EmitCmplJLess(assembler, temp, length, slow_path->GetEntryLabel());
794 }
795 }
796
SystemArrayCopyPrimitive(HInvoke * invoke,X86_64Assembler * assembler,CodeGeneratorX86_64 * codegen,DataType::Type type)797 static void SystemArrayCopyPrimitive(HInvoke* invoke,
798 X86_64Assembler* assembler,
799 CodeGeneratorX86_64* codegen,
800 DataType::Type type) {
801 LocationSummary* locations = invoke->GetLocations();
802 CpuRegister src = locations->InAt(0).AsRegister<CpuRegister>();
803 Location src_pos = locations->InAt(1);
804 CpuRegister dest = locations->InAt(2).AsRegister<CpuRegister>();
805 Location dest_pos = locations->InAt(3);
806 Location length = locations->InAt(4);
807
808 // Temporaries that we need for MOVSB/W/L.
809 CpuRegister src_base = locations->GetTemp(0).AsRegister<CpuRegister>();
810 DCHECK_EQ(src_base.AsRegister(), RSI);
811 CpuRegister dest_base = locations->GetTemp(1).AsRegister<CpuRegister>();
812 DCHECK_EQ(dest_base.AsRegister(), RDI);
813 CpuRegister count = locations->GetTemp(2).AsRegister<CpuRegister>();
814 DCHECK_EQ(count.AsRegister(), RCX);
815
816 SlowPathCode* slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke);
817 codegen->AddSlowPath(slow_path);
818
819 // Bail out if the source and destination are the same.
820 __ cmpl(src, dest);
821 __ j(kEqual, slow_path->GetEntryLabel());
822
823 // Bail out if the source is null.
824 __ testl(src, src);
825 __ j(kEqual, slow_path->GetEntryLabel());
826
827 // Bail out if the destination is null.
828 __ testl(dest, dest);
829 __ j(kEqual, slow_path->GetEntryLabel());
830
831 // If the length is negative, bail out.
832 // We have already checked in the LocationsBuilder for the constant case.
833 if (!length.IsConstant()) {
834 __ testl(length.AsRegister<CpuRegister>(), length.AsRegister<CpuRegister>());
835 __ j(kLess, slow_path->GetEntryLabel());
836 }
837
838 // Validity checks: source. Use src_base as a temporary register.
839 CheckSystemArrayCopyPosition(assembler,
840 src,
841 src_pos,
842 length,
843 slow_path,
844 src_base,
845 /*length_is_array_length=*/ false,
846 /*position_sign_checked=*/ false);
847
848 // Validity checks: dest. Use src_base as a temporary register.
849 CheckSystemArrayCopyPosition(assembler,
850 dest,
851 dest_pos,
852 length,
853 slow_path,
854 src_base,
855 /*length_is_array_length=*/ false,
856 /*position_sign_checked=*/ false);
857
858 // We need the count in RCX.
859 if (length.IsConstant()) {
860 __ movl(count, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
861 } else {
862 __ movl(count, length.AsRegister<CpuRegister>());
863 }
864
865 // Okay, everything checks out. Finally time to do the copy.
866 // Check assumption that sizeof(Char) is 2 (used in scaling below).
867 const size_t data_size = DataType::Size(type);
868 const uint32_t data_offset = mirror::Array::DataOffset(data_size).Uint32Value();
869
870 GenArrayAddress(assembler, src_base, src, src_pos, type, data_offset);
871 GenArrayAddress(assembler, dest_base, dest, dest_pos, type, data_offset);
872
873 // Do the move.
874 switch (type) {
875 case DataType::Type::kInt8:
876 __ rep_movsb();
877 break;
878 case DataType::Type::kUint16:
879 __ rep_movsw();
880 break;
881 case DataType::Type::kInt32:
882 __ rep_movsl();
883 break;
884 default:
885 LOG(FATAL) << "Unexpected data type for intrinsic";
886 }
887 __ Bind(slow_path->GetExitLabel());
888 }
889
VisitSystemArrayCopyChar(HInvoke * invoke)890 void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopyChar(HInvoke* invoke) {
891 CreateSystemArrayCopyLocations(invoke);
892 }
VisitSystemArrayCopyChar(HInvoke * invoke)893 void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopyChar(HInvoke* invoke) {
894 X86_64Assembler* assembler = GetAssembler();
895 SystemArrayCopyPrimitive(invoke, assembler, codegen_, DataType::Type::kUint16);
896 }
897
VisitSystemArrayCopyByte(HInvoke * invoke)898 void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopyByte(HInvoke* invoke) {
899 X86_64Assembler* assembler = GetAssembler();
900 SystemArrayCopyPrimitive(invoke, assembler, codegen_, DataType::Type::kInt8);
901 }
902
VisitSystemArrayCopyByte(HInvoke * invoke)903 void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopyByte(HInvoke* invoke) {
904 CreateSystemArrayCopyLocations(invoke);
905 }
906
VisitSystemArrayCopyInt(HInvoke * invoke)907 void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopyInt(HInvoke* invoke) {
908 X86_64Assembler* assembler = GetAssembler();
909 SystemArrayCopyPrimitive(invoke, assembler, codegen_, DataType::Type::kInt32);
910 }
911
VisitSystemArrayCopyInt(HInvoke * invoke)912 void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopyInt(HInvoke* invoke) {
913 CreateSystemArrayCopyLocations(invoke);
914 }
915
VisitSystemArrayCopy(HInvoke * invoke)916 void IntrinsicLocationsBuilderX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
917 // The only read barrier implementation supporting the
918 // SystemArrayCopy intrinsic is the Baker-style read barriers.
919 if (codegen_->EmitNonBakerReadBarrier()) {
920 return;
921 }
922
923 constexpr int32_t kLengthThreshold = -1; // No cut-off - handle large arrays in intrinsic code.
924 constexpr size_t kInitialNumTemps = 0u; // We shall allocate temps explicitly.
925 LocationSummary* locations = CodeGenerator::CreateSystemArrayCopyLocationSummary(
926 invoke, kLengthThreshold, kInitialNumTemps);
927 if (locations != nullptr) {
928 // Add temporaries. We will use REP MOVSL, so we need fixed registers.
929 DCHECK_EQ(locations->GetTempCount(), kInitialNumTemps);
930 locations->AddTemp(Location::RegisterLocation(RSI));
931 locations->AddTemp(Location::RegisterLocation(RDI));
932 locations->AddTemp(Location::RegisterLocation(RCX));
933 }
934 }
935
VisitSystemArrayCopy(HInvoke * invoke)936 void IntrinsicCodeGeneratorX86_64::VisitSystemArrayCopy(HInvoke* invoke) {
937 // The only read barrier implementation supporting the
938 // SystemArrayCopy intrinsic is the Baker-style read barriers.
939 DCHECK_IMPLIES(codegen_->EmitReadBarrier(), kUseBakerReadBarrier);
940
941 X86_64Assembler* assembler = GetAssembler();
942 LocationSummary* locations = invoke->GetLocations();
943
944 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
945 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
946 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
947 uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
948 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
949
950 CpuRegister src = locations->InAt(0).AsRegister<CpuRegister>();
951 Location src_pos = locations->InAt(1);
952 CpuRegister dest = locations->InAt(2).AsRegister<CpuRegister>();
953 Location dest_pos = locations->InAt(3);
954 Location length = locations->InAt(4);
955 Location temp1_loc = locations->GetTemp(0);
956 CpuRegister temp1 = temp1_loc.AsRegister<CpuRegister>();
957 Location temp2_loc = locations->GetTemp(1);
958 CpuRegister temp2 = temp2_loc.AsRegister<CpuRegister>();
959 Location temp3_loc = locations->GetTemp(2);
960 CpuRegister temp3 = temp3_loc.AsRegister<CpuRegister>();
961
962 SlowPathCode* intrinsic_slow_path =
963 new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke);
964 codegen_->AddSlowPath(intrinsic_slow_path);
965
966 NearLabel conditions_on_positions_validated;
967 SystemArrayCopyOptimizations optimizations(invoke);
968
969 // If source and destination are the same, we go to slow path if we need to do forward copying.
970 // We do not need to do this check if the source and destination positions are the same.
971 if (!optimizations.GetSourcePositionIsDestinationPosition()) {
972 if (src_pos.IsConstant()) {
973 int32_t src_pos_constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
974 if (dest_pos.IsConstant()) {
975 int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
976 if (optimizations.GetDestinationIsSource()) {
977 // Checked when building locations.
978 DCHECK_GE(src_pos_constant, dest_pos_constant);
979 } else if (src_pos_constant < dest_pos_constant) {
980 __ cmpl(src, dest);
981 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
982 }
983 } else {
984 if (!optimizations.GetDestinationIsSource()) {
985 __ cmpl(src, dest);
986 __ j(kNotEqual, &conditions_on_positions_validated);
987 }
988 __ cmpl(dest_pos.AsRegister<CpuRegister>(), Immediate(src_pos_constant));
989 __ j(kGreater, intrinsic_slow_path->GetEntryLabel());
990 }
991 } else {
992 if (!optimizations.GetDestinationIsSource()) {
993 __ cmpl(src, dest);
994 __ j(kNotEqual, &conditions_on_positions_validated);
995 }
996 CpuRegister src_pos_reg = src_pos.AsRegister<CpuRegister>();
997 EmitCmplJLess(assembler, src_pos_reg, dest_pos, intrinsic_slow_path->GetEntryLabel());
998 }
999 }
1000
1001 __ Bind(&conditions_on_positions_validated);
1002
1003 if (!optimizations.GetSourceIsNotNull()) {
1004 // Bail out if the source is null.
1005 __ testl(src, src);
1006 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
1007 }
1008
1009 if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
1010 // Bail out if the destination is null.
1011 __ testl(dest, dest);
1012 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
1013 }
1014
1015 // If the length is negative, bail out.
1016 // We have already checked in the LocationsBuilder for the constant case.
1017 if (!length.IsConstant() &&
1018 !optimizations.GetCountIsSourceLength() &&
1019 !optimizations.GetCountIsDestinationLength()) {
1020 __ testl(length.AsRegister<CpuRegister>(), length.AsRegister<CpuRegister>());
1021 __ j(kLess, intrinsic_slow_path->GetEntryLabel());
1022 }
1023
1024 // Validity checks: source.
1025 CheckSystemArrayCopyPosition(assembler,
1026 src,
1027 src_pos,
1028 length,
1029 intrinsic_slow_path,
1030 temp1,
1031 optimizations.GetCountIsSourceLength(),
1032 /*position_sign_checked=*/ false);
1033
1034 // Validity checks: dest.
1035 bool dest_position_sign_checked = optimizations.GetSourcePositionIsDestinationPosition();
1036 CheckSystemArrayCopyPosition(assembler,
1037 dest,
1038 dest_pos,
1039 length,
1040 intrinsic_slow_path,
1041 temp1,
1042 optimizations.GetCountIsDestinationLength(),
1043 dest_position_sign_checked);
1044
1045 auto check_non_primitive_array_class = [&](CpuRegister klass, CpuRegister temp) {
1046 // No read barrier is needed for reading a chain of constant references for comparing
1047 // with null, or for reading a constant primitive value, see `ReadBarrierOption`.
1048 // /* HeapReference<Class> */ temp = klass->component_type_
1049 __ movl(temp, Address(klass, component_offset));
1050 __ MaybeUnpoisonHeapReference(temp);
1051 // Check that the component type is not null.
1052 __ testl(temp, temp);
1053 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
1054 // Check that the component type is not a primitive.
1055 __ cmpw(Address(temp, primitive_offset), Immediate(Primitive::kPrimNot));
1056 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
1057 };
1058
1059 if (!optimizations.GetDoesNotNeedTypeCheck()) {
1060 // Check whether all elements of the source array are assignable to the component
1061 // type of the destination array. We do two checks: the classes are the same,
1062 // or the destination is Object[]. If none of these checks succeed, we go to the
1063 // slow path.
1064
1065 if (codegen_->EmitBakerReadBarrier()) {
1066 // /* HeapReference<Class> */ temp1 = dest->klass_
1067 codegen_->GenerateFieldLoadWithBakerReadBarrier(
1068 invoke, temp1_loc, dest, class_offset, /* needs_null_check= */ false);
1069 // Register `temp1` is not trashed by the read barrier emitted
1070 // by GenerateFieldLoadWithBakerReadBarrier below, as that
1071 // method produces a call to a ReadBarrierMarkRegX entry point,
1072 // which saves all potentially live registers, including
1073 // temporaries such a `temp1`.
1074 // /* HeapReference<Class> */ temp2 = src->klass_
1075 codegen_->GenerateFieldLoadWithBakerReadBarrier(
1076 invoke, temp2_loc, src, class_offset, /* needs_null_check= */ false);
1077 // If heap poisoning is enabled, `temp1` and `temp2` have been unpoisoned
1078 // by the previous calls to GenerateFieldLoadWithBakerReadBarrier.
1079 } else {
1080 // /* HeapReference<Class> */ temp1 = dest->klass_
1081 __ movl(temp1, Address(dest, class_offset));
1082 __ MaybeUnpoisonHeapReference(temp1);
1083 // /* HeapReference<Class> */ temp2 = src->klass_
1084 __ movl(temp2, Address(src, class_offset));
1085 __ MaybeUnpoisonHeapReference(temp2);
1086 }
1087
1088 __ cmpl(temp1, temp2);
1089 if (optimizations.GetDestinationIsTypedObjectArray()) {
1090 DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
1091 NearLabel do_copy;
1092 // For class match, we can skip the source type check regardless of the optimization flag.
1093 __ j(kEqual, &do_copy);
1094 // No read barrier is needed for reading a chain of constant references
1095 // for comparing with null, see `ReadBarrierOption`.
1096 // /* HeapReference<Class> */ temp1 = temp1->component_type_
1097 __ movl(temp1, Address(temp1, component_offset));
1098 __ MaybeUnpoisonHeapReference(temp1);
1099 // No need to unpoison the following heap reference load, as
1100 // we're comparing against null.
1101 __ cmpl(Address(temp1, super_offset), Immediate(0));
1102 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
1103 // Bail out if the source is not a non primitive array.
1104 if (!optimizations.GetSourceIsNonPrimitiveArray()) {
1105 check_non_primitive_array_class(temp2, CpuRegister(TMP));
1106 }
1107 __ Bind(&do_copy);
1108 } else {
1109 DCHECK(!optimizations.GetDestinationIsTypedObjectArray());
1110 // For class match, we can skip the array type check completely if at least one of source
1111 // and destination is known to be a non primitive array, otherwise one check is enough.
1112 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
1113 if (!optimizations.GetDestinationIsNonPrimitiveArray() &&
1114 !optimizations.GetSourceIsNonPrimitiveArray()) {
1115 check_non_primitive_array_class(temp2, CpuRegister(TMP));
1116 }
1117 }
1118 } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
1119 DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
1120 // Bail out if the source is not a non primitive array.
1121 // No read barrier is needed for reading a chain of constant references for comparing
1122 // with null, or for reading a constant primitive value, see `ReadBarrierOption`.
1123 // /* HeapReference<Class> */ temp1 = src->klass_
1124 __ movl(temp1, Address(src, class_offset));
1125 __ MaybeUnpoisonHeapReference(temp1);
1126 check_non_primitive_array_class(temp1, CpuRegister(TMP));
1127 }
1128
1129 if (length.IsConstant() && length.GetConstant()->AsIntConstant()->GetValue() == 0) {
1130 // Null constant length: not need to emit the loop code at all.
1131 } else {
1132 const DataType::Type type = DataType::Type::kReference;
1133 const int32_t element_size = DataType::Size(type);
1134 const uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value();
1135
1136 // Don't enter copy loop if `length == 0`.
1137 NearLabel skip_copy_and_write_barrier;
1138 if (!length.IsConstant()) {
1139 __ testl(length.AsRegister<CpuRegister>(), length.AsRegister<CpuRegister>());
1140 __ j(kEqual, &skip_copy_and_write_barrier);
1141 }
1142
1143 // Compute base source address, base destination address, and end
1144 // source address in `temp1`, `temp2` and `temp3` respectively.
1145 GenArrayAddress(assembler, temp1, src, src_pos, type, data_offset);
1146 GenArrayAddress(assembler, temp2, dest, dest_pos, type, data_offset);
1147
1148 SlowPathCode* read_barrier_slow_path = nullptr;
1149 if (codegen_->EmitBakerReadBarrier()) {
1150 // SystemArrayCopy implementation for Baker read barriers (see
1151 // also CodeGeneratorX86_64::GenerateReferenceLoadWithBakerReadBarrier):
1152 //
1153 // if (src_ptr != end_ptr) {
1154 // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
1155 // lfence; // Load fence or artificial data dependency to prevent load-load reordering
1156 // bool is_gray = (rb_state == ReadBarrier::GrayState());
1157 // if (is_gray) {
1158 // // Slow-path copy.
1159 // do {
1160 // *dest_ptr++ = MaybePoison(ReadBarrier::Mark(MaybeUnpoison(*src_ptr++)));
1161 // } while (src_ptr != end_ptr)
1162 // } else {
1163 // // Fast-path copy.
1164 // do {
1165 // *dest_ptr++ = *src_ptr++;
1166 // } while (src_ptr != end_ptr)
1167 // }
1168 // }
1169
1170 // Given the numeric representation, it's enough to check the low bit of the rb_state.
1171 static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
1172 static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
1173 constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
1174 constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
1175 constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
1176
1177 // if (rb_state == ReadBarrier::GrayState())
1178 // goto slow_path;
1179 // At this point, just do the "if" and make sure that flags are preserved until the branch.
1180 __ testb(Address(src, monitor_offset + gray_byte_position), Immediate(test_value));
1181
1182 // Load fence to prevent load-load reordering.
1183 // Note that this is a no-op, thanks to the x86-64 memory model.
1184 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
1185
1186 // Slow path used to copy array when `src` is gray.
1187 read_barrier_slow_path =
1188 new (codegen_->GetScopedAllocator()) ReadBarrierSystemArrayCopySlowPathX86_64(invoke);
1189 codegen_->AddSlowPath(read_barrier_slow_path);
1190
1191 // We have done the "if" of the gray bit check above, now branch based on the flags.
1192 __ j(kNotZero, read_barrier_slow_path->GetEntryLabel());
1193 }
1194
1195 if (length.IsConstant()) {
1196 __ movl(temp3, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
1197 } else {
1198 __ movl(temp3, length.AsRegister<CpuRegister>());
1199 }
1200
1201 // Iterate over the arrays and do a raw copy of the objects. We don't need to poison/unpoison.
1202 DCHECK_EQ(temp1.AsRegister(), RSI);
1203 DCHECK_EQ(temp2.AsRegister(), RDI);
1204 DCHECK_EQ(temp3.AsRegister(), RCX);
1205 __ rep_movsl();
1206
1207 if (read_barrier_slow_path != nullptr) {
1208 DCHECK(codegen_->EmitBakerReadBarrier());
1209 __ Bind(read_barrier_slow_path->GetExitLabel());
1210 }
1211
1212 // We only need one card marking on the destination array.
1213 codegen_->MarkGCCard(temp1, temp2, dest);
1214
1215 __ Bind(&skip_copy_and_write_barrier);
1216 }
1217
1218 __ Bind(intrinsic_slow_path->GetExitLabel());
1219 }
1220
VisitStringCompareTo(HInvoke * invoke)1221 void IntrinsicLocationsBuilderX86_64::VisitStringCompareTo(HInvoke* invoke) {
1222 LocationSummary* locations = new (allocator_) LocationSummary(
1223 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1224 InvokeRuntimeCallingConvention calling_convention;
1225 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1226 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1227 locations->SetOut(Location::RegisterLocation(RAX));
1228 }
1229
VisitStringCompareTo(HInvoke * invoke)1230 void IntrinsicCodeGeneratorX86_64::VisitStringCompareTo(HInvoke* invoke) {
1231 X86_64Assembler* assembler = GetAssembler();
1232 LocationSummary* locations = invoke->GetLocations();
1233
1234 // Note that the null check must have been done earlier.
1235 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1236
1237 CpuRegister argument = locations->InAt(1).AsRegister<CpuRegister>();
1238 __ testl(argument, argument);
1239 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke);
1240 codegen_->AddSlowPath(slow_path);
1241 __ j(kEqual, slow_path->GetEntryLabel());
1242
1243 codegen_->InvokeRuntime(kQuickStringCompareTo, invoke, slow_path);
1244 __ Bind(slow_path->GetExitLabel());
1245 }
1246
VisitStringEquals(HInvoke * invoke)1247 void IntrinsicLocationsBuilderX86_64::VisitStringEquals(HInvoke* invoke) {
1248 LocationSummary* locations =
1249 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1250 locations->SetInAt(0, Location::RequiresRegister());
1251 locations->SetInAt(1, Location::RequiresRegister());
1252
1253 // Request temporary registers, RCX and RDI needed for repe_cmpsq instruction.
1254 locations->AddTemp(Location::RegisterLocation(RCX));
1255 locations->AddTemp(Location::RegisterLocation(RDI));
1256
1257 // Set output, RSI needed for repe_cmpsq instruction anyways.
1258 locations->SetOut(Location::RegisterLocation(RSI), Location::kOutputOverlap);
1259 }
1260
VisitStringEquals(HInvoke * invoke)1261 void IntrinsicCodeGeneratorX86_64::VisitStringEquals(HInvoke* invoke) {
1262 X86_64Assembler* assembler = GetAssembler();
1263 LocationSummary* locations = invoke->GetLocations();
1264
1265 CpuRegister str = locations->InAt(0).AsRegister<CpuRegister>();
1266 CpuRegister arg = locations->InAt(1).AsRegister<CpuRegister>();
1267 CpuRegister rcx = locations->GetTemp(0).AsRegister<CpuRegister>();
1268 CpuRegister rdi = locations->GetTemp(1).AsRegister<CpuRegister>();
1269 CpuRegister rsi = locations->Out().AsRegister<CpuRegister>();
1270
1271 NearLabel end, return_true, return_false;
1272
1273 // Get offsets of count, value, and class fields within a string object.
1274 const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1275 const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
1276 const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value();
1277
1278 // Note that the null check must have been done earlier.
1279 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1280
1281 StringEqualsOptimizations optimizations(invoke);
1282 if (!optimizations.GetArgumentNotNull()) {
1283 // Check if input is null, return false if it is.
1284 __ testl(arg, arg);
1285 __ j(kEqual, &return_false);
1286 }
1287
1288 if (!optimizations.GetArgumentIsString()) {
1289 // Instanceof check for the argument by comparing class fields.
1290 // All string objects must have the same type since String cannot be subclassed.
1291 // Receiver must be a string object, so its class field is equal to all strings' class fields.
1292 // If the argument is a string object, its class field must be equal to receiver's class field.
1293 //
1294 // As the String class is expected to be non-movable, we can read the class
1295 // field from String.equals' arguments without read barriers.
1296 AssertNonMovableStringClass();
1297 // Also, because we use the loaded class references only to compare them, we
1298 // don't need to unpoison them.
1299 // /* HeapReference<Class> */ rcx = str->klass_
1300 __ movl(rcx, Address(str, class_offset));
1301 // if (rcx != /* HeapReference<Class> */ arg->klass_) return false
1302 __ cmpl(rcx, Address(arg, class_offset));
1303 __ j(kNotEqual, &return_false);
1304 }
1305
1306 // Reference equality check, return true if same reference.
1307 __ cmpl(str, arg);
1308 __ j(kEqual, &return_true);
1309
1310 // Load length and compression flag of receiver string.
1311 __ movl(rcx, Address(str, count_offset));
1312 // Check if lengths and compressiond flags are equal, return false if they're not.
1313 // Two identical strings will always have same compression style since
1314 // compression style is decided on alloc.
1315 __ cmpl(rcx, Address(arg, count_offset));
1316 __ j(kNotEqual, &return_false);
1317 // Return true if both strings are empty. Even with string compression `count == 0` means empty.
1318 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1319 "Expecting 0=compressed, 1=uncompressed");
1320 __ jrcxz(&return_true);
1321
1322 if (mirror::kUseStringCompression) {
1323 NearLabel string_uncompressed;
1324 // Extract length and differentiate between both compressed or both uncompressed.
1325 // Different compression style is cut above.
1326 __ shrl(rcx, Immediate(1));
1327 __ j(kCarrySet, &string_uncompressed);
1328 // Divide string length by 2, rounding up, and continue as if uncompressed.
1329 // Merge clearing the compression flag with +1 for rounding.
1330 __ addl(rcx, Immediate(1));
1331 __ shrl(rcx, Immediate(1));
1332 __ Bind(&string_uncompressed);
1333 }
1334 // Load starting addresses of string values into RSI/RDI as required for repe_cmpsq instruction.
1335 __ leal(rsi, Address(str, value_offset));
1336 __ leal(rdi, Address(arg, value_offset));
1337
1338 // Divide string length by 4 and adjust for lengths not divisible by 4.
1339 __ addl(rcx, Immediate(3));
1340 __ shrl(rcx, Immediate(2));
1341
1342 // Assertions that must hold in order to compare strings 4 characters (uncompressed)
1343 // or 8 characters (compressed) at a time.
1344 DCHECK_ALIGNED(value_offset, 8);
1345 static_assert(IsAligned<8>(kObjectAlignment), "String is not zero padded");
1346
1347 // Loop to compare strings four characters at a time starting at the beginning of the string.
1348 __ repe_cmpsq();
1349 // If strings are not equal, zero flag will be cleared.
1350 __ j(kNotEqual, &return_false);
1351
1352 // Return true and exit the function.
1353 // If loop does not result in returning false, we return true.
1354 __ Bind(&return_true);
1355 __ movl(rsi, Immediate(1));
1356 __ jmp(&end);
1357
1358 // Return false and exit the function.
1359 __ Bind(&return_false);
1360 __ xorl(rsi, rsi);
1361 __ Bind(&end);
1362 }
1363
CreateStringIndexOfLocations(HInvoke * invoke,ArenaAllocator * allocator,bool start_at_zero)1364 static void CreateStringIndexOfLocations(HInvoke* invoke,
1365 ArenaAllocator* allocator,
1366 bool start_at_zero) {
1367 LocationSummary* locations = new (allocator) LocationSummary(invoke,
1368 LocationSummary::kCallOnSlowPath,
1369 kIntrinsified);
1370 // The data needs to be in RDI for scasw. So request that the string is there, anyways.
1371 locations->SetInAt(0, Location::RegisterLocation(RDI));
1372 // If we look for a constant char, we'll still have to copy it into RAX. So just request the
1373 // allocator to do that, anyways. We can still do the constant check by checking the parameter
1374 // of the instruction explicitly.
1375 // Note: This works as we don't clobber RAX anywhere.
1376 locations->SetInAt(1, Location::RegisterLocation(RAX));
1377 if (!start_at_zero) {
1378 locations->SetInAt(2, Location::RequiresRegister()); // The starting index.
1379 }
1380 // As we clobber RDI during execution anyways, also use it as the output.
1381 locations->SetOut(Location::SameAsFirstInput());
1382
1383 // repne scasw uses RCX as the counter.
1384 locations->AddTemp(Location::RegisterLocation(RCX));
1385 // Need another temporary to be able to compute the result.
1386 locations->AddTemp(Location::RequiresRegister());
1387 }
1388
GenerateStringIndexOf(HInvoke * invoke,X86_64Assembler * assembler,CodeGeneratorX86_64 * codegen,bool start_at_zero)1389 static void GenerateStringIndexOf(HInvoke* invoke,
1390 X86_64Assembler* assembler,
1391 CodeGeneratorX86_64* codegen,
1392 bool start_at_zero) {
1393 LocationSummary* locations = invoke->GetLocations();
1394
1395 // Note that the null check must have been done earlier.
1396 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1397
1398 CpuRegister string_obj = locations->InAt(0).AsRegister<CpuRegister>();
1399 CpuRegister search_value = locations->InAt(1).AsRegister<CpuRegister>();
1400 CpuRegister counter = locations->GetTemp(0).AsRegister<CpuRegister>();
1401 CpuRegister string_length = locations->GetTemp(1).AsRegister<CpuRegister>();
1402 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
1403
1404 // Check our assumptions for registers.
1405 DCHECK_EQ(string_obj.AsRegister(), RDI);
1406 DCHECK_EQ(search_value.AsRegister(), RAX);
1407 DCHECK_EQ(counter.AsRegister(), RCX);
1408 DCHECK_EQ(out.AsRegister(), RDI);
1409
1410 // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
1411 // or directly dispatch for a large constant, or omit slow-path for a small constant or a char.
1412 SlowPathCode* slow_path = nullptr;
1413 HInstruction* code_point = invoke->InputAt(1);
1414 if (code_point->IsIntConstant()) {
1415 if (static_cast<uint32_t>(code_point->AsIntConstant()->GetValue()) >
1416 std::numeric_limits<uint16_t>::max()) {
1417 // Always needs the slow-path. We could directly dispatch to it, but this case should be
1418 // rare, so for simplicity just put the full slow-path down and branch unconditionally.
1419 slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke);
1420 codegen->AddSlowPath(slow_path);
1421 __ jmp(slow_path->GetEntryLabel());
1422 __ Bind(slow_path->GetExitLabel());
1423 return;
1424 }
1425 } else if (code_point->GetType() != DataType::Type::kUint16) {
1426 __ cmpl(search_value, Immediate(std::numeric_limits<uint16_t>::max()));
1427 slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke);
1428 codegen->AddSlowPath(slow_path);
1429 __ j(kAbove, slow_path->GetEntryLabel());
1430 }
1431
1432 // From here down, we know that we are looking for a char that fits in
1433 // 16 bits (uncompressed) or 8 bits (compressed).
1434 // Location of reference to data array within the String object.
1435 int32_t value_offset = mirror::String::ValueOffset().Int32Value();
1436 // Location of count within the String object.
1437 int32_t count_offset = mirror::String::CountOffset().Int32Value();
1438
1439 // Load the count field of the string containing the length and compression flag.
1440 __ movl(string_length, Address(string_obj, count_offset));
1441
1442 // Do a zero-length check. Even with string compression `count == 0` means empty.
1443 // TODO: Support jecxz.
1444 NearLabel not_found_label;
1445 __ testl(string_length, string_length);
1446 __ j(kEqual, ¬_found_label);
1447
1448 if (mirror::kUseStringCompression) {
1449 // Use TMP to keep string_length_flagged.
1450 __ movl(CpuRegister(TMP), string_length);
1451 // Mask out first bit used as compression flag.
1452 __ shrl(string_length, Immediate(1));
1453 }
1454
1455 if (start_at_zero) {
1456 // Number of chars to scan is the same as the string length.
1457 __ movl(counter, string_length);
1458 // Move to the start of the string.
1459 __ addq(string_obj, Immediate(value_offset));
1460 } else {
1461 CpuRegister start_index = locations->InAt(2).AsRegister<CpuRegister>();
1462
1463 // Do a start_index check.
1464 __ cmpl(start_index, string_length);
1465 __ j(kGreaterEqual, ¬_found_label);
1466
1467 // Ensure we have a start index >= 0;
1468 __ xorl(counter, counter);
1469 __ cmpl(start_index, Immediate(0));
1470 __ cmov(kGreater, counter, start_index, /* is64bit= */ false); // 32-bit copy is enough.
1471
1472 if (mirror::kUseStringCompression) {
1473 NearLabel modify_counter, offset_uncompressed_label;
1474 __ testl(CpuRegister(TMP), Immediate(1));
1475 __ j(kNotZero, &offset_uncompressed_label);
1476 __ leaq(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_1, value_offset));
1477 __ jmp(&modify_counter);
1478 // Move to the start of the string: string_obj + value_offset + 2 * start_index.
1479 __ Bind(&offset_uncompressed_label);
1480 __ leaq(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
1481 __ Bind(&modify_counter);
1482 } else {
1483 __ leaq(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
1484 }
1485 // Now update ecx, the work counter: it's gonna be string.length - start_index.
1486 __ negq(counter); // Needs to be 64-bit negation, as the address computation is 64-bit.
1487 __ leaq(counter, Address(string_length, counter, ScaleFactor::TIMES_1, 0));
1488 }
1489
1490 if (mirror::kUseStringCompression) {
1491 NearLabel uncompressed_string_comparison;
1492 NearLabel comparison_done;
1493 __ testl(CpuRegister(TMP), Immediate(1));
1494 __ j(kNotZero, &uncompressed_string_comparison);
1495 // Check if RAX (search_value) is ASCII.
1496 __ cmpl(search_value, Immediate(127));
1497 __ j(kGreater, ¬_found_label);
1498 // Comparing byte-per-byte.
1499 __ repne_scasb();
1500 __ jmp(&comparison_done);
1501 // Everything is set up for repne scasw:
1502 // * Comparison address in RDI.
1503 // * Counter in ECX.
1504 __ Bind(&uncompressed_string_comparison);
1505 __ repne_scasw();
1506 __ Bind(&comparison_done);
1507 } else {
1508 __ repne_scasw();
1509 }
1510 // Did we find a match?
1511 __ j(kNotEqual, ¬_found_label);
1512
1513 // Yes, we matched. Compute the index of the result.
1514 __ subl(string_length, counter);
1515 __ leal(out, Address(string_length, -1));
1516
1517 NearLabel done;
1518 __ jmp(&done);
1519
1520 // Failed to match; return -1.
1521 __ Bind(¬_found_label);
1522 __ movl(out, Immediate(-1));
1523
1524 // And join up at the end.
1525 __ Bind(&done);
1526 if (slow_path != nullptr) {
1527 __ Bind(slow_path->GetExitLabel());
1528 }
1529 }
1530
VisitStringIndexOf(HInvoke * invoke)1531 void IntrinsicLocationsBuilderX86_64::VisitStringIndexOf(HInvoke* invoke) {
1532 CreateStringIndexOfLocations(invoke, allocator_, /* start_at_zero= */ true);
1533 }
1534
VisitStringIndexOf(HInvoke * invoke)1535 void IntrinsicCodeGeneratorX86_64::VisitStringIndexOf(HInvoke* invoke) {
1536 GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ true);
1537 }
1538
VisitStringIndexOfAfter(HInvoke * invoke)1539 void IntrinsicLocationsBuilderX86_64::VisitStringIndexOfAfter(HInvoke* invoke) {
1540 CreateStringIndexOfLocations(invoke, allocator_, /* start_at_zero= */ false);
1541 }
1542
VisitStringIndexOfAfter(HInvoke * invoke)1543 void IntrinsicCodeGeneratorX86_64::VisitStringIndexOfAfter(HInvoke* invoke) {
1544 GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ false);
1545 }
1546
VisitStringNewStringFromBytes(HInvoke * invoke)1547 void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromBytes(HInvoke* invoke) {
1548 LocationSummary* locations = new (allocator_) LocationSummary(
1549 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1550 InvokeRuntimeCallingConvention calling_convention;
1551 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1552 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1553 locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1554 locations->SetInAt(3, Location::RegisterLocation(calling_convention.GetRegisterAt(3)));
1555 locations->SetOut(Location::RegisterLocation(RAX));
1556 }
1557
VisitStringNewStringFromBytes(HInvoke * invoke)1558 void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromBytes(HInvoke* invoke) {
1559 X86_64Assembler* assembler = GetAssembler();
1560 LocationSummary* locations = invoke->GetLocations();
1561
1562 CpuRegister byte_array = locations->InAt(0).AsRegister<CpuRegister>();
1563 __ testl(byte_array, byte_array);
1564 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke);
1565 codegen_->AddSlowPath(slow_path);
1566 __ j(kEqual, slow_path->GetEntryLabel());
1567
1568 codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke);
1569 CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
1570 __ Bind(slow_path->GetExitLabel());
1571 }
1572
VisitStringNewStringFromChars(HInvoke * invoke)1573 void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromChars(HInvoke* invoke) {
1574 LocationSummary* locations =
1575 new (allocator_) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
1576 InvokeRuntimeCallingConvention calling_convention;
1577 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1578 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1579 locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1580 locations->SetOut(Location::RegisterLocation(RAX));
1581 }
1582
VisitStringNewStringFromChars(HInvoke * invoke)1583 void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromChars(HInvoke* invoke) {
1584 // No need to emit code checking whether `locations->InAt(2)` is a null
1585 // pointer, as callers of the native method
1586 //
1587 // java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
1588 //
1589 // all include a null check on `data` before calling that method.
1590 codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke);
1591 CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
1592 }
1593
VisitStringNewStringFromString(HInvoke * invoke)1594 void IntrinsicLocationsBuilderX86_64::VisitStringNewStringFromString(HInvoke* invoke) {
1595 LocationSummary* locations = new (allocator_) LocationSummary(
1596 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1597 InvokeRuntimeCallingConvention calling_convention;
1598 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1599 locations->SetOut(Location::RegisterLocation(RAX));
1600 }
1601
VisitStringNewStringFromString(HInvoke * invoke)1602 void IntrinsicCodeGeneratorX86_64::VisitStringNewStringFromString(HInvoke* invoke) {
1603 X86_64Assembler* assembler = GetAssembler();
1604 LocationSummary* locations = invoke->GetLocations();
1605
1606 CpuRegister string_to_copy = locations->InAt(0).AsRegister<CpuRegister>();
1607 __ testl(string_to_copy, string_to_copy);
1608 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke);
1609 codegen_->AddSlowPath(slow_path);
1610 __ j(kEqual, slow_path->GetEntryLabel());
1611
1612 codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke);
1613 CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
1614 __ Bind(slow_path->GetExitLabel());
1615 }
1616
VisitStringGetCharsNoCheck(HInvoke * invoke)1617 void IntrinsicLocationsBuilderX86_64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
1618 // public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin);
1619 LocationSummary* locations =
1620 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1621 locations->SetInAt(0, Location::RequiresRegister());
1622 locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
1623 locations->SetInAt(2, Location::RequiresRegister());
1624 locations->SetInAt(3, Location::RequiresRegister());
1625 locations->SetInAt(4, Location::RequiresRegister());
1626
1627 // And we need some temporaries. We will use REP MOVSW, so we need fixed registers.
1628 locations->AddTemp(Location::RegisterLocation(RSI));
1629 locations->AddTemp(Location::RegisterLocation(RDI));
1630 locations->AddTemp(Location::RegisterLocation(RCX));
1631 }
1632
VisitStringGetCharsNoCheck(HInvoke * invoke)1633 void IntrinsicCodeGeneratorX86_64::VisitStringGetCharsNoCheck(HInvoke* invoke) {
1634 X86_64Assembler* assembler = GetAssembler();
1635 LocationSummary* locations = invoke->GetLocations();
1636
1637 size_t char_component_size = DataType::Size(DataType::Type::kUint16);
1638 // Location of data in char array buffer.
1639 const uint32_t data_offset = mirror::Array::DataOffset(char_component_size).Uint32Value();
1640 // Location of char array data in string.
1641 const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
1642
1643 // public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin);
1644 CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
1645 Location srcBegin = locations->InAt(1);
1646 int srcBegin_value =
1647 srcBegin.IsConstant() ? srcBegin.GetConstant()->AsIntConstant()->GetValue() : 0;
1648 CpuRegister srcEnd = locations->InAt(2).AsRegister<CpuRegister>();
1649 CpuRegister dst = locations->InAt(3).AsRegister<CpuRegister>();
1650 CpuRegister dstBegin = locations->InAt(4).AsRegister<CpuRegister>();
1651
1652 // Check assumption that sizeof(Char) is 2 (used in scaling below).
1653 const size_t char_size = DataType::Size(DataType::Type::kUint16);
1654 DCHECK_EQ(char_size, 2u);
1655
1656 NearLabel done;
1657 // Compute the number of chars (words) to move.
1658 __ movl(CpuRegister(RCX), srcEnd);
1659 if (srcBegin.IsConstant()) {
1660 __ subl(CpuRegister(RCX), Immediate(srcBegin_value));
1661 } else {
1662 DCHECK(srcBegin.IsRegister());
1663 __ subl(CpuRegister(RCX), srcBegin.AsRegister<CpuRegister>());
1664 }
1665 if (mirror::kUseStringCompression) {
1666 NearLabel copy_uncompressed, copy_loop;
1667 const size_t c_char_size = DataType::Size(DataType::Type::kInt8);
1668 DCHECK_EQ(c_char_size, 1u);
1669 // Location of count in string.
1670 const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1671
1672 __ testl(Address(obj, count_offset), Immediate(1));
1673 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1674 "Expecting 0=compressed, 1=uncompressed");
1675 __ j(kNotZero, ©_uncompressed);
1676 // Compute the address of the source string by adding the number of chars from
1677 // the source beginning to the value offset of a string.
1678 __ leaq(CpuRegister(RSI),
1679 CodeGeneratorX86_64::ArrayAddress(obj, srcBegin, TIMES_1, value_offset));
1680 // Start the loop to copy String's value to Array of Char.
1681 __ leaq(CpuRegister(RDI), Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset));
1682
1683 __ Bind(©_loop);
1684 __ jrcxz(&done);
1685 // Use TMP as temporary (convert byte from RSI to word).
1686 // TODO: Selecting RAX as the temporary and using LODSB/STOSW.
1687 __ movzxb(CpuRegister(TMP), Address(CpuRegister(RSI), 0));
1688 __ movw(Address(CpuRegister(RDI), 0), CpuRegister(TMP));
1689 __ leaq(CpuRegister(RDI), Address(CpuRegister(RDI), char_size));
1690 __ leaq(CpuRegister(RSI), Address(CpuRegister(RSI), c_char_size));
1691 // TODO: Add support for LOOP to X86_64Assembler.
1692 __ subl(CpuRegister(RCX), Immediate(1));
1693 __ jmp(©_loop);
1694
1695 __ Bind(©_uncompressed);
1696 }
1697
1698 __ leaq(CpuRegister(RSI),
1699 CodeGeneratorX86_64::ArrayAddress(obj, srcBegin, TIMES_2, value_offset));
1700 // Compute the address of the destination buffer.
1701 __ leaq(CpuRegister(RDI), Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset));
1702 // Do the move.
1703 __ rep_movsw();
1704
1705 __ Bind(&done);
1706 }
1707
GenPeek(LocationSummary * locations,DataType::Type size,X86_64Assembler * assembler)1708 static void GenPeek(LocationSummary* locations, DataType::Type size, X86_64Assembler* assembler) {
1709 CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>();
1710 CpuRegister out = locations->Out().AsRegister<CpuRegister>(); // == address, here for clarity.
1711 // x86 allows unaligned access. We do not have to check the input or use specific instructions
1712 // to avoid a SIGBUS.
1713 switch (size) {
1714 case DataType::Type::kInt8:
1715 __ movsxb(out, Address(address, 0));
1716 break;
1717 case DataType::Type::kInt16:
1718 __ movsxw(out, Address(address, 0));
1719 break;
1720 case DataType::Type::kInt32:
1721 __ movl(out, Address(address, 0));
1722 break;
1723 case DataType::Type::kInt64:
1724 __ movq(out, Address(address, 0));
1725 break;
1726 default:
1727 LOG(FATAL) << "Type not recognized for peek: " << size;
1728 UNREACHABLE();
1729 }
1730 }
1731
VisitMemoryPeekByte(HInvoke * invoke)1732 void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekByte(HInvoke* invoke) {
1733 CreateIntToIntLocations(allocator_, invoke);
1734 }
1735
VisitMemoryPeekByte(HInvoke * invoke)1736 void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekByte(HInvoke* invoke) {
1737 GenPeek(invoke->GetLocations(), DataType::Type::kInt8, GetAssembler());
1738 }
1739
VisitMemoryPeekIntNative(HInvoke * invoke)1740 void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekIntNative(HInvoke* invoke) {
1741 CreateIntToIntLocations(allocator_, invoke);
1742 }
1743
VisitMemoryPeekIntNative(HInvoke * invoke)1744 void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekIntNative(HInvoke* invoke) {
1745 GenPeek(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler());
1746 }
1747
VisitMemoryPeekLongNative(HInvoke * invoke)1748 void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekLongNative(HInvoke* invoke) {
1749 CreateIntToIntLocations(allocator_, invoke);
1750 }
1751
VisitMemoryPeekLongNative(HInvoke * invoke)1752 void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekLongNative(HInvoke* invoke) {
1753 GenPeek(invoke->GetLocations(), DataType::Type::kInt64, GetAssembler());
1754 }
1755
VisitMemoryPeekShortNative(HInvoke * invoke)1756 void IntrinsicLocationsBuilderX86_64::VisitMemoryPeekShortNative(HInvoke* invoke) {
1757 CreateIntToIntLocations(allocator_, invoke);
1758 }
1759
VisitMemoryPeekShortNative(HInvoke * invoke)1760 void IntrinsicCodeGeneratorX86_64::VisitMemoryPeekShortNative(HInvoke* invoke) {
1761 GenPeek(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler());
1762 }
1763
CreateIntIntToVoidLocations(ArenaAllocator * allocator,HInvoke * invoke)1764 static void CreateIntIntToVoidLocations(ArenaAllocator* allocator, HInvoke* invoke) {
1765 LocationSummary* locations =
1766 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1767 locations->SetInAt(0, Location::RequiresRegister());
1768 locations->SetInAt(1, Location::RegisterOrInt32Constant(invoke->InputAt(1)));
1769 }
1770
GenPoke(LocationSummary * locations,DataType::Type size,X86_64Assembler * assembler)1771 static void GenPoke(LocationSummary* locations, DataType::Type size, X86_64Assembler* assembler) {
1772 CpuRegister address = locations->InAt(0).AsRegister<CpuRegister>();
1773 Location value = locations->InAt(1);
1774 // x86 allows unaligned access. We do not have to check the input or use specific instructions
1775 // to avoid a SIGBUS.
1776 switch (size) {
1777 case DataType::Type::kInt8:
1778 if (value.IsConstant()) {
1779 __ movb(Address(address, 0),
1780 Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant())));
1781 } else {
1782 __ movb(Address(address, 0), value.AsRegister<CpuRegister>());
1783 }
1784 break;
1785 case DataType::Type::kInt16:
1786 if (value.IsConstant()) {
1787 __ movw(Address(address, 0),
1788 Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant())));
1789 } else {
1790 __ movw(Address(address, 0), value.AsRegister<CpuRegister>());
1791 }
1792 break;
1793 case DataType::Type::kInt32:
1794 if (value.IsConstant()) {
1795 __ movl(Address(address, 0),
1796 Immediate(CodeGenerator::GetInt32ValueOf(value.GetConstant())));
1797 } else {
1798 __ movl(Address(address, 0), value.AsRegister<CpuRegister>());
1799 }
1800 break;
1801 case DataType::Type::kInt64:
1802 if (value.IsConstant()) {
1803 int64_t v = value.GetConstant()->AsLongConstant()->GetValue();
1804 DCHECK(IsInt<32>(v));
1805 int32_t v_32 = v;
1806 __ movq(Address(address, 0), Immediate(v_32));
1807 } else {
1808 __ movq(Address(address, 0), value.AsRegister<CpuRegister>());
1809 }
1810 break;
1811 default:
1812 LOG(FATAL) << "Type not recognized for poke: " << size;
1813 UNREACHABLE();
1814 }
1815 }
1816
VisitMemoryPokeByte(HInvoke * invoke)1817 void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeByte(HInvoke* invoke) {
1818 CreateIntIntToVoidLocations(allocator_, invoke);
1819 }
1820
VisitMemoryPokeByte(HInvoke * invoke)1821 void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeByte(HInvoke* invoke) {
1822 GenPoke(invoke->GetLocations(), DataType::Type::kInt8, GetAssembler());
1823 }
1824
VisitMemoryPokeIntNative(HInvoke * invoke)1825 void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeIntNative(HInvoke* invoke) {
1826 CreateIntIntToVoidLocations(allocator_, invoke);
1827 }
1828
VisitMemoryPokeIntNative(HInvoke * invoke)1829 void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeIntNative(HInvoke* invoke) {
1830 GenPoke(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler());
1831 }
1832
VisitMemoryPokeLongNative(HInvoke * invoke)1833 void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeLongNative(HInvoke* invoke) {
1834 CreateIntIntToVoidLocations(allocator_, invoke);
1835 }
1836
VisitMemoryPokeLongNative(HInvoke * invoke)1837 void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeLongNative(HInvoke* invoke) {
1838 GenPoke(invoke->GetLocations(), DataType::Type::kInt64, GetAssembler());
1839 }
1840
VisitMemoryPokeShortNative(HInvoke * invoke)1841 void IntrinsicLocationsBuilderX86_64::VisitMemoryPokeShortNative(HInvoke* invoke) {
1842 CreateIntIntToVoidLocations(allocator_, invoke);
1843 }
1844
VisitMemoryPokeShortNative(HInvoke * invoke)1845 void IntrinsicCodeGeneratorX86_64::VisitMemoryPokeShortNative(HInvoke* invoke) {
1846 GenPoke(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler());
1847 }
1848
VisitThreadCurrentThread(HInvoke * invoke)1849 void IntrinsicLocationsBuilderX86_64::VisitThreadCurrentThread(HInvoke* invoke) {
1850 LocationSummary* locations =
1851 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1852 locations->SetOut(Location::RequiresRegister());
1853 }
1854
VisitThreadCurrentThread(HInvoke * invoke)1855 void IntrinsicCodeGeneratorX86_64::VisitThreadCurrentThread(HInvoke* invoke) {
1856 CpuRegister out = invoke->GetLocations()->Out().AsRegister<CpuRegister>();
1857 GetAssembler()->gs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86_64PointerSize>(),
1858 /* no_rip= */ true));
1859 }
1860
GenUnsafeGet(HInvoke * invoke,DataType::Type type,bool is_volatile,CodeGeneratorX86_64 * codegen)1861 static void GenUnsafeGet(HInvoke* invoke,
1862 DataType::Type type,
1863 [[maybe_unused]] bool is_volatile,
1864 CodeGeneratorX86_64* codegen) {
1865 X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler());
1866 LocationSummary* locations = invoke->GetLocations();
1867 Location base_loc = locations->InAt(1);
1868 CpuRegister base = base_loc.AsRegister<CpuRegister>();
1869 Location offset_loc = locations->InAt(2);
1870 CpuRegister offset = offset_loc.AsRegister<CpuRegister>();
1871 Location output_loc = locations->Out();
1872 CpuRegister output = output_loc.AsRegister<CpuRegister>();
1873
1874 switch (type) {
1875 case DataType::Type::kInt8:
1876 __ movsxb(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1877 break;
1878
1879 case DataType::Type::kInt32:
1880 __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1881 break;
1882
1883 case DataType::Type::kReference: {
1884 if (codegen->EmitReadBarrier()) {
1885 if (kUseBakerReadBarrier) {
1886 Address src(base, offset, ScaleFactor::TIMES_1, 0);
1887 codegen->GenerateReferenceLoadWithBakerReadBarrier(
1888 invoke, output_loc, base, src, /* needs_null_check= */ false);
1889 } else {
1890 __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1891 codegen->GenerateReadBarrierSlow(
1892 invoke, output_loc, output_loc, base_loc, 0U, offset_loc);
1893 }
1894 } else {
1895 __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1896 __ MaybeUnpoisonHeapReference(output);
1897 }
1898 break;
1899 }
1900
1901 case DataType::Type::kInt64:
1902 __ movq(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1903 break;
1904
1905 default:
1906 LOG(FATAL) << "Unsupported op size " << type;
1907 UNREACHABLE();
1908 }
1909 }
1910
GenUnsafeGetAbsolute(HInvoke * invoke,DataType::Type type,CodeGeneratorX86_64 * codegen)1911 static void GenUnsafeGetAbsolute(HInvoke* invoke,
1912 DataType::Type type,
1913 CodeGeneratorX86_64* codegen) {
1914 X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler());
1915 LocationSummary* locations = invoke->GetLocations();
1916 Location address_loc = locations->InAt(1);
1917 Address address = Address(address_loc.AsRegister<CpuRegister>(), 0);
1918 Location output_loc = locations->Out();
1919 CpuRegister output = output_loc.AsRegister<CpuRegister>();
1920
1921 switch (type) {
1922 case DataType::Type::kInt8:
1923 __ movsxb(output, address);
1924 break;
1925
1926 case DataType::Type::kInt32:
1927 __ movl(output, address);
1928 break;
1929
1930 case DataType::Type::kInt64:
1931 __ movq(output, address);
1932 break;
1933
1934 default:
1935 LOG(FATAL) << "Unsupported op size " << type;
1936 UNREACHABLE();
1937 }
1938 }
1939
CreateIntIntToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)1940 static void CreateIntIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
1941 LocationSummary* locations =
1942 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1943 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
1944 locations->SetInAt(1, Location::RequiresRegister());
1945 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
1946 }
1947
CreateIntIntIntToIntLocations(ArenaAllocator * allocator,HInvoke * invoke,CodeGeneratorX86_64 * codegen)1948 static void CreateIntIntIntToIntLocations(ArenaAllocator* allocator,
1949 HInvoke* invoke,
1950 CodeGeneratorX86_64* codegen) {
1951 bool can_call = codegen->EmitReadBarrier() && IsUnsafeGetReference(invoke);
1952 LocationSummary* locations =
1953 new (allocator) LocationSummary(invoke,
1954 can_call
1955 ? LocationSummary::kCallOnSlowPath
1956 : LocationSummary::kNoCall,
1957 kIntrinsified);
1958 if (can_call && kUseBakerReadBarrier) {
1959 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
1960 }
1961 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
1962 locations->SetInAt(1, Location::RequiresRegister());
1963 locations->SetInAt(2, Location::RequiresRegister());
1964 locations->SetOut(Location::RequiresRegister(),
1965 (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
1966 }
1967
VisitUnsafeGet(HInvoke * invoke)1968 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGet(HInvoke* invoke) {
1969 VisitJdkUnsafeGet(invoke);
1970 }
VisitUnsafeGetAbsolute(HInvoke * invoke)1971 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetAbsolute(HInvoke* invoke) {
1972 VisitJdkUnsafeGetAbsolute(invoke);
1973 }
VisitUnsafeGetVolatile(HInvoke * invoke)1974 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) {
1975 VisitJdkUnsafeGetVolatile(invoke);
1976 }
VisitUnsafeGetLong(HInvoke * invoke)1977 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLong(HInvoke* invoke) {
1978 VisitJdkUnsafeGetLong(invoke);
1979 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)1980 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
1981 VisitJdkUnsafeGetLongVolatile(invoke);
1982 }
VisitUnsafeGetObject(HInvoke * invoke)1983 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObject(HInvoke* invoke) {
1984 VisitJdkUnsafeGetReference(invoke);
1985 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)1986 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
1987 VisitJdkUnsafeGetReferenceVolatile(invoke);
1988 }
VisitUnsafeGetByte(HInvoke * invoke)1989 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetByte(HInvoke* invoke) {
1990 VisitJdkUnsafeGetByte(invoke);
1991 }
1992
VisitJdkUnsafeGet(HInvoke * invoke)1993 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGet(HInvoke* invoke) {
1994 CreateIntIntIntToIntLocations(allocator_, invoke, codegen_);
1995 }
VisitJdkUnsafeGetAbsolute(HInvoke * invoke)1996 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetAbsolute(HInvoke* invoke) {
1997 CreateIntIntToIntLocations(allocator_, invoke);
1998 }
VisitJdkUnsafeGetVolatile(HInvoke * invoke)1999 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetVolatile(HInvoke* invoke) {
2000 CreateIntIntIntToIntLocations(allocator_, invoke, codegen_);
2001 }
VisitJdkUnsafeGetAcquire(HInvoke * invoke)2002 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetAcquire(HInvoke* invoke) {
2003 CreateIntIntIntToIntLocations(allocator_, invoke, codegen_);
2004 }
VisitJdkUnsafeGetLong(HInvoke * invoke)2005 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetLong(HInvoke* invoke) {
2006 CreateIntIntIntToIntLocations(allocator_, invoke, codegen_);
2007 }
VisitJdkUnsafeGetLongVolatile(HInvoke * invoke)2008 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetLongVolatile(HInvoke* invoke) {
2009 CreateIntIntIntToIntLocations(allocator_, invoke, codegen_);
2010 }
VisitJdkUnsafeGetLongAcquire(HInvoke * invoke)2011 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetLongAcquire(HInvoke* invoke) {
2012 CreateIntIntIntToIntLocations(allocator_, invoke, codegen_);
2013 }
VisitJdkUnsafeGetReference(HInvoke * invoke)2014 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetReference(HInvoke* invoke) {
2015 CreateIntIntIntToIntLocations(allocator_, invoke, codegen_);
2016 }
VisitJdkUnsafeGetReferenceVolatile(HInvoke * invoke)2017 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetReferenceVolatile(HInvoke* invoke) {
2018 CreateIntIntIntToIntLocations(allocator_, invoke, codegen_);
2019 }
VisitJdkUnsafeGetReferenceAcquire(HInvoke * invoke)2020 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetReferenceAcquire(HInvoke* invoke) {
2021 CreateIntIntIntToIntLocations(allocator_, invoke, codegen_);
2022 }
VisitJdkUnsafeGetByte(HInvoke * invoke)2023 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetByte(HInvoke* invoke) {
2024 CreateIntIntIntToIntLocations(allocator_, invoke, codegen_);
2025 }
2026
VisitUnsafeGet(HInvoke * invoke)2027 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGet(HInvoke* invoke) {
2028 VisitJdkUnsafeGet(invoke);
2029 }
VisitUnsafeGetAbsolute(HInvoke * invoke)2030 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetAbsolute(HInvoke* invoke) {
2031 VisitJdkUnsafeGetAbsolute(invoke);
2032 }
VisitUnsafeGetVolatile(HInvoke * invoke)2033 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetVolatile(HInvoke* invoke) {
2034 VisitJdkUnsafeGetVolatile(invoke);
2035 }
VisitUnsafeGetLong(HInvoke * invoke)2036 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLong(HInvoke* invoke) {
2037 VisitJdkUnsafeGetLong(invoke);
2038 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)2039 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
2040 VisitJdkUnsafeGetLongVolatile(invoke);
2041 }
VisitUnsafeGetObject(HInvoke * invoke)2042 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObject(HInvoke* invoke) {
2043 VisitJdkUnsafeGetReference(invoke);
2044 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)2045 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
2046 VisitJdkUnsafeGetReferenceVolatile(invoke);
2047 }
VisitUnsafeGetByte(HInvoke * invoke)2048 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetByte(HInvoke* invoke) {
2049 VisitJdkUnsafeGetByte(invoke);
2050 }
2051
VisitJdkUnsafeGet(HInvoke * invoke)2052 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGet(HInvoke* invoke) {
2053 GenUnsafeGet(invoke, DataType::Type::kInt32, /*is_volatile=*/ false, codegen_);
2054 }
VisitJdkUnsafeGetAbsolute(HInvoke * invoke)2055 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetAbsolute(HInvoke* invoke) {
2056 GenUnsafeGetAbsolute(invoke, DataType::Type::kInt32, codegen_);
2057 }
VisitJdkUnsafeGetVolatile(HInvoke * invoke)2058 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetVolatile(HInvoke* invoke) {
2059 GenUnsafeGet(invoke, DataType::Type::kInt32, /*is_volatile=*/ true, codegen_);
2060 }
VisitJdkUnsafeGetAcquire(HInvoke * invoke)2061 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetAcquire(HInvoke* invoke) {
2062 GenUnsafeGet(invoke, DataType::Type::kInt32, /*is_volatile=*/ true, codegen_);
2063 }
VisitJdkUnsafeGetLong(HInvoke * invoke)2064 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetLong(HInvoke* invoke) {
2065 GenUnsafeGet(invoke, DataType::Type::kInt64, /*is_volatile=*/ false, codegen_);
2066 }
VisitJdkUnsafeGetLongVolatile(HInvoke * invoke)2067 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetLongVolatile(HInvoke* invoke) {
2068 GenUnsafeGet(invoke, DataType::Type::kInt64, /*is_volatile=*/ true, codegen_);
2069 }
VisitJdkUnsafeGetLongAcquire(HInvoke * invoke)2070 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetLongAcquire(HInvoke* invoke) {
2071 GenUnsafeGet(invoke, DataType::Type::kInt64, /*is_volatile=*/ true, codegen_);
2072 }
VisitJdkUnsafeGetReference(HInvoke * invoke)2073 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetReference(HInvoke* invoke) {
2074 GenUnsafeGet(invoke, DataType::Type::kReference, /*is_volatile=*/ false, codegen_);
2075 }
VisitJdkUnsafeGetReferenceVolatile(HInvoke * invoke)2076 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetReferenceVolatile(HInvoke* invoke) {
2077 GenUnsafeGet(invoke, DataType::Type::kReference, /*is_volatile=*/ true, codegen_);
2078 }
VisitJdkUnsafeGetReferenceAcquire(HInvoke * invoke)2079 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetReferenceAcquire(HInvoke* invoke) {
2080 GenUnsafeGet(invoke, DataType::Type::kReference, /*is_volatile=*/ true, codegen_);
2081 }
VisitJdkUnsafeGetByte(HInvoke * invoke)2082 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetByte(HInvoke* invoke) {
2083 GenUnsafeGet(invoke, DataType::Type::kInt8, /*is_volatile=*/false, codegen_);
2084 }
2085
CreateIntIntIntToVoidPlusTempsLocations(ArenaAllocator * allocator,DataType::Type type,HInvoke * invoke)2086 static void CreateIntIntIntToVoidPlusTempsLocations(ArenaAllocator* allocator,
2087 [[maybe_unused]] DataType::Type type,
2088 HInvoke* invoke) {
2089 LocationSummary* locations =
2090 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2091 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
2092 locations->SetInAt(1, Location::RequiresRegister());
2093 locations->SetInAt(2, Location::RequiresRegister());
2094 }
2095
CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator * allocator,DataType::Type type,HInvoke * invoke)2096 static void CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator* allocator,
2097 DataType::Type type,
2098 HInvoke* invoke) {
2099 LocationSummary* locations =
2100 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2101 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
2102 locations->SetInAt(1, Location::RequiresRegister());
2103 locations->SetInAt(2, Location::RequiresRegister());
2104 locations->SetInAt(3, Location::RequiresRegister());
2105 if (type == DataType::Type::kReference) {
2106 // Need temp registers for card-marking.
2107 locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too.
2108 locations->AddTemp(Location::RequiresRegister());
2109 }
2110 }
2111
VisitUnsafePut(HInvoke * invoke)2112 void IntrinsicLocationsBuilderX86_64::VisitUnsafePut(HInvoke* invoke) {
2113 VisitJdkUnsafePut(invoke);
2114 }
VisitUnsafePutAbsolute(HInvoke * invoke)2115 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutAbsolute(HInvoke* invoke) {
2116 VisitJdkUnsafePutAbsolute(invoke);
2117 }
VisitUnsafePutOrderedInt(HInvoke * invoke)2118 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutOrderedInt(HInvoke* invoke) {
2119 VisitJdkUnsafePutOrderedInt(invoke);
2120 }
VisitUnsafePutVolatile(HInvoke * invoke)2121 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutVolatile(HInvoke* invoke) {
2122 VisitJdkUnsafePutVolatile(invoke);
2123 }
VisitUnsafePutObject(HInvoke * invoke)2124 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObject(HInvoke* invoke) {
2125 VisitJdkUnsafePutReference(invoke);
2126 }
VisitUnsafePutOrderedObject(HInvoke * invoke)2127 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutOrderedObject(HInvoke* invoke) {
2128 VisitJdkUnsafePutOrderedObject(invoke);
2129 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)2130 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
2131 VisitJdkUnsafePutReferenceVolatile(invoke);
2132 }
VisitUnsafePutLong(HInvoke * invoke)2133 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLong(HInvoke* invoke) {
2134 VisitJdkUnsafePutLong(invoke);
2135 }
VisitUnsafePutLongOrdered(HInvoke * invoke)2136 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
2137 VisitJdkUnsafePutLongOrdered(invoke);
2138 }
VisitUnsafePutLongVolatile(HInvoke * invoke)2139 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
2140 VisitJdkUnsafePutLongVolatile(invoke);
2141 }
VisitUnsafePutByte(HInvoke * invoke)2142 void IntrinsicLocationsBuilderX86_64::VisitUnsafePutByte(HInvoke* invoke) {
2143 VisitJdkUnsafePut(invoke);
2144 }
2145
VisitJdkUnsafePut(HInvoke * invoke)2146 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePut(HInvoke* invoke) {
2147 CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt32, invoke);
2148 }
VisitJdkUnsafePutAbsolute(HInvoke * invoke)2149 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutAbsolute(HInvoke* invoke) {
2150 CreateIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt32, invoke);
2151 }
VisitJdkUnsafePutOrderedInt(HInvoke * invoke)2152 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutOrderedInt(HInvoke* invoke) {
2153 CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt32, invoke);
2154 }
VisitJdkUnsafePutVolatile(HInvoke * invoke)2155 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutVolatile(HInvoke* invoke) {
2156 CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt32, invoke);
2157 }
VisitJdkUnsafePutRelease(HInvoke * invoke)2158 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutRelease(HInvoke* invoke) {
2159 CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt32, invoke);
2160 }
VisitJdkUnsafePutReference(HInvoke * invoke)2161 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutReference(HInvoke* invoke) {
2162 CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kReference, invoke);
2163 }
VisitJdkUnsafePutOrderedObject(HInvoke * invoke)2164 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutOrderedObject(HInvoke* invoke) {
2165 CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kReference, invoke);
2166 }
VisitJdkUnsafePutReferenceVolatile(HInvoke * invoke)2167 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutReferenceVolatile(HInvoke* invoke) {
2168 CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kReference, invoke);
2169 }
VisitJdkUnsafePutReferenceRelease(HInvoke * invoke)2170 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutReferenceRelease(HInvoke* invoke) {
2171 CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kReference, invoke);
2172 }
VisitJdkUnsafePutLong(HInvoke * invoke)2173 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutLong(HInvoke* invoke) {
2174 CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt64, invoke);
2175 }
VisitJdkUnsafePutLongOrdered(HInvoke * invoke)2176 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutLongOrdered(HInvoke* invoke) {
2177 CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt64, invoke);
2178 }
VisitJdkUnsafePutLongVolatile(HInvoke * invoke)2179 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutLongVolatile(HInvoke* invoke) {
2180 CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt64, invoke);
2181 }
VisitJdkUnsafePutLongRelease(HInvoke * invoke)2182 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutLongRelease(HInvoke* invoke) {
2183 CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt64, invoke);
2184 }
VisitJdkUnsafePutByte(HInvoke * invoke)2185 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafePutByte(HInvoke* invoke) {
2186 CreateIntIntIntIntToVoidPlusTempsLocations(allocator_, DataType::Type::kInt8, invoke);
2187 }
2188
2189 // We don't care for ordered: it requires an AnyStore barrier, which is already given by the x86
2190 // memory model.
GenUnsafePut(LocationSummary * locations,DataType::Type type,bool is_volatile,CodeGeneratorX86_64 * codegen)2191 static void GenUnsafePut(LocationSummary* locations, DataType::Type type, bool is_volatile,
2192 CodeGeneratorX86_64* codegen) {
2193 X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler());
2194 CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>();
2195 CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>();
2196 CpuRegister value = locations->InAt(3).AsRegister<CpuRegister>();
2197
2198 if (type == DataType::Type::kInt64) {
2199 __ movq(Address(base, offset, ScaleFactor::TIMES_1, 0), value);
2200 } else if (kPoisonHeapReferences && type == DataType::Type::kReference) {
2201 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
2202 __ movl(temp, value);
2203 __ PoisonHeapReference(temp);
2204 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), temp);
2205 } else if (type == DataType::Type::kInt32 || type == DataType::Type::kReference) {
2206 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value);
2207 } else {
2208 CHECK_EQ(type, DataType::Type::kInt8) << "Unimplemented GenUnsafePut data type";
2209 __ movb(Address(base, offset, ScaleFactor::TIMES_1, 0), value);
2210 }
2211
2212 if (is_volatile) {
2213 codegen->MemoryFence();
2214 }
2215
2216 if (type == DataType::Type::kReference) {
2217 bool value_can_be_null = true; // TODO: Worth finding out this information?
2218 codegen->MaybeMarkGCCard(locations->GetTemp(0).AsRegister<CpuRegister>(),
2219 locations->GetTemp(1).AsRegister<CpuRegister>(),
2220 base,
2221 value,
2222 value_can_be_null);
2223 }
2224 }
2225
2226 // We don't care for ordered: it requires an AnyStore barrier, which is already given by the x86
2227 // memory model.
GenUnsafePutAbsolute(LocationSummary * locations,DataType::Type type,bool is_volatile,CodeGeneratorX86_64 * codegen)2228 static void GenUnsafePutAbsolute(LocationSummary* locations,
2229 DataType::Type type,
2230 bool is_volatile,
2231 CodeGeneratorX86_64* codegen) {
2232 X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler());
2233 CpuRegister address_reg = locations->InAt(1).AsRegister<CpuRegister>();
2234 Address address = Address(address_reg, 0);
2235 CpuRegister value = locations->InAt(2).AsRegister<CpuRegister>();
2236
2237 if (type == DataType::Type::kInt64) {
2238 __ movq(address, value);
2239 } else if (type == DataType::Type::kInt32) {
2240 __ movl(address, value);
2241 } else {
2242 CHECK_EQ(type, DataType::Type::kInt8) << "Unimplemented GenUnsafePut data type";
2243 __ movb(address, value);
2244 }
2245
2246 if (is_volatile) {
2247 codegen->MemoryFence();
2248 }
2249 }
2250
VisitUnsafePut(HInvoke * invoke)2251 void IntrinsicCodeGeneratorX86_64::VisitUnsafePut(HInvoke* invoke) {
2252 VisitJdkUnsafePut(invoke);
2253 }
VisitUnsafePutAbsolute(HInvoke * invoke)2254 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutAbsolute(HInvoke* invoke) {
2255 VisitJdkUnsafePutAbsolute(invoke);
2256 }
VisitUnsafePutOrderedInt(HInvoke * invoke)2257 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutOrderedInt(HInvoke* invoke) {
2258 VisitJdkUnsafePutOrderedInt(invoke);
2259 }
VisitUnsafePutVolatile(HInvoke * invoke)2260 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutVolatile(HInvoke* invoke) {
2261 VisitJdkUnsafePutVolatile(invoke);
2262 }
VisitUnsafePutObject(HInvoke * invoke)2263 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObject(HInvoke* invoke) {
2264 VisitJdkUnsafePutReference(invoke);
2265 }
VisitUnsafePutOrderedObject(HInvoke * invoke)2266 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutOrderedObject(HInvoke* invoke) {
2267 VisitJdkUnsafePutOrderedObject(invoke);
2268 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)2269 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
2270 VisitJdkUnsafePutReferenceVolatile(invoke);
2271 }
VisitUnsafePutLong(HInvoke * invoke)2272 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLong(HInvoke* invoke) {
2273 VisitJdkUnsafePutLong(invoke);
2274 }
VisitUnsafePutLongOrdered(HInvoke * invoke)2275 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongOrdered(HInvoke* invoke) {
2276 VisitJdkUnsafePutLongOrdered(invoke);
2277 }
VisitUnsafePutLongVolatile(HInvoke * invoke)2278 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutLongVolatile(HInvoke* invoke) {
2279 VisitJdkUnsafePutLongVolatile(invoke);
2280 }
VisitUnsafePutByte(HInvoke * invoke)2281 void IntrinsicCodeGeneratorX86_64::VisitUnsafePutByte(HInvoke* invoke) {
2282 VisitJdkUnsafePutByte(invoke);
2283 }
2284
VisitJdkUnsafePut(HInvoke * invoke)2285 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePut(HInvoke* invoke) {
2286 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /*is_volatile=*/ false, codegen_);
2287 }
VisitJdkUnsafePutAbsolute(HInvoke * invoke)2288 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutAbsolute(HInvoke* invoke) {
2289 GenUnsafePutAbsolute(
2290 invoke->GetLocations(), DataType::Type::kInt32, /*is_volatile=*/false, codegen_);
2291 }
VisitJdkUnsafePutOrderedInt(HInvoke * invoke)2292 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutOrderedInt(HInvoke* invoke) {
2293 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /*is_volatile=*/ false, codegen_);
2294 }
VisitJdkUnsafePutVolatile(HInvoke * invoke)2295 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutVolatile(HInvoke* invoke) {
2296 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /*is_volatile=*/ true, codegen_);
2297 }
VisitJdkUnsafePutRelease(HInvoke * invoke)2298 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutRelease(HInvoke* invoke) {
2299 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /* is_volatile= */ true, codegen_);
2300 }
VisitJdkUnsafePutReference(HInvoke * invoke)2301 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutReference(HInvoke* invoke) {
2302 GenUnsafePut(
2303 invoke->GetLocations(), DataType::Type::kReference, /*is_volatile=*/ false, codegen_);
2304 }
VisitJdkUnsafePutOrderedObject(HInvoke * invoke)2305 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutOrderedObject(HInvoke* invoke) {
2306 GenUnsafePut(
2307 invoke->GetLocations(), DataType::Type::kReference, /*is_volatile=*/ false, codegen_);
2308 }
VisitJdkUnsafePutReferenceVolatile(HInvoke * invoke)2309 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutReferenceVolatile(HInvoke* invoke) {
2310 GenUnsafePut(
2311 invoke->GetLocations(), DataType::Type::kReference, /*is_volatile=*/ true, codegen_);
2312 }
VisitJdkUnsafePutReferenceRelease(HInvoke * invoke)2313 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutReferenceRelease(HInvoke* invoke) {
2314 GenUnsafePut(
2315 invoke->GetLocations(), DataType::Type::kReference, /*is_volatile=*/ true, codegen_);
2316 }
VisitJdkUnsafePutLong(HInvoke * invoke)2317 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutLong(HInvoke* invoke) {
2318 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /*is_volatile=*/ false, codegen_);
2319 }
VisitJdkUnsafePutLongOrdered(HInvoke * invoke)2320 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutLongOrdered(HInvoke* invoke) {
2321 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /*is_volatile=*/ false, codegen_);
2322 }
VisitJdkUnsafePutLongVolatile(HInvoke * invoke)2323 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutLongVolatile(HInvoke* invoke) {
2324 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /*is_volatile=*/ true, codegen_);
2325 }
VisitJdkUnsafePutLongRelease(HInvoke * invoke)2326 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutLongRelease(HInvoke* invoke) {
2327 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /*is_volatile=*/ true, codegen_);
2328 }
VisitJdkUnsafePutByte(HInvoke * invoke)2329 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafePutByte(HInvoke* invoke) {
2330 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt8, /*is_volatile=*/false, codegen_);
2331 }
2332
CreateUnsafeCASLocations(ArenaAllocator * allocator,HInvoke * invoke,CodeGeneratorX86_64 * codegen,DataType::Type type)2333 static void CreateUnsafeCASLocations(ArenaAllocator* allocator,
2334 HInvoke* invoke,
2335 CodeGeneratorX86_64* codegen,
2336 DataType::Type type) {
2337 const bool can_call = codegen->EmitBakerReadBarrier() && IsUnsafeCASReference(invoke);
2338 LocationSummary* locations =
2339 new (allocator) LocationSummary(invoke,
2340 can_call
2341 ? LocationSummary::kCallOnSlowPath
2342 : LocationSummary::kNoCall,
2343 kIntrinsified);
2344 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
2345 locations->SetInAt(1, Location::RequiresRegister());
2346 locations->SetInAt(2, Location::RequiresRegister());
2347 // expected value must be in EAX/RAX.
2348 locations->SetInAt(3, Location::RegisterLocation(RAX));
2349 locations->SetInAt(4, Location::RequiresRegister());
2350
2351 // RAX is clobbered in CMPXCHG, but we set it as out so no need to add it as temporary.
2352 locations->SetOut(Location::RegisterLocation(RAX));
2353
2354 if (type == DataType::Type::kReference) {
2355 // Need two temporaries for MarkGCCard.
2356 locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too.
2357 locations->AddTemp(Location::RequiresRegister());
2358 if (codegen->EmitReadBarrier()) {
2359 // Need three temporaries for GenerateReferenceLoadWithBakerReadBarrier.
2360 DCHECK(kUseBakerReadBarrier);
2361 locations->AddTemp(Location::RequiresRegister());
2362 }
2363 }
2364 }
2365
VisitUnsafeCASInt(HInvoke * invoke)2366 void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASInt(HInvoke* invoke) {
2367 VisitJdkUnsafeCASInt(invoke);
2368 }
2369
VisitUnsafeCASLong(HInvoke * invoke)2370 void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASLong(HInvoke* invoke) {
2371 VisitJdkUnsafeCASLong(invoke);
2372 }
2373
VisitUnsafeCASObject(HInvoke * invoke)2374 void IntrinsicLocationsBuilderX86_64::VisitUnsafeCASObject(HInvoke* invoke) {
2375 VisitJdkUnsafeCASObject(invoke);
2376 }
2377
VisitJdkUnsafeCASInt(HInvoke * invoke)2378 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeCASInt(HInvoke* invoke) {
2379 // `jdk.internal.misc.Unsafe.compareAndSwapInt` has compare-and-set semantics (see javadoc).
2380 VisitJdkUnsafeCompareAndSetInt(invoke);
2381 }
2382
VisitJdkUnsafeCASLong(HInvoke * invoke)2383 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeCASLong(HInvoke* invoke) {
2384 // `jdk.internal.misc.Unsafe.compareAndSwapLong` has compare-and-set semantics (see javadoc).
2385 VisitJdkUnsafeCompareAndSetLong(invoke);
2386 }
2387
VisitJdkUnsafeCASObject(HInvoke * invoke)2388 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeCASObject(HInvoke* invoke) {
2389 // `jdk.internal.misc.Unsafe.compareAndSwapObject` has compare-and-set semantics (see javadoc).
2390 VisitJdkUnsafeCompareAndSetReference(invoke);
2391 }
2392
VisitJdkUnsafeCompareAndSetInt(HInvoke * invoke)2393 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeCompareAndSetInt(HInvoke* invoke) {
2394 CreateUnsafeCASLocations(allocator_, invoke, codegen_, DataType::Type::kInt32);
2395 }
2396
VisitJdkUnsafeCompareAndSetLong(HInvoke * invoke)2397 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeCompareAndSetLong(HInvoke* invoke) {
2398 CreateUnsafeCASLocations(allocator_, invoke, codegen_, DataType::Type::kInt64);
2399 }
2400
VisitJdkUnsafeCompareAndSetReference(HInvoke * invoke)2401 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeCompareAndSetReference(HInvoke* invoke) {
2402 // The only supported read barrier implementation is the Baker-style read barriers.
2403 if (codegen_->EmitNonBakerReadBarrier()) {
2404 return;
2405 }
2406
2407 CreateUnsafeCASLocations(allocator_, invoke, codegen_, DataType::Type::kReference);
2408 }
2409
2410 // Convert ZF into the Boolean result.
GenZFlagToResult(X86_64Assembler * assembler,CpuRegister out)2411 static inline void GenZFlagToResult(X86_64Assembler* assembler, CpuRegister out) {
2412 __ setcc(kZero, out);
2413 __ movzxb(out, out);
2414 }
2415
2416 // This function assumes that expected value for CMPXCHG and output are in RAX.
GenCompareAndSetOrExchangeInt(CodeGeneratorX86_64 * codegen,DataType::Type type,Address field_addr,Location value,bool is_cmpxchg,bool byte_swap)2417 static void GenCompareAndSetOrExchangeInt(CodeGeneratorX86_64* codegen,
2418 DataType::Type type,
2419 Address field_addr,
2420 Location value,
2421 bool is_cmpxchg,
2422 bool byte_swap) {
2423 X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler());
2424 InstructionCodeGeneratorX86_64* instr_codegen = codegen->GetInstructionCodegen();
2425
2426 if (byte_swap) {
2427 instr_codegen->Bswap(Location::RegisterLocation(RAX), type);
2428 instr_codegen->Bswap(value, type);
2429 }
2430
2431 switch (type) {
2432 case DataType::Type::kBool:
2433 case DataType::Type::kInt8:
2434 __ LockCmpxchgb(field_addr, value.AsRegister<CpuRegister>());
2435 break;
2436 case DataType::Type::kInt16:
2437 case DataType::Type::kUint16:
2438 __ LockCmpxchgw(field_addr, value.AsRegister<CpuRegister>());
2439 break;
2440 case DataType::Type::kInt32:
2441 case DataType::Type::kUint32:
2442 __ LockCmpxchgl(field_addr, value.AsRegister<CpuRegister>());
2443 break;
2444 case DataType::Type::kInt64:
2445 case DataType::Type::kUint64:
2446 __ LockCmpxchgq(field_addr, value.AsRegister<CpuRegister>());
2447 break;
2448 default:
2449 LOG(FATAL) << "Unexpected non-integral CAS type " << type;
2450 }
2451 // LOCK CMPXCHG has full barrier semantics, so we don't need barriers here.
2452
2453 if (byte_swap) {
2454 // Restore byte order for value.
2455 instr_codegen->Bswap(value, type);
2456 }
2457
2458 CpuRegister rax(RAX);
2459 if (is_cmpxchg) {
2460 if (byte_swap) {
2461 instr_codegen->Bswap(Location::RegisterLocation(RAX), type);
2462 }
2463 // Sign-extend or zero-extend the result as necessary.
2464 switch (type) {
2465 case DataType::Type::kBool:
2466 __ movzxb(rax, rax);
2467 break;
2468 case DataType::Type::kInt8:
2469 __ movsxb(rax, rax);
2470 break;
2471 case DataType::Type::kInt16:
2472 __ movsxw(rax, rax);
2473 break;
2474 case DataType::Type::kUint16:
2475 __ movzxw(rax, rax);
2476 break;
2477 default:
2478 break; // No need to do anything.
2479 }
2480 } else {
2481 GenZFlagToResult(assembler, rax);
2482 }
2483 }
2484
GenCompareAndSetOrExchangeFP(CodeGeneratorX86_64 * codegen,Address field_addr,CpuRegister temp,Location value,Location expected,Location out,bool is64bit,bool is_cmpxchg,bool byte_swap)2485 static void GenCompareAndSetOrExchangeFP(CodeGeneratorX86_64* codegen,
2486 Address field_addr,
2487 CpuRegister temp,
2488 Location value,
2489 Location expected,
2490 Location out,
2491 bool is64bit,
2492 bool is_cmpxchg,
2493 bool byte_swap) {
2494 X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler());
2495 InstructionCodeGeneratorX86_64* instr_codegen = codegen->GetInstructionCodegen();
2496
2497 Location rax_loc = Location::RegisterLocation(RAX);
2498 Location temp_loc = Location::RegisterLocation(temp.AsRegister());
2499
2500 DataType::Type type = is64bit ? DataType::Type::kUint64 : DataType::Type::kUint32;
2501
2502 // Copy `expected` to RAX (required by the CMPXCHG instruction).
2503 codegen->Move(rax_loc, expected);
2504
2505 // Copy value to some other register (ensure it's not RAX).
2506 DCHECK_NE(temp.AsRegister(), RAX);
2507 codegen->Move(temp_loc, value);
2508
2509 if (byte_swap) {
2510 instr_codegen->Bswap(rax_loc, type);
2511 instr_codegen->Bswap(temp_loc, type);
2512 }
2513
2514 if (is64bit) {
2515 __ LockCmpxchgq(field_addr, temp);
2516 } else {
2517 __ LockCmpxchgl(field_addr, temp);
2518 }
2519 // LOCK CMPXCHG has full barrier semantics, so we don't need barriers here.
2520 // No need to restore byte order for temporary register.
2521
2522 if (is_cmpxchg) {
2523 if (byte_swap) {
2524 instr_codegen->Bswap(rax_loc, type);
2525 }
2526 MoveIntToFP(out.AsFpuRegister<XmmRegister>(), CpuRegister(RAX), is64bit, assembler);
2527 } else {
2528 GenZFlagToResult(assembler, out.AsRegister<CpuRegister>());
2529 }
2530 }
2531
2532 // This function assumes that expected value for CMPXCHG and output are in RAX.
GenCompareAndSetOrExchangeRef(CodeGeneratorX86_64 * codegen,HInvoke * invoke,CpuRegister base,CpuRegister offset,CpuRegister value,CpuRegister temp1,CpuRegister temp2,CpuRegister temp3,bool is_cmpxchg)2533 static void GenCompareAndSetOrExchangeRef(CodeGeneratorX86_64* codegen,
2534 HInvoke* invoke,
2535 CpuRegister base,
2536 CpuRegister offset,
2537 CpuRegister value,
2538 CpuRegister temp1,
2539 CpuRegister temp2,
2540 CpuRegister temp3,
2541 bool is_cmpxchg) {
2542 // The only supported read barrier implementation is the Baker-style read barriers.
2543 DCHECK_IMPLIES(codegen->EmitReadBarrier(), kUseBakerReadBarrier);
2544
2545 X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler());
2546
2547 // Mark card for object assuming new value is stored.
2548 bool value_can_be_null = true; // TODO: Worth finding out this information?
2549 codegen->MaybeMarkGCCard(temp1, temp2, base, value, value_can_be_null);
2550
2551 Address field_addr(base, offset, TIMES_1, 0);
2552 if (codegen->EmitBakerReadBarrier()) {
2553 // Need to make sure the reference stored in the field is a to-space
2554 // one before attempting the CAS or the CAS could fail incorrectly.
2555 codegen->GenerateReferenceLoadWithBakerReadBarrier(
2556 invoke,
2557 Location::RegisterLocation(temp3.AsRegister()),
2558 base,
2559 field_addr,
2560 /* needs_null_check= */ false,
2561 /* always_update_field= */ true,
2562 &temp1,
2563 &temp2);
2564 } else {
2565 // Nothing to do, the value will be loaded into the out register by CMPXCHG.
2566 }
2567
2568 bool base_equals_value = (base.AsRegister() == value.AsRegister());
2569 Register value_reg = value.AsRegister();
2570 if (kPoisonHeapReferences) {
2571 if (base_equals_value) {
2572 // If `base` and `value` are the same register location, move `value_reg` to a temporary
2573 // register. This way, poisoning `value_reg` won't invalidate `base`.
2574 value_reg = temp1.AsRegister();
2575 __ movl(CpuRegister(value_reg), base);
2576 }
2577
2578 // Check that the register allocator did not assign the location of expected value (RAX) to
2579 // `value` nor to `base`, so that heap poisoning (when enabled) works as intended below.
2580 // - If `value` were equal to RAX, both references would be poisoned twice, meaning they would
2581 // not be poisoned at all, as heap poisoning uses address negation.
2582 // - If `base` were equal to RAX, poisoning RAX would invalidate `base`.
2583 DCHECK_NE(RAX, value_reg);
2584 DCHECK_NE(RAX, base.AsRegister());
2585
2586 __ PoisonHeapReference(CpuRegister(RAX));
2587 __ PoisonHeapReference(CpuRegister(value_reg));
2588 }
2589
2590 __ LockCmpxchgl(field_addr, CpuRegister(value_reg));
2591 // LOCK CMPXCHG has full barrier semantics, so we don't need barriers.
2592
2593 if (is_cmpxchg) {
2594 // Output is in RAX, so we can rely on CMPXCHG and do nothing.
2595 __ MaybeUnpoisonHeapReference(CpuRegister(RAX));
2596 } else {
2597 GenZFlagToResult(assembler, CpuRegister(RAX));
2598 }
2599
2600 // If heap poisoning is enabled, we need to unpoison the values that were poisoned earlier.
2601 if (kPoisonHeapReferences) {
2602 if (base_equals_value) {
2603 // `value_reg` has been moved to a temporary register, no need to unpoison it.
2604 } else {
2605 // Ensure `value` is not RAX, so that unpoisoning the former does not invalidate the latter.
2606 DCHECK_NE(RAX, value_reg);
2607 __ UnpoisonHeapReference(CpuRegister(value_reg));
2608 }
2609 }
2610 }
2611
2612 // In debug mode, return true if all registers are pairwise different. In release mode, do nothing
2613 // and always return true.
RegsAreAllDifferent(const std::vector<CpuRegister> & regs)2614 static bool RegsAreAllDifferent(const std::vector<CpuRegister>& regs) {
2615 if (kIsDebugBuild) {
2616 for (size_t i = 0; i < regs.size(); ++i) {
2617 for (size_t j = 0; j < i; ++j) {
2618 if (regs[i].AsRegister() == regs[j].AsRegister()) {
2619 return false;
2620 }
2621 }
2622 }
2623 }
2624 return true;
2625 }
2626
2627 // GenCompareAndSetOrExchange handles all value types and therefore accepts generic locations and
2628 // temporary indices that may not correspond to real registers for code paths that do not use them.
GenCompareAndSetOrExchange(CodeGeneratorX86_64 * codegen,HInvoke * invoke,DataType::Type type,CpuRegister base,CpuRegister offset,uint32_t temp1_index,uint32_t temp2_index,uint32_t temp3_index,Location new_value,Location expected,Location out,bool is_cmpxchg,bool byte_swap)2629 static void GenCompareAndSetOrExchange(CodeGeneratorX86_64* codegen,
2630 HInvoke* invoke,
2631 DataType::Type type,
2632 CpuRegister base,
2633 CpuRegister offset,
2634 uint32_t temp1_index,
2635 uint32_t temp2_index,
2636 uint32_t temp3_index,
2637 Location new_value,
2638 Location expected,
2639 Location out,
2640 bool is_cmpxchg,
2641 bool byte_swap) {
2642 LocationSummary* locations = invoke->GetLocations();
2643 Address field_address(base, offset, TIMES_1, 0);
2644
2645 if (DataType::IsFloatingPointType(type)) {
2646 bool is64bit = (type == DataType::Type::kFloat64);
2647 CpuRegister temp = locations->GetTemp(temp1_index).AsRegister<CpuRegister>();
2648 DCHECK(RegsAreAllDifferent({base, offset, temp, CpuRegister(RAX)}));
2649
2650 GenCompareAndSetOrExchangeFP(
2651 codegen, field_address, temp, new_value, expected, out, is64bit, is_cmpxchg, byte_swap);
2652 } else {
2653 // Both the expected value for CMPXCHG and the output are in RAX.
2654 DCHECK_EQ(RAX, expected.AsRegister<Register>());
2655 DCHECK_EQ(RAX, out.AsRegister<Register>());
2656
2657 if (type == DataType::Type::kReference) {
2658 CpuRegister new_value_reg = new_value.AsRegister<CpuRegister>();
2659 CpuRegister temp1 = locations->GetTemp(temp1_index).AsRegister<CpuRegister>();
2660 CpuRegister temp2 = locations->GetTemp(temp2_index).AsRegister<CpuRegister>();
2661 CpuRegister temp3 = codegen->EmitReadBarrier()
2662 ? locations->GetTemp(temp3_index).AsRegister<CpuRegister>()
2663 : CpuRegister(kNoRegister);
2664 DCHECK(RegsAreAllDifferent({base, offset, temp1, temp2, temp3}));
2665
2666 DCHECK(!byte_swap);
2667 GenCompareAndSetOrExchangeRef(
2668 codegen, invoke, base, offset, new_value_reg, temp1, temp2, temp3, is_cmpxchg);
2669 } else {
2670 GenCompareAndSetOrExchangeInt(codegen, type, field_address, new_value, is_cmpxchg, byte_swap);
2671 }
2672 }
2673 }
2674
GenCAS(DataType::Type type,HInvoke * invoke,CodeGeneratorX86_64 * codegen)2675 static void GenCAS(DataType::Type type, HInvoke* invoke, CodeGeneratorX86_64* codegen) {
2676 LocationSummary* locations = invoke->GetLocations();
2677 GenCompareAndSetOrExchange(codegen,
2678 invoke,
2679 type,
2680 /*base=*/ locations->InAt(1).AsRegister<CpuRegister>(),
2681 /*offset=*/ locations->InAt(2).AsRegister<CpuRegister>(),
2682 /*temp1_index=*/ 0,
2683 /*temp2_index=*/ 1,
2684 /*temp3_index=*/ 2,
2685 /*new_value=*/ locations->InAt(4),
2686 /*expected=*/ locations->InAt(3),
2687 locations->Out(),
2688 /*is_cmpxchg=*/ false,
2689 /*byte_swap=*/ false);
2690 }
2691
VisitUnsafeCASInt(HInvoke * invoke)2692 void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASInt(HInvoke* invoke) {
2693 VisitJdkUnsafeCASInt(invoke);
2694 }
2695
VisitUnsafeCASLong(HInvoke * invoke)2696 void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASLong(HInvoke* invoke) {
2697 VisitJdkUnsafeCASLong(invoke);
2698 }
2699
VisitUnsafeCASObject(HInvoke * invoke)2700 void IntrinsicCodeGeneratorX86_64::VisitUnsafeCASObject(HInvoke* invoke) {
2701 VisitJdkUnsafeCASObject(invoke);
2702 }
2703
VisitJdkUnsafeCASInt(HInvoke * invoke)2704 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeCASInt(HInvoke* invoke) {
2705 // `jdk.internal.misc.Unsafe.compareAndSwapInt` has compare-and-set semantics (see javadoc).
2706 VisitJdkUnsafeCompareAndSetInt(invoke);
2707 }
2708
VisitJdkUnsafeCASLong(HInvoke * invoke)2709 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeCASLong(HInvoke* invoke) {
2710 // `jdk.internal.misc.Unsafe.compareAndSwapLong` has compare-and-set semantics (see javadoc).
2711 VisitJdkUnsafeCompareAndSetLong(invoke);
2712 }
2713
VisitJdkUnsafeCASObject(HInvoke * invoke)2714 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeCASObject(HInvoke* invoke) {
2715 // `jdk.internal.misc.Unsafe.compareAndSwapObject` has compare-and-set semantics (see javadoc).
2716 VisitJdkUnsafeCompareAndSetReference(invoke);
2717 }
2718
VisitJdkUnsafeCompareAndSetInt(HInvoke * invoke)2719 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeCompareAndSetInt(HInvoke* invoke) {
2720 GenCAS(DataType::Type::kInt32, invoke, codegen_);
2721 }
2722
VisitJdkUnsafeCompareAndSetLong(HInvoke * invoke)2723 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeCompareAndSetLong(HInvoke* invoke) {
2724 GenCAS(DataType::Type::kInt64, invoke, codegen_);
2725 }
2726
VisitJdkUnsafeCompareAndSetReference(HInvoke * invoke)2727 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeCompareAndSetReference(HInvoke* invoke) {
2728 // The only supported read barrier implementation is the Baker-style read barriers.
2729 DCHECK_IMPLIES(codegen_->EmitReadBarrier(), kUseBakerReadBarrier);
2730
2731 GenCAS(DataType::Type::kReference, invoke, codegen_);
2732 }
2733
CreateUnsafeGetAndUpdateLocations(ArenaAllocator * allocator,HInvoke * invoke,CodeGeneratorX86_64 * codegen)2734 static void CreateUnsafeGetAndUpdateLocations(ArenaAllocator* allocator,
2735 HInvoke* invoke,
2736 CodeGeneratorX86_64* codegen) {
2737 const bool can_call = codegen->EmitReadBarrier() && IsUnsafeGetAndSetReference(invoke);
2738 LocationSummary* locations =
2739 new (allocator) LocationSummary(invoke,
2740 can_call
2741 ? LocationSummary::kCallOnSlowPath
2742 : LocationSummary::kNoCall,
2743 kIntrinsified);
2744 if (can_call && kUseBakerReadBarrier) {
2745 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
2746 }
2747 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
2748 locations->SetInAt(1, Location::RequiresRegister());
2749 locations->SetInAt(2, Location::RequiresRegister());
2750 // Use the same register for both the output and the new value or addend
2751 // to take advantage of XCHG or XADD. Arbitrarily pick RAX.
2752 locations->SetInAt(3, Location::RegisterLocation(RAX));
2753 // Only set the `out` register if it's needed. In the void case we can still use RAX in the
2754 // same manner as it is marked as a temp register.
2755 if (invoke->GetType() == DataType::Type::kVoid) {
2756 locations->AddTemp(Location::RegisterLocation(RAX));
2757 } else {
2758 locations->SetOut(Location::RegisterLocation(RAX));
2759 }
2760 }
2761
VisitUnsafeGetAndAddInt(HInvoke * invoke)2762 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetAndAddInt(HInvoke* invoke) {
2763 VisitJdkUnsafeGetAndAddInt(invoke);
2764 }
2765
VisitUnsafeGetAndAddLong(HInvoke * invoke)2766 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetAndAddLong(HInvoke* invoke) {
2767 VisitJdkUnsafeGetAndAddLong(invoke);
2768 }
2769
VisitUnsafeGetAndSetInt(HInvoke * invoke)2770 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetAndSetInt(HInvoke* invoke) {
2771 VisitJdkUnsafeGetAndSetInt(invoke);
2772 }
2773
VisitUnsafeGetAndSetLong(HInvoke * invoke)2774 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetAndSetLong(HInvoke* invoke) {
2775 VisitJdkUnsafeGetAndSetLong(invoke);
2776 }
2777
VisitUnsafeGetAndSetObject(HInvoke * invoke)2778 void IntrinsicLocationsBuilderX86_64::VisitUnsafeGetAndSetObject(HInvoke* invoke) {
2779 VisitJdkUnsafeGetAndSetReference(invoke);
2780 }
2781
VisitJdkUnsafeGetAndAddInt(HInvoke * invoke)2782 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetAndAddInt(HInvoke* invoke) {
2783 CreateUnsafeGetAndUpdateLocations(allocator_, invoke, codegen_);
2784 }
2785
VisitJdkUnsafeGetAndAddLong(HInvoke * invoke)2786 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetAndAddLong(HInvoke* invoke) {
2787 CreateUnsafeGetAndUpdateLocations(allocator_, invoke, codegen_);
2788 }
2789
VisitJdkUnsafeGetAndSetInt(HInvoke * invoke)2790 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetAndSetInt(HInvoke* invoke) {
2791 CreateUnsafeGetAndUpdateLocations(allocator_, invoke, codegen_);
2792 }
2793
VisitJdkUnsafeGetAndSetLong(HInvoke * invoke)2794 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetAndSetLong(HInvoke* invoke) {
2795 CreateUnsafeGetAndUpdateLocations(allocator_, invoke, codegen_);
2796 }
2797
VisitJdkUnsafeGetAndSetReference(HInvoke * invoke)2798 void IntrinsicLocationsBuilderX86_64::VisitJdkUnsafeGetAndSetReference(HInvoke* invoke) {
2799 // The only supported read barrier implementation is the Baker-style read barriers.
2800 if (codegen_->EmitNonBakerReadBarrier()) {
2801 return;
2802 }
2803
2804 CreateUnsafeGetAndUpdateLocations(allocator_, invoke, codegen_);
2805 invoke->GetLocations()->AddRegisterTemps(3);
2806 }
2807
2808 enum class GetAndUpdateOp {
2809 kSet,
2810 kAdd,
2811 kBitwiseAnd,
2812 kBitwiseOr,
2813 kBitwiseXor
2814 };
2815
GenUnsafeGetAndUpdate(HInvoke * invoke,DataType::Type type,CodeGeneratorX86_64 * codegen,GetAndUpdateOp get_and_update_op)2816 static void GenUnsafeGetAndUpdate(HInvoke* invoke,
2817 DataType::Type type,
2818 CodeGeneratorX86_64* codegen,
2819 GetAndUpdateOp get_and_update_op) {
2820 X86_64Assembler* assembler = down_cast<X86_64Assembler*>(codegen->GetAssembler());
2821 LocationSummary* locations = invoke->GetLocations();
2822
2823 const bool is_void = invoke->GetType() == DataType::Type::kVoid;
2824 Location rax_loc = Location::RegisterLocation(RAX);
2825 // We requested RAX to use as a temporary for void methods, as we don't return the value.
2826 DCHECK_IMPLIES(!is_void, locations->Out().Equals(rax_loc));
2827 CpuRegister out_or_temp = rax_loc.AsRegister<CpuRegister>(); // Result.
2828 CpuRegister base = locations->InAt(1).AsRegister<CpuRegister>(); // Object pointer.
2829 CpuRegister offset = locations->InAt(2).AsRegister<CpuRegister>(); // Long offset.
2830 DCHECK_EQ(out_or_temp, locations->InAt(3).AsRegister<CpuRegister>()); // New value or addend.
2831 Address field_address(base, offset, TIMES_1, 0);
2832
2833 if (type == DataType::Type::kInt32) {
2834 if (get_and_update_op == GetAndUpdateOp::kAdd) {
2835 __ LockXaddl(field_address, out_or_temp);
2836 } else {
2837 DCHECK(get_and_update_op == GetAndUpdateOp::kSet);
2838 __ xchgl(out_or_temp, field_address);
2839 }
2840 } else if (type == DataType::Type::kInt64) {
2841 if (get_and_update_op == GetAndUpdateOp::kAdd) {
2842 __ LockXaddq(field_address, out_or_temp);
2843 } else {
2844 DCHECK(get_and_update_op == GetAndUpdateOp::kSet);
2845 __ xchgq(out_or_temp, field_address);
2846 }
2847 } else {
2848 DCHECK_EQ(type, DataType::Type::kReference);
2849 DCHECK(get_and_update_op == GetAndUpdateOp::kSet);
2850
2851 // In the void case, we have an extra temp register, which is used to signal the register
2852 // allocator that we are clobering RAX.
2853 const uint32_t extra_temp = is_void ? 1u : 0u;
2854 DCHECK_EQ(locations->GetTempCount(), 3u + extra_temp);
2855 DCHECK_IMPLIES(is_void, locations->GetTemp(0u).Equals(Location::RegisterLocation(RAX)));
2856
2857 CpuRegister temp1 = locations->GetTemp(0u + extra_temp).AsRegister<CpuRegister>();
2858 CpuRegister temp2 = locations->GetTemp(1u + extra_temp).AsRegister<CpuRegister>();
2859 CpuRegister temp3 = locations->GetTemp(2u + extra_temp).AsRegister<CpuRegister>();
2860
2861 if (codegen->EmitReadBarrier()) {
2862 DCHECK(kUseBakerReadBarrier);
2863 // Ensure that the field contains a to-space reference.
2864 codegen->GenerateReferenceLoadWithBakerReadBarrier(
2865 invoke,
2866 Location::RegisterLocation(temp3.AsRegister()),
2867 base,
2868 field_address,
2869 /*needs_null_check=*/ false,
2870 /*always_update_field=*/ true,
2871 &temp1,
2872 &temp2);
2873 }
2874
2875 // Mark card for object as a new value shall be stored.
2876 bool new_value_can_be_null = true; // TODO: Worth finding out this information?
2877 codegen->MaybeMarkGCCard(temp1, temp2, base, /*value=*/out_or_temp, new_value_can_be_null);
2878
2879 if (kPoisonHeapReferences) {
2880 // Use a temp to avoid poisoning base of the field address, which might happen if `out`
2881 // is the same as `base` (for code like `unsafe.getAndSet(obj, offset, obj)`).
2882 __ movl(temp1, out_or_temp);
2883 __ PoisonHeapReference(temp1);
2884 __ xchgl(temp1, field_address);
2885 if (!is_void) {
2886 __ UnpoisonHeapReference(temp1);
2887 __ movl(out_or_temp, temp1);
2888 }
2889 } else {
2890 __ xchgl(out_or_temp, field_address);
2891 }
2892 }
2893 }
2894
VisitUnsafeGetAndAddInt(HInvoke * invoke)2895 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetAndAddInt(HInvoke* invoke) {
2896 VisitJdkUnsafeGetAndAddInt(invoke);
2897 }
2898
VisitUnsafeGetAndAddLong(HInvoke * invoke)2899 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetAndAddLong(HInvoke* invoke) {
2900 VisitJdkUnsafeGetAndAddLong(invoke);
2901 }
2902
VisitUnsafeGetAndSetInt(HInvoke * invoke)2903 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetAndSetInt(HInvoke* invoke) {
2904 VisitJdkUnsafeGetAndSetInt(invoke);
2905 }
2906
VisitUnsafeGetAndSetLong(HInvoke * invoke)2907 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetAndSetLong(HInvoke* invoke) {
2908 VisitJdkUnsafeGetAndSetLong(invoke);
2909 }
2910
VisitUnsafeGetAndSetObject(HInvoke * invoke)2911 void IntrinsicCodeGeneratorX86_64::VisitUnsafeGetAndSetObject(HInvoke* invoke) {
2912 VisitJdkUnsafeGetAndSetReference(invoke);
2913 }
2914
VisitJdkUnsafeGetAndAddInt(HInvoke * invoke)2915 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetAndAddInt(HInvoke* invoke) {
2916 GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt32, codegen_, GetAndUpdateOp::kAdd);
2917 }
2918
VisitJdkUnsafeGetAndAddLong(HInvoke * invoke)2919 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetAndAddLong(HInvoke* invoke) {
2920 GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt64, codegen_, GetAndUpdateOp::kAdd);
2921 }
2922
VisitJdkUnsafeGetAndSetInt(HInvoke * invoke)2923 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetAndSetInt(HInvoke* invoke) {
2924 GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt32, codegen_, GetAndUpdateOp::kSet);
2925 }
2926
VisitJdkUnsafeGetAndSetLong(HInvoke * invoke)2927 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetAndSetLong(HInvoke* invoke) {
2928 GenUnsafeGetAndUpdate(invoke, DataType::Type::kInt64, codegen_, GetAndUpdateOp::kSet);
2929 }
2930
VisitJdkUnsafeGetAndSetReference(HInvoke * invoke)2931 void IntrinsicCodeGeneratorX86_64::VisitJdkUnsafeGetAndSetReference(HInvoke* invoke) {
2932 GenUnsafeGetAndUpdate(invoke, DataType::Type::kReference, codegen_, GetAndUpdateOp::kSet);
2933 }
2934
VisitIntegerReverse(HInvoke * invoke)2935 void IntrinsicLocationsBuilderX86_64::VisitIntegerReverse(HInvoke* invoke) {
2936 LocationSummary* locations =
2937 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2938 locations->SetInAt(0, Location::RequiresRegister());
2939 locations->SetOut(Location::SameAsFirstInput());
2940 locations->AddTemp(Location::RequiresRegister());
2941 }
2942
SwapBits(CpuRegister reg,CpuRegister temp,int32_t shift,int32_t mask,X86_64Assembler * assembler)2943 static void SwapBits(CpuRegister reg, CpuRegister temp, int32_t shift, int32_t mask,
2944 X86_64Assembler* assembler) {
2945 Immediate imm_shift(shift);
2946 Immediate imm_mask(mask);
2947 __ movl(temp, reg);
2948 __ shrl(reg, imm_shift);
2949 __ andl(temp, imm_mask);
2950 __ andl(reg, imm_mask);
2951 __ shll(temp, imm_shift);
2952 __ orl(reg, temp);
2953 }
2954
VisitIntegerReverse(HInvoke * invoke)2955 void IntrinsicCodeGeneratorX86_64::VisitIntegerReverse(HInvoke* invoke) {
2956 X86_64Assembler* assembler = GetAssembler();
2957 LocationSummary* locations = invoke->GetLocations();
2958
2959 CpuRegister reg = locations->InAt(0).AsRegister<CpuRegister>();
2960 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
2961
2962 /*
2963 * Use one bswap instruction to reverse byte order first and then use 3 rounds of
2964 * swapping bits to reverse bits in a number x. Using bswap to save instructions
2965 * compared to generic luni implementation which has 5 rounds of swapping bits.
2966 * x = bswap x
2967 * x = (x & 0x55555555) << 1 | (x >> 1) & 0x55555555;
2968 * x = (x & 0x33333333) << 2 | (x >> 2) & 0x33333333;
2969 * x = (x & 0x0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F;
2970 */
2971 __ bswapl(reg);
2972 SwapBits(reg, temp, 1, 0x55555555, assembler);
2973 SwapBits(reg, temp, 2, 0x33333333, assembler);
2974 SwapBits(reg, temp, 4, 0x0f0f0f0f, assembler);
2975 }
2976
VisitLongReverse(HInvoke * invoke)2977 void IntrinsicLocationsBuilderX86_64::VisitLongReverse(HInvoke* invoke) {
2978 LocationSummary* locations =
2979 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2980 locations->SetInAt(0, Location::RequiresRegister());
2981 locations->SetOut(Location::SameAsFirstInput());
2982 locations->AddRegisterTemps(2);
2983 }
2984
SwapBits64(CpuRegister reg,CpuRegister temp,CpuRegister temp_mask,int32_t shift,int64_t mask,X86_64Assembler * assembler)2985 static void SwapBits64(CpuRegister reg, CpuRegister temp, CpuRegister temp_mask,
2986 int32_t shift, int64_t mask, X86_64Assembler* assembler) {
2987 Immediate imm_shift(shift);
2988 __ movq(temp_mask, Immediate(mask));
2989 __ movq(temp, reg);
2990 __ shrq(reg, imm_shift);
2991 __ andq(temp, temp_mask);
2992 __ andq(reg, temp_mask);
2993 __ shlq(temp, imm_shift);
2994 __ orq(reg, temp);
2995 }
2996
VisitLongReverse(HInvoke * invoke)2997 void IntrinsicCodeGeneratorX86_64::VisitLongReverse(HInvoke* invoke) {
2998 X86_64Assembler* assembler = GetAssembler();
2999 LocationSummary* locations = invoke->GetLocations();
3000
3001 CpuRegister reg = locations->InAt(0).AsRegister<CpuRegister>();
3002 CpuRegister temp1 = locations->GetTemp(0).AsRegister<CpuRegister>();
3003 CpuRegister temp2 = locations->GetTemp(1).AsRegister<CpuRegister>();
3004
3005 /*
3006 * Use one bswap instruction to reverse byte order first and then use 3 rounds of
3007 * swapping bits to reverse bits in a long number x. Using bswap to save instructions
3008 * compared to generic luni implementation which has 5 rounds of swapping bits.
3009 * x = bswap x
3010 * x = (x & 0x5555555555555555) << 1 | (x >> 1) & 0x5555555555555555;
3011 * x = (x & 0x3333333333333333) << 2 | (x >> 2) & 0x3333333333333333;
3012 * x = (x & 0x0F0F0F0F0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F0F0F0F0F;
3013 */
3014 __ bswapq(reg);
3015 SwapBits64(reg, temp1, temp2, 1, INT64_C(0x5555555555555555), assembler);
3016 SwapBits64(reg, temp1, temp2, 2, INT64_C(0x3333333333333333), assembler);
3017 SwapBits64(reg, temp1, temp2, 4, INT64_C(0x0f0f0f0f0f0f0f0f), assembler);
3018 }
3019
CreateBitCountLocations(ArenaAllocator * allocator,CodeGeneratorX86_64 * codegen,HInvoke * invoke)3020 static void CreateBitCountLocations(
3021 ArenaAllocator* allocator, CodeGeneratorX86_64* codegen, HInvoke* invoke) {
3022 if (!codegen->GetInstructionSetFeatures().HasPopCnt()) {
3023 // Do nothing if there is no popcnt support. This results in generating
3024 // a call for the intrinsic rather than direct code.
3025 return;
3026 }
3027 LocationSummary* locations =
3028 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3029 locations->SetInAt(0, Location::Any());
3030 locations->SetOut(Location::RequiresRegister());
3031 }
3032
GenBitCount(X86_64Assembler * assembler,CodeGeneratorX86_64 * codegen,HInvoke * invoke,bool is_long)3033 static void GenBitCount(X86_64Assembler* assembler,
3034 CodeGeneratorX86_64* codegen,
3035 HInvoke* invoke,
3036 bool is_long) {
3037 LocationSummary* locations = invoke->GetLocations();
3038 Location src = locations->InAt(0);
3039 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
3040
3041 if (invoke->InputAt(0)->IsConstant()) {
3042 // Evaluate this at compile time.
3043 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
3044 int32_t result = is_long
3045 ? POPCOUNT(static_cast<uint64_t>(value))
3046 : POPCOUNT(static_cast<uint32_t>(value));
3047 codegen->Load32BitValue(out, result);
3048 return;
3049 }
3050
3051 if (src.IsRegister()) {
3052 if (is_long) {
3053 __ popcntq(out, src.AsRegister<CpuRegister>());
3054 } else {
3055 __ popcntl(out, src.AsRegister<CpuRegister>());
3056 }
3057 } else if (is_long) {
3058 DCHECK(src.IsDoubleStackSlot());
3059 __ popcntq(out, Address(CpuRegister(RSP), src.GetStackIndex()));
3060 } else {
3061 DCHECK(src.IsStackSlot());
3062 __ popcntl(out, Address(CpuRegister(RSP), src.GetStackIndex()));
3063 }
3064 }
3065
VisitIntegerBitCount(HInvoke * invoke)3066 void IntrinsicLocationsBuilderX86_64::VisitIntegerBitCount(HInvoke* invoke) {
3067 CreateBitCountLocations(allocator_, codegen_, invoke);
3068 }
3069
VisitIntegerBitCount(HInvoke * invoke)3070 void IntrinsicCodeGeneratorX86_64::VisitIntegerBitCount(HInvoke* invoke) {
3071 GenBitCount(GetAssembler(), codegen_, invoke, /* is_long= */ false);
3072 }
3073
VisitLongBitCount(HInvoke * invoke)3074 void IntrinsicLocationsBuilderX86_64::VisitLongBitCount(HInvoke* invoke) {
3075 CreateBitCountLocations(allocator_, codegen_, invoke);
3076 }
3077
VisitLongBitCount(HInvoke * invoke)3078 void IntrinsicCodeGeneratorX86_64::VisitLongBitCount(HInvoke* invoke) {
3079 GenBitCount(GetAssembler(), codegen_, invoke, /* is_long= */ true);
3080 }
3081
CreateOneBitLocations(ArenaAllocator * allocator,HInvoke * invoke,bool is_high)3082 static void CreateOneBitLocations(ArenaAllocator* allocator, HInvoke* invoke, bool is_high) {
3083 LocationSummary* locations =
3084 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3085 locations->SetInAt(0, Location::Any());
3086 locations->SetOut(Location::RequiresRegister());
3087 locations->AddTemp(is_high ? Location::RegisterLocation(RCX) // needs CL
3088 : Location::RequiresRegister()); // any will do
3089 }
3090
GenOneBit(X86_64Assembler * assembler,CodeGeneratorX86_64 * codegen,HInvoke * invoke,bool is_high,bool is_long)3091 static void GenOneBit(X86_64Assembler* assembler,
3092 CodeGeneratorX86_64* codegen,
3093 HInvoke* invoke,
3094 bool is_high, bool is_long) {
3095 LocationSummary* locations = invoke->GetLocations();
3096 Location src = locations->InAt(0);
3097 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
3098
3099 if (invoke->InputAt(0)->IsConstant()) {
3100 // Evaluate this at compile time.
3101 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
3102 if (value == 0) {
3103 __ xorl(out, out); // Clears upper bits too.
3104 return;
3105 }
3106 // Nonzero value.
3107 if (is_high) {
3108 value = is_long ? 63 - CLZ(static_cast<uint64_t>(value))
3109 : 31 - CLZ(static_cast<uint32_t>(value));
3110 } else {
3111 value = is_long ? CTZ(static_cast<uint64_t>(value))
3112 : CTZ(static_cast<uint32_t>(value));
3113 }
3114 if (is_long) {
3115 codegen->Load64BitValue(out, 1ULL << value);
3116 } else {
3117 codegen->Load32BitValue(out, 1 << value);
3118 }
3119 return;
3120 }
3121
3122 // Handle the non-constant cases.
3123 if (!is_high && codegen->GetInstructionSetFeatures().HasAVX2() &&
3124 src.IsRegister()) {
3125 __ blsi(out, src.AsRegister<CpuRegister>());
3126 } else {
3127 CpuRegister tmp = locations->GetTemp(0).AsRegister<CpuRegister>();
3128 if (is_high) {
3129 // Use architectural support: basically 1 << bsr.
3130 if (src.IsRegister()) {
3131 if (is_long) {
3132 __ bsrq(tmp, src.AsRegister<CpuRegister>());
3133 } else {
3134 __ bsrl(tmp, src.AsRegister<CpuRegister>());
3135 }
3136 } else if (is_long) {
3137 DCHECK(src.IsDoubleStackSlot());
3138 __ bsrq(tmp, Address(CpuRegister(RSP), src.GetStackIndex()));
3139 } else {
3140 DCHECK(src.IsStackSlot());
3141 __ bsrl(tmp, Address(CpuRegister(RSP), src.GetStackIndex()));
3142 }
3143 // BSR sets ZF if the input was zero.
3144 NearLabel is_zero, done;
3145 __ j(kEqual, &is_zero);
3146 __ movl(out, Immediate(1)); // Clears upper bits too.
3147 if (is_long) {
3148 __ shlq(out, tmp);
3149 } else {
3150 __ shll(out, tmp);
3151 }
3152 __ jmp(&done);
3153 __ Bind(&is_zero);
3154 __ xorl(out, out); // Clears upper bits too.
3155 __ Bind(&done);
3156 } else {
3157 // Copy input into temporary.
3158 if (src.IsRegister()) {
3159 if (is_long) {
3160 __ movq(tmp, src.AsRegister<CpuRegister>());
3161 } else {
3162 __ movl(tmp, src.AsRegister<CpuRegister>());
3163 }
3164 } else if (is_long) {
3165 DCHECK(src.IsDoubleStackSlot());
3166 __ movq(tmp, Address(CpuRegister(RSP), src.GetStackIndex()));
3167 } else {
3168 DCHECK(src.IsStackSlot());
3169 __ movl(tmp, Address(CpuRegister(RSP), src.GetStackIndex()));
3170 }
3171 // Do the bit twiddling: basically tmp & -tmp;
3172 if (is_long) {
3173 __ movq(out, tmp);
3174 __ negq(tmp);
3175 __ andq(out, tmp);
3176 } else {
3177 __ movl(out, tmp);
3178 __ negl(tmp);
3179 __ andl(out, tmp);
3180 }
3181 }
3182 }
3183 }
3184
VisitIntegerHighestOneBit(HInvoke * invoke)3185 void IntrinsicLocationsBuilderX86_64::VisitIntegerHighestOneBit(HInvoke* invoke) {
3186 CreateOneBitLocations(allocator_, invoke, /* is_high= */ true);
3187 }
3188
VisitIntegerHighestOneBit(HInvoke * invoke)3189 void IntrinsicCodeGeneratorX86_64::VisitIntegerHighestOneBit(HInvoke* invoke) {
3190 GenOneBit(GetAssembler(), codegen_, invoke, /* is_high= */ true, /* is_long= */ false);
3191 }
3192
VisitLongHighestOneBit(HInvoke * invoke)3193 void IntrinsicLocationsBuilderX86_64::VisitLongHighestOneBit(HInvoke* invoke) {
3194 CreateOneBitLocations(allocator_, invoke, /* is_high= */ true);
3195 }
3196
VisitLongHighestOneBit(HInvoke * invoke)3197 void IntrinsicCodeGeneratorX86_64::VisitLongHighestOneBit(HInvoke* invoke) {
3198 GenOneBit(GetAssembler(), codegen_, invoke, /* is_high= */ true, /* is_long= */ true);
3199 }
3200
VisitIntegerLowestOneBit(HInvoke * invoke)3201 void IntrinsicLocationsBuilderX86_64::VisitIntegerLowestOneBit(HInvoke* invoke) {
3202 CreateOneBitLocations(allocator_, invoke, /* is_high= */ false);
3203 }
3204
VisitIntegerLowestOneBit(HInvoke * invoke)3205 void IntrinsicCodeGeneratorX86_64::VisitIntegerLowestOneBit(HInvoke* invoke) {
3206 GenOneBit(GetAssembler(), codegen_, invoke, /* is_high= */ false, /* is_long= */ false);
3207 }
3208
VisitLongLowestOneBit(HInvoke * invoke)3209 void IntrinsicLocationsBuilderX86_64::VisitLongLowestOneBit(HInvoke* invoke) {
3210 CreateOneBitLocations(allocator_, invoke, /* is_high= */ false);
3211 }
3212
VisitLongLowestOneBit(HInvoke * invoke)3213 void IntrinsicCodeGeneratorX86_64::VisitLongLowestOneBit(HInvoke* invoke) {
3214 GenOneBit(GetAssembler(), codegen_, invoke, /* is_high= */ false, /* is_long= */ true);
3215 }
3216
CreateLeadingZeroLocations(ArenaAllocator * allocator,HInvoke * invoke)3217 static void CreateLeadingZeroLocations(ArenaAllocator* allocator, HInvoke* invoke) {
3218 LocationSummary* locations =
3219 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3220 locations->SetInAt(0, Location::Any());
3221 locations->SetOut(Location::RequiresRegister());
3222 }
3223
GenLeadingZeros(X86_64Assembler * assembler,CodeGeneratorX86_64 * codegen,HInvoke * invoke,bool is_long)3224 static void GenLeadingZeros(X86_64Assembler* assembler,
3225 CodeGeneratorX86_64* codegen,
3226 HInvoke* invoke, bool is_long) {
3227 LocationSummary* locations = invoke->GetLocations();
3228 Location src = locations->InAt(0);
3229 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
3230
3231 int zero_value_result = is_long ? 64 : 32;
3232 if (invoke->InputAt(0)->IsConstant()) {
3233 // Evaluate this at compile time.
3234 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
3235 if (value == 0) {
3236 value = zero_value_result;
3237 } else {
3238 value = is_long ? CLZ(static_cast<uint64_t>(value)) : CLZ(static_cast<uint32_t>(value));
3239 }
3240 codegen->Load32BitValue(out, value);
3241 return;
3242 }
3243
3244 // Handle the non-constant cases.
3245 if (src.IsRegister()) {
3246 if (is_long) {
3247 __ bsrq(out, src.AsRegister<CpuRegister>());
3248 } else {
3249 __ bsrl(out, src.AsRegister<CpuRegister>());
3250 }
3251 } else if (is_long) {
3252 DCHECK(src.IsDoubleStackSlot());
3253 __ bsrq(out, Address(CpuRegister(RSP), src.GetStackIndex()));
3254 } else {
3255 DCHECK(src.IsStackSlot());
3256 __ bsrl(out, Address(CpuRegister(RSP), src.GetStackIndex()));
3257 }
3258
3259 // BSR sets ZF if the input was zero, and the output is undefined.
3260 NearLabel is_zero, done;
3261 __ j(kEqual, &is_zero);
3262
3263 // Correct the result from BSR to get the CLZ result.
3264 __ xorl(out, Immediate(zero_value_result - 1));
3265 __ jmp(&done);
3266
3267 // Fix the zero case with the expected result.
3268 __ Bind(&is_zero);
3269 __ movl(out, Immediate(zero_value_result));
3270
3271 __ Bind(&done);
3272 }
3273
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)3274 void IntrinsicLocationsBuilderX86_64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
3275 CreateLeadingZeroLocations(allocator_, invoke);
3276 }
3277
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)3278 void IntrinsicCodeGeneratorX86_64::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
3279 GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ false);
3280 }
3281
VisitLongNumberOfLeadingZeros(HInvoke * invoke)3282 void IntrinsicLocationsBuilderX86_64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
3283 CreateLeadingZeroLocations(allocator_, invoke);
3284 }
3285
VisitLongNumberOfLeadingZeros(HInvoke * invoke)3286 void IntrinsicCodeGeneratorX86_64::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
3287 GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ true);
3288 }
3289
CreateTrailingZeroLocations(ArenaAllocator * allocator,HInvoke * invoke)3290 static void CreateTrailingZeroLocations(ArenaAllocator* allocator, HInvoke* invoke) {
3291 LocationSummary* locations =
3292 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3293 locations->SetInAt(0, Location::Any());
3294 locations->SetOut(Location::RequiresRegister());
3295 }
3296
GenTrailingZeros(X86_64Assembler * assembler,CodeGeneratorX86_64 * codegen,HInvoke * invoke,bool is_long)3297 static void GenTrailingZeros(X86_64Assembler* assembler,
3298 CodeGeneratorX86_64* codegen,
3299 HInvoke* invoke, bool is_long) {
3300 LocationSummary* locations = invoke->GetLocations();
3301 Location src = locations->InAt(0);
3302 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
3303
3304 int zero_value_result = is_long ? 64 : 32;
3305 if (invoke->InputAt(0)->IsConstant()) {
3306 // Evaluate this at compile time.
3307 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
3308 if (value == 0) {
3309 value = zero_value_result;
3310 } else {
3311 value = is_long ? CTZ(static_cast<uint64_t>(value)) : CTZ(static_cast<uint32_t>(value));
3312 }
3313 codegen->Load32BitValue(out, value);
3314 return;
3315 }
3316
3317 // Handle the non-constant cases.
3318 if (src.IsRegister()) {
3319 if (is_long) {
3320 __ bsfq(out, src.AsRegister<CpuRegister>());
3321 } else {
3322 __ bsfl(out, src.AsRegister<CpuRegister>());
3323 }
3324 } else if (is_long) {
3325 DCHECK(src.IsDoubleStackSlot());
3326 __ bsfq(out, Address(CpuRegister(RSP), src.GetStackIndex()));
3327 } else {
3328 DCHECK(src.IsStackSlot());
3329 __ bsfl(out, Address(CpuRegister(RSP), src.GetStackIndex()));
3330 }
3331
3332 // BSF sets ZF if the input was zero, and the output is undefined.
3333 NearLabel done;
3334 __ j(kNotEqual, &done);
3335
3336 // Fix the zero case with the expected result.
3337 __ movl(out, Immediate(zero_value_result));
3338
3339 __ Bind(&done);
3340 }
3341
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)3342 void IntrinsicLocationsBuilderX86_64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
3343 CreateTrailingZeroLocations(allocator_, invoke);
3344 }
3345
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)3346 void IntrinsicCodeGeneratorX86_64::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
3347 GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ false);
3348 }
3349
VisitLongNumberOfTrailingZeros(HInvoke * invoke)3350 void IntrinsicLocationsBuilderX86_64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
3351 CreateTrailingZeroLocations(allocator_, invoke);
3352 }
3353
VisitLongNumberOfTrailingZeros(HInvoke * invoke)3354 void IntrinsicCodeGeneratorX86_64::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
3355 GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ true);
3356 }
3357
3358 #define VISIT_INTRINSIC(name, low, high, type, start_index) \
3359 void IntrinsicLocationsBuilderX86_64::Visit##name##ValueOf(HInvoke* invoke) { \
3360 InvokeRuntimeCallingConvention calling_convention; \
3361 IntrinsicVisitor::ComputeValueOfLocations( \
3362 invoke, \
3363 codegen_, \
3364 low, \
3365 (high) - (low) + 1, \
3366 Location::RegisterLocation(RAX), \
3367 Location::RegisterLocation(calling_convention.GetRegisterAt(0))); \
3368 } \
3369 void IntrinsicCodeGeneratorX86_64::Visit##name##ValueOf(HInvoke* invoke) { \
3370 IntrinsicVisitor::ValueOfInfo info = \
3371 IntrinsicVisitor::ComputeValueOfInfo(invoke, \
3372 codegen_->GetCompilerOptions(), \
3373 WellKnownClasses::java_lang_##name##_value, \
3374 low, \
3375 (high) - (low) + 1, \
3376 start_index); \
3377 HandleValueOf(invoke, info, type); \
3378 }
BOXED_TYPES(VISIT_INTRINSIC)3379 BOXED_TYPES(VISIT_INTRINSIC)
3380 #undef VISIT_INTRINSIC
3381
3382 template <typename T>
3383 static void Store(X86_64Assembler* assembler,
3384 DataType::Type primitive_type,
3385 const Address& address,
3386 const T& operand) {
3387 switch (primitive_type) {
3388 case DataType::Type::kInt8:
3389 case DataType::Type::kUint8: {
3390 __ movb(address, operand);
3391 break;
3392 }
3393 case DataType::Type::kInt16:
3394 case DataType::Type::kUint16: {
3395 __ movw(address, operand);
3396 break;
3397 }
3398 case DataType::Type::kInt32: {
3399 __ movl(address, operand);
3400 break;
3401 }
3402 default: {
3403 LOG(FATAL) << "Unrecognized ValueOf type " << primitive_type;
3404 }
3405 }
3406 }
3407
HandleValueOf(HInvoke * invoke,const IntrinsicVisitor::ValueOfInfo & info,DataType::Type type)3408 void IntrinsicCodeGeneratorX86_64::HandleValueOf(HInvoke* invoke,
3409 const IntrinsicVisitor::ValueOfInfo& info,
3410 DataType::Type type) {
3411 LocationSummary* locations = invoke->GetLocations();
3412 X86_64Assembler* assembler = GetAssembler();
3413
3414 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
3415 InvokeRuntimeCallingConvention calling_convention;
3416 CpuRegister argument = CpuRegister(calling_convention.GetRegisterAt(0));
3417 auto allocate_instance = [&]() {
3418 codegen_->LoadIntrinsicDeclaringClass(argument, invoke);
3419 codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke);
3420 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
3421 };
3422 if (invoke->InputAt(0)->IsIntConstant()) {
3423 int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
3424 if (static_cast<uint32_t>(value - info.low) < info.length) {
3425 // Just embed the object in the code.
3426 DCHECK_NE(info.value_boot_image_reference, ValueOfInfo::kInvalidReference);
3427 codegen_->LoadBootImageAddress(out, info.value_boot_image_reference);
3428 } else {
3429 DCHECK(locations->CanCall());
3430 // Allocate and initialize a new object.
3431 // TODO: If we JIT, we could allocate the boxed value now, and store it in the
3432 // JIT object table.
3433 allocate_instance();
3434 Store(assembler, type, Address(out, info.value_offset), Immediate(value));
3435 }
3436 } else {
3437 DCHECK(locations->CanCall());
3438 CpuRegister in = locations->InAt(0).AsRegister<CpuRegister>();
3439 // Check bounds of our cache.
3440 __ leal(out, Address(in, -info.low));
3441 __ cmpl(out, Immediate(info.length));
3442 NearLabel allocate, done;
3443 __ j(kAboveEqual, &allocate);
3444 // If the value is within the bounds, load the boxed value directly from the array.
3445 DCHECK_NE(out.AsRegister(), argument.AsRegister());
3446 codegen_->LoadBootImageAddress(argument, info.array_data_boot_image_reference);
3447 static_assert((1u << TIMES_4) == sizeof(mirror::HeapReference<mirror::Object>),
3448 "Check heap reference size.");
3449 __ movl(out, Address(argument, out, TIMES_4, 0));
3450 __ MaybeUnpoisonHeapReference(out);
3451 __ jmp(&done);
3452 __ Bind(&allocate);
3453 // Otherwise allocate and initialize a new object.
3454 allocate_instance();
3455 Store(assembler, type, Address(out, info.value_offset), in);
3456 __ Bind(&done);
3457 }
3458 }
3459
VisitReferenceGetReferent(HInvoke * invoke)3460 void IntrinsicLocationsBuilderX86_64::VisitReferenceGetReferent(HInvoke* invoke) {
3461 IntrinsicVisitor::CreateReferenceGetReferentLocations(invoke, codegen_);
3462 }
3463
VisitReferenceGetReferent(HInvoke * invoke)3464 void IntrinsicCodeGeneratorX86_64::VisitReferenceGetReferent(HInvoke* invoke) {
3465 X86_64Assembler* assembler = GetAssembler();
3466 LocationSummary* locations = invoke->GetLocations();
3467
3468 Location obj = locations->InAt(0);
3469 Location out = locations->Out();
3470
3471 SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86_64(invoke);
3472 codegen_->AddSlowPath(slow_path);
3473
3474 if (codegen_->EmitReadBarrier()) {
3475 // Check self->GetWeakRefAccessEnabled().
3476 ThreadOffset64 offset = Thread::WeakRefAccessEnabledOffset<kX86_64PointerSize>();
3477 __ gs()->cmpl(Address::Absolute(offset, /* no_rip= */ true),
3478 Immediate(enum_cast<int32_t>(WeakRefAccessState::kVisiblyEnabled)));
3479 __ j(kNotEqual, slow_path->GetEntryLabel());
3480 }
3481
3482 // Load the java.lang.ref.Reference class, use the output register as a temporary.
3483 codegen_->LoadIntrinsicDeclaringClass(out.AsRegister<CpuRegister>(), invoke);
3484
3485 // Check static fields java.lang.ref.Reference.{disableIntrinsic,slowPathEnabled} together.
3486 MemberOffset disable_intrinsic_offset = IntrinsicVisitor::GetReferenceDisableIntrinsicOffset();
3487 DCHECK_ALIGNED(disable_intrinsic_offset.Uint32Value(), 2u);
3488 DCHECK_EQ(disable_intrinsic_offset.Uint32Value() + 1u,
3489 IntrinsicVisitor::GetReferenceSlowPathEnabledOffset().Uint32Value());
3490 __ cmpw(Address(out.AsRegister<CpuRegister>(), disable_intrinsic_offset.Uint32Value()),
3491 Immediate(0));
3492 __ j(kNotEqual, slow_path->GetEntryLabel());
3493
3494 // Load the value from the field.
3495 uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value();
3496 if (codegen_->EmitBakerReadBarrier()) {
3497 codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
3498 out,
3499 obj.AsRegister<CpuRegister>(),
3500 referent_offset,
3501 /*needs_null_check=*/ true);
3502 // Note that the fence is a no-op, thanks to the x86-64 memory model.
3503 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); // `referent` is volatile.
3504 } else {
3505 __ movl(out.AsRegister<CpuRegister>(), Address(obj.AsRegister<CpuRegister>(), referent_offset));
3506 codegen_->MaybeRecordImplicitNullCheck(invoke);
3507 // Note that the fence is a no-op, thanks to the x86-64 memory model.
3508 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); // `referent` is volatile.
3509 codegen_->MaybeGenerateReadBarrierSlow(invoke, out, out, obj, referent_offset);
3510 }
3511 __ Bind(slow_path->GetExitLabel());
3512 }
3513
VisitReferenceRefersTo(HInvoke * invoke)3514 void IntrinsicLocationsBuilderX86_64::VisitReferenceRefersTo(HInvoke* invoke) {
3515 IntrinsicVisitor::CreateReferenceRefersToLocations(invoke, codegen_);
3516 }
3517
VisitReferenceRefersTo(HInvoke * invoke)3518 void IntrinsicCodeGeneratorX86_64::VisitReferenceRefersTo(HInvoke* invoke) {
3519 X86_64Assembler* assembler = GetAssembler();
3520 LocationSummary* locations = invoke->GetLocations();
3521
3522 CpuRegister obj = locations->InAt(0).AsRegister<CpuRegister>();
3523 CpuRegister other = locations->InAt(1).AsRegister<CpuRegister>();
3524 CpuRegister out = locations->Out().AsRegister<CpuRegister>();
3525
3526 uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value();
3527 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
3528
3529 __ movl(out, Address(obj, referent_offset));
3530 codegen_->MaybeRecordImplicitNullCheck(invoke);
3531 __ MaybeUnpoisonHeapReference(out);
3532 // Note that the fence is a no-op, thanks to the x86-64 memory model.
3533 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); // `referent` is volatile.
3534
3535 __ cmpl(out, other);
3536
3537 if (codegen_->EmitReadBarrier()) {
3538 DCHECK(kUseBakerReadBarrier);
3539
3540 NearLabel calculate_result;
3541 __ j(kEqual, &calculate_result); // ZF set if taken.
3542
3543 // Check if the loaded reference is null in a way that leaves ZF clear for null.
3544 __ cmpl(out, Immediate(1));
3545 __ j(kBelow, &calculate_result); // ZF clear if taken.
3546
3547 // For correct memory visibility, we need a barrier before loading the lock word
3548 // but we already have the barrier emitted for volatile load above which is sufficient.
3549
3550 // Load the lockword and check if it is a forwarding address.
3551 static_assert(LockWord::kStateShift == 30u);
3552 static_assert(LockWord::kStateForwardingAddress == 3u);
3553 __ movl(out, Address(out, monitor_offset));
3554 __ cmpl(out, Immediate(static_cast<int32_t>(0xc0000000)));
3555 __ j(kBelow, &calculate_result); // ZF clear if taken.
3556
3557 // Extract the forwarding address and compare with `other`.
3558 __ shll(out, Immediate(LockWord::kForwardingAddressShift));
3559 __ cmpl(out, other);
3560
3561 __ Bind(&calculate_result);
3562 }
3563
3564 // Convert ZF into the Boolean result.
3565 __ setcc(kEqual, out);
3566 __ movzxb(out, out);
3567 }
3568
VisitThreadInterrupted(HInvoke * invoke)3569 void IntrinsicLocationsBuilderX86_64::VisitThreadInterrupted(HInvoke* invoke) {
3570 LocationSummary* locations =
3571 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3572 locations->SetOut(Location::RequiresRegister());
3573 }
3574
VisitThreadInterrupted(HInvoke * invoke)3575 void IntrinsicCodeGeneratorX86_64::VisitThreadInterrupted(HInvoke* invoke) {
3576 X86_64Assembler* assembler = GetAssembler();
3577 CpuRegister out = invoke->GetLocations()->Out().AsRegister<CpuRegister>();
3578 Address address = Address::Absolute
3579 (Thread::InterruptedOffset<kX86_64PointerSize>().Int32Value(), /* no_rip= */ true);
3580 NearLabel done;
3581 __ gs()->movl(out, address);
3582 __ testl(out, out);
3583 __ j(kEqual, &done);
3584 __ gs()->movl(address, Immediate(0));
3585 codegen_->MemoryFence();
3586 __ Bind(&done);
3587 }
3588
VisitReachabilityFence(HInvoke * invoke)3589 void IntrinsicLocationsBuilderX86_64::VisitReachabilityFence(HInvoke* invoke) {
3590 LocationSummary* locations =
3591 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3592 locations->SetInAt(0, Location::Any());
3593 }
3594
VisitReachabilityFence(HInvoke * invoke)3595 void IntrinsicCodeGeneratorX86_64::VisitReachabilityFence([[maybe_unused]] HInvoke* invoke) {}
3596
CreateDivideUnsignedLocations(HInvoke * invoke,ArenaAllocator * allocator)3597 static void CreateDivideUnsignedLocations(HInvoke* invoke, ArenaAllocator* allocator) {
3598 LocationSummary* locations =
3599 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
3600 locations->SetInAt(0, Location::RegisterLocation(RAX));
3601 locations->SetInAt(1, Location::RequiresRegister());
3602 locations->SetOut(Location::SameAsFirstInput());
3603 // Intel uses edx:eax as the dividend.
3604 locations->AddTemp(Location::RegisterLocation(RDX));
3605 }
3606
GenerateDivideUnsigned(HInvoke * invoke,CodeGeneratorX86_64 * codegen,DataType::Type data_type)3607 static void GenerateDivideUnsigned(HInvoke* invoke,
3608 CodeGeneratorX86_64* codegen,
3609 DataType::Type data_type) {
3610 LocationSummary* locations = invoke->GetLocations();
3611 Location out = locations->Out();
3612 Location first = locations->InAt(0);
3613 Location second = locations->InAt(1);
3614 CpuRegister rdx = locations->GetTemp(0).AsRegister<CpuRegister>();
3615 CpuRegister second_reg = second.AsRegister<CpuRegister>();
3616
3617 DCHECK_EQ(RAX, first.AsRegister<Register>());
3618 DCHECK_EQ(RAX, out.AsRegister<Register>());
3619 DCHECK_EQ(RDX, rdx.AsRegister());
3620
3621 // We check if the divisor is zero and bail to the slow path to handle if so.
3622 auto* slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86_64(invoke);
3623 codegen->AddSlowPath(slow_path);
3624
3625 X86_64Assembler* assembler = codegen->GetAssembler();
3626 if (data_type == DataType::Type::kInt32) {
3627 __ testl(second_reg, second_reg);
3628 __ j(kEqual, slow_path->GetEntryLabel());
3629 __ xorl(rdx, rdx);
3630 __ divl(second_reg);
3631 } else {
3632 DCHECK(data_type == DataType::Type::kInt64);
3633 __ testq(second_reg, second_reg);
3634 __ j(kEqual, slow_path->GetEntryLabel());
3635 __ xorq(rdx, rdx);
3636 __ divq(second_reg);
3637 }
3638 __ Bind(slow_path->GetExitLabel());
3639 }
3640
VisitIntegerDivideUnsigned(HInvoke * invoke)3641 void IntrinsicLocationsBuilderX86_64::VisitIntegerDivideUnsigned(HInvoke* invoke) {
3642 CreateDivideUnsignedLocations(invoke, allocator_);
3643 }
3644
VisitIntegerDivideUnsigned(HInvoke * invoke)3645 void IntrinsicCodeGeneratorX86_64::VisitIntegerDivideUnsigned(HInvoke* invoke) {
3646 GenerateDivideUnsigned(invoke, codegen_, DataType::Type::kInt32);
3647 }
3648
VisitLongDivideUnsigned(HInvoke * invoke)3649 void IntrinsicLocationsBuilderX86_64::VisitLongDivideUnsigned(HInvoke* invoke) {
3650 CreateDivideUnsignedLocations(invoke, allocator_);
3651 }
3652
VisitLongDivideUnsigned(HInvoke * invoke)3653 void IntrinsicCodeGeneratorX86_64::VisitLongDivideUnsigned(HInvoke* invoke) {
3654 GenerateDivideUnsigned(invoke, codegen_, DataType::Type::kInt64);
3655 }
3656
VisitMathMultiplyHigh(HInvoke * invoke)3657 void IntrinsicLocationsBuilderX86_64::VisitMathMultiplyHigh(HInvoke* invoke) {
3658 LocationSummary* locations =
3659 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3660 locations->SetInAt(0, Location::RegisterLocation(RAX));
3661 locations->SetInAt(1, Location::RequiresRegister());
3662 locations->SetOut(Location::RegisterLocation(RDX));
3663 locations->AddTemp(Location::RegisterLocation(RAX));
3664 }
3665
VisitMathMultiplyHigh(HInvoke * invoke)3666 void IntrinsicCodeGeneratorX86_64::VisitMathMultiplyHigh(HInvoke* invoke) {
3667 X86_64Assembler* assembler = GetAssembler();
3668 LocationSummary* locations = invoke->GetLocations();
3669
3670 CpuRegister y = locations->InAt(1).AsRegister<CpuRegister>();
3671
3672 DCHECK_EQ(locations->InAt(0).AsRegister<Register>(), RAX);
3673 DCHECK_EQ(locations->Out().AsRegister<Register>(), RDX);
3674
3675 __ imulq(y);
3676 }
3677
3678 class VarHandleSlowPathX86_64 : public IntrinsicSlowPathX86_64 {
3679 public:
VarHandleSlowPathX86_64(HInvoke * invoke)3680 explicit VarHandleSlowPathX86_64(HInvoke* invoke)
3681 : IntrinsicSlowPathX86_64(invoke) {
3682 }
3683
SetVolatile(bool is_volatile)3684 void SetVolatile(bool is_volatile) {
3685 is_volatile_ = is_volatile;
3686 }
3687
SetAtomic(bool is_atomic)3688 void SetAtomic(bool is_atomic) {
3689 is_atomic_ = is_atomic;
3690 }
3691
SetNeedAnyStoreBarrier(bool need_any_store_barrier)3692 void SetNeedAnyStoreBarrier(bool need_any_store_barrier) {
3693 need_any_store_barrier_ = need_any_store_barrier;
3694 }
3695
SetNeedAnyAnyBarrier(bool need_any_any_barrier)3696 void SetNeedAnyAnyBarrier(bool need_any_any_barrier) {
3697 need_any_any_barrier_ = need_any_any_barrier;
3698 }
3699
SetGetAndUpdateOp(GetAndUpdateOp get_and_update_op)3700 void SetGetAndUpdateOp(GetAndUpdateOp get_and_update_op) {
3701 get_and_update_op_ = get_and_update_op;
3702 }
3703
GetByteArrayViewCheckLabel()3704 Label* GetByteArrayViewCheckLabel() {
3705 return &byte_array_view_check_label_;
3706 }
3707
GetNativeByteOrderLabel()3708 Label* GetNativeByteOrderLabel() {
3709 return &native_byte_order_label_;
3710 }
3711
EmitNativeCode(CodeGenerator * codegen)3712 void EmitNativeCode(CodeGenerator* codegen) override {
3713 if (GetByteArrayViewCheckLabel()->IsLinked()) {
3714 EmitByteArrayViewCode(down_cast<CodeGeneratorX86_64*>(codegen));
3715 }
3716 IntrinsicSlowPathX86_64::EmitNativeCode(codegen);
3717 }
3718
3719 private:
GetInvoke() const3720 HInvoke* GetInvoke() const {
3721 return GetInstruction()->AsInvoke();
3722 }
3723
GetAccessModeTemplate() const3724 mirror::VarHandle::AccessModeTemplate GetAccessModeTemplate() const {
3725 return mirror::VarHandle::GetAccessModeTemplateByIntrinsic(GetInvoke()->GetIntrinsic());
3726 }
3727
3728 void EmitByteArrayViewCode(CodeGeneratorX86_64* codegen);
3729
3730 Label byte_array_view_check_label_;
3731 Label native_byte_order_label_;
3732
3733 // Arguments forwarded to specific methods.
3734 bool is_volatile_;
3735 bool is_atomic_;
3736 bool need_any_store_barrier_;
3737 bool need_any_any_barrier_;
3738 GetAndUpdateOp get_and_update_op_;
3739 };
3740
GenerateMathFma(HInvoke * invoke,CodeGeneratorX86_64 * codegen)3741 static void GenerateMathFma(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
3742 DCHECK(DataType::IsFloatingPointType(invoke->GetType()));
3743 X86_64Assembler* assembler = codegen->GetAssembler();
3744 LocationSummary* locations = invoke->GetLocations();
3745 DCHECK(locations->InAt(0).Equals(locations->Out()));
3746 XmmRegister left = locations->InAt(0).AsFpuRegister<XmmRegister>();
3747 XmmRegister right = locations->InAt(1).AsFpuRegister<XmmRegister>();
3748 XmmRegister accumulator = locations->InAt(2).AsFpuRegister<XmmRegister>();
3749 if (invoke->GetType() == DataType::Type::kFloat32) {
3750 __ vfmadd213ss(left, right, accumulator);
3751 } else {
3752 DCHECK_EQ(invoke->GetType(), DataType::Type::kFloat64);
3753 __ vfmadd213sd(left, right, accumulator);
3754 }
3755 }
3756
VisitMathFmaDouble(HInvoke * invoke)3757 void IntrinsicCodeGeneratorX86_64::VisitMathFmaDouble(HInvoke* invoke) {
3758 DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
3759 GenerateMathFma(invoke, codegen_);
3760 }
3761
VisitMathFmaDouble(HInvoke * invoke)3762 void IntrinsicLocationsBuilderX86_64::VisitMathFmaDouble(HInvoke* invoke) {
3763 if (codegen_->GetInstructionSetFeatures().HasAVX2()) {
3764 CreateFPFPFPToFPCallLocations(allocator_, invoke);
3765 }
3766 }
3767
VisitMathFmaFloat(HInvoke * invoke)3768 void IntrinsicCodeGeneratorX86_64::VisitMathFmaFloat(HInvoke* invoke) {
3769 DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
3770 GenerateMathFma(invoke, codegen_);
3771 }
3772
VisitMathFmaFloat(HInvoke * invoke)3773 void IntrinsicLocationsBuilderX86_64::VisitMathFmaFloat(HInvoke* invoke) {
3774 if (codegen_->GetInstructionSetFeatures().HasAVX2()) {
3775 CreateFPFPFPToFPCallLocations(allocator_, invoke);
3776 }
3777 }
3778
3779 // Generate subtype check without read barriers.
GenerateSubTypeObjectCheckNoReadBarrier(CodeGeneratorX86_64 * codegen,SlowPathCode * slow_path,CpuRegister object,CpuRegister temp,Address type_address,bool object_can_be_null=true)3780 static void GenerateSubTypeObjectCheckNoReadBarrier(CodeGeneratorX86_64* codegen,
3781 SlowPathCode* slow_path,
3782 CpuRegister object,
3783 CpuRegister temp,
3784 Address type_address,
3785 bool object_can_be_null = true) {
3786 X86_64Assembler* assembler = codegen->GetAssembler();
3787
3788 const MemberOffset class_offset = mirror::Object::ClassOffset();
3789 const MemberOffset super_class_offset = mirror::Class::SuperClassOffset();
3790
3791 NearLabel check_type_compatibility, type_matched;
3792
3793 // If the object is null, there is no need to check the type
3794 if (object_can_be_null) {
3795 __ testl(object, object);
3796 __ j(kZero, &type_matched);
3797 }
3798
3799 // Do not unpoison for in-memory comparison.
3800 // We deliberately avoid the read barrier, letting the slow path handle the false negatives.
3801 __ movl(temp, Address(object, class_offset));
3802 __ Bind(&check_type_compatibility);
3803 __ cmpl(temp, type_address);
3804 __ j(kEqual, &type_matched);
3805 // Load the super class.
3806 __ MaybeUnpoisonHeapReference(temp);
3807 __ movl(temp, Address(temp, super_class_offset));
3808 // If the super class is null, we reached the root of the hierarchy without a match.
3809 // We let the slow path handle uncovered cases (e.g. interfaces).
3810 __ testl(temp, temp);
3811 __ j(kEqual, slow_path->GetEntryLabel());
3812 __ jmp(&check_type_compatibility);
3813 __ Bind(&type_matched);
3814 }
3815
3816 // Check access mode and the primitive type from VarHandle.varType.
3817 // Check reference arguments against the VarHandle.varType; for references this is a subclass
3818 // check without read barrier, so it can have false negatives which we handle in the slow path.
GenerateVarHandleAccessModeAndVarTypeChecks(HInvoke * invoke,CodeGeneratorX86_64 * codegen,VarHandleSlowPathX86_64 * slow_path,DataType::Type type)3819 static void GenerateVarHandleAccessModeAndVarTypeChecks(HInvoke* invoke,
3820 CodeGeneratorX86_64* codegen,
3821 VarHandleSlowPathX86_64* slow_path,
3822 DataType::Type type) {
3823 X86_64Assembler* assembler = codegen->GetAssembler();
3824
3825 LocationSummary* locations = invoke->GetLocations();
3826 CpuRegister varhandle = locations->InAt(0).AsRegister<CpuRegister>();
3827 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
3828
3829 mirror::VarHandle::AccessMode access_mode =
3830 mirror::VarHandle::GetAccessModeByIntrinsic(invoke->GetIntrinsic());
3831 Primitive::Type primitive_type = DataTypeToPrimitive(type);
3832
3833 const MemberOffset var_type_offset = mirror::VarHandle::VarTypeOffset();
3834 const MemberOffset access_mode_bit_mask_offset = mirror::VarHandle::AccessModesBitMaskOffset();
3835 const MemberOffset primitive_type_offset = mirror::Class::PrimitiveTypeOffset();
3836
3837 // Check that the operation is permitted.
3838 __ testl(Address(varhandle, access_mode_bit_mask_offset),
3839 Immediate(1u << static_cast<uint32_t>(access_mode)));
3840 __ j(kZero, slow_path->GetEntryLabel());
3841
3842 // For primitive types, we do not need a read barrier when loading a reference only for loading
3843 // constant field through the reference. For reference types, we deliberately avoid the read
3844 // barrier, letting the slow path handle the false negatives.
3845 __ movl(temp, Address(varhandle, var_type_offset));
3846 __ MaybeUnpoisonHeapReference(temp);
3847
3848 // Check the varType.primitiveType field against the type we're trying to use.
3849 __ cmpw(Address(temp, primitive_type_offset), Immediate(static_cast<uint16_t>(primitive_type)));
3850 __ j(kNotEqual, slow_path->GetEntryLabel());
3851
3852 if (type == DataType::Type::kReference) {
3853 // Check reference arguments against the varType.
3854 // False negatives due to varType being an interface or array type
3855 // or due to the missing read barrier are handled by the slow path.
3856 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
3857 uint32_t arguments_start = /* VarHandle object */ 1u + expected_coordinates_count;
3858 uint32_t number_of_arguments = invoke->GetNumberOfArguments();
3859 for (size_t arg_index = arguments_start; arg_index != number_of_arguments; ++arg_index) {
3860 HInstruction* arg = invoke->InputAt(arg_index);
3861 DCHECK_EQ(arg->GetType(), DataType::Type::kReference);
3862 if (!arg->IsNullConstant()) {
3863 CpuRegister arg_reg = invoke->GetLocations()->InAt(arg_index).AsRegister<CpuRegister>();
3864 Address type_addr(varhandle, var_type_offset);
3865 GenerateSubTypeObjectCheckNoReadBarrier(codegen, slow_path, arg_reg, temp, type_addr);
3866 }
3867 }
3868 }
3869 }
3870
GenerateVarHandleStaticFieldCheck(HInvoke * invoke,CodeGeneratorX86_64 * codegen,VarHandleSlowPathX86_64 * slow_path)3871 static void GenerateVarHandleStaticFieldCheck(HInvoke* invoke,
3872 CodeGeneratorX86_64* codegen,
3873 VarHandleSlowPathX86_64* slow_path) {
3874 X86_64Assembler* assembler = codegen->GetAssembler();
3875
3876 LocationSummary* locations = invoke->GetLocations();
3877 CpuRegister varhandle = locations->InAt(0).AsRegister<CpuRegister>();
3878
3879 const MemberOffset coordinate_type0_offset = mirror::VarHandle::CoordinateType0Offset();
3880
3881 // Check that the VarHandle references a static field by checking that coordinateType0 == null.
3882 // Do not emit read barrier (or unpoison the reference) for comparing to null.
3883 __ cmpl(Address(varhandle, coordinate_type0_offset), Immediate(0));
3884 __ j(kNotEqual, slow_path->GetEntryLabel());
3885 }
3886
GenerateVarHandleInstanceFieldChecks(HInvoke * invoke,CodeGeneratorX86_64 * codegen,VarHandleSlowPathX86_64 * slow_path)3887 static void GenerateVarHandleInstanceFieldChecks(HInvoke* invoke,
3888 CodeGeneratorX86_64* codegen,
3889 VarHandleSlowPathX86_64* slow_path) {
3890 VarHandleOptimizations optimizations(invoke);
3891 X86_64Assembler* assembler = codegen->GetAssembler();
3892
3893 LocationSummary* locations = invoke->GetLocations();
3894 CpuRegister varhandle = locations->InAt(0).AsRegister<CpuRegister>();
3895 CpuRegister object = locations->InAt(1).AsRegister<CpuRegister>();
3896 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
3897
3898 const MemberOffset coordinate_type0_offset = mirror::VarHandle::CoordinateType0Offset();
3899 const MemberOffset coordinate_type1_offset = mirror::VarHandle::CoordinateType1Offset();
3900
3901 // Null-check the object.
3902 if (!optimizations.GetSkipObjectNullCheck()) {
3903 __ testl(object, object);
3904 __ j(kZero, slow_path->GetEntryLabel());
3905 }
3906
3907 if (!optimizations.GetUseKnownImageVarHandle()) {
3908 // Check that the VarHandle references an instance field by checking that
3909 // coordinateType1 == null. coordinateType0 should be not null, but this is handled by the
3910 // type compatibility check with the source object's type, which will fail for null.
3911 __ cmpl(Address(varhandle, coordinate_type1_offset), Immediate(0));
3912 __ j(kNotEqual, slow_path->GetEntryLabel());
3913
3914 // Check that the object has the correct type.
3915 // We deliberately avoid the read barrier, letting the slow path handle the false negatives.
3916 GenerateSubTypeObjectCheckNoReadBarrier(codegen,
3917 slow_path,
3918 object,
3919 temp,
3920 Address(varhandle, coordinate_type0_offset),
3921 /*object_can_be_null=*/ false);
3922 }
3923 }
3924
GenerateVarHandleArrayChecks(HInvoke * invoke,CodeGeneratorX86_64 * codegen,VarHandleSlowPathX86_64 * slow_path)3925 static void GenerateVarHandleArrayChecks(HInvoke* invoke,
3926 CodeGeneratorX86_64* codegen,
3927 VarHandleSlowPathX86_64* slow_path) {
3928 VarHandleOptimizations optimizations(invoke);
3929 X86_64Assembler* assembler = codegen->GetAssembler();
3930 LocationSummary* locations = invoke->GetLocations();
3931
3932 CpuRegister varhandle = locations->InAt(0).AsRegister<CpuRegister>();
3933 CpuRegister object = locations->InAt(1).AsRegister<CpuRegister>();
3934 CpuRegister index = locations->InAt(2).AsRegister<CpuRegister>();
3935 DataType::Type value_type =
3936 GetVarHandleExpectedValueType(invoke, /*expected_coordinates_count=*/ 2u);
3937 Primitive::Type primitive_type = DataTypeToPrimitive(value_type);
3938
3939 const MemberOffset coordinate_type0_offset = mirror::VarHandle::CoordinateType0Offset();
3940 const MemberOffset coordinate_type1_offset = mirror::VarHandle::CoordinateType1Offset();
3941 const MemberOffset component_type_offset = mirror::Class::ComponentTypeOffset();
3942 const MemberOffset primitive_type_offset = mirror::Class::PrimitiveTypeOffset();
3943 const MemberOffset class_offset = mirror::Object::ClassOffset();
3944 const MemberOffset array_length_offset = mirror::Array::LengthOffset();
3945
3946 // Null-check the object.
3947 if (!optimizations.GetSkipObjectNullCheck()) {
3948 __ testl(object, object);
3949 __ j(kZero, slow_path->GetEntryLabel());
3950 }
3951
3952 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
3953
3954 // Check that the VarHandle references an array, byte array view or ByteBuffer by checking
3955 // that coordinateType1 != null. If that's true, coordinateType1 shall be int.class and
3956 // coordinateType0 shall not be null but we do not explicitly verify that.
3957 // No need for read barrier or unpoisoning of coordinateType1 for comparison with null.
3958 __ cmpl(Address(varhandle, coordinate_type1_offset.Int32Value()), Immediate(0));
3959 __ j(kEqual, slow_path->GetEntryLabel());
3960
3961 // Check object class against componentType0.
3962 //
3963 // This is an exact check and we defer other cases to the runtime. This includes
3964 // conversion to array of superclass references, which is valid but subsequently
3965 // requires all update operations to check that the value can indeed be stored.
3966 // We do not want to perform such extra checks in the intrinsified code.
3967 //
3968 // We do this check without read barrier, so there can be false negatives which we
3969 // defer to the slow path. There shall be no false negatives for array classes in the
3970 // boot image (including Object[] and primitive arrays) because they are non-movable.
3971 __ movl(temp, Address(object, class_offset.Int32Value()));
3972 __ cmpl(temp, Address(varhandle, coordinate_type0_offset.Int32Value()));
3973 __ j(kNotEqual, slow_path->GetEntryLabel());
3974
3975 // Check that the coordinateType0 is an array type. We do not need a read barrier
3976 // for loading constant reference fields (or chains of them) for comparison with null,
3977 // nor for finally loading a constant primitive field (primitive type) below.
3978 codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp);
3979 __ movl(temp, Address(temp, component_type_offset.Int32Value()));
3980 codegen->GetAssembler()->MaybeUnpoisonHeapReference(temp);
3981 __ testl(temp, temp);
3982 __ j(kZero, slow_path->GetEntryLabel());
3983
3984 // Check that the array component type matches the primitive type.
3985 Label* slow_path_label;
3986 if (primitive_type == Primitive::kPrimNot) {
3987 slow_path_label = slow_path->GetEntryLabel();
3988 } else {
3989 // With the exception of `kPrimNot` (handled above), `kPrimByte` and `kPrimBoolean`,
3990 // we shall check for a byte array view in the slow path.
3991 // The check requires the ByteArrayViewVarHandle.class to be in the boot image,
3992 // so we cannot emit that if we're JITting without boot image.
3993 bool boot_image_available =
3994 codegen->GetCompilerOptions().IsBootImage() ||
3995 !Runtime::Current()->GetHeap()->GetBootImageSpaces().empty();
3996 bool can_be_view = (DataType::Size(value_type) != 1u) && boot_image_available;
3997 slow_path_label =
3998 can_be_view ? slow_path->GetByteArrayViewCheckLabel() : slow_path->GetEntryLabel();
3999 }
4000 __ cmpw(Address(temp, primitive_type_offset), Immediate(static_cast<uint16_t>(primitive_type)));
4001 __ j(kNotEqual, slow_path_label);
4002
4003 // Check for array index out of bounds.
4004 __ cmpl(index, Address(object, array_length_offset.Int32Value()));
4005 __ j(kAboveEqual, slow_path->GetEntryLabel());
4006 }
4007
GenerateVarHandleCoordinateChecks(HInvoke * invoke,CodeGeneratorX86_64 * codegen,VarHandleSlowPathX86_64 * slow_path)4008 static void GenerateVarHandleCoordinateChecks(HInvoke* invoke,
4009 CodeGeneratorX86_64* codegen,
4010 VarHandleSlowPathX86_64* slow_path) {
4011 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4012 if (expected_coordinates_count == 0u) {
4013 GenerateVarHandleStaticFieldCheck(invoke, codegen, slow_path);
4014 } else if (expected_coordinates_count == 1u) {
4015 GenerateVarHandleInstanceFieldChecks(invoke, codegen, slow_path);
4016 } else {
4017 DCHECK_EQ(expected_coordinates_count, 2u);
4018 GenerateVarHandleArrayChecks(invoke, codegen, slow_path);
4019 }
4020 }
4021
GenerateVarHandleChecks(HInvoke * invoke,CodeGeneratorX86_64 * codegen,DataType::Type type)4022 static VarHandleSlowPathX86_64* GenerateVarHandleChecks(HInvoke* invoke,
4023 CodeGeneratorX86_64* codegen,
4024 DataType::Type type) {
4025 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4026 VarHandleOptimizations optimizations(invoke);
4027 if (optimizations.GetUseKnownImageVarHandle()) {
4028 DCHECK_NE(expected_coordinates_count, 2u);
4029 if (expected_coordinates_count == 0u || optimizations.GetSkipObjectNullCheck()) {
4030 return nullptr;
4031 }
4032 }
4033
4034 VarHandleSlowPathX86_64* slow_path =
4035 new (codegen->GetScopedAllocator()) VarHandleSlowPathX86_64(invoke);
4036 codegen->AddSlowPath(slow_path);
4037
4038 if (!optimizations.GetUseKnownImageVarHandle()) {
4039 GenerateVarHandleAccessModeAndVarTypeChecks(invoke, codegen, slow_path, type);
4040 }
4041 GenerateVarHandleCoordinateChecks(invoke, codegen, slow_path);
4042
4043 return slow_path;
4044 }
4045
4046 struct VarHandleTarget {
4047 Register object; // The object holding the value to operate on.
4048 Register offset; // The offset of the value to operate on.
4049 };
4050
GetVarHandleTarget(HInvoke * invoke)4051 static VarHandleTarget GetVarHandleTarget(HInvoke* invoke) {
4052 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4053 LocationSummary* locations = invoke->GetLocations();
4054
4055 VarHandleTarget target;
4056 // The temporary allocated for loading the offset.
4057 target.offset = locations->GetTemp(0).AsRegister<CpuRegister>().AsRegister();
4058 // The reference to the object that holds the value to operate on.
4059 target.object = (expected_coordinates_count == 0u)
4060 ? locations->GetTemp(1).AsRegister<CpuRegister>().AsRegister()
4061 : locations->InAt(1).AsRegister<CpuRegister>().AsRegister();
4062 return target;
4063 }
4064
GenerateVarHandleTarget(HInvoke * invoke,const VarHandleTarget & target,CodeGeneratorX86_64 * codegen)4065 static void GenerateVarHandleTarget(HInvoke* invoke,
4066 const VarHandleTarget& target,
4067 CodeGeneratorX86_64* codegen) {
4068 LocationSummary* locations = invoke->GetLocations();
4069 X86_64Assembler* assembler = codegen->GetAssembler();
4070 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4071
4072 CpuRegister varhandle = locations->InAt(0).AsRegister<CpuRegister>();
4073
4074 if (expected_coordinates_count <= 1u) {
4075 if (VarHandleOptimizations(invoke).GetUseKnownImageVarHandle()) {
4076 ScopedObjectAccess soa(Thread::Current());
4077 ArtField* target_field = GetImageVarHandleField(invoke);
4078 if (expected_coordinates_count == 0u) {
4079 ObjPtr<mirror::Class> declaring_class = target_field->GetDeclaringClass();
4080 __ movl(CpuRegister(target.object),
4081 Address::Absolute(CodeGeneratorX86_64::kPlaceholder32BitOffset, /*no_rip=*/ false));
4082 if (Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(declaring_class)) {
4083 codegen->RecordBootImageRelRoPatch(CodeGenerator::GetBootImageOffset(declaring_class));
4084 } else {
4085 codegen->RecordBootImageTypePatch(declaring_class->GetDexFile(),
4086 declaring_class->GetDexTypeIndex());
4087 }
4088 }
4089 __ movl(CpuRegister(target.offset), Immediate(target_field->GetOffset().Uint32Value()));
4090 } else {
4091 // For static fields, we need to fill the `target.object` with the declaring class,
4092 // so we can use `target.object` as temporary for the `ArtField*`. For instance fields,
4093 // we do not need the declaring class, so we can forget the `ArtField*` when
4094 // we load the `target.offset`, so use the `target.offset` to hold the `ArtField*`.
4095 CpuRegister field((expected_coordinates_count == 0) ? target.object : target.offset);
4096
4097 const MemberOffset art_field_offset = mirror::FieldVarHandle::ArtFieldOffset();
4098 const MemberOffset offset_offset = ArtField::OffsetOffset();
4099
4100 // Load the ArtField*, the offset and, if needed, declaring class.
4101 __ movq(field, Address(varhandle, art_field_offset));
4102 __ movl(CpuRegister(target.offset), Address(field, offset_offset));
4103 if (expected_coordinates_count == 0u) {
4104 InstructionCodeGeneratorX86_64* instr_codegen = codegen->GetInstructionCodegen();
4105 instr_codegen->GenerateGcRootFieldLoad(invoke,
4106 Location::RegisterLocation(target.object),
4107 Address(field, ArtField::DeclaringClassOffset()),
4108 /*fixup_label=*/nullptr,
4109 codegen->GetCompilerReadBarrierOption());
4110 }
4111 }
4112 } else {
4113 DCHECK_EQ(expected_coordinates_count, 2u);
4114
4115 DataType::Type value_type =
4116 GetVarHandleExpectedValueType(invoke, /*expected_coordinates_count=*/ 2u);
4117 ScaleFactor scale = CodeGenerator::ScaleFactorForType(value_type);
4118 MemberOffset data_offset = mirror::Array::DataOffset(DataType::Size(value_type));
4119 CpuRegister index = locations->InAt(2).AsRegister<CpuRegister>();
4120
4121 // The effect of LEA is `target.offset = index * scale + data_offset`.
4122 __ leal(CpuRegister(target.offset), Address(index, scale, data_offset.Int32Value()));
4123 }
4124 }
4125
HasVarHandleIntrinsicImplementation(HInvoke * invoke,CodeGeneratorX86_64 * codegen)4126 static bool HasVarHandleIntrinsicImplementation(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
4127 // The only supported read barrier implementation is the Baker-style read barriers.
4128 if (codegen->EmitNonBakerReadBarrier()) {
4129 return false;
4130 }
4131
4132 VarHandleOptimizations optimizations(invoke);
4133 if (optimizations.GetDoNotIntrinsify()) {
4134 return false;
4135 }
4136
4137 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4138 DCHECK_LE(expected_coordinates_count, 2u); // Filtered by the `DoNotIntrinsify` flag above.
4139 return true;
4140 }
4141
CreateVarHandleCommonLocations(HInvoke * invoke)4142 static LocationSummary* CreateVarHandleCommonLocations(HInvoke* invoke) {
4143 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4144 ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
4145 LocationSummary* locations = new (allocator) LocationSummary(
4146 invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
4147
4148 locations->SetInAt(0, Location::RequiresRegister());
4149 // Require coordinates in registers. These are the object holding the value
4150 // to operate on (except for static fields) and index (for arrays and views).
4151 for (size_t i = 0; i != expected_coordinates_count; ++i) {
4152 locations->SetInAt(/* VarHandle object */ 1u + i, Location::RequiresRegister());
4153 }
4154
4155 uint32_t arguments_start = /* VarHandle object */ 1u + expected_coordinates_count;
4156 uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4157 for (size_t arg_index = arguments_start; arg_index != number_of_arguments; ++arg_index) {
4158 HInstruction* arg = invoke->InputAt(arg_index);
4159 if (DataType::IsFloatingPointType(arg->GetType())) {
4160 locations->SetInAt(arg_index, Location::FpuRegisterOrConstant(arg));
4161 } else {
4162 locations->SetInAt(arg_index, Location::RegisterOrConstant(arg));
4163 }
4164 }
4165
4166 // Add a temporary for offset.
4167 locations->AddTemp(Location::RequiresRegister());
4168
4169 if (expected_coordinates_count == 0u) {
4170 // Add a temporary to hold the declaring class.
4171 locations->AddTemp(Location::RequiresRegister());
4172 }
4173
4174 return locations;
4175 }
4176
CreateVarHandleGetLocations(HInvoke * invoke,CodeGeneratorX86_64 * codegen)4177 static void CreateVarHandleGetLocations(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
4178 if (!HasVarHandleIntrinsicImplementation(invoke, codegen)) {
4179 return;
4180 }
4181
4182 LocationSummary* locations = CreateVarHandleCommonLocations(invoke);
4183 if (DataType::IsFloatingPointType(invoke->GetType())) {
4184 locations->SetOut(Location::RequiresFpuRegister());
4185 } else {
4186 locations->SetOut(Location::RequiresRegister());
4187 }
4188 }
4189
GenerateVarHandleGet(HInvoke * invoke,CodeGeneratorX86_64 * codegen,bool byte_swap=false)4190 static void GenerateVarHandleGet(HInvoke* invoke,
4191 CodeGeneratorX86_64* codegen,
4192 bool byte_swap = false) {
4193 DataType::Type type = invoke->GetType();
4194 DCHECK_NE(type, DataType::Type::kVoid);
4195
4196 LocationSummary* locations = invoke->GetLocations();
4197 X86_64Assembler* assembler = codegen->GetAssembler();
4198
4199 VarHandleTarget target = GetVarHandleTarget(invoke);
4200 VarHandleSlowPathX86_64* slow_path = nullptr;
4201 if (!byte_swap) {
4202 slow_path = GenerateVarHandleChecks(invoke, codegen, type);
4203 GenerateVarHandleTarget(invoke, target, codegen);
4204 if (slow_path != nullptr) {
4205 __ Bind(slow_path->GetNativeByteOrderLabel());
4206 }
4207 }
4208
4209 // Load the value from the field
4210 Address src(CpuRegister(target.object), CpuRegister(target.offset), TIMES_1, 0);
4211 Location out = locations->Out();
4212
4213 if (type == DataType::Type::kReference) {
4214 if (codegen->EmitReadBarrier()) {
4215 DCHECK(kUseBakerReadBarrier);
4216 codegen->GenerateReferenceLoadWithBakerReadBarrier(
4217 invoke, out, CpuRegister(target.object), src, /* needs_null_check= */ false);
4218 } else {
4219 __ movl(out.AsRegister<CpuRegister>(), src);
4220 __ MaybeUnpoisonHeapReference(out.AsRegister<CpuRegister>());
4221 }
4222 DCHECK(!byte_swap);
4223 } else {
4224 codegen->LoadFromMemoryNoReference(type, out, src);
4225 if (byte_swap) {
4226 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
4227 codegen->GetInstructionCodegen()->Bswap(out, type, &temp);
4228 }
4229 }
4230
4231 if (slow_path != nullptr) {
4232 DCHECK(!byte_swap);
4233 __ Bind(slow_path->GetExitLabel());
4234 }
4235 }
4236
VisitMethodHandleInvokeExact(HInvoke * invoke)4237 void IntrinsicLocationsBuilderX86_64::VisitMethodHandleInvokeExact(HInvoke* invoke) {
4238 ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
4239 LocationSummary* locations = new (allocator)
4240 LocationSummary(invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
4241
4242 InvokeDexCallingConventionVisitorX86_64 calling_convention;
4243 locations->SetOut(calling_convention.GetReturnLocation(invoke->GetType()));
4244
4245 uint32_t number_of_args = invoke->GetNumberOfArguments();
4246
4247 // Accomodating LocationSummary for underlying invoke-* call.
4248 for (uint32_t i = 1; i < number_of_args; ++i) {
4249 locations->SetInAt(i, calling_convention.GetNextLocation(invoke->InputAt(i)->GetType()));
4250 }
4251
4252 // Passing MethodHandle object as the last parameter: accessors implementation rely on it.
4253 DCHECK_EQ(invoke->InputAt(0)->GetType(), DataType::Type::kReference);
4254 Location receiver_mh_loc = calling_convention.GetNextLocation(DataType::Type::kReference);
4255 locations->SetInAt(0, receiver_mh_loc);
4256
4257 // The last input is MethodType object corresponding to the call-site.
4258 locations->SetInAt(number_of_args, Location::RequiresRegister());
4259
4260 locations->AddTemp(Location::RequiresRegister());
4261 // Hidden arg for invoke-interface.
4262 locations->AddTemp(Location::RegisterLocation(RAX));
4263
4264 if (!receiver_mh_loc.IsRegister()) {
4265 locations->AddTemp(Location::RequiresRegister());
4266 }
4267 }
4268
VisitMethodHandleInvokeExact(HInvoke * invoke)4269 void IntrinsicCodeGeneratorX86_64::VisitMethodHandleInvokeExact(HInvoke* invoke) {
4270 LocationSummary* locations = invoke->GetLocations();
4271 X86_64Assembler* assembler = codegen_->GetAssembler();
4272
4273 Location receiver_mh_loc = locations->InAt(0);
4274 CpuRegister method_handle = receiver_mh_loc.IsRegister()
4275 ? receiver_mh_loc.AsRegister<CpuRegister>()
4276 : locations->GetTemp(2).AsRegister<CpuRegister>();
4277
4278 if (!receiver_mh_loc.IsRegister()) {
4279 DCHECK(receiver_mh_loc.IsStackSlot());
4280 __ movl(method_handle, Address(CpuRegister(RSP), receiver_mh_loc.GetStackIndex()));
4281 }
4282
4283 SlowPathCode* slow_path =
4284 new (codegen_->GetScopedAllocator()) InvokePolymorphicSlowPathX86_64(invoke, method_handle);
4285 codegen_->AddSlowPath(slow_path);
4286
4287 CpuRegister call_site_type =
4288 locations->InAt(invoke->GetNumberOfArguments()).AsRegister<CpuRegister>();
4289
4290 CpuRegister temp = locations->GetTemp(0).AsRegister<CpuRegister>();
4291
4292 // Call site should match with MethodHandle's type.
4293 if (kPoisonHeapReferences) {
4294 // call_site_type should be left intact as it 1) might be in callee-saved register 2) is known
4295 // for GC to contain a reference.
4296 __ movl(temp, call_site_type);
4297 __ PoisonHeapReference(temp);
4298 __ cmpl(temp, Address(method_handle, mirror::MethodHandle::MethodTypeOffset()));
4299 __ j(kNotEqual, slow_path->GetEntryLabel());
4300 } else {
4301 __ cmpl(call_site_type, Address(method_handle, mirror::MethodHandle::MethodTypeOffset()));
4302 __ j(kNotEqual, slow_path->GetEntryLabel());
4303 }
4304
4305 CpuRegister method = CpuRegister(kMethodRegisterArgument);
4306 __ movq(method, Address(method_handle, mirror::MethodHandle::ArtFieldOrMethodOffset()));
4307
4308 Label execute_target_method;
4309 Label method_dispatch;
4310 Label static_dispatch;
4311
4312 Address method_handle_kind = Address(method_handle, mirror::MethodHandle::HandleKindOffset());
4313
4314 __ cmpl(method_handle_kind, Immediate(mirror::MethodHandle::kFirstAccessorKind));
4315 __ j(kLess, &method_dispatch);
4316 __ movq(method, Address(method_handle, mirror::MethodHandleImpl::TargetOffset()));
4317 __ Jump(&execute_target_method);
4318
4319 __ Bind(&method_dispatch);
4320 if (invoke->AsInvokePolymorphic()->CanTargetInstanceMethod()) {
4321 CpuRegister receiver = locations->InAt(1).AsRegister<CpuRegister>();
4322
4323 // Receiver shouldn't be null for all the following cases.
4324 __ testl(receiver, receiver);
4325 __ j(kEqual, slow_path->GetEntryLabel());
4326
4327 __ cmpl(method_handle_kind, Immediate(mirror::MethodHandle::Kind::kInvokeDirect));
4328 // No dispatch is needed for invoke-direct.
4329 __ j(kEqual, &execute_target_method);
4330
4331 Label non_virtual_dispatch;
4332 // Handle invoke-virtual case.
4333 __ cmpl(method_handle_kind, Immediate(mirror::MethodHandle::Kind::kInvokeVirtual));
4334 __ j(kNotEqual, &non_virtual_dispatch);
4335
4336 // Skip virtual dispatch if `method` is private.
4337 __ testl(Address(method, ArtMethod::AccessFlagsOffset()), Immediate(kAccPrivate));
4338 __ j(kNotZero, &execute_target_method);
4339
4340 __ movl(temp, Address(method, ArtMethod::DeclaringClassOffset()));
4341 __ cmpl(temp, Address(receiver, mirror::Object::ClassOffset()));
4342 // If method is defined in the receiver's class, execute it as it is.
4343 __ j(kEqual, &execute_target_method);
4344
4345 // MethodIndex is uint16_t.
4346 __ movzxw(temp, Address(method, ArtMethod::MethodIndexOffset()));
4347
4348 constexpr uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
4349 // Re-using method register for receiver class.
4350 __ movl(method, Address(receiver, class_offset));
4351 __ MaybeUnpoisonHeapReference(method);
4352
4353 constexpr uint32_t vtable_offset =
4354 mirror::Class::EmbeddedVTableOffset(art::PointerSize::k64).Int32Value();
4355 __ movq(method, Address(method, temp, TIMES_8, vtable_offset));
4356 __ Jump(&execute_target_method);
4357
4358 __ Bind(&non_virtual_dispatch);
4359 __ cmpl(method_handle_kind, Immediate(mirror::MethodHandle::Kind::kInvokeInterface));
4360 __ j(kNotEqual, &static_dispatch);
4361
4362 __ movl(temp, Address(method, ArtMethod::AccessFlagsOffset()));
4363
4364 __ testl(temp, Immediate(kAccPrivate));
4365 __ j(kNotZero, &execute_target_method);
4366
4367 CpuRegister hidden_arg = locations->GetTemp(1).AsRegister<CpuRegister>();
4368 // Set the hidden argument.
4369 DCHECK_EQ(RAX, hidden_arg.AsRegister());
4370 __ movq(hidden_arg, method);
4371
4372 Label get_imt_index_from_method_index;
4373 Label do_imt_dispatch;
4374
4375 // Get IMT index.
4376 // Not doing default conflict check as IMT index is set for all method which have
4377 // kAccAbstract bit.
4378 __ testl(temp, Immediate(kAccAbstract));
4379 __ j(kZero, &get_imt_index_from_method_index);
4380
4381 // imt_index_ is uint16_t
4382 __ movzxw(temp, Address(method, ArtMethod::ImtIndexOffset()));
4383 __ Jump(&do_imt_dispatch);
4384
4385 // Default method, do method->GetMethodIndex() & (ImTable::kSizeTruncToPowerOfTwo - 1);
4386 __ Bind(&get_imt_index_from_method_index);
4387 __ movl(temp, Address(method, ArtMethod::MethodIndexOffset()));
4388 __ andl(temp, Immediate(ImTable::kSizeTruncToPowerOfTwo - 1));
4389
4390 __ Bind(&do_imt_dispatch);
4391 // Re-using `method` to store receiver class and ImTableEntry.
4392 __ movl(method, Address(receiver, mirror::Object::ClassOffset()));
4393 __ MaybeUnpoisonHeapReference(method);
4394
4395 __ movq(method, Address(method, mirror::Class::ImtPtrOffset(kX86_64PointerSize).Uint32Value()));
4396 // method = receiver->GetClass()->embedded_imtable_->Get(method_offset);
4397 __ movq(method, Address(method, temp, TIMES_8, /* disp= */ 0));
4398
4399 __ Jump(&execute_target_method);
4400 }
4401 __ Bind(&static_dispatch);
4402 __ cmpl(method_handle_kind, Immediate(mirror::MethodHandle::Kind::kInvokeStatic));
4403 __ j(kNotEqual, slow_path->GetEntryLabel());
4404 // MH's kind is invoke-static. The method can be called directly, hence fall-through.
4405
4406 __ Bind(&execute_target_method);
4407 __ call(Address(
4408 method,
4409 ArtMethod::EntryPointFromQuickCompiledCodeOffset(art::PointerSize::k64).SizeValue()));
4410 codegen_->RecordPcInfo(invoke, slow_path);
4411 __ Bind(slow_path->GetExitLabel());
4412 }
4413
VisitVarHandleGet(HInvoke * invoke)4414 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGet(HInvoke* invoke) {
4415 CreateVarHandleGetLocations(invoke, codegen_);
4416 }
4417
VisitVarHandleGet(HInvoke * invoke)4418 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGet(HInvoke* invoke) {
4419 GenerateVarHandleGet(invoke, codegen_);
4420 }
4421
VisitVarHandleGetAcquire(HInvoke * invoke)4422 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAcquire(HInvoke* invoke) {
4423 CreateVarHandleGetLocations(invoke, codegen_);
4424 }
4425
VisitVarHandleGetAcquire(HInvoke * invoke)4426 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAcquire(HInvoke* invoke) {
4427 // VarHandleGetAcquire is the same as VarHandleGet on x86-64 due to the x86 memory model.
4428 GenerateVarHandleGet(invoke, codegen_);
4429 }
4430
VisitVarHandleGetOpaque(HInvoke * invoke)4431 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetOpaque(HInvoke* invoke) {
4432 CreateVarHandleGetLocations(invoke, codegen_);
4433 }
4434
VisitVarHandleGetOpaque(HInvoke * invoke)4435 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetOpaque(HInvoke* invoke) {
4436 // VarHandleGetOpaque is the same as VarHandleGet on x86-64 due to the x86 memory model.
4437 GenerateVarHandleGet(invoke, codegen_);
4438 }
4439
VisitVarHandleGetVolatile(HInvoke * invoke)4440 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetVolatile(HInvoke* invoke) {
4441 CreateVarHandleGetLocations(invoke, codegen_);
4442 }
4443
VisitVarHandleGetVolatile(HInvoke * invoke)4444 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetVolatile(HInvoke* invoke) {
4445 // VarHandleGetVolatile is the same as VarHandleGet on x86-64 due to the x86 memory model.
4446 GenerateVarHandleGet(invoke, codegen_);
4447 }
4448
CreateVarHandleSetLocations(HInvoke * invoke,CodeGeneratorX86_64 * codegen)4449 static void CreateVarHandleSetLocations(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
4450 if (!HasVarHandleIntrinsicImplementation(invoke, codegen)) {
4451 return;
4452 }
4453
4454 LocationSummary* locations = CreateVarHandleCommonLocations(invoke);
4455
4456 // Extra temporary is used for card in MarkGCCard and to move 64-bit constants to memory.
4457 locations->AddTemp(Location::RequiresRegister());
4458 }
4459
GenerateVarHandleSet(HInvoke * invoke,CodeGeneratorX86_64 * codegen,bool is_volatile,bool is_atomic,bool byte_swap=false)4460 static void GenerateVarHandleSet(HInvoke* invoke,
4461 CodeGeneratorX86_64* codegen,
4462 bool is_volatile,
4463 bool is_atomic,
4464 bool byte_swap = false) {
4465 X86_64Assembler* assembler = codegen->GetAssembler();
4466
4467 LocationSummary* locations = invoke->GetLocations();
4468 const uint32_t last_temp_index = locations->GetTempCount() - 1;
4469
4470 uint32_t value_index = invoke->GetNumberOfArguments() - 1;
4471 DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index);
4472
4473 VarHandleTarget target = GetVarHandleTarget(invoke);
4474 VarHandleSlowPathX86_64* slow_path = nullptr;
4475 if (!byte_swap) {
4476 slow_path = GenerateVarHandleChecks(invoke, codegen, value_type);
4477 GenerateVarHandleTarget(invoke, target, codegen);
4478 if (slow_path != nullptr) {
4479 slow_path->SetVolatile(is_volatile);
4480 slow_path->SetAtomic(is_atomic);
4481 __ Bind(slow_path->GetNativeByteOrderLabel());
4482 }
4483 }
4484
4485 switch (invoke->GetIntrinsic()) {
4486 case Intrinsics::kVarHandleSetRelease:
4487 codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
4488 break;
4489 case Intrinsics::kVarHandleSetVolatile:
4490 // setVolatile needs kAnyStore barrier, but HandleFieldSet takes care of that.
4491 break;
4492 default:
4493 // Other intrinsics don't need a barrier.
4494 break;
4495 }
4496
4497 Address dst(CpuRegister(target.object), CpuRegister(target.offset), TIMES_1, 0);
4498
4499 // Store the value to the field.
4500 codegen->GetInstructionCodegen()->HandleFieldSet(
4501 invoke,
4502 value_index,
4503 last_temp_index,
4504 value_type,
4505 dst,
4506 CpuRegister(target.object),
4507 is_volatile,
4508 is_atomic,
4509 /*value_can_be_null=*/true,
4510 byte_swap,
4511 // Value can be null, and this write barrier is not being relied on for other sets.
4512 value_type == DataType::Type::kReference ? WriteBarrierKind::kEmitNotBeingReliedOn :
4513 WriteBarrierKind::kDontEmit);
4514
4515 // setVolatile needs kAnyAny barrier, but HandleFieldSet takes care of that.
4516
4517 if (slow_path != nullptr) {
4518 DCHECK(!byte_swap);
4519 __ Bind(slow_path->GetExitLabel());
4520 }
4521 }
4522
VisitVarHandleSet(HInvoke * invoke)4523 void IntrinsicLocationsBuilderX86_64::VisitVarHandleSet(HInvoke* invoke) {
4524 CreateVarHandleSetLocations(invoke, codegen_);
4525 }
4526
VisitVarHandleSet(HInvoke * invoke)4527 void IntrinsicCodeGeneratorX86_64::VisitVarHandleSet(HInvoke* invoke) {
4528 GenerateVarHandleSet(invoke, codegen_, /*is_volatile=*/ false, /*is_atomic=*/ true);
4529 }
4530
VisitVarHandleSetOpaque(HInvoke * invoke)4531 void IntrinsicLocationsBuilderX86_64::VisitVarHandleSetOpaque(HInvoke* invoke) {
4532 CreateVarHandleSetLocations(invoke, codegen_);
4533 }
4534
VisitVarHandleSetOpaque(HInvoke * invoke)4535 void IntrinsicCodeGeneratorX86_64::VisitVarHandleSetOpaque(HInvoke* invoke) {
4536 GenerateVarHandleSet(invoke, codegen_, /*is_volatile=*/ false, /*is_atomic=*/ true);
4537 }
4538
VisitVarHandleSetRelease(HInvoke * invoke)4539 void IntrinsicLocationsBuilderX86_64::VisitVarHandleSetRelease(HInvoke* invoke) {
4540 CreateVarHandleSetLocations(invoke, codegen_);
4541 }
4542
VisitVarHandleSetRelease(HInvoke * invoke)4543 void IntrinsicCodeGeneratorX86_64::VisitVarHandleSetRelease(HInvoke* invoke) {
4544 GenerateVarHandleSet(invoke, codegen_, /*is_volatile=*/ false, /*is_atomic=*/ true);
4545 }
4546
VisitVarHandleSetVolatile(HInvoke * invoke)4547 void IntrinsicLocationsBuilderX86_64::VisitVarHandleSetVolatile(HInvoke* invoke) {
4548 CreateVarHandleSetLocations(invoke, codegen_);
4549 }
4550
VisitVarHandleSetVolatile(HInvoke * invoke)4551 void IntrinsicCodeGeneratorX86_64::VisitVarHandleSetVolatile(HInvoke* invoke) {
4552 GenerateVarHandleSet(invoke, codegen_, /*is_volatile=*/ true, /*is_atomic=*/ true);
4553 }
4554
CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke * invoke,CodeGeneratorX86_64 * codegen)4555 static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke,
4556 CodeGeneratorX86_64* codegen) {
4557 if (!HasVarHandleIntrinsicImplementation(invoke, codegen)) {
4558 return;
4559 }
4560
4561 uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4562 uint32_t expected_value_index = number_of_arguments - 2;
4563 uint32_t new_value_index = number_of_arguments - 1;
4564 DataType::Type return_type = invoke->GetType();
4565 DataType::Type expected_type = GetDataTypeFromShorty(invoke, expected_value_index);
4566 DCHECK_EQ(expected_type, GetDataTypeFromShorty(invoke, new_value_index));
4567
4568 LocationSummary* locations = CreateVarHandleCommonLocations(invoke);
4569
4570 if (DataType::IsFloatingPointType(return_type)) {
4571 locations->SetOut(Location::RequiresFpuRegister());
4572 } else {
4573 // Take advantage of the fact that CMPXCHG writes result to RAX.
4574 locations->SetOut(Location::RegisterLocation(RAX));
4575 }
4576
4577 if (DataType::IsFloatingPointType(expected_type)) {
4578 // RAX is needed to load the expected floating-point value into a register for CMPXCHG.
4579 locations->AddTemp(Location::RegisterLocation(RAX));
4580 // Another temporary is needed to load the new floating-point value into a register for CMPXCHG.
4581 locations->AddTemp(Location::RequiresRegister());
4582 } else {
4583 // Ensure that expected value is in RAX, as required by CMPXCHG.
4584 locations->SetInAt(expected_value_index, Location::RegisterLocation(RAX));
4585 locations->SetInAt(new_value_index, Location::RequiresRegister());
4586 if (expected_type == DataType::Type::kReference) {
4587 // Need two temporaries for MarkGCCard.
4588 locations->AddRegisterTemps(2);
4589 if (codegen->EmitReadBarrier()) {
4590 // Need three temporaries for GenerateReferenceLoadWithBakerReadBarrier.
4591 DCHECK(kUseBakerReadBarrier);
4592 locations->AddTemp(Location::RequiresRegister());
4593 }
4594 }
4595 // RAX is clobbered in CMPXCHG, but no need to mark it as temporary as it's the output register.
4596 DCHECK_EQ(RAX, locations->Out().AsRegister<Register>());
4597 }
4598 }
4599
GenerateVarHandleCompareAndSetOrExchange(HInvoke * invoke,CodeGeneratorX86_64 * codegen,bool is_cmpxchg,bool byte_swap=false)4600 static void GenerateVarHandleCompareAndSetOrExchange(HInvoke* invoke,
4601 CodeGeneratorX86_64* codegen,
4602 bool is_cmpxchg,
4603 bool byte_swap = false) {
4604 DCHECK_IMPLIES(codegen->EmitReadBarrier(), kUseBakerReadBarrier);
4605
4606 X86_64Assembler* assembler = codegen->GetAssembler();
4607 LocationSummary* locations = invoke->GetLocations();
4608
4609 uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4610 uint32_t expected_value_index = number_of_arguments - 2;
4611 uint32_t new_value_index = number_of_arguments - 1;
4612 DataType::Type type = GetDataTypeFromShorty(invoke, expected_value_index);
4613
4614 VarHandleSlowPathX86_64* slow_path = nullptr;
4615 VarHandleTarget target = GetVarHandleTarget(invoke);
4616 if (!byte_swap) {
4617 slow_path = GenerateVarHandleChecks(invoke, codegen, type);
4618 GenerateVarHandleTarget(invoke, target, codegen);
4619 if (slow_path != nullptr) {
4620 __ Bind(slow_path->GetNativeByteOrderLabel());
4621 }
4622 }
4623
4624 uint32_t temp_count = locations->GetTempCount();
4625 GenCompareAndSetOrExchange(codegen,
4626 invoke,
4627 type,
4628 CpuRegister(target.object),
4629 CpuRegister(target.offset),
4630 /*temp1_index=*/ temp_count - 1,
4631 /*temp2_index=*/ temp_count - 2,
4632 /*temp3_index=*/ temp_count - 3,
4633 locations->InAt(new_value_index),
4634 locations->InAt(expected_value_index),
4635 locations->Out(),
4636 is_cmpxchg,
4637 byte_swap);
4638
4639 // We are using LOCK CMPXCHG in all cases because there is no CAS equivalent that has weak
4640 // failure semantics. LOCK CMPXCHG has full barrier semantics, so we don't need barriers.
4641
4642 if (slow_path != nullptr) {
4643 DCHECK(!byte_swap);
4644 __ Bind(slow_path->GetExitLabel());
4645 }
4646 }
4647
VisitVarHandleCompareAndSet(HInvoke * invoke)4648 void IntrinsicLocationsBuilderX86_64::VisitVarHandleCompareAndSet(HInvoke* invoke) {
4649 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
4650 }
4651
VisitVarHandleCompareAndSet(HInvoke * invoke)4652 void IntrinsicCodeGeneratorX86_64::VisitVarHandleCompareAndSet(HInvoke* invoke) {
4653 GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_, /*is_cmpxchg=*/ false);
4654 }
4655
VisitVarHandleWeakCompareAndSet(HInvoke * invoke)4656 void IntrinsicLocationsBuilderX86_64::VisitVarHandleWeakCompareAndSet(HInvoke* invoke) {
4657 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
4658 }
4659
VisitVarHandleWeakCompareAndSet(HInvoke * invoke)4660 void IntrinsicCodeGeneratorX86_64::VisitVarHandleWeakCompareAndSet(HInvoke* invoke) {
4661 GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_, /*is_cmpxchg=*/ false);
4662 }
4663
VisitVarHandleWeakCompareAndSetPlain(HInvoke * invoke)4664 void IntrinsicLocationsBuilderX86_64::VisitVarHandleWeakCompareAndSetPlain(HInvoke* invoke) {
4665 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
4666 }
4667
VisitVarHandleWeakCompareAndSetPlain(HInvoke * invoke)4668 void IntrinsicCodeGeneratorX86_64::VisitVarHandleWeakCompareAndSetPlain(HInvoke* invoke) {
4669 GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_, /*is_cmpxchg=*/ false);
4670 }
4671
VisitVarHandleWeakCompareAndSetAcquire(HInvoke * invoke)4672 void IntrinsicLocationsBuilderX86_64::VisitVarHandleWeakCompareAndSetAcquire(HInvoke* invoke) {
4673 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
4674 }
4675
VisitVarHandleWeakCompareAndSetAcquire(HInvoke * invoke)4676 void IntrinsicCodeGeneratorX86_64::VisitVarHandleWeakCompareAndSetAcquire(HInvoke* invoke) {
4677 GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_, /*is_cmpxchg=*/ false);
4678 }
4679
VisitVarHandleWeakCompareAndSetRelease(HInvoke * invoke)4680 void IntrinsicLocationsBuilderX86_64::VisitVarHandleWeakCompareAndSetRelease(HInvoke* invoke) {
4681 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
4682 }
4683
VisitVarHandleWeakCompareAndSetRelease(HInvoke * invoke)4684 void IntrinsicCodeGeneratorX86_64::VisitVarHandleWeakCompareAndSetRelease(HInvoke* invoke) {
4685 GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_, /*is_cmpxchg=*/ false);
4686 }
4687
VisitVarHandleCompareAndExchange(HInvoke * invoke)4688 void IntrinsicLocationsBuilderX86_64::VisitVarHandleCompareAndExchange(HInvoke* invoke) {
4689 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
4690 }
4691
VisitVarHandleCompareAndExchange(HInvoke * invoke)4692 void IntrinsicCodeGeneratorX86_64::VisitVarHandleCompareAndExchange(HInvoke* invoke) {
4693 GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_, /*is_cmpxchg=*/ true);
4694 }
4695
VisitVarHandleCompareAndExchangeAcquire(HInvoke * invoke)4696 void IntrinsicLocationsBuilderX86_64::VisitVarHandleCompareAndExchangeAcquire(HInvoke* invoke) {
4697 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
4698 }
4699
VisitVarHandleCompareAndExchangeAcquire(HInvoke * invoke)4700 void IntrinsicCodeGeneratorX86_64::VisitVarHandleCompareAndExchangeAcquire(HInvoke* invoke) {
4701 GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_, /*is_cmpxchg=*/ true);
4702 }
4703
VisitVarHandleCompareAndExchangeRelease(HInvoke * invoke)4704 void IntrinsicLocationsBuilderX86_64::VisitVarHandleCompareAndExchangeRelease(HInvoke* invoke) {
4705 CreateVarHandleCompareAndSetOrExchangeLocations(invoke, codegen_);
4706 }
4707
VisitVarHandleCompareAndExchangeRelease(HInvoke * invoke)4708 void IntrinsicCodeGeneratorX86_64::VisitVarHandleCompareAndExchangeRelease(HInvoke* invoke) {
4709 GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_, /*is_cmpxchg=*/ true);
4710 }
4711
CreateVarHandleGetAndSetLocations(HInvoke * invoke,CodeGeneratorX86_64 * codegen)4712 static void CreateVarHandleGetAndSetLocations(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
4713 if (!HasVarHandleIntrinsicImplementation(invoke, codegen)) {
4714 return;
4715 }
4716
4717 // Get the type from the shorty as the invokes may not return a value.
4718 uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4719 uint32_t new_value_index = number_of_arguments - 1;
4720 DataType::Type value_type = GetDataTypeFromShorty(invoke, new_value_index);
4721 DataType::Type return_type = invoke->GetType();
4722 const bool is_void = return_type == DataType::Type::kVoid;
4723 DCHECK_IMPLIES(!is_void, return_type == value_type);
4724
4725 LocationSummary* locations = CreateVarHandleCommonLocations(invoke);
4726
4727 if (DataType::IsFloatingPointType(value_type)) {
4728 // Only set the `out` register if it's needed. In the void case we don't use `out`.
4729 if (!is_void) {
4730 locations->SetOut(Location::RequiresFpuRegister());
4731 }
4732 // A temporary is needed to load the new floating-point value into a register for XCHG.
4733 locations->AddTemp(Location::RequiresRegister());
4734 } else {
4735 locations->SetInAt(new_value_index, Location::RegisterLocation(RAX));
4736 if (value_type == DataType::Type::kReference) {
4737 // Need two temporaries for MarkGCCard.
4738 locations->AddRegisterTemps(2);
4739 if (codegen->EmitReadBarrier()) {
4740 // Need a third temporary for GenerateReferenceLoadWithBakerReadBarrier.
4741 DCHECK(kUseBakerReadBarrier);
4742 locations->AddTemp(Location::RequiresRegister());
4743 }
4744 }
4745 // Only set the `out` register if it's needed. In the void case we can still use RAX in the
4746 // same manner as it is marked as a temp register.
4747 if (is_void) {
4748 locations->AddTemp(Location::RegisterLocation(RAX));
4749 } else {
4750 // Use the same register for both the new value and output to take advantage of XCHG.
4751 // It doesn't have to be RAX, but we need to choose some to make sure it's the same.
4752 locations->SetOut(Location::RegisterLocation(RAX));
4753 }
4754 }
4755 }
4756
GenerateVarHandleGetAndSet(HInvoke * invoke,CodeGeneratorX86_64 * codegen,Location value,DataType::Type type,Address field_addr,CpuRegister ref,bool byte_swap)4757 static void GenerateVarHandleGetAndSet(HInvoke* invoke,
4758 CodeGeneratorX86_64* codegen,
4759 Location value,
4760 DataType::Type type,
4761 Address field_addr,
4762 CpuRegister ref,
4763 bool byte_swap) {
4764 X86_64Assembler* assembler = codegen->GetAssembler();
4765 LocationSummary* locations = invoke->GetLocations();
4766 Location out = locations->Out();
4767 uint32_t temp_count = locations->GetTempCount();
4768 DataType::Type return_type = invoke->GetType();
4769 const bool is_void = return_type == DataType::Type::kVoid;
4770 DCHECK_IMPLIES(!is_void, return_type == type);
4771
4772 if (DataType::IsFloatingPointType(type)) {
4773 // `getAndSet` for floating-point types: move the new FP value into a register, atomically
4774 // exchange it with the field, and move the old value into the output FP register.
4775 Location temp = locations->GetTemp(temp_count - 1);
4776 codegen->Move(temp, value);
4777 bool is64bit = (type == DataType::Type::kFloat64);
4778 DataType::Type bswap_type = is64bit ? DataType::Type::kUint64 : DataType::Type::kUint32;
4779 if (byte_swap) {
4780 codegen->GetInstructionCodegen()->Bswap(temp, bswap_type);
4781 }
4782 if (is64bit) {
4783 __ xchgq(temp.AsRegister<CpuRegister>(), field_addr);
4784 } else {
4785 __ xchgl(temp.AsRegister<CpuRegister>(), field_addr);
4786 }
4787 if (byte_swap) {
4788 codegen->GetInstructionCodegen()->Bswap(temp, bswap_type);
4789 }
4790 if (!is_void) {
4791 MoveIntToFP(
4792 out.AsFpuRegister<XmmRegister>(), temp.AsRegister<CpuRegister>(), is64bit, assembler);
4793 }
4794 } else if (type == DataType::Type::kReference) {
4795 // `getAndSet` for references: load reference and atomically exchange it with the field.
4796 // Output register is the same as the one holding new value, so no need to move the result.
4797 DCHECK(!byte_swap);
4798
4799 // In the void case, we have an extra temp register, which is used to signal the register
4800 // allocator that we are clobering RAX.
4801 const uint32_t extra_temp = is_void ? 1u : 0u;
4802 DCHECK_IMPLIES(is_void,
4803 locations->GetTemp(temp_count - 1u).Equals(Location::RegisterLocation(RAX)));
4804
4805 CpuRegister temp1 = locations->GetTemp(temp_count - extra_temp - 1u).AsRegister<CpuRegister>();
4806 CpuRegister temp2 = locations->GetTemp(temp_count - extra_temp - 2u).AsRegister<CpuRegister>();
4807 CpuRegister valreg = value.AsRegister<CpuRegister>();
4808
4809 if (codegen->EmitBakerReadBarrier()) {
4810 codegen->GenerateReferenceLoadWithBakerReadBarrier(
4811 invoke,
4812 locations->GetTemp(temp_count - extra_temp - 3u),
4813 ref,
4814 field_addr,
4815 /*needs_null_check=*/false,
4816 /*always_update_field=*/true,
4817 &temp1,
4818 &temp2);
4819 }
4820 codegen->MarkGCCard(temp1, temp2, ref);
4821
4822 DCHECK_IMPLIES(!is_void, valreg == out.AsRegister<CpuRegister>());
4823 if (kPoisonHeapReferences) {
4824 // Use a temp to avoid poisoning base of the field address, which might happen if `valreg` is
4825 // the same as `target.object` (for code like `vh.getAndSet(obj, obj)`).
4826 __ movl(temp1, valreg);
4827 __ PoisonHeapReference(temp1);
4828 __ xchgl(temp1, field_addr);
4829 if (!is_void) {
4830 __ UnpoisonHeapReference(temp1);
4831 __ movl(valreg, temp1);
4832 }
4833 } else {
4834 __ xchgl(valreg, field_addr);
4835 }
4836 } else {
4837 // `getAndSet` for integral types: atomically exchange the new value with the field. Output
4838 // register is the same as the one holding new value. Do sign extend / zero extend as needed.
4839 if (byte_swap) {
4840 codegen->GetInstructionCodegen()->Bswap(value, type);
4841 }
4842 CpuRegister valreg = value.AsRegister<CpuRegister>();
4843 DCHECK_IMPLIES(!is_void, valreg == out.AsRegister<CpuRegister>());
4844 switch (type) {
4845 case DataType::Type::kBool:
4846 case DataType::Type::kUint8:
4847 __ xchgb(valreg, field_addr);
4848 if (!is_void) {
4849 __ movzxb(valreg, valreg);
4850 }
4851 break;
4852 case DataType::Type::kInt8:
4853 __ xchgb(valreg, field_addr);
4854 if (!is_void) {
4855 __ movsxb(valreg, valreg);
4856 }
4857 break;
4858 case DataType::Type::kUint16:
4859 __ xchgw(valreg, field_addr);
4860 if (!is_void) {
4861 __ movzxw(valreg, valreg);
4862 }
4863 break;
4864 case DataType::Type::kInt16:
4865 __ xchgw(valreg, field_addr);
4866 if (!is_void) {
4867 __ movsxw(valreg, valreg);
4868 }
4869 break;
4870 case DataType::Type::kInt32:
4871 case DataType::Type::kUint32:
4872 __ xchgl(valreg, field_addr);
4873 break;
4874 case DataType::Type::kInt64:
4875 case DataType::Type::kUint64:
4876 __ xchgq(valreg, field_addr);
4877 break;
4878 default:
4879 LOG(FATAL) << "unexpected type in getAndSet intrinsic: " << type;
4880 UNREACHABLE();
4881 }
4882 if (byte_swap) {
4883 codegen->GetInstructionCodegen()->Bswap(value, type);
4884 }
4885 }
4886 }
4887
CreateVarHandleGetAndBitwiseOpLocations(HInvoke * invoke,CodeGeneratorX86_64 * codegen)4888 static void CreateVarHandleGetAndBitwiseOpLocations(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
4889 if (!HasVarHandleIntrinsicImplementation(invoke, codegen)) {
4890 return;
4891 }
4892
4893 // Get the type from the shorty as the invokes may not return a value.
4894 uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4895 uint32_t new_value_index = number_of_arguments - 1;
4896 DataType::Type value_type = GetDataTypeFromShorty(invoke, new_value_index);
4897 DataType::Type return_type = invoke->GetType();
4898 const bool is_void = return_type == DataType::Type::kVoid;
4899 DCHECK_IMPLIES(!is_void, return_type == value_type);
4900
4901 LocationSummary* locations = CreateVarHandleCommonLocations(invoke);
4902
4903 DCHECK_NE(DataType::Type::kReference, value_type);
4904 DCHECK(!DataType::IsFloatingPointType(value_type));
4905 // A temporary to compute the bitwise operation on the old and the new values.
4906 locations->AddTemp(Location::RequiresRegister());
4907 // We need value to be either in a register, or a 32-bit constant (as there are no arithmetic
4908 // instructions that accept 64-bit immediate on x86_64).
4909 locations->SetInAt(new_value_index,
4910 DataType::Is64BitType(value_type) ?
4911 Location::RequiresRegister() :
4912 Location::RegisterOrConstant(invoke->InputAt(new_value_index)));
4913 if (is_void) {
4914 // Used as a temporary, even when we are not outputting it so reserve it. This has to be
4915 // requested before the other temporary since there's variable number of temp registers and the
4916 // other temp register is expected to be the last one.
4917 locations->AddTemp(Location::RegisterLocation(RAX));
4918 } else {
4919 // Output is in RAX to accommodate CMPXCHG. It is also used as a temporary.
4920 locations->SetOut(Location::RegisterLocation(RAX));
4921 }
4922 }
4923
GenerateVarHandleGetAndOp(HInvoke * invoke,CodeGeneratorX86_64 * codegen,Location value,DataType::Type type,Address field_addr,GetAndUpdateOp get_and_update_op,bool byte_swap)4924 static void GenerateVarHandleGetAndOp(HInvoke* invoke,
4925 CodeGeneratorX86_64* codegen,
4926 Location value,
4927 DataType::Type type,
4928 Address field_addr,
4929 GetAndUpdateOp get_and_update_op,
4930 bool byte_swap) {
4931 X86_64Assembler* assembler = codegen->GetAssembler();
4932 LocationSummary* locations = invoke->GetLocations();
4933 // In the void case, we have an extra temp register, which is used to signal the register
4934 // allocator that we are clobering RAX.
4935 const bool is_void = invoke->GetType() == DataType::Type::kVoid;
4936 const uint32_t extra_temp = is_void ? 1u : 0u;
4937 const uint32_t temp_count = locations->GetTempCount();
4938 DCHECK_IMPLIES(is_void,
4939 locations->GetTemp(temp_count - 1u).Equals(Location::RegisterLocation(RAX)));
4940 Location temp_loc = locations->GetTemp(temp_count - extra_temp - 1u);
4941 Location rax_loc = Location::RegisterLocation(RAX);
4942 DCHECK_IMPLIES(!is_void, locations->Out().Equals(rax_loc));
4943 CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
4944 bool is64Bit = DataType::Is64BitType(type);
4945
4946 NearLabel retry;
4947 __ Bind(&retry);
4948
4949 // Load field value into RAX and copy it into a temporary register for the operation.
4950 codegen->LoadFromMemoryNoReference(type, rax_loc, field_addr);
4951 codegen->Move(temp_loc, rax_loc);
4952 if (byte_swap) {
4953 // Byte swap the temporary, since we need to perform operation in native endianness.
4954 codegen->GetInstructionCodegen()->Bswap(temp_loc, type);
4955 }
4956
4957 DCHECK_IMPLIES(value.IsConstant(), !is64Bit);
4958 int32_t const_value = value.IsConstant()
4959 ? CodeGenerator::GetInt32ValueOf(value.GetConstant())
4960 : 0;
4961
4962 // Use 32-bit registers for 8/16/32-bit types to save on the REX prefix.
4963 switch (get_and_update_op) {
4964 case GetAndUpdateOp::kAdd:
4965 DCHECK(byte_swap); // The non-byte-swapping path should use a faster XADD instruction.
4966 if (is64Bit) {
4967 __ addq(temp, value.AsRegister<CpuRegister>());
4968 } else if (value.IsConstant()) {
4969 __ addl(temp, Immediate(const_value));
4970 } else {
4971 __ addl(temp, value.AsRegister<CpuRegister>());
4972 }
4973 break;
4974 case GetAndUpdateOp::kBitwiseAnd:
4975 if (is64Bit) {
4976 __ andq(temp, value.AsRegister<CpuRegister>());
4977 } else if (value.IsConstant()) {
4978 __ andl(temp, Immediate(const_value));
4979 } else {
4980 __ andl(temp, value.AsRegister<CpuRegister>());
4981 }
4982 break;
4983 case GetAndUpdateOp::kBitwiseOr:
4984 if (is64Bit) {
4985 __ orq(temp, value.AsRegister<CpuRegister>());
4986 } else if (value.IsConstant()) {
4987 __ orl(temp, Immediate(const_value));
4988 } else {
4989 __ orl(temp, value.AsRegister<CpuRegister>());
4990 }
4991 break;
4992 case GetAndUpdateOp::kBitwiseXor:
4993 if (is64Bit) {
4994 __ xorq(temp, value.AsRegister<CpuRegister>());
4995 } else if (value.IsConstant()) {
4996 __ xorl(temp, Immediate(const_value));
4997 } else {
4998 __ xorl(temp, value.AsRegister<CpuRegister>());
4999 }
5000 break;
5001 default:
5002 LOG(FATAL) << "unexpected operation";
5003 UNREACHABLE();
5004 }
5005
5006 if (byte_swap) {
5007 // RAX still contains the original value, but we need to byte swap the temporary back.
5008 codegen->GetInstructionCodegen()->Bswap(temp_loc, type);
5009 }
5010
5011 switch (type) {
5012 case DataType::Type::kBool:
5013 case DataType::Type::kUint8:
5014 case DataType::Type::kInt8:
5015 __ LockCmpxchgb(field_addr, temp);
5016 break;
5017 case DataType::Type::kUint16:
5018 case DataType::Type::kInt16:
5019 __ LockCmpxchgw(field_addr, temp);
5020 break;
5021 case DataType::Type::kInt32:
5022 case DataType::Type::kUint32:
5023 __ LockCmpxchgl(field_addr, temp);
5024 break;
5025 case DataType::Type::kInt64:
5026 case DataType::Type::kUint64:
5027 __ LockCmpxchgq(field_addr, temp);
5028 break;
5029 default:
5030 LOG(FATAL) << "unexpected type in getAndBitwiseOp intrinsic";
5031 UNREACHABLE();
5032 }
5033
5034 __ j(kNotZero, &retry);
5035
5036 // The result is in RAX after CMPXCHG. Byte swap if necessary, but do not sign/zero extend,
5037 // as it has already been done by `LoadFromMemoryNoReference` above (and not altered by CMPXCHG).
5038 if (byte_swap) {
5039 codegen->GetInstructionCodegen()->Bswap(rax_loc, type);
5040 }
5041 }
5042
CreateVarHandleGetAndAddLocations(HInvoke * invoke,CodeGeneratorX86_64 * codegen)5043 static void CreateVarHandleGetAndAddLocations(HInvoke* invoke, CodeGeneratorX86_64* codegen) {
5044 if (!HasVarHandleIntrinsicImplementation(invoke, codegen)) {
5045 return;
5046 }
5047
5048 // Get the type from the shorty as the invokes may not return a value.
5049 uint32_t number_of_arguments = invoke->GetNumberOfArguments();
5050 uint32_t new_value_index = number_of_arguments - 1;
5051 DataType::Type value_type = GetDataTypeFromShorty(invoke, new_value_index);
5052 DataType::Type return_type = invoke->GetType();
5053 const bool is_void = return_type == DataType::Type::kVoid;
5054 DCHECK_IMPLIES(!is_void, return_type == value_type);
5055
5056 LocationSummary* locations = CreateVarHandleCommonLocations(invoke);
5057
5058 if (DataType::IsFloatingPointType(value_type)) {
5059 // Only set the `out` register if it's needed. In the void case we don't use `out`
5060 if (!is_void) {
5061 locations->SetOut(Location::RequiresFpuRegister());
5062 }
5063 // Require that the new FP value is in a register (and not a constant) for ADDSS/ADDSD.
5064 locations->SetInAt(new_value_index, Location::RequiresFpuRegister());
5065 // CMPXCHG clobbers RAX.
5066 locations->AddTemp(Location::RegisterLocation(RAX));
5067 // An FP temporary to load the old value from the field and perform FP addition.
5068 locations->AddTemp(Location::RequiresFpuRegister());
5069 // A temporary to hold the new value for CMPXCHG.
5070 locations->AddTemp(Location::RequiresRegister());
5071 } else {
5072 DCHECK_NE(value_type, DataType::Type::kReference);
5073 locations->SetInAt(new_value_index, Location::RegisterLocation(RAX));
5074 if (GetExpectedVarHandleCoordinatesCount(invoke) == 2) {
5075 // For byte array views with non-native endianness we need extra BSWAP operations, so we
5076 // cannot use XADD and have to fallback to a generic implementation based on CMPXCH. In that
5077 // case we need two temporary registers: one to hold value instead of RAX (which may get
5078 // clobbered by repeated CMPXCHG) and one for performing the operation. At compile time we
5079 // cannot distinguish this case from arrays or native-endian byte array views.
5080 locations->AddRegisterTemps(2);
5081 }
5082 // Only set the `out` register if it's needed. In the void case we can still use RAX in the
5083 // same manner as it is marked as a temp register.
5084 if (is_void) {
5085 locations->AddTemp(Location::RegisterLocation(RAX));
5086 } else {
5087 // Use the same register for both the new value and output to take advantage of XADD.
5088 // It should be RAX, because the byte-swapping path of GenerateVarHandleGetAndAdd falls
5089 // back to GenerateVarHandleGetAndOp that expects out in RAX.
5090 locations->SetOut(Location::RegisterLocation(RAX));
5091 }
5092 }
5093 }
5094
GenerateVarHandleGetAndAdd(HInvoke * invoke,CodeGeneratorX86_64 * codegen,Location value,DataType::Type type,Address field_addr,bool byte_swap)5095 static void GenerateVarHandleGetAndAdd(HInvoke* invoke,
5096 CodeGeneratorX86_64* codegen,
5097 Location value,
5098 DataType::Type type,
5099 Address field_addr,
5100 bool byte_swap) {
5101 X86_64Assembler* assembler = codegen->GetAssembler();
5102 LocationSummary* locations = invoke->GetLocations();
5103 Location out = locations->Out();
5104 uint32_t temp_count = locations->GetTempCount();
5105
5106 DataType::Type return_type = invoke->GetType();
5107 const bool is_void = return_type == DataType::Type::kVoid;
5108 DCHECK_IMPLIES(!is_void, return_type == type);
5109
5110 if (DataType::IsFloatingPointType(type)) {
5111 if (byte_swap) {
5112 // This code should never be executed: it is the case of a byte array view (since it requires
5113 // a byte swap), and varhandles for byte array views support numeric atomic update access mode
5114 // only for int and long, but not for floating-point types (see javadoc comments for
5115 // java.lang.invoke.MethodHandles.byteArrayViewVarHandle()). But ART varhandle implementation
5116 // for byte array views treats floating-point types them as numeric types in
5117 // ByteArrayViewVarHandle::Access(). Terefore we do generate intrinsic code, but it always
5118 // fails access mode check at runtime prior to reaching this point. Illegal instruction UD2
5119 // ensures that if control flow gets here by mistake, we will notice.
5120 __ ud2();
5121 }
5122
5123 // `getAndAdd` for floating-point types: load the old FP value into a temporary FP register and
5124 // in RAX for CMPXCHG, add the new FP value to the old one, move it to a non-FP temporary for
5125 // CMPXCHG and loop until CMPXCHG succeeds. Move the result from RAX to the output FP register.
5126 bool is64bit = (type == DataType::Type::kFloat64);
5127 DataType::Type bswap_type = is64bit ? DataType::Type::kUint64 : DataType::Type::kUint32;
5128 XmmRegister fptemp = locations->GetTemp(temp_count - 2).AsFpuRegister<XmmRegister>();
5129 Location rax_loc = Location::RegisterLocation(RAX);
5130 Location temp_loc = locations->GetTemp(temp_count - 1);
5131 CpuRegister temp = temp_loc.AsRegister<CpuRegister>();
5132
5133 NearLabel retry;
5134 __ Bind(&retry);
5135
5136 // Read value from memory into an FP register and copy in into RAX.
5137 if (is64bit) {
5138 __ movsd(fptemp, field_addr);
5139 } else {
5140 __ movss(fptemp, field_addr);
5141 }
5142 MoveFPToInt(CpuRegister(RAX), fptemp, is64bit, assembler);
5143 // If necessary, byte swap RAX and update the value in FP register to also be byte-swapped.
5144 if (byte_swap) {
5145 codegen->GetInstructionCodegen()->Bswap(rax_loc, bswap_type);
5146 MoveIntToFP(fptemp, CpuRegister(RAX), is64bit, assembler);
5147 }
5148 // Perform the FP addition and move it to a temporary register to prepare for CMPXCHG.
5149 if (is64bit) {
5150 __ addsd(fptemp, value.AsFpuRegister<XmmRegister>());
5151 } else {
5152 __ addss(fptemp, value.AsFpuRegister<XmmRegister>());
5153 }
5154 MoveFPToInt(temp, fptemp, is64bit, assembler);
5155 // If necessary, byte swap RAX before CMPXCHG and the temporary before copying to FP register.
5156 if (byte_swap) {
5157 codegen->GetInstructionCodegen()->Bswap(temp_loc, bswap_type);
5158 codegen->GetInstructionCodegen()->Bswap(rax_loc, bswap_type);
5159 }
5160 if (is64bit) {
5161 __ LockCmpxchgq(field_addr, temp);
5162 } else {
5163 __ LockCmpxchgl(field_addr, temp);
5164 }
5165
5166 __ j(kNotZero, &retry);
5167
5168 // The old value is in RAX, byte swap if necessary.
5169 if (byte_swap) {
5170 codegen->GetInstructionCodegen()->Bswap(rax_loc, bswap_type);
5171 }
5172 if (!is_void) {
5173 MoveIntToFP(out.AsFpuRegister<XmmRegister>(), CpuRegister(RAX), is64bit, assembler);
5174 }
5175 } else {
5176 if (byte_swap) {
5177 // We cannot use XADD since we need to byte-swap the old value when reading it from memory,
5178 // and then byte-swap the sum before writing it to memory. So fallback to the slower generic
5179 // implementation that is also used for bitwise operations.
5180 // Move value from RAX to a temporary register, as RAX may get clobbered by repeated CMPXCHG.
5181 DCHECK_EQ(GetExpectedVarHandleCoordinatesCount(invoke), 2u);
5182 // In the void case, we have an extra temp register, which is used to signal the register
5183 // allocator that we are clobering RAX.
5184 const uint32_t extra_temp = is_void ? 1u : 0u;
5185 DCHECK_IMPLIES(is_void,
5186 locations->GetTemp(temp_count - 1u).Equals(Location::RegisterLocation(RAX)));
5187 Location temp = locations->GetTemp(temp_count - extra_temp - 2u);
5188 codegen->Move(temp, value);
5189 GenerateVarHandleGetAndOp(
5190 invoke, codegen, temp, type, field_addr, GetAndUpdateOp::kAdd, byte_swap);
5191 } else {
5192 // `getAndAdd` for integral types: atomically exchange the new value with the field and add
5193 // the old value to the field. Output register is the same as the one holding new value. Do
5194 // sign extend / zero extend as needed.
5195 CpuRegister valreg = value.AsRegister<CpuRegister>();
5196 DCHECK_IMPLIES(!is_void, valreg == out.AsRegister<CpuRegister>());
5197 switch (type) {
5198 case DataType::Type::kBool:
5199 case DataType::Type::kUint8:
5200 __ LockXaddb(field_addr, valreg);
5201 if (!is_void) {
5202 __ movzxb(valreg, valreg);
5203 }
5204 break;
5205 case DataType::Type::kInt8:
5206 __ LockXaddb(field_addr, valreg);
5207 if (!is_void) {
5208 __ movsxb(valreg, valreg);
5209 }
5210 break;
5211 case DataType::Type::kUint16:
5212 __ LockXaddw(field_addr, valreg);
5213 if (!is_void) {
5214 __ movzxw(valreg, valreg);
5215 }
5216 break;
5217 case DataType::Type::kInt16:
5218 __ LockXaddw(field_addr, valreg);
5219 if (!is_void) {
5220 __ movsxw(valreg, valreg);
5221 }
5222 break;
5223 case DataType::Type::kInt32:
5224 case DataType::Type::kUint32:
5225 __ LockXaddl(field_addr, valreg);
5226 break;
5227 case DataType::Type::kInt64:
5228 case DataType::Type::kUint64:
5229 __ LockXaddq(field_addr, valreg);
5230 break;
5231 default:
5232 LOG(FATAL) << "unexpected type in getAndAdd intrinsic";
5233 UNREACHABLE();
5234 }
5235 }
5236 }
5237 }
5238
GenerateVarHandleGetAndUpdate(HInvoke * invoke,CodeGeneratorX86_64 * codegen,GetAndUpdateOp get_and_update_op,bool need_any_store_barrier,bool need_any_any_barrier,bool byte_swap=false)5239 static void GenerateVarHandleGetAndUpdate(HInvoke* invoke,
5240 CodeGeneratorX86_64* codegen,
5241 GetAndUpdateOp get_and_update_op,
5242 bool need_any_store_barrier,
5243 bool need_any_any_barrier,
5244 bool byte_swap = false) {
5245 DCHECK_IMPLIES(codegen->EmitReadBarrier(), kUseBakerReadBarrier);
5246
5247 X86_64Assembler* assembler = codegen->GetAssembler();
5248 LocationSummary* locations = invoke->GetLocations();
5249
5250 // Get the type from the shorty as the invokes may not return a value.
5251 uint32_t number_of_arguments = invoke->GetNumberOfArguments();
5252 Location value = locations->InAt(number_of_arguments - 1);
5253 DataType::Type type = GetDataTypeFromShorty(invoke, number_of_arguments - 1);
5254
5255 VarHandleSlowPathX86_64* slow_path = nullptr;
5256 VarHandleTarget target = GetVarHandleTarget(invoke);
5257 if (!byte_swap) {
5258 slow_path = GenerateVarHandleChecks(invoke, codegen, type);
5259 GenerateVarHandleTarget(invoke, target, codegen);
5260 if (slow_path != nullptr) {
5261 slow_path->SetGetAndUpdateOp(get_and_update_op);
5262 slow_path->SetNeedAnyStoreBarrier(need_any_store_barrier);
5263 slow_path->SetNeedAnyAnyBarrier(need_any_any_barrier);
5264 __ Bind(slow_path->GetNativeByteOrderLabel());
5265 }
5266 }
5267
5268 CpuRegister ref(target.object);
5269 Address field_addr(ref, CpuRegister(target.offset), TIMES_1, 0);
5270
5271 if (need_any_store_barrier) {
5272 codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
5273 }
5274
5275 switch (get_and_update_op) {
5276 case GetAndUpdateOp::kSet:
5277 GenerateVarHandleGetAndSet(invoke, codegen, value, type, field_addr, ref, byte_swap);
5278 break;
5279 case GetAndUpdateOp::kAdd:
5280 GenerateVarHandleGetAndAdd(invoke, codegen, value, type, field_addr, byte_swap);
5281 break;
5282 case GetAndUpdateOp::kBitwiseAnd:
5283 case GetAndUpdateOp::kBitwiseOr:
5284 case GetAndUpdateOp::kBitwiseXor:
5285 GenerateVarHandleGetAndOp(
5286 invoke, codegen, value, type, field_addr, get_and_update_op, byte_swap);
5287 break;
5288 }
5289
5290 if (need_any_any_barrier) {
5291 codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyAny);
5292 }
5293
5294 if (slow_path != nullptr) {
5295 DCHECK(!byte_swap);
5296 __ Bind(slow_path->GetExitLabel());
5297 }
5298 }
5299
VisitVarHandleGetAndSet(HInvoke * invoke)5300 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndSet(HInvoke* invoke) {
5301 CreateVarHandleGetAndSetLocations(invoke, codegen_);
5302 }
5303
VisitVarHandleGetAndSet(HInvoke * invoke)5304 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndSet(HInvoke* invoke) {
5305 // `getAndSet` has `getVolatile` + `setVolatile` semantics, so it needs both barriers.
5306 GenerateVarHandleGetAndUpdate(invoke,
5307 codegen_,
5308 GetAndUpdateOp::kSet,
5309 /*need_any_store_barrier=*/ true,
5310 /*need_any_any_barrier=*/ true);
5311 }
5312
VisitVarHandleGetAndSetAcquire(HInvoke * invoke)5313 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndSetAcquire(HInvoke* invoke) {
5314 CreateVarHandleGetAndSetLocations(invoke, codegen_);
5315 }
5316
VisitVarHandleGetAndSetAcquire(HInvoke * invoke)5317 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndSetAcquire(HInvoke* invoke) {
5318 // `getAndSetAcquire` has `getAcquire` + `set` semantics, so it doesn't need any barriers.
5319 GenerateVarHandleGetAndUpdate(invoke,
5320 codegen_,
5321 GetAndUpdateOp::kSet,
5322 /*need_any_store_barrier=*/ false,
5323 /*need_any_any_barrier=*/ false);
5324 }
5325
VisitVarHandleGetAndSetRelease(HInvoke * invoke)5326 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndSetRelease(HInvoke* invoke) {
5327 CreateVarHandleGetAndSetLocations(invoke, codegen_);
5328 }
5329
VisitVarHandleGetAndSetRelease(HInvoke * invoke)5330 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndSetRelease(HInvoke* invoke) {
5331 // `getAndSetRelease` has `get` + `setRelease` semantics, so it needs `kAnyStore` barrier.
5332 GenerateVarHandleGetAndUpdate(invoke,
5333 codegen_,
5334 GetAndUpdateOp::kSet,
5335 /*need_any_store_barrier=*/ true,
5336 /*need_any_any_barrier=*/ false);
5337 }
5338
VisitVarHandleGetAndAdd(HInvoke * invoke)5339 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndAdd(HInvoke* invoke) {
5340 CreateVarHandleGetAndAddLocations(invoke, codegen_);
5341 }
5342
VisitVarHandleGetAndAdd(HInvoke * invoke)5343 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndAdd(HInvoke* invoke) {
5344 // `getAndAdd` has `getVolatile` + `setVolatile` semantics, so it needs both barriers.
5345 GenerateVarHandleGetAndUpdate(invoke,
5346 codegen_,
5347 GetAndUpdateOp::kAdd,
5348 /*need_any_store_barrier=*/ true,
5349 /*need_any_any_barrier=*/ true);
5350 }
5351
VisitVarHandleGetAndAddAcquire(HInvoke * invoke)5352 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndAddAcquire(HInvoke* invoke) {
5353 CreateVarHandleGetAndAddLocations(invoke, codegen_);
5354 }
5355
VisitVarHandleGetAndAddAcquire(HInvoke * invoke)5356 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndAddAcquire(HInvoke* invoke) {
5357 // `getAndAddAcquire` has `getAcquire` + `set` semantics, so it doesn't need any barriers.
5358 GenerateVarHandleGetAndUpdate(invoke,
5359 codegen_,
5360 GetAndUpdateOp::kAdd,
5361 /*need_any_store_barrier=*/ false,
5362 /*need_any_any_barrier=*/ false);
5363 }
5364
VisitVarHandleGetAndAddRelease(HInvoke * invoke)5365 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndAddRelease(HInvoke* invoke) {
5366 CreateVarHandleGetAndAddLocations(invoke, codegen_);
5367 }
5368
VisitVarHandleGetAndAddRelease(HInvoke * invoke)5369 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndAddRelease(HInvoke* invoke) {
5370 // `getAndAddRelease` has `get` + `setRelease` semantics, so it needs `kAnyStore` barrier.
5371 GenerateVarHandleGetAndUpdate(invoke,
5372 codegen_,
5373 GetAndUpdateOp::kAdd,
5374 /*need_any_store_barrier=*/ true,
5375 /*need_any_any_barrier=*/ false);
5376 }
5377
VisitVarHandleGetAndBitwiseAnd(HInvoke * invoke)5378 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndBitwiseAnd(HInvoke* invoke) {
5379 CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
5380 }
5381
VisitVarHandleGetAndBitwiseAnd(HInvoke * invoke)5382 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseAnd(HInvoke* invoke) {
5383 // `getAndBitwiseAnd` has `getVolatile` + `setVolatile` semantics, so it needs both barriers.
5384 GenerateVarHandleGetAndUpdate(invoke,
5385 codegen_,
5386 GetAndUpdateOp::kBitwiseAnd,
5387 /*need_any_store_barrier=*/ true,
5388 /*need_any_any_barrier=*/ true);
5389 }
5390
VisitVarHandleGetAndBitwiseAndAcquire(HInvoke * invoke)5391 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke* invoke) {
5392 CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
5393 }
5394
VisitVarHandleGetAndBitwiseAndAcquire(HInvoke * invoke)5395 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke* invoke) {
5396 // `getAndBitwiseAndAcquire` has `getAcquire` + `set` semantics, so it doesn't need any barriers.
5397 GenerateVarHandleGetAndUpdate(invoke,
5398 codegen_,
5399 GetAndUpdateOp::kBitwiseAnd,
5400 /*need_any_store_barrier=*/ false,
5401 /*need_any_any_barrier=*/ false);
5402 }
5403
VisitVarHandleGetAndBitwiseAndRelease(HInvoke * invoke)5404 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndBitwiseAndRelease(HInvoke* invoke) {
5405 CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
5406 }
5407
VisitVarHandleGetAndBitwiseAndRelease(HInvoke * invoke)5408 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseAndRelease(HInvoke* invoke) {
5409 // `getAndBitwiseAndRelease` has `get` + `setRelease` semantics, so it needs `kAnyStore` barrier.
5410 GenerateVarHandleGetAndUpdate(invoke,
5411 codegen_,
5412 GetAndUpdateOp::kBitwiseAnd,
5413 /*need_any_store_barrier=*/ true,
5414 /*need_any_any_barrier=*/ false);
5415 }
5416
VisitVarHandleGetAndBitwiseOr(HInvoke * invoke)5417 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke) {
5418 CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
5419 }
5420
VisitVarHandleGetAndBitwiseOr(HInvoke * invoke)5421 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke) {
5422 // `getAndBitwiseOr` has `getVolatile` + `setVolatile` semantics, so it needs both barriers.
5423 GenerateVarHandleGetAndUpdate(invoke,
5424 codegen_,
5425 GetAndUpdateOp::kBitwiseOr,
5426 /*need_any_store_barrier=*/ true,
5427 /*need_any_any_barrier=*/ true);
5428 }
5429
VisitVarHandleGetAndBitwiseOrAcquire(HInvoke * invoke)5430 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke* invoke) {
5431 CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
5432 }
5433
VisitVarHandleGetAndBitwiseOrAcquire(HInvoke * invoke)5434 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke* invoke) {
5435 // `getAndBitwiseOrAcquire` has `getAcquire` + `set` semantics, so it doesn't need any barriers.
5436 GenerateVarHandleGetAndUpdate(invoke,
5437 codegen_,
5438 GetAndUpdateOp::kBitwiseOr,
5439 /*need_any_store_barrier=*/ false,
5440 /*need_any_any_barrier=*/ false);
5441 }
5442
VisitVarHandleGetAndBitwiseOrRelease(HInvoke * invoke)5443 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndBitwiseOrRelease(HInvoke* invoke) {
5444 CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
5445 }
5446
VisitVarHandleGetAndBitwiseOrRelease(HInvoke * invoke)5447 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseOrRelease(HInvoke* invoke) {
5448 // `getAndBitwiseOrRelease` has `get` + `setRelease` semantics, so it needs `kAnyStore` barrier.
5449 GenerateVarHandleGetAndUpdate(invoke,
5450 codegen_,
5451 GetAndUpdateOp::kBitwiseOr,
5452 /*need_any_store_barrier=*/ true,
5453 /*need_any_any_barrier=*/ false);
5454 }
5455
VisitVarHandleGetAndBitwiseXor(HInvoke * invoke)5456 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndBitwiseXor(HInvoke* invoke) {
5457 CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
5458 }
5459
VisitVarHandleGetAndBitwiseXor(HInvoke * invoke)5460 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseXor(HInvoke* invoke) {
5461 // `getAndBitwiseXor` has `getVolatile` + `setVolatile` semantics, so it needs both barriers.
5462 GenerateVarHandleGetAndUpdate(invoke,
5463 codegen_,
5464 GetAndUpdateOp::kBitwiseXor,
5465 /*need_any_store_barrier=*/ true,
5466 /*need_any_any_barrier=*/ true);
5467 }
5468
VisitVarHandleGetAndBitwiseXorAcquire(HInvoke * invoke)5469 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke* invoke) {
5470 CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
5471 }
5472
VisitVarHandleGetAndBitwiseXorAcquire(HInvoke * invoke)5473 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke* invoke) {
5474 // `getAndBitwiseXorAcquire` has `getAcquire` + `set` semantics, so it doesn't need any barriers.
5475 GenerateVarHandleGetAndUpdate(invoke,
5476 codegen_,
5477 GetAndUpdateOp::kBitwiseXor,
5478 /*need_any_store_barrier=*/ false,
5479 /*need_any_any_barrier=*/ false);
5480 }
5481
VisitVarHandleGetAndBitwiseXorRelease(HInvoke * invoke)5482 void IntrinsicLocationsBuilderX86_64::VisitVarHandleGetAndBitwiseXorRelease(HInvoke* invoke) {
5483 CreateVarHandleGetAndBitwiseOpLocations(invoke, codegen_);
5484 }
5485
VisitVarHandleGetAndBitwiseXorRelease(HInvoke * invoke)5486 void IntrinsicCodeGeneratorX86_64::VisitVarHandleGetAndBitwiseXorRelease(HInvoke* invoke) {
5487 // `getAndBitwiseXorRelease` has `get` + `setRelease` semantics, so it needs `kAnyStore` barrier.
5488 GenerateVarHandleGetAndUpdate(invoke,
5489 codegen_,
5490 GetAndUpdateOp::kBitwiseXor,
5491 /*need_any_store_barrier=*/ true,
5492 /*need_any_any_barrier=*/ false);
5493 }
5494
EmitByteArrayViewCode(CodeGeneratorX86_64 * codegen)5495 void VarHandleSlowPathX86_64::EmitByteArrayViewCode(CodeGeneratorX86_64* codegen) {
5496 DCHECK(GetByteArrayViewCheckLabel()->IsLinked());
5497 X86_64Assembler* assembler = codegen->GetAssembler();
5498
5499 HInvoke* invoke = GetInvoke();
5500 LocationSummary* locations = invoke->GetLocations();
5501 mirror::VarHandle::AccessModeTemplate access_mode_template = GetAccessModeTemplate();
5502 DataType::Type value_type =
5503 GetVarHandleExpectedValueType(invoke, /*expected_coordinates_count=*/ 2u);
5504 DCHECK_NE(value_type, DataType::Type::kReference);
5505 size_t size = DataType::Size(value_type);
5506 DCHECK_GT(size, 1u);
5507
5508 CpuRegister varhandle = locations->InAt(0).AsRegister<CpuRegister>();
5509 CpuRegister object = locations->InAt(1).AsRegister<CpuRegister>();
5510 CpuRegister index = locations->InAt(2).AsRegister<CpuRegister>();
5511 CpuRegister temp = locations->GetTemp(locations->GetTempCount() - 1u).AsRegister<CpuRegister>();
5512
5513 MemberOffset class_offset = mirror::Object::ClassOffset();
5514 MemberOffset array_length_offset = mirror::Array::LengthOffset();
5515 MemberOffset data_offset = mirror::Array::DataOffset(Primitive::kPrimByte);
5516 MemberOffset native_byte_order_offset = mirror::ByteArrayViewVarHandle::NativeByteOrderOffset();
5517
5518 VarHandleTarget target = GetVarHandleTarget(invoke);
5519
5520 __ Bind(GetByteArrayViewCheckLabel());
5521
5522 // The main path checked that the coordinateType0 is an array class that matches
5523 // the class of the actual coordinate argument but it does not match the value type.
5524 // Check if the `varhandle` references a ByteArrayViewVarHandle instance.
5525 codegen->LoadClassRootForIntrinsic(temp, ClassRoot::kJavaLangInvokeByteArrayViewVarHandle);
5526 assembler->MaybePoisonHeapReference(temp);
5527 __ cmpl(temp, Address(varhandle, class_offset.Int32Value()));
5528 __ j(kNotEqual, GetEntryLabel());
5529
5530 // Check for array index out of bounds.
5531 __ movl(temp, Address(object, array_length_offset.Int32Value()));
5532 // SUB sets flags in the same way as CMP.
5533 __ subl(temp, index);
5534 __ j(kBelowEqual, GetEntryLabel());
5535 // The difference between index and array length must be enough for the `value_type` size.
5536 __ cmpl(temp, Immediate(size));
5537 __ j(kBelow, GetEntryLabel());
5538
5539 // Construct the target.
5540 __ leal(CpuRegister(target.offset), Address(index, TIMES_1, data_offset.Int32Value()));
5541
5542 // Alignment check. For unaligned access, go to the runtime.
5543 DCHECK(IsPowerOfTwo(size));
5544 __ testl(CpuRegister(target.offset), Immediate(size - 1u));
5545 __ j(kNotZero, GetEntryLabel());
5546
5547 // Byte order check. For native byte order return to the main path.
5548 if (access_mode_template == mirror::VarHandle::AccessModeTemplate::kSet &&
5549 IsZeroBitPattern(invoke->InputAt(invoke->GetNumberOfArguments() - 1u))) {
5550 // There is no reason to differentiate between native byte order and byte-swap
5551 // for setting a zero bit pattern. Just return to the main path.
5552 __ jmp(GetNativeByteOrderLabel());
5553 return;
5554 }
5555 __ cmpl(Address(varhandle, native_byte_order_offset.Int32Value()), Immediate(0));
5556 __ j(kNotEqual, GetNativeByteOrderLabel());
5557
5558 switch (access_mode_template) {
5559 case mirror::VarHandle::AccessModeTemplate::kGet:
5560 GenerateVarHandleGet(invoke, codegen, /*byte_swap=*/ true);
5561 break;
5562 case mirror::VarHandle::AccessModeTemplate::kSet:
5563 GenerateVarHandleSet(invoke, codegen, is_volatile_, is_atomic_, /*byte_swap=*/ true);
5564 break;
5565 case mirror::VarHandle::AccessModeTemplate::kCompareAndSet:
5566 GenerateVarHandleCompareAndSetOrExchange(
5567 invoke, codegen, /*is_cmpxchg=*/ false, /*byte_swap=*/ true);
5568 break;
5569 case mirror::VarHandle::AccessModeTemplate::kCompareAndExchange:
5570 GenerateVarHandleCompareAndSetOrExchange(
5571 invoke, codegen, /*is_cmpxchg=*/ true, /*byte_swap=*/ true);
5572 break;
5573 case mirror::VarHandle::AccessModeTemplate::kGetAndUpdate:
5574 GenerateVarHandleGetAndUpdate(invoke,
5575 codegen,
5576 get_and_update_op_,
5577 need_any_store_barrier_,
5578 need_any_any_barrier_,
5579 /*byte_swap=*/ true);
5580 break;
5581 }
5582
5583 __ jmp(GetExitLabel());
5584 }
5585
5586 #define MARK_UNIMPLEMENTED(Name) UNIMPLEMENTED_INTRINSIC(X86_64, Name)
5587 UNIMPLEMENTED_INTRINSIC_LIST_X86_64(MARK_UNIMPLEMENTED);
5588 #undef MARK_UNIMPLEMENTED
5589
5590 UNREACHABLE_INTRINSICS(X86_64)
5591
5592 #undef __
5593
5594 } // namespace x86_64
5595 } // namespace art
5596