1 /*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "intrinsics_x86.h"
18
19 #include <limits>
20
21 #include "arch/x86/instruction_set_features_x86.h"
22 #include "art_method.h"
23 #include "base/bit_utils.h"
24 #include "code_generator_x86.h"
25 #include "data_type-inl.h"
26 #include "entrypoints/quick/quick_entrypoints.h"
27 #include "heap_poisoning.h"
28 #include "intrinsics.h"
29 #include "intrinsics_utils.h"
30 #include "lock_word.h"
31 #include "mirror/array-inl.h"
32 #include "mirror/object_array-inl.h"
33 #include "mirror/reference.h"
34 #include "mirror/string.h"
35 #include "mirror/var_handle.h"
36 #include "scoped_thread_state_change-inl.h"
37 #include "thread-current-inl.h"
38 #include "utils/x86/assembler_x86.h"
39 #include "utils/x86/constants_x86.h"
40
41 namespace art HIDDEN {
42
43 namespace x86 {
44
IntrinsicLocationsBuilderX86(CodeGeneratorX86 * codegen)45 IntrinsicLocationsBuilderX86::IntrinsicLocationsBuilderX86(CodeGeneratorX86* codegen)
46 : allocator_(codegen->GetGraph()->GetAllocator()),
47 codegen_(codegen) {
48 }
49
50
GetAssembler()51 X86Assembler* IntrinsicCodeGeneratorX86::GetAssembler() {
52 return down_cast<X86Assembler*>(codegen_->GetAssembler());
53 }
54
GetAllocator()55 ArenaAllocator* IntrinsicCodeGeneratorX86::GetAllocator() {
56 return codegen_->GetGraph()->GetAllocator();
57 }
58
TryDispatch(HInvoke * invoke)59 bool IntrinsicLocationsBuilderX86::TryDispatch(HInvoke* invoke) {
60 Dispatch(invoke);
61 LocationSummary* res = invoke->GetLocations();
62 if (res == nullptr) {
63 return false;
64 }
65 return res->Intrinsified();
66 }
67
68 using IntrinsicSlowPathX86 = IntrinsicSlowPath<InvokeDexCallingConventionVisitorX86>;
69
70 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
71 #define __ down_cast<X86Assembler*>(codegen->GetAssembler())-> // NOLINT
72
73 // Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
74 class ReadBarrierSystemArrayCopySlowPathX86 : public SlowPathCode {
75 public:
ReadBarrierSystemArrayCopySlowPathX86(HInstruction * instruction)76 explicit ReadBarrierSystemArrayCopySlowPathX86(HInstruction* instruction)
77 : SlowPathCode(instruction) {
78 DCHECK(gUseReadBarrier);
79 DCHECK(kUseBakerReadBarrier);
80 }
81
EmitNativeCode(CodeGenerator * codegen)82 void EmitNativeCode(CodeGenerator* codegen) override {
83 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
84 LocationSummary* locations = instruction_->GetLocations();
85 DCHECK(locations->CanCall());
86 DCHECK(instruction_->IsInvokeStaticOrDirect())
87 << "Unexpected instruction in read barrier arraycopy slow path: "
88 << instruction_->DebugName();
89 DCHECK(instruction_->GetLocations()->Intrinsified());
90 DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
91
92 int32_t element_size = DataType::Size(DataType::Type::kReference);
93 uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value();
94
95 Register src = locations->InAt(0).AsRegister<Register>();
96 Location src_pos = locations->InAt(1);
97 Register dest = locations->InAt(2).AsRegister<Register>();
98 Location dest_pos = locations->InAt(3);
99 Location length = locations->InAt(4);
100 Location temp1_loc = locations->GetTemp(0);
101 Register temp1 = temp1_loc.AsRegister<Register>();
102 Register temp2 = locations->GetTemp(1).AsRegister<Register>();
103 Register temp3 = locations->GetTemp(2).AsRegister<Register>();
104
105 __ Bind(GetEntryLabel());
106 // In this code path, registers `temp1`, `temp2`, and `temp3`
107 // (resp.) are not used for the base source address, the base
108 // destination address, and the end source address (resp.), as in
109 // other SystemArrayCopy intrinsic code paths. Instead they are
110 // (resp.) used for:
111 // - the loop index (`i`);
112 // - the source index (`src_index`) and the loaded (source)
113 // reference (`value`); and
114 // - the destination index (`dest_index`).
115
116 // i = 0
117 __ xorl(temp1, temp1);
118 NearLabel loop;
119 __ Bind(&loop);
120 // value = src_array[i + src_pos]
121 if (src_pos.IsConstant()) {
122 int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
123 int32_t adjusted_offset = offset + constant * element_size;
124 __ movl(temp2, Address(src, temp1, ScaleFactor::TIMES_4, adjusted_offset));
125 } else {
126 __ leal(temp2, Address(src_pos.AsRegister<Register>(), temp1, ScaleFactor::TIMES_1, 0));
127 __ movl(temp2, Address(src, temp2, ScaleFactor::TIMES_4, offset));
128 }
129 __ MaybeUnpoisonHeapReference(temp2);
130 // TODO: Inline the mark bit check before calling the runtime?
131 // value = ReadBarrier::Mark(value)
132 // No need to save live registers; it's taken care of by the
133 // entrypoint. Also, there is no need to update the stack mask,
134 // as this runtime call will not trigger a garbage collection.
135 // (See ReadBarrierMarkSlowPathX86::EmitNativeCode for more
136 // explanations.)
137 DCHECK_NE(temp2, ESP);
138 DCHECK(0 <= temp2 && temp2 < kNumberOfCpuRegisters) << temp2;
139 int32_t entry_point_offset = Thread::ReadBarrierMarkEntryPointsOffset<kX86PointerSize>(temp2);
140 // This runtime call does not require a stack map.
141 x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
142 __ MaybePoisonHeapReference(temp2);
143 // dest_array[i + dest_pos] = value
144 if (dest_pos.IsConstant()) {
145 int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
146 int32_t adjusted_offset = offset + constant * element_size;
147 __ movl(Address(dest, temp1, ScaleFactor::TIMES_4, adjusted_offset), temp2);
148 } else {
149 __ leal(temp3, Address(dest_pos.AsRegister<Register>(), temp1, ScaleFactor::TIMES_1, 0));
150 __ movl(Address(dest, temp3, ScaleFactor::TIMES_4, offset), temp2);
151 }
152 // ++i
153 __ addl(temp1, Immediate(1));
154 // if (i != length) goto loop
155 x86_codegen->GenerateIntCompare(temp1_loc, length);
156 __ j(kNotEqual, &loop);
157 __ jmp(GetExitLabel());
158 }
159
GetDescription() const160 const char* GetDescription() const override { return "ReadBarrierSystemArrayCopySlowPathX86"; }
161
162 private:
163 DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathX86);
164 };
165
166 #undef __
167
168 #define __ assembler->
169
CreateFPToIntLocations(ArenaAllocator * allocator,HInvoke * invoke,bool is64bit)170 static void CreateFPToIntLocations(ArenaAllocator* allocator, HInvoke* invoke, bool is64bit) {
171 LocationSummary* locations =
172 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
173 locations->SetInAt(0, Location::RequiresFpuRegister());
174 locations->SetOut(Location::RequiresRegister());
175 if (is64bit) {
176 locations->AddTemp(Location::RequiresFpuRegister());
177 }
178 }
179
CreateIntToFPLocations(ArenaAllocator * allocator,HInvoke * invoke,bool is64bit)180 static void CreateIntToFPLocations(ArenaAllocator* allocator, HInvoke* invoke, bool is64bit) {
181 LocationSummary* locations =
182 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
183 locations->SetInAt(0, Location::RequiresRegister());
184 locations->SetOut(Location::RequiresFpuRegister());
185 if (is64bit) {
186 locations->AddTemp(Location::RequiresFpuRegister());
187 locations->AddTemp(Location::RequiresFpuRegister());
188 }
189 }
190
MoveFPToInt(LocationSummary * locations,bool is64bit,X86Assembler * assembler)191 static void MoveFPToInt(LocationSummary* locations, bool is64bit, X86Assembler* assembler) {
192 Location input = locations->InAt(0);
193 Location output = locations->Out();
194 if (is64bit) {
195 // Need to use the temporary.
196 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
197 __ movsd(temp, input.AsFpuRegister<XmmRegister>());
198 __ movd(output.AsRegisterPairLow<Register>(), temp);
199 __ psrlq(temp, Immediate(32));
200 __ movd(output.AsRegisterPairHigh<Register>(), temp);
201 } else {
202 __ movd(output.AsRegister<Register>(), input.AsFpuRegister<XmmRegister>());
203 }
204 }
205
MoveIntToFP(LocationSummary * locations,bool is64bit,X86Assembler * assembler)206 static void MoveIntToFP(LocationSummary* locations, bool is64bit, X86Assembler* assembler) {
207 Location input = locations->InAt(0);
208 Location output = locations->Out();
209 if (is64bit) {
210 // Need to use the temporary.
211 XmmRegister temp1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
212 XmmRegister temp2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
213 __ movd(temp1, input.AsRegisterPairLow<Register>());
214 __ movd(temp2, input.AsRegisterPairHigh<Register>());
215 __ punpckldq(temp1, temp2);
216 __ movsd(output.AsFpuRegister<XmmRegister>(), temp1);
217 } else {
218 __ movd(output.AsFpuRegister<XmmRegister>(), input.AsRegister<Register>());
219 }
220 }
221
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)222 void IntrinsicLocationsBuilderX86::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
223 CreateFPToIntLocations(allocator_, invoke, /* is64bit= */ true);
224 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)225 void IntrinsicLocationsBuilderX86::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
226 CreateIntToFPLocations(allocator_, invoke, /* is64bit= */ true);
227 }
228
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)229 void IntrinsicCodeGeneratorX86::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
230 MoveFPToInt(invoke->GetLocations(), /* is64bit= */ true, GetAssembler());
231 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)232 void IntrinsicCodeGeneratorX86::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
233 MoveIntToFP(invoke->GetLocations(), /* is64bit= */ true, GetAssembler());
234 }
235
VisitFloatFloatToRawIntBits(HInvoke * invoke)236 void IntrinsicLocationsBuilderX86::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
237 CreateFPToIntLocations(allocator_, invoke, /* is64bit= */ false);
238 }
VisitFloatIntBitsToFloat(HInvoke * invoke)239 void IntrinsicLocationsBuilderX86::VisitFloatIntBitsToFloat(HInvoke* invoke) {
240 CreateIntToFPLocations(allocator_, invoke, /* is64bit= */ false);
241 }
242
VisitFloatFloatToRawIntBits(HInvoke * invoke)243 void IntrinsicCodeGeneratorX86::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
244 MoveFPToInt(invoke->GetLocations(), /* is64bit= */ false, GetAssembler());
245 }
VisitFloatIntBitsToFloat(HInvoke * invoke)246 void IntrinsicCodeGeneratorX86::VisitFloatIntBitsToFloat(HInvoke* invoke) {
247 MoveIntToFP(invoke->GetLocations(), /* is64bit= */ false, GetAssembler());
248 }
249
CreateIntToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)250 static void CreateIntToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
251 LocationSummary* locations =
252 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
253 locations->SetInAt(0, Location::RequiresRegister());
254 locations->SetOut(Location::SameAsFirstInput());
255 }
256
CreateLongToIntLocations(ArenaAllocator * allocator,HInvoke * invoke)257 static void CreateLongToIntLocations(ArenaAllocator* allocator, HInvoke* invoke) {
258 LocationSummary* locations =
259 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
260 locations->SetInAt(0, Location::RequiresRegister());
261 locations->SetOut(Location::RequiresRegister());
262 }
263
CreateLongToLongLocations(ArenaAllocator * allocator,HInvoke * invoke)264 static void CreateLongToLongLocations(ArenaAllocator* allocator, HInvoke* invoke) {
265 LocationSummary* locations =
266 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
267 locations->SetInAt(0, Location::RequiresRegister());
268 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
269 }
270
GenReverseBytes(LocationSummary * locations,DataType::Type size,X86Assembler * assembler)271 static void GenReverseBytes(LocationSummary* locations,
272 DataType::Type size,
273 X86Assembler* assembler) {
274 Register out = locations->Out().AsRegister<Register>();
275
276 switch (size) {
277 case DataType::Type::kInt16:
278 // TODO: Can be done with an xchg of 8b registers. This is straight from Quick.
279 __ bswapl(out);
280 __ sarl(out, Immediate(16));
281 break;
282 case DataType::Type::kInt32:
283 __ bswapl(out);
284 break;
285 default:
286 LOG(FATAL) << "Unexpected size for reverse-bytes: " << size;
287 UNREACHABLE();
288 }
289 }
290
VisitIntegerReverseBytes(HInvoke * invoke)291 void IntrinsicLocationsBuilderX86::VisitIntegerReverseBytes(HInvoke* invoke) {
292 CreateIntToIntLocations(allocator_, invoke);
293 }
294
VisitIntegerReverseBytes(HInvoke * invoke)295 void IntrinsicCodeGeneratorX86::VisitIntegerReverseBytes(HInvoke* invoke) {
296 GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler());
297 }
298
VisitLongReverseBytes(HInvoke * invoke)299 void IntrinsicLocationsBuilderX86::VisitLongReverseBytes(HInvoke* invoke) {
300 CreateLongToLongLocations(allocator_, invoke);
301 }
302
VisitLongReverseBytes(HInvoke * invoke)303 void IntrinsicCodeGeneratorX86::VisitLongReverseBytes(HInvoke* invoke) {
304 LocationSummary* locations = invoke->GetLocations();
305 Location input = locations->InAt(0);
306 Register input_lo = input.AsRegisterPairLow<Register>();
307 Register input_hi = input.AsRegisterPairHigh<Register>();
308 Location output = locations->Out();
309 Register output_lo = output.AsRegisterPairLow<Register>();
310 Register output_hi = output.AsRegisterPairHigh<Register>();
311
312 X86Assembler* assembler = GetAssembler();
313 // Assign the inputs to the outputs, mixing low/high.
314 __ movl(output_lo, input_hi);
315 __ movl(output_hi, input_lo);
316 __ bswapl(output_lo);
317 __ bswapl(output_hi);
318 }
319
VisitShortReverseBytes(HInvoke * invoke)320 void IntrinsicLocationsBuilderX86::VisitShortReverseBytes(HInvoke* invoke) {
321 CreateIntToIntLocations(allocator_, invoke);
322 }
323
VisitShortReverseBytes(HInvoke * invoke)324 void IntrinsicCodeGeneratorX86::VisitShortReverseBytes(HInvoke* invoke) {
325 GenReverseBytes(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler());
326 }
327
CreateFPToFPLocations(ArenaAllocator * allocator,HInvoke * invoke)328 static void CreateFPToFPLocations(ArenaAllocator* allocator, HInvoke* invoke) {
329 LocationSummary* locations =
330 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
331 locations->SetInAt(0, Location::RequiresFpuRegister());
332 locations->SetOut(Location::RequiresFpuRegister());
333 }
334
VisitMathSqrt(HInvoke * invoke)335 void IntrinsicLocationsBuilderX86::VisitMathSqrt(HInvoke* invoke) {
336 CreateFPToFPLocations(allocator_, invoke);
337 }
338
VisitMathSqrt(HInvoke * invoke)339 void IntrinsicCodeGeneratorX86::VisitMathSqrt(HInvoke* invoke) {
340 LocationSummary* locations = invoke->GetLocations();
341 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
342 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
343
344 GetAssembler()->sqrtsd(out, in);
345 }
346
CreateSSE41FPToFPLocations(ArenaAllocator * allocator,HInvoke * invoke,CodeGeneratorX86 * codegen)347 static void CreateSSE41FPToFPLocations(ArenaAllocator* allocator,
348 HInvoke* invoke,
349 CodeGeneratorX86* codegen) {
350 // Do we have instruction support?
351 if (!codegen->GetInstructionSetFeatures().HasSSE4_1()) {
352 return;
353 }
354
355 CreateFPToFPLocations(allocator, invoke);
356 }
357
GenSSE41FPToFPIntrinsic(HInvoke * invoke,X86Assembler * assembler,int round_mode)358 static void GenSSE41FPToFPIntrinsic(HInvoke* invoke, X86Assembler* assembler, int round_mode) {
359 LocationSummary* locations = invoke->GetLocations();
360 DCHECK(!locations->WillCall());
361 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
362 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
363 __ roundsd(out, in, Immediate(round_mode));
364 }
365
VisitMathCeil(HInvoke * invoke)366 void IntrinsicLocationsBuilderX86::VisitMathCeil(HInvoke* invoke) {
367 CreateSSE41FPToFPLocations(allocator_, invoke, codegen_);
368 }
369
VisitMathCeil(HInvoke * invoke)370 void IntrinsicCodeGeneratorX86::VisitMathCeil(HInvoke* invoke) {
371 GenSSE41FPToFPIntrinsic(invoke, GetAssembler(), 2);
372 }
373
VisitMathFloor(HInvoke * invoke)374 void IntrinsicLocationsBuilderX86::VisitMathFloor(HInvoke* invoke) {
375 CreateSSE41FPToFPLocations(allocator_, invoke, codegen_);
376 }
377
VisitMathFloor(HInvoke * invoke)378 void IntrinsicCodeGeneratorX86::VisitMathFloor(HInvoke* invoke) {
379 GenSSE41FPToFPIntrinsic(invoke, GetAssembler(), 1);
380 }
381
VisitMathRint(HInvoke * invoke)382 void IntrinsicLocationsBuilderX86::VisitMathRint(HInvoke* invoke) {
383 CreateSSE41FPToFPLocations(allocator_, invoke, codegen_);
384 }
385
VisitMathRint(HInvoke * invoke)386 void IntrinsicCodeGeneratorX86::VisitMathRint(HInvoke* invoke) {
387 GenSSE41FPToFPIntrinsic(invoke, GetAssembler(), 0);
388 }
389
VisitMathRoundFloat(HInvoke * invoke)390 void IntrinsicLocationsBuilderX86::VisitMathRoundFloat(HInvoke* invoke) {
391 // Do we have instruction support?
392 if (!codegen_->GetInstructionSetFeatures().HasSSE4_1()) {
393 return;
394 }
395
396 HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect();
397 DCHECK(static_or_direct != nullptr);
398 LocationSummary* locations =
399 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
400 locations->SetInAt(0, Location::RequiresFpuRegister());
401 if (static_or_direct->HasSpecialInput() &&
402 invoke->InputAt(
403 static_or_direct->GetSpecialInputIndex())->IsX86ComputeBaseMethodAddress()) {
404 locations->SetInAt(1, Location::RequiresRegister());
405 }
406 locations->SetOut(Location::RequiresRegister());
407 locations->AddTemp(Location::RequiresFpuRegister());
408 locations->AddTemp(Location::RequiresFpuRegister());
409 }
410
VisitMathRoundFloat(HInvoke * invoke)411 void IntrinsicCodeGeneratorX86::VisitMathRoundFloat(HInvoke* invoke) {
412 LocationSummary* locations = invoke->GetLocations();
413 DCHECK(!locations->WillCall());
414
415 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
416 XmmRegister t1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
417 XmmRegister t2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
418 Register out = locations->Out().AsRegister<Register>();
419 NearLabel skip_incr, done;
420 X86Assembler* assembler = GetAssembler();
421
422 // Since no direct x86 rounding instruction matches the required semantics,
423 // this intrinsic is implemented as follows:
424 // result = floor(in);
425 // if (in - result >= 0.5f)
426 // result = result + 1.0f;
427 __ movss(t2, in);
428 __ roundss(t1, in, Immediate(1));
429 __ subss(t2, t1);
430 if (locations->GetInputCount() == 2 && locations->InAt(1).IsValid()) {
431 // Direct constant area available.
432 HX86ComputeBaseMethodAddress* method_address =
433 invoke->InputAt(1)->AsX86ComputeBaseMethodAddress();
434 Register constant_area = locations->InAt(1).AsRegister<Register>();
435 __ comiss(t2, codegen_->LiteralInt32Address(bit_cast<int32_t, float>(0.5f),
436 method_address,
437 constant_area));
438 __ j(kBelow, &skip_incr);
439 __ addss(t1, codegen_->LiteralInt32Address(bit_cast<int32_t, float>(1.0f),
440 method_address,
441 constant_area));
442 __ Bind(&skip_incr);
443 } else {
444 // No constant area: go through stack.
445 __ pushl(Immediate(bit_cast<int32_t, float>(0.5f)));
446 __ pushl(Immediate(bit_cast<int32_t, float>(1.0f)));
447 __ comiss(t2, Address(ESP, 4));
448 __ j(kBelow, &skip_incr);
449 __ addss(t1, Address(ESP, 0));
450 __ Bind(&skip_incr);
451 __ addl(ESP, Immediate(8));
452 }
453
454 // Final conversion to an integer. Unfortunately this also does not have a
455 // direct x86 instruction, since NaN should map to 0 and large positive
456 // values need to be clipped to the extreme value.
457 __ movl(out, Immediate(kPrimIntMax));
458 __ cvtsi2ss(t2, out);
459 __ comiss(t1, t2);
460 __ j(kAboveEqual, &done); // clipped to max (already in out), does not jump on unordered
461 __ movl(out, Immediate(0)); // does not change flags
462 __ j(kUnordered, &done); // NaN mapped to 0 (just moved in out)
463 __ cvttss2si(out, t1);
464 __ Bind(&done);
465 }
466
CreateFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)467 static void CreateFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
468 LocationSummary* locations =
469 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
470 InvokeRuntimeCallingConvention calling_convention;
471 locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
472 locations->SetOut(Location::FpuRegisterLocation(XMM0));
473 }
474
GenFPToFPCall(HInvoke * invoke,CodeGeneratorX86 * codegen,QuickEntrypointEnum entry)475 static void GenFPToFPCall(HInvoke* invoke, CodeGeneratorX86* codegen, QuickEntrypointEnum entry) {
476 LocationSummary* locations = invoke->GetLocations();
477 DCHECK(locations->WillCall());
478 DCHECK(invoke->IsInvokeStaticOrDirect());
479 X86Assembler* assembler = codegen->GetAssembler();
480
481 // We need some place to pass the parameters.
482 __ subl(ESP, Immediate(16));
483 __ cfi().AdjustCFAOffset(16);
484
485 // Pass the parameters at the bottom of the stack.
486 __ movsd(Address(ESP, 0), XMM0);
487
488 // If we have a second parameter, pass it next.
489 if (invoke->GetNumberOfArguments() == 2) {
490 __ movsd(Address(ESP, 8), XMM1);
491 }
492
493 // Now do the actual call.
494 codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
495
496 // Extract the return value from the FP stack.
497 __ fstpl(Address(ESP, 0));
498 __ movsd(XMM0, Address(ESP, 0));
499
500 // And clean up the stack.
501 __ addl(ESP, Immediate(16));
502 __ cfi().AdjustCFAOffset(-16);
503 }
504
CreateLowestOneBitLocations(ArenaAllocator * allocator,bool is_long,HInvoke * invoke)505 static void CreateLowestOneBitLocations(ArenaAllocator* allocator, bool is_long, HInvoke* invoke) {
506 LocationSummary* locations =
507 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
508 if (is_long) {
509 locations->SetInAt(0, Location::RequiresRegister());
510 } else {
511 locations->SetInAt(0, Location::Any());
512 }
513 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
514 }
515
GenLowestOneBit(X86Assembler * assembler,CodeGeneratorX86 * codegen,bool is_long,HInvoke * invoke)516 static void GenLowestOneBit(X86Assembler* assembler,
517 CodeGeneratorX86* codegen,
518 bool is_long,
519 HInvoke* invoke) {
520 LocationSummary* locations = invoke->GetLocations();
521 Location src = locations->InAt(0);
522 Location out_loc = locations->Out();
523
524 if (invoke->InputAt(0)->IsConstant()) {
525 // Evaluate this at compile time.
526 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
527 if (value == 0) {
528 if (is_long) {
529 __ xorl(out_loc.AsRegisterPairLow<Register>(), out_loc.AsRegisterPairLow<Register>());
530 __ xorl(out_loc.AsRegisterPairHigh<Register>(), out_loc.AsRegisterPairHigh<Register>());
531 } else {
532 __ xorl(out_loc.AsRegister<Register>(), out_loc.AsRegister<Register>());
533 }
534 return;
535 }
536 // Nonzero value.
537 value = is_long ? CTZ(static_cast<uint64_t>(value))
538 : CTZ(static_cast<uint32_t>(value));
539 if (is_long) {
540 if (value >= 32) {
541 int shift = value-32;
542 codegen->Load32BitValue(out_loc.AsRegisterPairLow<Register>(), 0);
543 codegen->Load32BitValue(out_loc.AsRegisterPairHigh<Register>(), 1 << shift);
544 } else {
545 codegen->Load32BitValue(out_loc.AsRegisterPairLow<Register>(), 1 << value);
546 codegen->Load32BitValue(out_loc.AsRegisterPairHigh<Register>(), 0);
547 }
548 } else {
549 codegen->Load32BitValue(out_loc.AsRegister<Register>(), 1 << value);
550 }
551 return;
552 }
553 // Handle non constant case
554 if (is_long) {
555 DCHECK(src.IsRegisterPair());
556 Register src_lo = src.AsRegisterPairLow<Register>();
557 Register src_hi = src.AsRegisterPairHigh<Register>();
558
559 Register out_lo = out_loc.AsRegisterPairLow<Register>();
560 Register out_hi = out_loc.AsRegisterPairHigh<Register>();
561
562 __ movl(out_lo, src_lo);
563 __ movl(out_hi, src_hi);
564
565 __ negl(out_lo);
566 __ adcl(out_hi, Immediate(0));
567 __ negl(out_hi);
568
569 __ andl(out_lo, src_lo);
570 __ andl(out_hi, src_hi);
571 } else {
572 if (codegen->GetInstructionSetFeatures().HasAVX2() && src.IsRegister()) {
573 Register out = out_loc.AsRegister<Register>();
574 __ blsi(out, src.AsRegister<Register>());
575 } else {
576 Register out = out_loc.AsRegister<Register>();
577 // Do tmp & -tmp
578 if (src.IsRegister()) {
579 __ movl(out, src.AsRegister<Register>());
580 } else {
581 DCHECK(src.IsStackSlot());
582 __ movl(out, Address(ESP, src.GetStackIndex()));
583 }
584 __ negl(out);
585
586 if (src.IsRegister()) {
587 __ andl(out, src.AsRegister<Register>());
588 } else {
589 __ andl(out, Address(ESP, src.GetStackIndex()));
590 }
591 }
592 }
593 }
594
VisitMathCos(HInvoke * invoke)595 void IntrinsicLocationsBuilderX86::VisitMathCos(HInvoke* invoke) {
596 CreateFPToFPCallLocations(allocator_, invoke);
597 }
598
VisitMathCos(HInvoke * invoke)599 void IntrinsicCodeGeneratorX86::VisitMathCos(HInvoke* invoke) {
600 GenFPToFPCall(invoke, codegen_, kQuickCos);
601 }
602
VisitMathSin(HInvoke * invoke)603 void IntrinsicLocationsBuilderX86::VisitMathSin(HInvoke* invoke) {
604 CreateFPToFPCallLocations(allocator_, invoke);
605 }
606
VisitMathSin(HInvoke * invoke)607 void IntrinsicCodeGeneratorX86::VisitMathSin(HInvoke* invoke) {
608 GenFPToFPCall(invoke, codegen_, kQuickSin);
609 }
610
VisitMathAcos(HInvoke * invoke)611 void IntrinsicLocationsBuilderX86::VisitMathAcos(HInvoke* invoke) {
612 CreateFPToFPCallLocations(allocator_, invoke);
613 }
614
VisitMathAcos(HInvoke * invoke)615 void IntrinsicCodeGeneratorX86::VisitMathAcos(HInvoke* invoke) {
616 GenFPToFPCall(invoke, codegen_, kQuickAcos);
617 }
618
VisitMathAsin(HInvoke * invoke)619 void IntrinsicLocationsBuilderX86::VisitMathAsin(HInvoke* invoke) {
620 CreateFPToFPCallLocations(allocator_, invoke);
621 }
622
VisitMathAsin(HInvoke * invoke)623 void IntrinsicCodeGeneratorX86::VisitMathAsin(HInvoke* invoke) {
624 GenFPToFPCall(invoke, codegen_, kQuickAsin);
625 }
626
VisitMathAtan(HInvoke * invoke)627 void IntrinsicLocationsBuilderX86::VisitMathAtan(HInvoke* invoke) {
628 CreateFPToFPCallLocations(allocator_, invoke);
629 }
630
VisitMathAtan(HInvoke * invoke)631 void IntrinsicCodeGeneratorX86::VisitMathAtan(HInvoke* invoke) {
632 GenFPToFPCall(invoke, codegen_, kQuickAtan);
633 }
634
VisitMathCbrt(HInvoke * invoke)635 void IntrinsicLocationsBuilderX86::VisitMathCbrt(HInvoke* invoke) {
636 CreateFPToFPCallLocations(allocator_, invoke);
637 }
638
VisitMathCbrt(HInvoke * invoke)639 void IntrinsicCodeGeneratorX86::VisitMathCbrt(HInvoke* invoke) {
640 GenFPToFPCall(invoke, codegen_, kQuickCbrt);
641 }
642
VisitMathCosh(HInvoke * invoke)643 void IntrinsicLocationsBuilderX86::VisitMathCosh(HInvoke* invoke) {
644 CreateFPToFPCallLocations(allocator_, invoke);
645 }
646
VisitMathCosh(HInvoke * invoke)647 void IntrinsicCodeGeneratorX86::VisitMathCosh(HInvoke* invoke) {
648 GenFPToFPCall(invoke, codegen_, kQuickCosh);
649 }
650
VisitMathExp(HInvoke * invoke)651 void IntrinsicLocationsBuilderX86::VisitMathExp(HInvoke* invoke) {
652 CreateFPToFPCallLocations(allocator_, invoke);
653 }
654
VisitMathExp(HInvoke * invoke)655 void IntrinsicCodeGeneratorX86::VisitMathExp(HInvoke* invoke) {
656 GenFPToFPCall(invoke, codegen_, kQuickExp);
657 }
658
VisitMathExpm1(HInvoke * invoke)659 void IntrinsicLocationsBuilderX86::VisitMathExpm1(HInvoke* invoke) {
660 CreateFPToFPCallLocations(allocator_, invoke);
661 }
662
VisitMathExpm1(HInvoke * invoke)663 void IntrinsicCodeGeneratorX86::VisitMathExpm1(HInvoke* invoke) {
664 GenFPToFPCall(invoke, codegen_, kQuickExpm1);
665 }
666
VisitMathLog(HInvoke * invoke)667 void IntrinsicLocationsBuilderX86::VisitMathLog(HInvoke* invoke) {
668 CreateFPToFPCallLocations(allocator_, invoke);
669 }
670
VisitMathLog(HInvoke * invoke)671 void IntrinsicCodeGeneratorX86::VisitMathLog(HInvoke* invoke) {
672 GenFPToFPCall(invoke, codegen_, kQuickLog);
673 }
674
VisitMathLog10(HInvoke * invoke)675 void IntrinsicLocationsBuilderX86::VisitMathLog10(HInvoke* invoke) {
676 CreateFPToFPCallLocations(allocator_, invoke);
677 }
678
VisitMathLog10(HInvoke * invoke)679 void IntrinsicCodeGeneratorX86::VisitMathLog10(HInvoke* invoke) {
680 GenFPToFPCall(invoke, codegen_, kQuickLog10);
681 }
682
VisitMathSinh(HInvoke * invoke)683 void IntrinsicLocationsBuilderX86::VisitMathSinh(HInvoke* invoke) {
684 CreateFPToFPCallLocations(allocator_, invoke);
685 }
686
VisitMathSinh(HInvoke * invoke)687 void IntrinsicCodeGeneratorX86::VisitMathSinh(HInvoke* invoke) {
688 GenFPToFPCall(invoke, codegen_, kQuickSinh);
689 }
690
VisitMathTan(HInvoke * invoke)691 void IntrinsicLocationsBuilderX86::VisitMathTan(HInvoke* invoke) {
692 CreateFPToFPCallLocations(allocator_, invoke);
693 }
694
VisitMathTan(HInvoke * invoke)695 void IntrinsicCodeGeneratorX86::VisitMathTan(HInvoke* invoke) {
696 GenFPToFPCall(invoke, codegen_, kQuickTan);
697 }
698
VisitMathTanh(HInvoke * invoke)699 void IntrinsicLocationsBuilderX86::VisitMathTanh(HInvoke* invoke) {
700 CreateFPToFPCallLocations(allocator_, invoke);
701 }
702
VisitMathTanh(HInvoke * invoke)703 void IntrinsicCodeGeneratorX86::VisitMathTanh(HInvoke* invoke) {
704 GenFPToFPCall(invoke, codegen_, kQuickTanh);
705 }
706
VisitIntegerLowestOneBit(HInvoke * invoke)707 void IntrinsicLocationsBuilderX86::VisitIntegerLowestOneBit(HInvoke* invoke) {
708 CreateLowestOneBitLocations(allocator_, /*is_long=*/ false, invoke);
709 }
VisitIntegerLowestOneBit(HInvoke * invoke)710 void IntrinsicCodeGeneratorX86::VisitIntegerLowestOneBit(HInvoke* invoke) {
711 GenLowestOneBit(GetAssembler(), codegen_, /*is_long=*/ false, invoke);
712 }
713
VisitLongLowestOneBit(HInvoke * invoke)714 void IntrinsicLocationsBuilderX86::VisitLongLowestOneBit(HInvoke* invoke) {
715 CreateLowestOneBitLocations(allocator_, /*is_long=*/ true, invoke);
716 }
717
VisitLongLowestOneBit(HInvoke * invoke)718 void IntrinsicCodeGeneratorX86::VisitLongLowestOneBit(HInvoke* invoke) {
719 GenLowestOneBit(GetAssembler(), codegen_, /*is_long=*/ true, invoke);
720 }
721
CreateFPFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)722 static void CreateFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
723 LocationSummary* locations =
724 new (allocator) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
725 InvokeRuntimeCallingConvention calling_convention;
726 locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
727 locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1)));
728 locations->SetOut(Location::FpuRegisterLocation(XMM0));
729 }
730
CreateFPFPFPToFPCallLocations(ArenaAllocator * allocator,HInvoke * invoke)731 static void CreateFPFPFPToFPCallLocations(ArenaAllocator* allocator, HInvoke* invoke) {
732 DCHECK_EQ(invoke->GetNumberOfArguments(), 3U);
733 LocationSummary* locations =
734 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
735 InvokeRuntimeCallingConvention calling_convention;
736 locations->SetInAt(0, Location::RequiresFpuRegister());
737 locations->SetInAt(1, Location::RequiresFpuRegister());
738 locations->SetInAt(2, Location::RequiresFpuRegister());
739 locations->SetOut(Location::SameAsFirstInput());
740 }
741
VisitMathAtan2(HInvoke * invoke)742 void IntrinsicLocationsBuilderX86::VisitMathAtan2(HInvoke* invoke) {
743 CreateFPFPToFPCallLocations(allocator_, invoke);
744 }
745
VisitMathAtan2(HInvoke * invoke)746 void IntrinsicCodeGeneratorX86::VisitMathAtan2(HInvoke* invoke) {
747 GenFPToFPCall(invoke, codegen_, kQuickAtan2);
748 }
749
VisitMathPow(HInvoke * invoke)750 void IntrinsicLocationsBuilderX86::VisitMathPow(HInvoke* invoke) {
751 CreateFPFPToFPCallLocations(allocator_, invoke);
752 }
753
VisitMathPow(HInvoke * invoke)754 void IntrinsicCodeGeneratorX86::VisitMathPow(HInvoke* invoke) {
755 GenFPToFPCall(invoke, codegen_, kQuickPow);
756 }
757
VisitMathHypot(HInvoke * invoke)758 void IntrinsicLocationsBuilderX86::VisitMathHypot(HInvoke* invoke) {
759 CreateFPFPToFPCallLocations(allocator_, invoke);
760 }
761
VisitMathHypot(HInvoke * invoke)762 void IntrinsicCodeGeneratorX86::VisitMathHypot(HInvoke* invoke) {
763 GenFPToFPCall(invoke, codegen_, kQuickHypot);
764 }
765
VisitMathNextAfter(HInvoke * invoke)766 void IntrinsicLocationsBuilderX86::VisitMathNextAfter(HInvoke* invoke) {
767 CreateFPFPToFPCallLocations(allocator_, invoke);
768 }
769
VisitMathNextAfter(HInvoke * invoke)770 void IntrinsicCodeGeneratorX86::VisitMathNextAfter(HInvoke* invoke) {
771 GenFPToFPCall(invoke, codegen_, kQuickNextAfter);
772 }
773
CreateSystemArrayCopyLocations(HInvoke * invoke)774 static void CreateSystemArrayCopyLocations(HInvoke* invoke) {
775 // We need at least two of the positions or length to be an integer constant,
776 // or else we won't have enough free registers.
777 HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
778 HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant();
779 HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
780
781 int num_constants =
782 ((src_pos != nullptr) ? 1 : 0)
783 + ((dest_pos != nullptr) ? 1 : 0)
784 + ((length != nullptr) ? 1 : 0);
785
786 if (num_constants < 2) {
787 // Not enough free registers.
788 return;
789 }
790
791 // As long as we are checking, we might as well check to see if the src and dest
792 // positions are >= 0.
793 if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
794 (dest_pos != nullptr && dest_pos->GetValue() < 0)) {
795 // We will have to fail anyways.
796 return;
797 }
798
799 // And since we are already checking, check the length too.
800 if (length != nullptr) {
801 int32_t len = length->GetValue();
802 if (len < 0) {
803 // Just call as normal.
804 return;
805 }
806 }
807
808 // Okay, it is safe to generate inline code.
809 LocationSummary* locations =
810 new (invoke->GetBlock()->GetGraph()->GetAllocator())
811 LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
812 // arraycopy(Object src, int srcPos, Object dest, int destPos, int length).
813 locations->SetInAt(0, Location::RequiresRegister());
814 locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
815 locations->SetInAt(2, Location::RequiresRegister());
816 locations->SetInAt(3, Location::RegisterOrConstant(invoke->InputAt(3)));
817 locations->SetInAt(4, Location::RegisterOrConstant(invoke->InputAt(4)));
818
819 // And we need some temporaries. We will use REP MOVSW, so we need fixed registers.
820 locations->AddTemp(Location::RegisterLocation(ESI));
821 locations->AddTemp(Location::RegisterLocation(EDI));
822 locations->AddTemp(Location::RegisterLocation(ECX));
823 }
824
CheckPosition(X86Assembler * assembler,Location pos,Register input,Location length,SlowPathCode * slow_path,Register temp,bool length_is_input_length=false)825 static void CheckPosition(X86Assembler* assembler,
826 Location pos,
827 Register input,
828 Location length,
829 SlowPathCode* slow_path,
830 Register temp,
831 bool length_is_input_length = false) {
832 // Where is the length in the Array?
833 const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value();
834
835 if (pos.IsConstant()) {
836 int32_t pos_const = pos.GetConstant()->AsIntConstant()->GetValue();
837 if (pos_const == 0) {
838 if (!length_is_input_length) {
839 // Check that length(input) >= length.
840 if (length.IsConstant()) {
841 __ cmpl(Address(input, length_offset),
842 Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
843 } else {
844 __ cmpl(Address(input, length_offset), length.AsRegister<Register>());
845 }
846 __ j(kLess, slow_path->GetEntryLabel());
847 }
848 } else {
849 // Check that length(input) >= pos.
850 __ movl(temp, Address(input, length_offset));
851 __ subl(temp, Immediate(pos_const));
852 __ j(kLess, slow_path->GetEntryLabel());
853
854 // Check that (length(input) - pos) >= length.
855 if (length.IsConstant()) {
856 __ cmpl(temp, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
857 } else {
858 __ cmpl(temp, length.AsRegister<Register>());
859 }
860 __ j(kLess, slow_path->GetEntryLabel());
861 }
862 } else if (length_is_input_length) {
863 // The only way the copy can succeed is if pos is zero.
864 Register pos_reg = pos.AsRegister<Register>();
865 __ testl(pos_reg, pos_reg);
866 __ j(kNotEqual, slow_path->GetEntryLabel());
867 } else {
868 // Check that pos >= 0.
869 Register pos_reg = pos.AsRegister<Register>();
870 __ testl(pos_reg, pos_reg);
871 __ j(kLess, slow_path->GetEntryLabel());
872
873 // Check that pos <= length(input).
874 __ cmpl(Address(input, length_offset), pos_reg);
875 __ j(kLess, slow_path->GetEntryLabel());
876
877 // Check that (length(input) - pos) >= length.
878 __ movl(temp, Address(input, length_offset));
879 __ subl(temp, pos_reg);
880 if (length.IsConstant()) {
881 __ cmpl(temp, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
882 } else {
883 __ cmpl(temp, length.AsRegister<Register>());
884 }
885 __ j(kLess, slow_path->GetEntryLabel());
886 }
887 }
888
SystemArrayCopyPrimitive(HInvoke * invoke,X86Assembler * assembler,CodeGeneratorX86 * codegen,DataType::Type type)889 static void SystemArrayCopyPrimitive(HInvoke* invoke,
890 X86Assembler* assembler,
891 CodeGeneratorX86* codegen,
892 DataType::Type type) {
893 LocationSummary* locations = invoke->GetLocations();
894 Register src = locations->InAt(0).AsRegister<Register>();
895 Location src_pos = locations->InAt(1);
896 Register dest = locations->InAt(2).AsRegister<Register>();
897 Location dest_pos = locations->InAt(3);
898 Location length = locations->InAt(4);
899
900 // Temporaries that we need for MOVSB/W/L.
901 Register src_base = locations->GetTemp(0).AsRegister<Register>();
902 DCHECK_EQ(src_base, ESI);
903 Register dest_base = locations->GetTemp(1).AsRegister<Register>();
904 DCHECK_EQ(dest_base, EDI);
905 Register count = locations->GetTemp(2).AsRegister<Register>();
906 DCHECK_EQ(count, ECX);
907
908 SlowPathCode* slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
909 codegen->AddSlowPath(slow_path);
910
911 // Bail out if the source and destination are the same (to handle overlap).
912 __ cmpl(src, dest);
913 __ j(kEqual, slow_path->GetEntryLabel());
914
915 // Bail out if the source is null.
916 __ testl(src, src);
917 __ j(kEqual, slow_path->GetEntryLabel());
918
919 // Bail out if the destination is null.
920 __ testl(dest, dest);
921 __ j(kEqual, slow_path->GetEntryLabel());
922
923 // If the length is negative, bail out.
924 // We have already checked in the LocationsBuilder for the constant case.
925 if (!length.IsConstant()) {
926 __ cmpl(length.AsRegister<Register>(), length.AsRegister<Register>());
927 __ j(kLess, slow_path->GetEntryLabel());
928 }
929
930 // We need the count in ECX.
931 if (length.IsConstant()) {
932 __ movl(count, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
933 } else {
934 __ movl(count, length.AsRegister<Register>());
935 }
936
937 // Validity checks: source. Use src_base as a temporary register.
938 CheckPosition(assembler, src_pos, src, Location::RegisterLocation(count), slow_path, src_base);
939
940 // Validity checks: dest. Use src_base as a temporary register.
941 CheckPosition(assembler, dest_pos, dest, Location::RegisterLocation(count), slow_path, src_base);
942
943 // Okay, everything checks out. Finally time to do the copy.
944 // Check assumption that sizeof(Char) is 2 (used in scaling below).
945 const size_t data_size = DataType::Size(type);
946 const ScaleFactor scale_factor = CodeGenerator::ScaleFactorForType(type);
947 const uint32_t data_offset = mirror::Array::DataOffset(data_size).Uint32Value();
948
949 if (src_pos.IsConstant()) {
950 int32_t src_pos_const = src_pos.GetConstant()->AsIntConstant()->GetValue();
951 __ leal(src_base, Address(src, data_size * src_pos_const + data_offset));
952 } else {
953 __ leal(src_base, Address(src, src_pos.AsRegister<Register>(), scale_factor, data_offset));
954 }
955 if (dest_pos.IsConstant()) {
956 int32_t dest_pos_const = dest_pos.GetConstant()->AsIntConstant()->GetValue();
957 __ leal(dest_base, Address(dest, data_size * dest_pos_const + data_offset));
958 } else {
959 __ leal(dest_base, Address(dest, dest_pos.AsRegister<Register>(), scale_factor, data_offset));
960 }
961
962 // Do the move.
963 switch (type) {
964 case DataType::Type::kInt8:
965 __ rep_movsb();
966 break;
967 case DataType::Type::kUint16:
968 __ rep_movsw();
969 break;
970 case DataType::Type::kInt32:
971 __ rep_movsl();
972 break;
973 default:
974 LOG(FATAL) << "Unexpected data type for intrinsic";
975 }
976 __ Bind(slow_path->GetExitLabel());
977 }
978
VisitSystemArrayCopyChar(HInvoke * invoke)979 void IntrinsicLocationsBuilderX86::VisitSystemArrayCopyChar(HInvoke* invoke) {
980 CreateSystemArrayCopyLocations(invoke);
981 }
982
VisitSystemArrayCopyChar(HInvoke * invoke)983 void IntrinsicCodeGeneratorX86::VisitSystemArrayCopyChar(HInvoke* invoke) {
984 X86Assembler* assembler = GetAssembler();
985 SystemArrayCopyPrimitive(invoke, assembler, codegen_, DataType::Type::kUint16);
986 }
987
VisitSystemArrayCopyByte(HInvoke * invoke)988 void IntrinsicCodeGeneratorX86::VisitSystemArrayCopyByte(HInvoke* invoke) {
989 X86Assembler* assembler = GetAssembler();
990 SystemArrayCopyPrimitive(invoke, assembler, codegen_, DataType::Type::kInt8);
991 }
992
VisitSystemArrayCopyByte(HInvoke * invoke)993 void IntrinsicLocationsBuilderX86::VisitSystemArrayCopyByte(HInvoke* invoke) {
994 CreateSystemArrayCopyLocations(invoke);
995 }
996
VisitSystemArrayCopyInt(HInvoke * invoke)997 void IntrinsicCodeGeneratorX86::VisitSystemArrayCopyInt(HInvoke* invoke) {
998 X86Assembler* assembler = GetAssembler();
999 SystemArrayCopyPrimitive(invoke, assembler, codegen_, DataType::Type::kInt32);
1000 }
1001
VisitSystemArrayCopyInt(HInvoke * invoke)1002 void IntrinsicLocationsBuilderX86::VisitSystemArrayCopyInt(HInvoke* invoke) {
1003 CreateSystemArrayCopyLocations(invoke);
1004 }
1005
VisitStringCompareTo(HInvoke * invoke)1006 void IntrinsicLocationsBuilderX86::VisitStringCompareTo(HInvoke* invoke) {
1007 // The inputs plus one temp.
1008 LocationSummary* locations = new (allocator_) LocationSummary(
1009 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1010 InvokeRuntimeCallingConvention calling_convention;
1011 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1012 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1013 locations->SetOut(Location::RegisterLocation(EAX));
1014 }
1015
VisitStringCompareTo(HInvoke * invoke)1016 void IntrinsicCodeGeneratorX86::VisitStringCompareTo(HInvoke* invoke) {
1017 X86Assembler* assembler = GetAssembler();
1018 LocationSummary* locations = invoke->GetLocations();
1019
1020 // Note that the null check must have been done earlier.
1021 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1022
1023 Register argument = locations->InAt(1).AsRegister<Register>();
1024 __ testl(argument, argument);
1025 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
1026 codegen_->AddSlowPath(slow_path);
1027 __ j(kEqual, slow_path->GetEntryLabel());
1028
1029 codegen_->InvokeRuntime(kQuickStringCompareTo, invoke, invoke->GetDexPc(), slow_path);
1030 __ Bind(slow_path->GetExitLabel());
1031 }
1032
VisitStringEquals(HInvoke * invoke)1033 void IntrinsicLocationsBuilderX86::VisitStringEquals(HInvoke* invoke) {
1034 LocationSummary* locations =
1035 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1036 locations->SetInAt(0, Location::RequiresRegister());
1037 locations->SetInAt(1, Location::RequiresRegister());
1038
1039 // Request temporary registers, ECX and EDI needed for repe_cmpsl instruction.
1040 locations->AddTemp(Location::RegisterLocation(ECX));
1041 locations->AddTemp(Location::RegisterLocation(EDI));
1042
1043 // Set output, ESI needed for repe_cmpsl instruction anyways.
1044 locations->SetOut(Location::RegisterLocation(ESI), Location::kOutputOverlap);
1045 }
1046
VisitStringEquals(HInvoke * invoke)1047 void IntrinsicCodeGeneratorX86::VisitStringEquals(HInvoke* invoke) {
1048 X86Assembler* assembler = GetAssembler();
1049 LocationSummary* locations = invoke->GetLocations();
1050
1051 Register str = locations->InAt(0).AsRegister<Register>();
1052 Register arg = locations->InAt(1).AsRegister<Register>();
1053 Register ecx = locations->GetTemp(0).AsRegister<Register>();
1054 Register edi = locations->GetTemp(1).AsRegister<Register>();
1055 Register esi = locations->Out().AsRegister<Register>();
1056
1057 NearLabel end, return_true, return_false;
1058
1059 // Get offsets of count, value, and class fields within a string object.
1060 const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1061 const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
1062 const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value();
1063
1064 // Note that the null check must have been done earlier.
1065 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1066
1067 StringEqualsOptimizations optimizations(invoke);
1068 if (!optimizations.GetArgumentNotNull()) {
1069 // Check if input is null, return false if it is.
1070 __ testl(arg, arg);
1071 __ j(kEqual, &return_false);
1072 }
1073
1074 if (!optimizations.GetArgumentIsString()) {
1075 // Instanceof check for the argument by comparing class fields.
1076 // All string objects must have the same type since String cannot be subclassed.
1077 // Receiver must be a string object, so its class field is equal to all strings' class fields.
1078 // If the argument is a string object, its class field must be equal to receiver's class field.
1079 //
1080 // As the String class is expected to be non-movable, we can read the class
1081 // field from String.equals' arguments without read barriers.
1082 AssertNonMovableStringClass();
1083 // Also, because we use the loaded class references only to compare them, we
1084 // don't need to unpoison them.
1085 // /* HeapReference<Class> */ ecx = str->klass_
1086 __ movl(ecx, Address(str, class_offset));
1087 // if (ecx != /* HeapReference<Class> */ arg->klass_) return false
1088 __ cmpl(ecx, Address(arg, class_offset));
1089 __ j(kNotEqual, &return_false);
1090 }
1091
1092 // Reference equality check, return true if same reference.
1093 __ cmpl(str, arg);
1094 __ j(kEqual, &return_true);
1095
1096 // Load length and compression flag of receiver string.
1097 __ movl(ecx, Address(str, count_offset));
1098 // Check if lengths and compression flags are equal, return false if they're not.
1099 // Two identical strings will always have same compression style since
1100 // compression style is decided on alloc.
1101 __ cmpl(ecx, Address(arg, count_offset));
1102 __ j(kNotEqual, &return_false);
1103 // Return true if strings are empty. Even with string compression `count == 0` means empty.
1104 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1105 "Expecting 0=compressed, 1=uncompressed");
1106 __ jecxz(&return_true);
1107
1108 if (mirror::kUseStringCompression) {
1109 NearLabel string_uncompressed;
1110 // Extract length and differentiate between both compressed or both uncompressed.
1111 // Different compression style is cut above.
1112 __ shrl(ecx, Immediate(1));
1113 __ j(kCarrySet, &string_uncompressed);
1114 // Divide string length by 2, rounding up, and continue as if uncompressed.
1115 __ addl(ecx, Immediate(1));
1116 __ shrl(ecx, Immediate(1));
1117 __ Bind(&string_uncompressed);
1118 }
1119 // Load starting addresses of string values into ESI/EDI as required for repe_cmpsl instruction.
1120 __ leal(esi, Address(str, value_offset));
1121 __ leal(edi, Address(arg, value_offset));
1122
1123 // Divide string length by 2 to compare characters 2 at a time and adjust for lengths not
1124 // divisible by 2.
1125 __ addl(ecx, Immediate(1));
1126 __ shrl(ecx, Immediate(1));
1127
1128 // Assertions that must hold in order to compare strings 2 characters (uncompressed)
1129 // or 4 characters (compressed) at a time.
1130 DCHECK_ALIGNED(value_offset, 4);
1131 static_assert(IsAligned<4>(kObjectAlignment), "String of odd length is not zero padded");
1132
1133 // Loop to compare strings two characters at a time starting at the beginning of the string.
1134 __ repe_cmpsl();
1135 // If strings are not equal, zero flag will be cleared.
1136 __ j(kNotEqual, &return_false);
1137
1138 // Return true and exit the function.
1139 // If loop does not result in returning false, we return true.
1140 __ Bind(&return_true);
1141 __ movl(esi, Immediate(1));
1142 __ jmp(&end);
1143
1144 // Return false and exit the function.
1145 __ Bind(&return_false);
1146 __ xorl(esi, esi);
1147 __ Bind(&end);
1148 }
1149
CreateStringIndexOfLocations(HInvoke * invoke,ArenaAllocator * allocator,bool start_at_zero)1150 static void CreateStringIndexOfLocations(HInvoke* invoke,
1151 ArenaAllocator* allocator,
1152 bool start_at_zero) {
1153 LocationSummary* locations = new (allocator) LocationSummary(invoke,
1154 LocationSummary::kCallOnSlowPath,
1155 kIntrinsified);
1156 // The data needs to be in EDI for scasw. So request that the string is there, anyways.
1157 locations->SetInAt(0, Location::RegisterLocation(EDI));
1158 // If we look for a constant char, we'll still have to copy it into EAX. So just request the
1159 // allocator to do that, anyways. We can still do the constant check by checking the parameter
1160 // of the instruction explicitly.
1161 // Note: This works as we don't clobber EAX anywhere.
1162 locations->SetInAt(1, Location::RegisterLocation(EAX));
1163 if (!start_at_zero) {
1164 locations->SetInAt(2, Location::RequiresRegister()); // The starting index.
1165 }
1166 // As we clobber EDI during execution anyways, also use it as the output.
1167 locations->SetOut(Location::SameAsFirstInput());
1168
1169 // repne scasw uses ECX as the counter.
1170 locations->AddTemp(Location::RegisterLocation(ECX));
1171 // Need another temporary to be able to compute the result.
1172 locations->AddTemp(Location::RequiresRegister());
1173 if (mirror::kUseStringCompression) {
1174 // Need another temporary to be able to save unflagged string length.
1175 locations->AddTemp(Location::RequiresRegister());
1176 }
1177 }
1178
GenerateStringIndexOf(HInvoke * invoke,X86Assembler * assembler,CodeGeneratorX86 * codegen,bool start_at_zero)1179 static void GenerateStringIndexOf(HInvoke* invoke,
1180 X86Assembler* assembler,
1181 CodeGeneratorX86* codegen,
1182 bool start_at_zero) {
1183 LocationSummary* locations = invoke->GetLocations();
1184
1185 // Note that the null check must have been done earlier.
1186 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1187
1188 Register string_obj = locations->InAt(0).AsRegister<Register>();
1189 Register search_value = locations->InAt(1).AsRegister<Register>();
1190 Register counter = locations->GetTemp(0).AsRegister<Register>();
1191 Register string_length = locations->GetTemp(1).AsRegister<Register>();
1192 Register out = locations->Out().AsRegister<Register>();
1193 // Only used when string compression feature is on.
1194 Register string_length_flagged;
1195
1196 // Check our assumptions for registers.
1197 DCHECK_EQ(string_obj, EDI);
1198 DCHECK_EQ(search_value, EAX);
1199 DCHECK_EQ(counter, ECX);
1200 DCHECK_EQ(out, EDI);
1201
1202 // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
1203 // or directly dispatch for a large constant, or omit slow-path for a small constant or a char.
1204 SlowPathCode* slow_path = nullptr;
1205 HInstruction* code_point = invoke->InputAt(1);
1206 if (code_point->IsIntConstant()) {
1207 if (static_cast<uint32_t>(code_point->AsIntConstant()->GetValue()) >
1208 std::numeric_limits<uint16_t>::max()) {
1209 // Always needs the slow-path. We could directly dispatch to it, but this case should be
1210 // rare, so for simplicity just put the full slow-path down and branch unconditionally.
1211 slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
1212 codegen->AddSlowPath(slow_path);
1213 __ jmp(slow_path->GetEntryLabel());
1214 __ Bind(slow_path->GetExitLabel());
1215 return;
1216 }
1217 } else if (code_point->GetType() != DataType::Type::kUint16) {
1218 __ cmpl(search_value, Immediate(std::numeric_limits<uint16_t>::max()));
1219 slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
1220 codegen->AddSlowPath(slow_path);
1221 __ j(kAbove, slow_path->GetEntryLabel());
1222 }
1223
1224 // From here down, we know that we are looking for a char that fits in 16 bits.
1225 // Location of reference to data array within the String object.
1226 int32_t value_offset = mirror::String::ValueOffset().Int32Value();
1227 // Location of count within the String object.
1228 int32_t count_offset = mirror::String::CountOffset().Int32Value();
1229
1230 // Load the count field of the string containing the length and compression flag.
1231 __ movl(string_length, Address(string_obj, count_offset));
1232
1233 // Do a zero-length check. Even with string compression `count == 0` means empty.
1234 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1235 "Expecting 0=compressed, 1=uncompressed");
1236 // TODO: Support jecxz.
1237 NearLabel not_found_label;
1238 __ testl(string_length, string_length);
1239 __ j(kEqual, ¬_found_label);
1240
1241 if (mirror::kUseStringCompression) {
1242 string_length_flagged = locations->GetTemp(2).AsRegister<Register>();
1243 __ movl(string_length_flagged, string_length);
1244 // Extract the length and shift out the least significant bit used as compression flag.
1245 __ shrl(string_length, Immediate(1));
1246 }
1247
1248 if (start_at_zero) {
1249 // Number of chars to scan is the same as the string length.
1250 __ movl(counter, string_length);
1251
1252 // Move to the start of the string.
1253 __ addl(string_obj, Immediate(value_offset));
1254 } else {
1255 Register start_index = locations->InAt(2).AsRegister<Register>();
1256
1257 // Do a start_index check.
1258 __ cmpl(start_index, string_length);
1259 __ j(kGreaterEqual, ¬_found_label);
1260
1261 // Ensure we have a start index >= 0;
1262 __ xorl(counter, counter);
1263 __ cmpl(start_index, Immediate(0));
1264 __ cmovl(kGreater, counter, start_index);
1265
1266 if (mirror::kUseStringCompression) {
1267 NearLabel modify_counter, offset_uncompressed_label;
1268 __ testl(string_length_flagged, Immediate(1));
1269 __ j(kNotZero, &offset_uncompressed_label);
1270 // Move to the start of the string: string_obj + value_offset + start_index.
1271 __ leal(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_1, value_offset));
1272 __ jmp(&modify_counter);
1273
1274 // Move to the start of the string: string_obj + value_offset + 2 * start_index.
1275 __ Bind(&offset_uncompressed_label);
1276 __ leal(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
1277
1278 // Now update ecx (the repne scasw work counter). We have string.length - start_index left to
1279 // compare.
1280 __ Bind(&modify_counter);
1281 } else {
1282 __ leal(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
1283 }
1284 __ negl(counter);
1285 __ leal(counter, Address(string_length, counter, ScaleFactor::TIMES_1, 0));
1286 }
1287
1288 if (mirror::kUseStringCompression) {
1289 NearLabel uncompressed_string_comparison;
1290 NearLabel comparison_done;
1291 __ testl(string_length_flagged, Immediate(1));
1292 __ j(kNotZero, &uncompressed_string_comparison);
1293
1294 // Check if EAX (search_value) is ASCII.
1295 __ cmpl(search_value, Immediate(127));
1296 __ j(kGreater, ¬_found_label);
1297 // Comparing byte-per-byte.
1298 __ repne_scasb();
1299 __ jmp(&comparison_done);
1300
1301 // Everything is set up for repne scasw:
1302 // * Comparison address in EDI.
1303 // * Counter in ECX.
1304 __ Bind(&uncompressed_string_comparison);
1305 __ repne_scasw();
1306 __ Bind(&comparison_done);
1307 } else {
1308 __ repne_scasw();
1309 }
1310 // Did we find a match?
1311 __ j(kNotEqual, ¬_found_label);
1312
1313 // Yes, we matched. Compute the index of the result.
1314 __ subl(string_length, counter);
1315 __ leal(out, Address(string_length, -1));
1316
1317 NearLabel done;
1318 __ jmp(&done);
1319
1320 // Failed to match; return -1.
1321 __ Bind(¬_found_label);
1322 __ movl(out, Immediate(-1));
1323
1324 // And join up at the end.
1325 __ Bind(&done);
1326 if (slow_path != nullptr) {
1327 __ Bind(slow_path->GetExitLabel());
1328 }
1329 }
1330
VisitStringIndexOf(HInvoke * invoke)1331 void IntrinsicLocationsBuilderX86::VisitStringIndexOf(HInvoke* invoke) {
1332 CreateStringIndexOfLocations(invoke, allocator_, /* start_at_zero= */ true);
1333 }
1334
VisitStringIndexOf(HInvoke * invoke)1335 void IntrinsicCodeGeneratorX86::VisitStringIndexOf(HInvoke* invoke) {
1336 GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ true);
1337 }
1338
VisitStringIndexOfAfter(HInvoke * invoke)1339 void IntrinsicLocationsBuilderX86::VisitStringIndexOfAfter(HInvoke* invoke) {
1340 CreateStringIndexOfLocations(invoke, allocator_, /* start_at_zero= */ false);
1341 }
1342
VisitStringIndexOfAfter(HInvoke * invoke)1343 void IntrinsicCodeGeneratorX86::VisitStringIndexOfAfter(HInvoke* invoke) {
1344 GenerateStringIndexOf(invoke, GetAssembler(), codegen_, /* start_at_zero= */ false);
1345 }
1346
VisitStringNewStringFromBytes(HInvoke * invoke)1347 void IntrinsicLocationsBuilderX86::VisitStringNewStringFromBytes(HInvoke* invoke) {
1348 LocationSummary* locations = new (allocator_) LocationSummary(
1349 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1350 InvokeRuntimeCallingConvention calling_convention;
1351 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1352 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1353 locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1354 locations->SetInAt(3, Location::RegisterLocation(calling_convention.GetRegisterAt(3)));
1355 locations->SetOut(Location::RegisterLocation(EAX));
1356 }
1357
VisitStringNewStringFromBytes(HInvoke * invoke)1358 void IntrinsicCodeGeneratorX86::VisitStringNewStringFromBytes(HInvoke* invoke) {
1359 X86Assembler* assembler = GetAssembler();
1360 LocationSummary* locations = invoke->GetLocations();
1361
1362 Register byte_array = locations->InAt(0).AsRegister<Register>();
1363 __ testl(byte_array, byte_array);
1364 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
1365 codegen_->AddSlowPath(slow_path);
1366 __ j(kEqual, slow_path->GetEntryLabel());
1367
1368 codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc());
1369 CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
1370 __ Bind(slow_path->GetExitLabel());
1371 }
1372
VisitStringNewStringFromChars(HInvoke * invoke)1373 void IntrinsicLocationsBuilderX86::VisitStringNewStringFromChars(HInvoke* invoke) {
1374 LocationSummary* locations =
1375 new (allocator_) LocationSummary(invoke, LocationSummary::kCallOnMainOnly, kIntrinsified);
1376 InvokeRuntimeCallingConvention calling_convention;
1377 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1378 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1379 locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1380 locations->SetOut(Location::RegisterLocation(EAX));
1381 }
1382
VisitStringNewStringFromChars(HInvoke * invoke)1383 void IntrinsicCodeGeneratorX86::VisitStringNewStringFromChars(HInvoke* invoke) {
1384 // No need to emit code checking whether `locations->InAt(2)` is a null
1385 // pointer, as callers of the native method
1386 //
1387 // java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
1388 //
1389 // all include a null check on `data` before calling that method.
1390 codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc());
1391 CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
1392 }
1393
VisitStringNewStringFromString(HInvoke * invoke)1394 void IntrinsicLocationsBuilderX86::VisitStringNewStringFromString(HInvoke* invoke) {
1395 LocationSummary* locations = new (allocator_) LocationSummary(
1396 invoke, LocationSummary::kCallOnMainAndSlowPath, kIntrinsified);
1397 InvokeRuntimeCallingConvention calling_convention;
1398 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1399 locations->SetOut(Location::RegisterLocation(EAX));
1400 }
1401
VisitStringNewStringFromString(HInvoke * invoke)1402 void IntrinsicCodeGeneratorX86::VisitStringNewStringFromString(HInvoke* invoke) {
1403 X86Assembler* assembler = GetAssembler();
1404 LocationSummary* locations = invoke->GetLocations();
1405
1406 Register string_to_copy = locations->InAt(0).AsRegister<Register>();
1407 __ testl(string_to_copy, string_to_copy);
1408 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
1409 codegen_->AddSlowPath(slow_path);
1410 __ j(kEqual, slow_path->GetEntryLabel());
1411
1412 codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc());
1413 CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
1414 __ Bind(slow_path->GetExitLabel());
1415 }
1416
VisitStringGetCharsNoCheck(HInvoke * invoke)1417 void IntrinsicLocationsBuilderX86::VisitStringGetCharsNoCheck(HInvoke* invoke) {
1418 // public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin);
1419 LocationSummary* locations =
1420 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1421 locations->SetInAt(0, Location::RequiresRegister());
1422 locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
1423 // Place srcEnd in ECX to save a move below.
1424 locations->SetInAt(2, Location::RegisterLocation(ECX));
1425 locations->SetInAt(3, Location::RequiresRegister());
1426 locations->SetInAt(4, Location::RequiresRegister());
1427
1428 // And we need some temporaries. We will use REP MOVSW, so we need fixed registers.
1429 // We don't have enough registers to also grab ECX, so handle below.
1430 locations->AddTemp(Location::RegisterLocation(ESI));
1431 locations->AddTemp(Location::RegisterLocation(EDI));
1432 }
1433
VisitStringGetCharsNoCheck(HInvoke * invoke)1434 void IntrinsicCodeGeneratorX86::VisitStringGetCharsNoCheck(HInvoke* invoke) {
1435 X86Assembler* assembler = GetAssembler();
1436 LocationSummary* locations = invoke->GetLocations();
1437
1438 size_t char_component_size = DataType::Size(DataType::Type::kUint16);
1439 // Location of data in char array buffer.
1440 const uint32_t data_offset = mirror::Array::DataOffset(char_component_size).Uint32Value();
1441 // Location of char array data in string.
1442 const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
1443
1444 // public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin);
1445 Register obj = locations->InAt(0).AsRegister<Register>();
1446 Location srcBegin = locations->InAt(1);
1447 int srcBegin_value =
1448 srcBegin.IsConstant() ? srcBegin.GetConstant()->AsIntConstant()->GetValue() : 0;
1449 Register srcEnd = locations->InAt(2).AsRegister<Register>();
1450 Register dst = locations->InAt(3).AsRegister<Register>();
1451 Register dstBegin = locations->InAt(4).AsRegister<Register>();
1452
1453 // Check assumption that sizeof(Char) is 2 (used in scaling below).
1454 const size_t char_size = DataType::Size(DataType::Type::kUint16);
1455 DCHECK_EQ(char_size, 2u);
1456
1457 // Compute the number of chars (words) to move.
1458 // Save ECX, since we don't know if it will be used later.
1459 __ pushl(ECX);
1460 int stack_adjust = kX86WordSize;
1461 __ cfi().AdjustCFAOffset(stack_adjust);
1462 DCHECK_EQ(srcEnd, ECX);
1463 if (srcBegin.IsConstant()) {
1464 __ subl(ECX, Immediate(srcBegin_value));
1465 } else {
1466 DCHECK(srcBegin.IsRegister());
1467 __ subl(ECX, srcBegin.AsRegister<Register>());
1468 }
1469
1470 NearLabel done;
1471 if (mirror::kUseStringCompression) {
1472 // Location of count in string
1473 const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1474 const size_t c_char_size = DataType::Size(DataType::Type::kInt8);
1475 DCHECK_EQ(c_char_size, 1u);
1476 __ pushl(EAX);
1477 __ cfi().AdjustCFAOffset(stack_adjust);
1478
1479 NearLabel copy_loop, copy_uncompressed;
1480 __ testl(Address(obj, count_offset), Immediate(1));
1481 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1482 "Expecting 0=compressed, 1=uncompressed");
1483 __ j(kNotZero, ©_uncompressed);
1484 // Compute the address of the source string by adding the number of chars from
1485 // the source beginning to the value offset of a string.
1486 __ leal(ESI, CodeGeneratorX86::ArrayAddress(obj, srcBegin, TIMES_1, value_offset));
1487
1488 // Start the loop to copy String's value to Array of Char.
1489 __ leal(EDI, Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset));
1490 __ Bind(©_loop);
1491 __ jecxz(&done);
1492 // Use EAX temporary (convert byte from ESI to word).
1493 // TODO: Use LODSB/STOSW (not supported by X86Assembler) with AH initialized to 0.
1494 __ movzxb(EAX, Address(ESI, 0));
1495 __ movw(Address(EDI, 0), EAX);
1496 __ leal(EDI, Address(EDI, char_size));
1497 __ leal(ESI, Address(ESI, c_char_size));
1498 // TODO: Add support for LOOP to X86Assembler.
1499 __ subl(ECX, Immediate(1));
1500 __ jmp(©_loop);
1501 __ Bind(©_uncompressed);
1502 }
1503
1504 // Do the copy for uncompressed string.
1505 // Compute the address of the destination buffer.
1506 __ leal(EDI, Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset));
1507 __ leal(ESI, CodeGeneratorX86::ArrayAddress(obj, srcBegin, TIMES_2, value_offset));
1508 __ rep_movsw();
1509
1510 __ Bind(&done);
1511 if (mirror::kUseStringCompression) {
1512 // Restore EAX.
1513 __ popl(EAX);
1514 __ cfi().AdjustCFAOffset(-stack_adjust);
1515 }
1516 // Restore ECX.
1517 __ popl(ECX);
1518 __ cfi().AdjustCFAOffset(-stack_adjust);
1519 }
1520
GenPeek(LocationSummary * locations,DataType::Type size,X86Assembler * assembler)1521 static void GenPeek(LocationSummary* locations, DataType::Type size, X86Assembler* assembler) {
1522 Register address = locations->InAt(0).AsRegisterPairLow<Register>();
1523 Location out_loc = locations->Out();
1524 // x86 allows unaligned access. We do not have to check the input or use specific instructions
1525 // to avoid a SIGBUS.
1526 switch (size) {
1527 case DataType::Type::kInt8:
1528 __ movsxb(out_loc.AsRegister<Register>(), Address(address, 0));
1529 break;
1530 case DataType::Type::kInt16:
1531 __ movsxw(out_loc.AsRegister<Register>(), Address(address, 0));
1532 break;
1533 case DataType::Type::kInt32:
1534 __ movl(out_loc.AsRegister<Register>(), Address(address, 0));
1535 break;
1536 case DataType::Type::kInt64:
1537 __ movl(out_loc.AsRegisterPairLow<Register>(), Address(address, 0));
1538 __ movl(out_loc.AsRegisterPairHigh<Register>(), Address(address, 4));
1539 break;
1540 default:
1541 LOG(FATAL) << "Type not recognized for peek: " << size;
1542 UNREACHABLE();
1543 }
1544 }
1545
VisitMemoryPeekByte(HInvoke * invoke)1546 void IntrinsicLocationsBuilderX86::VisitMemoryPeekByte(HInvoke* invoke) {
1547 CreateLongToIntLocations(allocator_, invoke);
1548 }
1549
VisitMemoryPeekByte(HInvoke * invoke)1550 void IntrinsicCodeGeneratorX86::VisitMemoryPeekByte(HInvoke* invoke) {
1551 GenPeek(invoke->GetLocations(), DataType::Type::kInt8, GetAssembler());
1552 }
1553
VisitMemoryPeekIntNative(HInvoke * invoke)1554 void IntrinsicLocationsBuilderX86::VisitMemoryPeekIntNative(HInvoke* invoke) {
1555 CreateLongToIntLocations(allocator_, invoke);
1556 }
1557
VisitMemoryPeekIntNative(HInvoke * invoke)1558 void IntrinsicCodeGeneratorX86::VisitMemoryPeekIntNative(HInvoke* invoke) {
1559 GenPeek(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler());
1560 }
1561
VisitMemoryPeekLongNative(HInvoke * invoke)1562 void IntrinsicLocationsBuilderX86::VisitMemoryPeekLongNative(HInvoke* invoke) {
1563 CreateLongToLongLocations(allocator_, invoke);
1564 }
1565
VisitMemoryPeekLongNative(HInvoke * invoke)1566 void IntrinsicCodeGeneratorX86::VisitMemoryPeekLongNative(HInvoke* invoke) {
1567 GenPeek(invoke->GetLocations(), DataType::Type::kInt64, GetAssembler());
1568 }
1569
VisitMemoryPeekShortNative(HInvoke * invoke)1570 void IntrinsicLocationsBuilderX86::VisitMemoryPeekShortNative(HInvoke* invoke) {
1571 CreateLongToIntLocations(allocator_, invoke);
1572 }
1573
VisitMemoryPeekShortNative(HInvoke * invoke)1574 void IntrinsicCodeGeneratorX86::VisitMemoryPeekShortNative(HInvoke* invoke) {
1575 GenPeek(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler());
1576 }
1577
CreateLongIntToVoidLocations(ArenaAllocator * allocator,DataType::Type size,HInvoke * invoke)1578 static void CreateLongIntToVoidLocations(ArenaAllocator* allocator,
1579 DataType::Type size,
1580 HInvoke* invoke) {
1581 LocationSummary* locations =
1582 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1583 locations->SetInAt(0, Location::RequiresRegister());
1584 HInstruction* value = invoke->InputAt(1);
1585 if (size == DataType::Type::kInt8) {
1586 locations->SetInAt(1, Location::ByteRegisterOrConstant(EDX, value));
1587 } else {
1588 locations->SetInAt(1, Location::RegisterOrConstant(value));
1589 }
1590 }
1591
GenPoke(LocationSummary * locations,DataType::Type size,X86Assembler * assembler)1592 static void GenPoke(LocationSummary* locations, DataType::Type size, X86Assembler* assembler) {
1593 Register address = locations->InAt(0).AsRegisterPairLow<Register>();
1594 Location value_loc = locations->InAt(1);
1595 // x86 allows unaligned access. We do not have to check the input or use specific instructions
1596 // to avoid a SIGBUS.
1597 switch (size) {
1598 case DataType::Type::kInt8:
1599 if (value_loc.IsConstant()) {
1600 __ movb(Address(address, 0),
1601 Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue()));
1602 } else {
1603 __ movb(Address(address, 0), value_loc.AsRegister<ByteRegister>());
1604 }
1605 break;
1606 case DataType::Type::kInt16:
1607 if (value_loc.IsConstant()) {
1608 __ movw(Address(address, 0),
1609 Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue()));
1610 } else {
1611 __ movw(Address(address, 0), value_loc.AsRegister<Register>());
1612 }
1613 break;
1614 case DataType::Type::kInt32:
1615 if (value_loc.IsConstant()) {
1616 __ movl(Address(address, 0),
1617 Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue()));
1618 } else {
1619 __ movl(Address(address, 0), value_loc.AsRegister<Register>());
1620 }
1621 break;
1622 case DataType::Type::kInt64:
1623 if (value_loc.IsConstant()) {
1624 int64_t value = value_loc.GetConstant()->AsLongConstant()->GetValue();
1625 __ movl(Address(address, 0), Immediate(Low32Bits(value)));
1626 __ movl(Address(address, 4), Immediate(High32Bits(value)));
1627 } else {
1628 __ movl(Address(address, 0), value_loc.AsRegisterPairLow<Register>());
1629 __ movl(Address(address, 4), value_loc.AsRegisterPairHigh<Register>());
1630 }
1631 break;
1632 default:
1633 LOG(FATAL) << "Type not recognized for poke: " << size;
1634 UNREACHABLE();
1635 }
1636 }
1637
VisitMemoryPokeByte(HInvoke * invoke)1638 void IntrinsicLocationsBuilderX86::VisitMemoryPokeByte(HInvoke* invoke) {
1639 CreateLongIntToVoidLocations(allocator_, DataType::Type::kInt8, invoke);
1640 }
1641
VisitMemoryPokeByte(HInvoke * invoke)1642 void IntrinsicCodeGeneratorX86::VisitMemoryPokeByte(HInvoke* invoke) {
1643 GenPoke(invoke->GetLocations(), DataType::Type::kInt8, GetAssembler());
1644 }
1645
VisitMemoryPokeIntNative(HInvoke * invoke)1646 void IntrinsicLocationsBuilderX86::VisitMemoryPokeIntNative(HInvoke* invoke) {
1647 CreateLongIntToVoidLocations(allocator_, DataType::Type::kInt32, invoke);
1648 }
1649
VisitMemoryPokeIntNative(HInvoke * invoke)1650 void IntrinsicCodeGeneratorX86::VisitMemoryPokeIntNative(HInvoke* invoke) {
1651 GenPoke(invoke->GetLocations(), DataType::Type::kInt32, GetAssembler());
1652 }
1653
VisitMemoryPokeLongNative(HInvoke * invoke)1654 void IntrinsicLocationsBuilderX86::VisitMemoryPokeLongNative(HInvoke* invoke) {
1655 CreateLongIntToVoidLocations(allocator_, DataType::Type::kInt64, invoke);
1656 }
1657
VisitMemoryPokeLongNative(HInvoke * invoke)1658 void IntrinsicCodeGeneratorX86::VisitMemoryPokeLongNative(HInvoke* invoke) {
1659 GenPoke(invoke->GetLocations(), DataType::Type::kInt64, GetAssembler());
1660 }
1661
VisitMemoryPokeShortNative(HInvoke * invoke)1662 void IntrinsicLocationsBuilderX86::VisitMemoryPokeShortNative(HInvoke* invoke) {
1663 CreateLongIntToVoidLocations(allocator_, DataType::Type::kInt16, invoke);
1664 }
1665
VisitMemoryPokeShortNative(HInvoke * invoke)1666 void IntrinsicCodeGeneratorX86::VisitMemoryPokeShortNative(HInvoke* invoke) {
1667 GenPoke(invoke->GetLocations(), DataType::Type::kInt16, GetAssembler());
1668 }
1669
VisitThreadCurrentThread(HInvoke * invoke)1670 void IntrinsicLocationsBuilderX86::VisitThreadCurrentThread(HInvoke* invoke) {
1671 LocationSummary* locations =
1672 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1673 locations->SetOut(Location::RequiresRegister());
1674 }
1675
VisitThreadCurrentThread(HInvoke * invoke)1676 void IntrinsicCodeGeneratorX86::VisitThreadCurrentThread(HInvoke* invoke) {
1677 Register out = invoke->GetLocations()->Out().AsRegister<Register>();
1678 GetAssembler()->fs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86PointerSize>()));
1679 }
1680
GenUnsafeGet(HInvoke * invoke,DataType::Type type,bool is_volatile,CodeGeneratorX86 * codegen)1681 static void GenUnsafeGet(HInvoke* invoke,
1682 DataType::Type type,
1683 bool is_volatile,
1684 CodeGeneratorX86* codegen) {
1685 X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
1686 LocationSummary* locations = invoke->GetLocations();
1687 Location base_loc = locations->InAt(1);
1688 Register base = base_loc.AsRegister<Register>();
1689 Location offset_loc = locations->InAt(2);
1690 Register offset = offset_loc.AsRegisterPairLow<Register>();
1691 Location output_loc = locations->Out();
1692
1693 switch (type) {
1694 case DataType::Type::kInt32: {
1695 Register output = output_loc.AsRegister<Register>();
1696 __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1697 break;
1698 }
1699
1700 case DataType::Type::kReference: {
1701 Register output = output_loc.AsRegister<Register>();
1702 if (gUseReadBarrier) {
1703 if (kUseBakerReadBarrier) {
1704 Address src(base, offset, ScaleFactor::TIMES_1, 0);
1705 codegen->GenerateReferenceLoadWithBakerReadBarrier(
1706 invoke, output_loc, base, src, /* needs_null_check= */ false);
1707 } else {
1708 __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1709 codegen->GenerateReadBarrierSlow(
1710 invoke, output_loc, output_loc, base_loc, 0U, offset_loc);
1711 }
1712 } else {
1713 __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
1714 __ MaybeUnpoisonHeapReference(output);
1715 }
1716 break;
1717 }
1718
1719 case DataType::Type::kInt64: {
1720 Register output_lo = output_loc.AsRegisterPairLow<Register>();
1721 Register output_hi = output_loc.AsRegisterPairHigh<Register>();
1722 if (is_volatile) {
1723 // Need to use a XMM to read atomically.
1724 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
1725 __ movsd(temp, Address(base, offset, ScaleFactor::TIMES_1, 0));
1726 __ movd(output_lo, temp);
1727 __ psrlq(temp, Immediate(32));
1728 __ movd(output_hi, temp);
1729 } else {
1730 __ movl(output_lo, Address(base, offset, ScaleFactor::TIMES_1, 0));
1731 __ movl(output_hi, Address(base, offset, ScaleFactor::TIMES_1, 4));
1732 }
1733 }
1734 break;
1735
1736 default:
1737 LOG(FATAL) << "Unsupported op size " << type;
1738 UNREACHABLE();
1739 }
1740 }
1741
UnsafeGetIntrinsicOnCallList(Intrinsics intrinsic)1742 static bool UnsafeGetIntrinsicOnCallList(Intrinsics intrinsic) {
1743 switch (intrinsic) {
1744 case Intrinsics::kUnsafeGetObject:
1745 case Intrinsics::kUnsafeGetObjectVolatile:
1746 case Intrinsics::kJdkUnsafeGetObject:
1747 case Intrinsics::kJdkUnsafeGetObjectVolatile:
1748 case Intrinsics::kJdkUnsafeGetObjectAcquire:
1749 return true;
1750 default:
1751 break;
1752 }
1753 return false;
1754 }
1755
CreateIntIntIntToIntLocations(ArenaAllocator * allocator,HInvoke * invoke,DataType::Type type,bool is_volatile)1756 static void CreateIntIntIntToIntLocations(ArenaAllocator* allocator,
1757 HInvoke* invoke,
1758 DataType::Type type,
1759 bool is_volatile) {
1760 bool can_call = gUseReadBarrier && UnsafeGetIntrinsicOnCallList(invoke->GetIntrinsic());
1761 LocationSummary* locations =
1762 new (allocator) LocationSummary(invoke,
1763 can_call
1764 ? LocationSummary::kCallOnSlowPath
1765 : LocationSummary::kNoCall,
1766 kIntrinsified);
1767 if (can_call && kUseBakerReadBarrier) {
1768 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
1769 }
1770 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
1771 locations->SetInAt(1, Location::RequiresRegister());
1772 locations->SetInAt(2, Location::RequiresRegister());
1773 if (type == DataType::Type::kInt64) {
1774 if (is_volatile) {
1775 // Need to use XMM to read volatile.
1776 locations->AddTemp(Location::RequiresFpuRegister());
1777 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
1778 } else {
1779 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
1780 }
1781 } else {
1782 locations->SetOut(Location::RequiresRegister(),
1783 (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
1784 }
1785 }
1786
VisitUnsafeGet(HInvoke * invoke)1787 void IntrinsicLocationsBuilderX86::VisitUnsafeGet(HInvoke* invoke) {
1788 VisitJdkUnsafeGet(invoke);
1789 }
VisitUnsafeGetVolatile(HInvoke * invoke)1790 void IntrinsicLocationsBuilderX86::VisitUnsafeGetVolatile(HInvoke* invoke) {
1791 VisitJdkUnsafeGetVolatile(invoke);
1792 }
VisitUnsafeGetLong(HInvoke * invoke)1793 void IntrinsicLocationsBuilderX86::VisitUnsafeGetLong(HInvoke* invoke) {
1794 VisitJdkUnsafeGetLong(invoke);
1795 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)1796 void IntrinsicLocationsBuilderX86::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
1797 VisitJdkUnsafeGetLongVolatile(invoke);
1798 }
VisitUnsafeGetObject(HInvoke * invoke)1799 void IntrinsicLocationsBuilderX86::VisitUnsafeGetObject(HInvoke* invoke) {
1800 VisitJdkUnsafeGetObject(invoke);
1801 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)1802 void IntrinsicLocationsBuilderX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
1803 VisitJdkUnsafeGetObjectVolatile(invoke);
1804 }
1805
1806
VisitUnsafeGet(HInvoke * invoke)1807 void IntrinsicCodeGeneratorX86::VisitUnsafeGet(HInvoke* invoke) {
1808 VisitJdkUnsafeGet(invoke);
1809 }
VisitUnsafeGetVolatile(HInvoke * invoke)1810 void IntrinsicCodeGeneratorX86::VisitUnsafeGetVolatile(HInvoke* invoke) {
1811 VisitJdkUnsafeGetVolatile(invoke);
1812 }
VisitUnsafeGetLong(HInvoke * invoke)1813 void IntrinsicCodeGeneratorX86::VisitUnsafeGetLong(HInvoke* invoke) {
1814 VisitJdkUnsafeGetLong(invoke);
1815 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)1816 void IntrinsicCodeGeneratorX86::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
1817 VisitJdkUnsafeGetLongVolatile(invoke);
1818 }
VisitUnsafeGetObject(HInvoke * invoke)1819 void IntrinsicCodeGeneratorX86::VisitUnsafeGetObject(HInvoke* invoke) {
1820 VisitJdkUnsafeGetObject(invoke);
1821 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)1822 void IntrinsicCodeGeneratorX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
1823 VisitJdkUnsafeGetObjectVolatile(invoke);
1824 }
1825
1826
VisitJdkUnsafeGet(HInvoke * invoke)1827 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGet(HInvoke* invoke) {
1828 CreateIntIntIntToIntLocations(
1829 allocator_, invoke, DataType::Type::kInt32, /*is_volatile=*/ false);
1830 }
VisitJdkUnsafeGetVolatile(HInvoke * invoke)1831 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetVolatile(HInvoke* invoke) {
1832 CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kInt32, /*is_volatile=*/ true);
1833 }
VisitJdkUnsafeGetAcquire(HInvoke * invoke)1834 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetAcquire(HInvoke* invoke) {
1835 CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kInt32, /*is_volatile=*/ true);
1836 }
VisitJdkUnsafeGetLong(HInvoke * invoke)1837 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetLong(HInvoke* invoke) {
1838 CreateIntIntIntToIntLocations(
1839 allocator_, invoke, DataType::Type::kInt64, /*is_volatile=*/ false);
1840 }
VisitJdkUnsafeGetLongVolatile(HInvoke * invoke)1841 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetLongVolatile(HInvoke* invoke) {
1842 CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kInt64, /*is_volatile=*/ true);
1843 }
VisitJdkUnsafeGetLongAcquire(HInvoke * invoke)1844 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetLongAcquire(HInvoke* invoke) {
1845 CreateIntIntIntToIntLocations(allocator_, invoke, DataType::Type::kInt64, /*is_volatile=*/ true);
1846 }
VisitJdkUnsafeGetObject(HInvoke * invoke)1847 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetObject(HInvoke* invoke) {
1848 CreateIntIntIntToIntLocations(
1849 allocator_, invoke, DataType::Type::kReference, /*is_volatile=*/ false);
1850 }
VisitJdkUnsafeGetObjectVolatile(HInvoke * invoke)1851 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetObjectVolatile(HInvoke* invoke) {
1852 CreateIntIntIntToIntLocations(
1853 allocator_, invoke, DataType::Type::kReference, /*is_volatile=*/ true);
1854 }
VisitJdkUnsafeGetObjectAcquire(HInvoke * invoke)1855 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeGetObjectAcquire(HInvoke* invoke) {
1856 CreateIntIntIntToIntLocations(
1857 allocator_, invoke, DataType::Type::kReference, /*is_volatile=*/ true);
1858 }
1859
VisitJdkUnsafeGet(HInvoke * invoke)1860 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGet(HInvoke* invoke) {
1861 GenUnsafeGet(invoke, DataType::Type::kInt32, /*is_volatile=*/ false, codegen_);
1862 }
VisitJdkUnsafeGetVolatile(HInvoke * invoke)1863 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetVolatile(HInvoke* invoke) {
1864 GenUnsafeGet(invoke, DataType::Type::kInt32, /*is_volatile=*/ true, codegen_);
1865 }
VisitJdkUnsafeGetAcquire(HInvoke * invoke)1866 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetAcquire(HInvoke* invoke) {
1867 GenUnsafeGet(invoke, DataType::Type::kInt32, /*is_volatile=*/ true, codegen_);
1868 }
VisitJdkUnsafeGetLong(HInvoke * invoke)1869 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetLong(HInvoke* invoke) {
1870 GenUnsafeGet(invoke, DataType::Type::kInt64, /*is_volatile=*/ false, codegen_);
1871 }
VisitJdkUnsafeGetLongVolatile(HInvoke * invoke)1872 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetLongVolatile(HInvoke* invoke) {
1873 GenUnsafeGet(invoke, DataType::Type::kInt64, /*is_volatile=*/ true, codegen_);
1874 }
VisitJdkUnsafeGetLongAcquire(HInvoke * invoke)1875 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetLongAcquire(HInvoke* invoke) {
1876 GenUnsafeGet(invoke, DataType::Type::kInt64, /*is_volatile=*/ true, codegen_);
1877 }
VisitJdkUnsafeGetObject(HInvoke * invoke)1878 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetObject(HInvoke* invoke) {
1879 GenUnsafeGet(invoke, DataType::Type::kReference, /*is_volatile=*/ false, codegen_);
1880 }
VisitJdkUnsafeGetObjectVolatile(HInvoke * invoke)1881 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetObjectVolatile(HInvoke* invoke) {
1882 GenUnsafeGet(invoke, DataType::Type::kReference, /*is_volatile=*/ true, codegen_);
1883 }
VisitJdkUnsafeGetObjectAcquire(HInvoke * invoke)1884 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeGetObjectAcquire(HInvoke* invoke) {
1885 GenUnsafeGet(invoke, DataType::Type::kReference, /*is_volatile=*/ true, codegen_);
1886 }
1887
CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator * allocator,DataType::Type type,HInvoke * invoke,bool is_volatile)1888 static void CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator* allocator,
1889 DataType::Type type,
1890 HInvoke* invoke,
1891 bool is_volatile) {
1892 LocationSummary* locations =
1893 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
1894 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
1895 locations->SetInAt(1, Location::RequiresRegister());
1896 locations->SetInAt(2, Location::RequiresRegister());
1897 locations->SetInAt(3, Location::RequiresRegister());
1898 if (type == DataType::Type::kReference) {
1899 // Need temp registers for card-marking.
1900 locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too.
1901 // Ensure the value is in a byte register.
1902 locations->AddTemp(Location::RegisterLocation(ECX));
1903 } else if (type == DataType::Type::kInt64 && is_volatile) {
1904 locations->AddTemp(Location::RequiresFpuRegister());
1905 locations->AddTemp(Location::RequiresFpuRegister());
1906 }
1907 }
1908
VisitUnsafePut(HInvoke * invoke)1909 void IntrinsicLocationsBuilderX86::VisitUnsafePut(HInvoke* invoke) {
1910 VisitJdkUnsafePut(invoke);
1911 }
VisitUnsafePutOrdered(HInvoke * invoke)1912 void IntrinsicLocationsBuilderX86::VisitUnsafePutOrdered(HInvoke* invoke) {
1913 VisitJdkUnsafePutOrdered(invoke);
1914 }
VisitUnsafePutVolatile(HInvoke * invoke)1915 void IntrinsicLocationsBuilderX86::VisitUnsafePutVolatile(HInvoke* invoke) {
1916 VisitJdkUnsafePutVolatile(invoke);
1917 }
VisitUnsafePutObject(HInvoke * invoke)1918 void IntrinsicLocationsBuilderX86::VisitUnsafePutObject(HInvoke* invoke) {
1919 VisitJdkUnsafePutObject(invoke);
1920 }
VisitUnsafePutObjectOrdered(HInvoke * invoke)1921 void IntrinsicLocationsBuilderX86::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
1922 VisitJdkUnsafePutObjectOrdered(invoke);
1923 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)1924 void IntrinsicLocationsBuilderX86::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
1925 VisitJdkUnsafePutObjectVolatile(invoke);
1926 }
VisitUnsafePutLong(HInvoke * invoke)1927 void IntrinsicLocationsBuilderX86::VisitUnsafePutLong(HInvoke* invoke) {
1928 VisitJdkUnsafePutLong(invoke);
1929 }
VisitUnsafePutLongOrdered(HInvoke * invoke)1930 void IntrinsicLocationsBuilderX86::VisitUnsafePutLongOrdered(HInvoke* invoke) {
1931 VisitJdkUnsafePutLongOrdered(invoke);
1932 }
VisitUnsafePutLongVolatile(HInvoke * invoke)1933 void IntrinsicLocationsBuilderX86::VisitUnsafePutLongVolatile(HInvoke* invoke) {
1934 VisitJdkUnsafePutLongVolatile(invoke);
1935 }
1936
VisitJdkUnsafePut(HInvoke * invoke)1937 void IntrinsicLocationsBuilderX86::VisitJdkUnsafePut(HInvoke* invoke) {
1938 CreateIntIntIntIntToVoidPlusTempsLocations(
1939 allocator_, DataType::Type::kInt32, invoke, /*is_volatile=*/ false);
1940 }
VisitJdkUnsafePutOrdered(HInvoke * invoke)1941 void IntrinsicLocationsBuilderX86::VisitJdkUnsafePutOrdered(HInvoke* invoke) {
1942 CreateIntIntIntIntToVoidPlusTempsLocations(
1943 allocator_, DataType::Type::kInt32, invoke, /*is_volatile=*/ false);
1944 }
VisitJdkUnsafePutVolatile(HInvoke * invoke)1945 void IntrinsicLocationsBuilderX86::VisitJdkUnsafePutVolatile(HInvoke* invoke) {
1946 CreateIntIntIntIntToVoidPlusTempsLocations(
1947 allocator_, DataType::Type::kInt32, invoke, /*is_volatile=*/ true);
1948 }
VisitJdkUnsafePutRelease(HInvoke * invoke)1949 void IntrinsicLocationsBuilderX86::VisitJdkUnsafePutRelease(HInvoke* invoke) {
1950 CreateIntIntIntIntToVoidPlusTempsLocations(
1951 allocator_, DataType::Type::kInt32, invoke, /*is_volatile=*/ true);
1952 }
VisitJdkUnsafePutObject(HInvoke * invoke)1953 void IntrinsicLocationsBuilderX86::VisitJdkUnsafePutObject(HInvoke* invoke) {
1954 CreateIntIntIntIntToVoidPlusTempsLocations(
1955 allocator_, DataType::Type::kReference, invoke, /*is_volatile=*/ false);
1956 }
VisitJdkUnsafePutObjectOrdered(HInvoke * invoke)1957 void IntrinsicLocationsBuilderX86::VisitJdkUnsafePutObjectOrdered(HInvoke* invoke) {
1958 CreateIntIntIntIntToVoidPlusTempsLocations(
1959 allocator_, DataType::Type::kReference, invoke, /*is_volatile=*/ false);
1960 }
VisitJdkUnsafePutObjectVolatile(HInvoke * invoke)1961 void IntrinsicLocationsBuilderX86::VisitJdkUnsafePutObjectVolatile(HInvoke* invoke) {
1962 CreateIntIntIntIntToVoidPlusTempsLocations(
1963 allocator_, DataType::Type::kReference, invoke, /*is_volatile=*/ true);
1964 }
VisitJdkUnsafePutObjectRelease(HInvoke * invoke)1965 void IntrinsicLocationsBuilderX86::VisitJdkUnsafePutObjectRelease(HInvoke* invoke) {
1966 CreateIntIntIntIntToVoidPlusTempsLocations(
1967 allocator_, DataType::Type::kReference, invoke, /*is_volatile=*/ true);
1968 }
VisitJdkUnsafePutLong(HInvoke * invoke)1969 void IntrinsicLocationsBuilderX86::VisitJdkUnsafePutLong(HInvoke* invoke) {
1970 CreateIntIntIntIntToVoidPlusTempsLocations(
1971 allocator_, DataType::Type::kInt64, invoke, /*is_volatile=*/ false);
1972 }
VisitJdkUnsafePutLongOrdered(HInvoke * invoke)1973 void IntrinsicLocationsBuilderX86::VisitJdkUnsafePutLongOrdered(HInvoke* invoke) {
1974 CreateIntIntIntIntToVoidPlusTempsLocations(
1975 allocator_, DataType::Type::kInt64, invoke, /*is_volatile=*/ false);
1976 }
VisitJdkUnsafePutLongVolatile(HInvoke * invoke)1977 void IntrinsicLocationsBuilderX86::VisitJdkUnsafePutLongVolatile(HInvoke* invoke) {
1978 CreateIntIntIntIntToVoidPlusTempsLocations(
1979 allocator_, DataType::Type::kInt64, invoke, /*is_volatile=*/ true);
1980 }
VisitJdkUnsafePutLongRelease(HInvoke * invoke)1981 void IntrinsicLocationsBuilderX86::VisitJdkUnsafePutLongRelease(HInvoke* invoke) {
1982 CreateIntIntIntIntToVoidPlusTempsLocations(
1983 allocator_, DataType::Type::kInt64, invoke, /*is_volatile=*/ true);
1984 }
1985
1986 // We don't care for ordered: it requires an AnyStore barrier, which is already given by the x86
1987 // memory model.
GenUnsafePut(LocationSummary * locations,DataType::Type type,bool is_volatile,CodeGeneratorX86 * codegen)1988 static void GenUnsafePut(LocationSummary* locations,
1989 DataType::Type type,
1990 bool is_volatile,
1991 CodeGeneratorX86* codegen) {
1992 X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
1993 Register base = locations->InAt(1).AsRegister<Register>();
1994 Register offset = locations->InAt(2).AsRegisterPairLow<Register>();
1995 Location value_loc = locations->InAt(3);
1996
1997 if (type == DataType::Type::kInt64) {
1998 Register value_lo = value_loc.AsRegisterPairLow<Register>();
1999 Register value_hi = value_loc.AsRegisterPairHigh<Register>();
2000 if (is_volatile) {
2001 XmmRegister temp1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2002 XmmRegister temp2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
2003 __ movd(temp1, value_lo);
2004 __ movd(temp2, value_hi);
2005 __ punpckldq(temp1, temp2);
2006 __ movsd(Address(base, offset, ScaleFactor::TIMES_1, 0), temp1);
2007 } else {
2008 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value_lo);
2009 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 4), value_hi);
2010 }
2011 } else if (kPoisonHeapReferences && type == DataType::Type::kReference) {
2012 Register temp = locations->GetTemp(0).AsRegister<Register>();
2013 __ movl(temp, value_loc.AsRegister<Register>());
2014 __ PoisonHeapReference(temp);
2015 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), temp);
2016 } else {
2017 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value_loc.AsRegister<Register>());
2018 }
2019
2020 if (is_volatile) {
2021 codegen->MemoryFence();
2022 }
2023
2024 if (type == DataType::Type::kReference) {
2025 bool value_can_be_null = true; // TODO: Worth finding out this information?
2026 codegen->MarkGCCard(locations->GetTemp(0).AsRegister<Register>(),
2027 locations->GetTemp(1).AsRegister<Register>(),
2028 base,
2029 value_loc.AsRegister<Register>(),
2030 value_can_be_null);
2031 }
2032 }
2033
VisitUnsafePut(HInvoke * invoke)2034 void IntrinsicCodeGeneratorX86::VisitUnsafePut(HInvoke* invoke) {
2035 VisitJdkUnsafePut(invoke);
2036 }
VisitUnsafePutOrdered(HInvoke * invoke)2037 void IntrinsicCodeGeneratorX86::VisitUnsafePutOrdered(HInvoke* invoke) {
2038 VisitJdkUnsafePutOrdered(invoke);
2039 }
VisitUnsafePutVolatile(HInvoke * invoke)2040 void IntrinsicCodeGeneratorX86::VisitUnsafePutVolatile(HInvoke* invoke) {
2041 VisitJdkUnsafePutVolatile(invoke);
2042 }
VisitUnsafePutObject(HInvoke * invoke)2043 void IntrinsicCodeGeneratorX86::VisitUnsafePutObject(HInvoke* invoke) {
2044 VisitJdkUnsafePutObject(invoke);
2045 }
VisitUnsafePutObjectOrdered(HInvoke * invoke)2046 void IntrinsicCodeGeneratorX86::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
2047 VisitJdkUnsafePutObjectOrdered(invoke);
2048 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)2049 void IntrinsicCodeGeneratorX86::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
2050 VisitJdkUnsafePutObjectVolatile(invoke);
2051 }
VisitUnsafePutLong(HInvoke * invoke)2052 void IntrinsicCodeGeneratorX86::VisitUnsafePutLong(HInvoke* invoke) {
2053 VisitJdkUnsafePutLong(invoke);
2054 }
VisitUnsafePutLongOrdered(HInvoke * invoke)2055 void IntrinsicCodeGeneratorX86::VisitUnsafePutLongOrdered(HInvoke* invoke) {
2056 VisitJdkUnsafePutLongOrdered(invoke);
2057 }
VisitUnsafePutLongVolatile(HInvoke * invoke)2058 void IntrinsicCodeGeneratorX86::VisitUnsafePutLongVolatile(HInvoke* invoke) {
2059 VisitJdkUnsafePutLongVolatile(invoke);
2060 }
2061
VisitJdkUnsafePut(HInvoke * invoke)2062 void IntrinsicCodeGeneratorX86::VisitJdkUnsafePut(HInvoke* invoke) {
2063 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /*is_volatile=*/ false, codegen_);
2064 }
VisitJdkUnsafePutOrdered(HInvoke * invoke)2065 void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutOrdered(HInvoke* invoke) {
2066 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /*is_volatile=*/ false, codegen_);
2067 }
VisitJdkUnsafePutVolatile(HInvoke * invoke)2068 void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutVolatile(HInvoke* invoke) {
2069 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /*is_volatile=*/ true, codegen_);
2070 }
VisitJdkUnsafePutRelease(HInvoke * invoke)2071 void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutRelease(HInvoke* invoke) {
2072 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt32, /*is_volatile=*/ true, codegen_);
2073 }
VisitJdkUnsafePutObject(HInvoke * invoke)2074 void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutObject(HInvoke* invoke) {
2075 GenUnsafePut(
2076 invoke->GetLocations(), DataType::Type::kReference, /*is_volatile=*/ false, codegen_);
2077 }
VisitJdkUnsafePutObjectOrdered(HInvoke * invoke)2078 void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutObjectOrdered(HInvoke* invoke) {
2079 GenUnsafePut(
2080 invoke->GetLocations(), DataType::Type::kReference, /*is_volatile=*/ false, codegen_);
2081 }
VisitJdkUnsafePutObjectVolatile(HInvoke * invoke)2082 void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutObjectVolatile(HInvoke* invoke) {
2083 GenUnsafePut(
2084 invoke->GetLocations(), DataType::Type::kReference, /*is_volatile=*/ true, codegen_);
2085 }
VisitJdkUnsafePutObjectRelease(HInvoke * invoke)2086 void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutObjectRelease(HInvoke* invoke) {
2087 GenUnsafePut(
2088 invoke->GetLocations(), DataType::Type::kReference, /*is_volatile=*/ true, codegen_);
2089 }
VisitJdkUnsafePutLong(HInvoke * invoke)2090 void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutLong(HInvoke* invoke) {
2091 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /*is_volatile=*/ false, codegen_);
2092 }
VisitJdkUnsafePutLongOrdered(HInvoke * invoke)2093 void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutLongOrdered(HInvoke* invoke) {
2094 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /*is_volatile=*/ false, codegen_);
2095 }
VisitJdkUnsafePutLongVolatile(HInvoke * invoke)2096 void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutLongVolatile(HInvoke* invoke) {
2097 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /*is_volatile=*/ true, codegen_);
2098 }
VisitJdkUnsafePutLongRelease(HInvoke * invoke)2099 void IntrinsicCodeGeneratorX86::VisitJdkUnsafePutLongRelease(HInvoke* invoke) {
2100 GenUnsafePut(invoke->GetLocations(), DataType::Type::kInt64, /*is_volatile=*/ true, codegen_);
2101 }
2102
CreateIntIntIntIntIntToInt(ArenaAllocator * allocator,DataType::Type type,HInvoke * invoke)2103 static void CreateIntIntIntIntIntToInt(ArenaAllocator* allocator,
2104 DataType::Type type,
2105 HInvoke* invoke) {
2106 const bool can_call = gUseReadBarrier &&
2107 kUseBakerReadBarrier &&
2108 IsUnsafeCASObject(invoke);
2109 LocationSummary* locations =
2110 new (allocator) LocationSummary(invoke,
2111 can_call
2112 ? LocationSummary::kCallOnSlowPath
2113 : LocationSummary::kNoCall,
2114 kIntrinsified);
2115 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
2116 locations->SetInAt(1, Location::RequiresRegister());
2117 // Offset is a long, but in 32 bit mode, we only need the low word.
2118 // Can we update the invoke here to remove a TypeConvert to Long?
2119 locations->SetInAt(2, Location::RequiresRegister());
2120 // Expected value must be in EAX or EDX:EAX.
2121 // For long, new value must be in ECX:EBX.
2122 if (type == DataType::Type::kInt64) {
2123 locations->SetInAt(3, Location::RegisterPairLocation(EAX, EDX));
2124 locations->SetInAt(4, Location::RegisterPairLocation(EBX, ECX));
2125 } else {
2126 locations->SetInAt(3, Location::RegisterLocation(EAX));
2127 locations->SetInAt(4, Location::RequiresRegister());
2128 }
2129
2130 // Force a byte register for the output.
2131 locations->SetOut(Location::RegisterLocation(EAX));
2132 if (type == DataType::Type::kReference) {
2133 // Need temporary registers for card-marking, and possibly for
2134 // (Baker) read barrier.
2135 locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too.
2136 // Need a byte register for marking.
2137 locations->AddTemp(Location::RegisterLocation(ECX));
2138 }
2139 }
2140
VisitUnsafeCASInt(HInvoke * invoke)2141 void IntrinsicLocationsBuilderX86::VisitUnsafeCASInt(HInvoke* invoke) {
2142 VisitJdkUnsafeCASInt(invoke);
2143 }
2144
VisitUnsafeCASLong(HInvoke * invoke)2145 void IntrinsicLocationsBuilderX86::VisitUnsafeCASLong(HInvoke* invoke) {
2146 VisitJdkUnsafeCASLong(invoke);
2147 }
2148
VisitUnsafeCASObject(HInvoke * invoke)2149 void IntrinsicLocationsBuilderX86::VisitUnsafeCASObject(HInvoke* invoke) {
2150 VisitJdkUnsafeCASObject(invoke);
2151 }
2152
VisitJdkUnsafeCASInt(HInvoke * invoke)2153 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeCASInt(HInvoke* invoke) {
2154 // `jdk.internal.misc.Unsafe.compareAndSwapInt` has compare-and-set semantics (see javadoc).
2155 VisitJdkUnsafeCompareAndSetInt(invoke);
2156 }
2157
VisitJdkUnsafeCASLong(HInvoke * invoke)2158 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeCASLong(HInvoke* invoke) {
2159 // `jdk.internal.misc.Unsafe.compareAndSwapLong` has compare-and-set semantics (see javadoc).
2160 VisitJdkUnsafeCompareAndSetLong(invoke);
2161 }
2162
VisitJdkUnsafeCASObject(HInvoke * invoke)2163 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeCASObject(HInvoke* invoke) {
2164 // `jdk.internal.misc.Unsafe.compareAndSwapObject` has compare-and-set semantics (see javadoc).
2165 VisitJdkUnsafeCompareAndSetObject(invoke);
2166 }
2167
VisitJdkUnsafeCompareAndSetInt(HInvoke * invoke)2168 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeCompareAndSetInt(HInvoke* invoke) {
2169 CreateIntIntIntIntIntToInt(allocator_, DataType::Type::kInt32, invoke);
2170 }
2171
VisitJdkUnsafeCompareAndSetLong(HInvoke * invoke)2172 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeCompareAndSetLong(HInvoke* invoke) {
2173 CreateIntIntIntIntIntToInt(allocator_, DataType::Type::kInt64, invoke);
2174 }
2175
VisitJdkUnsafeCompareAndSetObject(HInvoke * invoke)2176 void IntrinsicLocationsBuilderX86::VisitJdkUnsafeCompareAndSetObject(HInvoke* invoke) {
2177 // The only supported read barrier implementation is the Baker-style read barriers.
2178 if (gUseReadBarrier && !kUseBakerReadBarrier) {
2179 return;
2180 }
2181
2182 CreateIntIntIntIntIntToInt(allocator_, DataType::Type::kReference, invoke);
2183 }
2184
GenPrimitiveLockedCmpxchg(DataType::Type type,CodeGeneratorX86 * codegen,Location expected_value,Location new_value,Register base,Register offset,Register temp=Register::kNoRegister)2185 static void GenPrimitiveLockedCmpxchg(DataType::Type type,
2186 CodeGeneratorX86* codegen,
2187 Location expected_value,
2188 Location new_value,
2189 Register base,
2190 Register offset,
2191 // Only necessary for floating point
2192 Register temp = Register::kNoRegister) {
2193 X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
2194
2195 if (DataType::Kind(type) == DataType::Type::kInt32) {
2196 DCHECK_EQ(expected_value.AsRegister<Register>(), EAX);
2197 }
2198
2199 // The address of the field within the holding object.
2200 Address field_addr(base, offset, TIMES_1, 0);
2201
2202 switch (type) {
2203 case DataType::Type::kBool:
2204 case DataType::Type::kInt8:
2205 __ LockCmpxchgb(field_addr, new_value.AsRegister<ByteRegister>());
2206 break;
2207 case DataType::Type::kInt16:
2208 case DataType::Type::kUint16:
2209 __ LockCmpxchgw(field_addr, new_value.AsRegister<Register>());
2210 break;
2211 case DataType::Type::kInt32:
2212 __ LockCmpxchgl(field_addr, new_value.AsRegister<Register>());
2213 break;
2214 case DataType::Type::kFloat32: {
2215 // cmpxchg requires the expected value to be in EAX so the new value must be elsewhere.
2216 DCHECK_NE(temp, EAX);
2217 // EAX is both an input and an output for cmpxchg
2218 codegen->Move32(Location::RegisterLocation(EAX), expected_value);
2219 codegen->Move32(Location::RegisterLocation(temp), new_value);
2220 __ LockCmpxchgl(field_addr, temp);
2221 break;
2222 }
2223 case DataType::Type::kInt64:
2224 // Ensure the expected value is in EAX:EDX and that the new
2225 // value is in EBX:ECX (required by the CMPXCHG8B instruction).
2226 DCHECK_EQ(expected_value.AsRegisterPairLow<Register>(), EAX);
2227 DCHECK_EQ(expected_value.AsRegisterPairHigh<Register>(), EDX);
2228 DCHECK_EQ(new_value.AsRegisterPairLow<Register>(), EBX);
2229 DCHECK_EQ(new_value.AsRegisterPairHigh<Register>(), ECX);
2230 __ LockCmpxchg8b(field_addr);
2231 break;
2232 default:
2233 LOG(FATAL) << "Unexpected CAS type " << type;
2234 }
2235 // LOCK CMPXCHG/LOCK CMPXCHG8B have full barrier semantics, and we
2236 // don't need scheduling barriers at this time.
2237 }
2238
GenPrimitiveCAS(DataType::Type type,CodeGeneratorX86 * codegen,Location expected_value,Location new_value,Register base,Register offset,Location out,Register temp=Register::kNoRegister,bool is_cmpxchg=false)2239 static void GenPrimitiveCAS(DataType::Type type,
2240 CodeGeneratorX86* codegen,
2241 Location expected_value,
2242 Location new_value,
2243 Register base,
2244 Register offset,
2245 Location out,
2246 // Only necessary for floating point
2247 Register temp = Register::kNoRegister,
2248 bool is_cmpxchg = false) {
2249 X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
2250
2251 if (!is_cmpxchg || DataType::Kind(type) == DataType::Type::kInt32) {
2252 DCHECK_EQ(out.AsRegister<Register>(), EAX);
2253 }
2254
2255 GenPrimitiveLockedCmpxchg(type, codegen, expected_value, new_value, base, offset, temp);
2256
2257 if (is_cmpxchg) {
2258 // Sign-extend, zero-extend or move the result if necessary
2259 switch (type) {
2260 case DataType::Type::kBool:
2261 __ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>());
2262 break;
2263 case DataType::Type::kInt8:
2264 __ movsxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>());
2265 break;
2266 case DataType::Type::kInt16:
2267 __ movsxw(out.AsRegister<Register>(), out.AsRegister<Register>());
2268 break;
2269 case DataType::Type::kUint16:
2270 __ movzxw(out.AsRegister<Register>(), out.AsRegister<Register>());
2271 break;
2272 case DataType::Type::kFloat32:
2273 __ movd(out.AsFpuRegister<XmmRegister>(), EAX);
2274 break;
2275 default:
2276 // Nothing to do
2277 break;
2278 }
2279 } else {
2280 // Convert ZF into the Boolean result.
2281 __ setb(kZero, out.AsRegister<Register>());
2282 __ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>());
2283 }
2284 }
2285
GenReferenceCAS(HInvoke * invoke,CodeGeneratorX86 * codegen,Location expected_value,Location new_value,Register base,Register offset,Register temp,Register temp2,bool is_cmpxchg=false)2286 static void GenReferenceCAS(HInvoke* invoke,
2287 CodeGeneratorX86* codegen,
2288 Location expected_value,
2289 Location new_value,
2290 Register base,
2291 Register offset,
2292 Register temp,
2293 Register temp2,
2294 bool is_cmpxchg = false) {
2295 X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
2296 LocationSummary* locations = invoke->GetLocations();
2297 Location out = locations->Out();
2298
2299 // The address of the field within the holding object.
2300 Address field_addr(base, offset, TIMES_1, 0);
2301
2302 Register value = new_value.AsRegister<Register>();
2303 Register expected = expected_value.AsRegister<Register>();
2304 DCHECK_EQ(expected, EAX);
2305 DCHECK_NE(temp, temp2);
2306
2307 if (gUseReadBarrier && kUseBakerReadBarrier) {
2308 // Need to make sure the reference stored in the field is a to-space
2309 // one before attempting the CAS or the CAS could fail incorrectly.
2310 codegen->GenerateReferenceLoadWithBakerReadBarrier(
2311 invoke,
2312 // Unused, used only as a "temporary" within the read barrier.
2313 Location::RegisterLocation(temp),
2314 base,
2315 field_addr,
2316 /* needs_null_check= */ false,
2317 /* always_update_field= */ true,
2318 &temp2);
2319 }
2320 bool base_equals_value = (base == value);
2321 if (kPoisonHeapReferences) {
2322 if (base_equals_value) {
2323 // If `base` and `value` are the same register location, move
2324 // `value` to a temporary register. This way, poisoning
2325 // `value` won't invalidate `base`.
2326 value = temp;
2327 __ movl(value, base);
2328 }
2329
2330 // Check that the register allocator did not assign the location
2331 // of `expected` (EAX) to `value` nor to `base`, so that heap
2332 // poisoning (when enabled) works as intended below.
2333 // - If `value` were equal to `expected`, both references would
2334 // be poisoned twice, meaning they would not be poisoned at
2335 // all, as heap poisoning uses address negation.
2336 // - If `base` were equal to `expected`, poisoning `expected`
2337 // would invalidate `base`.
2338 DCHECK_NE(value, expected);
2339 DCHECK_NE(base, expected);
2340 __ PoisonHeapReference(expected);
2341 __ PoisonHeapReference(value);
2342 }
2343 __ LockCmpxchgl(field_addr, value);
2344
2345 // LOCK CMPXCHG has full barrier semantics, and we don't need
2346 // scheduling barriers at this time.
2347
2348 if (is_cmpxchg) {
2349 DCHECK_EQ(out.AsRegister<Register>(), EAX);
2350 __ MaybeUnpoisonHeapReference(out.AsRegister<Register>());
2351 } else {
2352 // Convert ZF into the Boolean result.
2353 __ setb(kZero, out.AsRegister<Register>());
2354 __ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>());
2355 }
2356
2357 // Mark card for object if the new value is stored.
2358 bool value_can_be_null = true; // TODO: Worth finding out this information?
2359 NearLabel skip_mark_gc_card;
2360 __ j(kNotZero, &skip_mark_gc_card);
2361 codegen->MarkGCCard(temp, temp2, base, value, value_can_be_null);
2362 __ Bind(&skip_mark_gc_card);
2363
2364 // If heap poisoning is enabled, we need to unpoison the values
2365 // that were poisoned earlier.
2366 if (kPoisonHeapReferences) {
2367 if (base_equals_value) {
2368 // `value` has been moved to a temporary register, no need to
2369 // unpoison it.
2370 } else {
2371 // Ensure `value` is different from `out`, so that unpoisoning
2372 // the former does not invalidate the latter.
2373 DCHECK_NE(value, out.AsRegister<Register>());
2374 __ UnpoisonHeapReference(value);
2375 }
2376 }
2377 // Do not unpoison the reference contained in register
2378 // `expected`, as it is the same as register `out` (EAX).
2379 }
2380
GenCAS(DataType::Type type,HInvoke * invoke,CodeGeneratorX86 * codegen)2381 static void GenCAS(DataType::Type type, HInvoke* invoke, CodeGeneratorX86* codegen) {
2382 LocationSummary* locations = invoke->GetLocations();
2383
2384 Register base = locations->InAt(1).AsRegister<Register>();
2385 Register offset = locations->InAt(2).AsRegisterPairLow<Register>();
2386 Location expected_value = locations->InAt(3);
2387 Location new_value = locations->InAt(4);
2388 Location out = locations->Out();
2389 DCHECK_EQ(out.AsRegister<Register>(), EAX);
2390
2391 if (type == DataType::Type::kReference) {
2392 // The only read barrier implementation supporting the
2393 // UnsafeCASObject intrinsic is the Baker-style read barriers.
2394 DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier);
2395
2396 Register temp = locations->GetTemp(0).AsRegister<Register>();
2397 Register temp2 = locations->GetTemp(1).AsRegister<Register>();
2398 GenReferenceCAS(invoke, codegen, expected_value, new_value, base, offset, temp, temp2);
2399 } else {
2400 DCHECK(!DataType::IsFloatingPointType(type));
2401 GenPrimitiveCAS(type, codegen, expected_value, new_value, base, offset, out);
2402 }
2403 }
2404
VisitUnsafeCASInt(HInvoke * invoke)2405 void IntrinsicCodeGeneratorX86::VisitUnsafeCASInt(HInvoke* invoke) {
2406 VisitJdkUnsafeCASInt(invoke);
2407 }
2408
VisitUnsafeCASLong(HInvoke * invoke)2409 void IntrinsicCodeGeneratorX86::VisitUnsafeCASLong(HInvoke* invoke) {
2410 VisitJdkUnsafeCASLong(invoke);
2411 }
2412
VisitUnsafeCASObject(HInvoke * invoke)2413 void IntrinsicCodeGeneratorX86::VisitUnsafeCASObject(HInvoke* invoke) {
2414 // The only read barrier implementation supporting the
2415 // UnsafeCASObject intrinsic is the Baker-style read barriers.
2416 DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier);
2417
2418 GenCAS(DataType::Type::kReference, invoke, codegen_);
2419 }
2420
VisitJdkUnsafeCASInt(HInvoke * invoke)2421 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeCASInt(HInvoke* invoke) {
2422 // `jdk.internal.misc.Unsafe.compareAndSwapInt` has compare-and-set semantics (see javadoc).
2423 VisitJdkUnsafeCompareAndSetInt(invoke);
2424 }
2425
VisitJdkUnsafeCASLong(HInvoke * invoke)2426 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeCASLong(HInvoke* invoke) {
2427 // `jdk.internal.misc.Unsafe.compareAndSwapLong` has compare-and-set semantics (see javadoc).
2428 VisitJdkUnsafeCompareAndSetLong(invoke);
2429 }
2430
VisitJdkUnsafeCASObject(HInvoke * invoke)2431 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeCASObject(HInvoke* invoke) {
2432 // `jdk.internal.misc.Unsafe.compareAndSwapObject` has compare-and-set semantics (see javadoc).
2433 VisitJdkUnsafeCompareAndSetObject(invoke);
2434 }
2435
VisitJdkUnsafeCompareAndSetInt(HInvoke * invoke)2436 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeCompareAndSetInt(HInvoke* invoke) {
2437 GenCAS(DataType::Type::kInt32, invoke, codegen_);
2438 }
2439
VisitJdkUnsafeCompareAndSetLong(HInvoke * invoke)2440 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeCompareAndSetLong(HInvoke* invoke) {
2441 GenCAS(DataType::Type::kInt64, invoke, codegen_);
2442 }
2443
VisitJdkUnsafeCompareAndSetObject(HInvoke * invoke)2444 void IntrinsicCodeGeneratorX86::VisitJdkUnsafeCompareAndSetObject(HInvoke* invoke) {
2445 // The only supported read barrier implementation is the Baker-style read barriers.
2446 DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier);
2447
2448 GenCAS(DataType::Type::kReference, invoke, codegen_);
2449 }
2450
VisitIntegerReverse(HInvoke * invoke)2451 void IntrinsicLocationsBuilderX86::VisitIntegerReverse(HInvoke* invoke) {
2452 LocationSummary* locations =
2453 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2454 locations->SetInAt(0, Location::RequiresRegister());
2455 locations->SetOut(Location::SameAsFirstInput());
2456 locations->AddTemp(Location::RequiresRegister());
2457 }
2458
SwapBits(Register reg,Register temp,int32_t shift,int32_t mask,X86Assembler * assembler)2459 static void SwapBits(Register reg, Register temp, int32_t shift, int32_t mask,
2460 X86Assembler* assembler) {
2461 Immediate imm_shift(shift);
2462 Immediate imm_mask(mask);
2463 __ movl(temp, reg);
2464 __ shrl(reg, imm_shift);
2465 __ andl(temp, imm_mask);
2466 __ andl(reg, imm_mask);
2467 __ shll(temp, imm_shift);
2468 __ orl(reg, temp);
2469 }
2470
VisitIntegerReverse(HInvoke * invoke)2471 void IntrinsicCodeGeneratorX86::VisitIntegerReverse(HInvoke* invoke) {
2472 X86Assembler* assembler = GetAssembler();
2473 LocationSummary* locations = invoke->GetLocations();
2474
2475 Register reg = locations->InAt(0).AsRegister<Register>();
2476 Register temp = locations->GetTemp(0).AsRegister<Register>();
2477
2478 /*
2479 * Use one bswap instruction to reverse byte order first and then use 3 rounds of
2480 * swapping bits to reverse bits in a number x. Using bswap to save instructions
2481 * compared to generic luni implementation which has 5 rounds of swapping bits.
2482 * x = bswap x
2483 * x = (x & 0x55555555) << 1 | (x >> 1) & 0x55555555;
2484 * x = (x & 0x33333333) << 2 | (x >> 2) & 0x33333333;
2485 * x = (x & 0x0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F;
2486 */
2487 __ bswapl(reg);
2488 SwapBits(reg, temp, 1, 0x55555555, assembler);
2489 SwapBits(reg, temp, 2, 0x33333333, assembler);
2490 SwapBits(reg, temp, 4, 0x0f0f0f0f, assembler);
2491 }
2492
VisitLongReverse(HInvoke * invoke)2493 void IntrinsicLocationsBuilderX86::VisitLongReverse(HInvoke* invoke) {
2494 LocationSummary* locations =
2495 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2496 locations->SetInAt(0, Location::RequiresRegister());
2497 locations->SetOut(Location::SameAsFirstInput());
2498 locations->AddTemp(Location::RequiresRegister());
2499 }
2500
VisitLongReverse(HInvoke * invoke)2501 void IntrinsicCodeGeneratorX86::VisitLongReverse(HInvoke* invoke) {
2502 X86Assembler* assembler = GetAssembler();
2503 LocationSummary* locations = invoke->GetLocations();
2504
2505 Register reg_low = locations->InAt(0).AsRegisterPairLow<Register>();
2506 Register reg_high = locations->InAt(0).AsRegisterPairHigh<Register>();
2507 Register temp = locations->GetTemp(0).AsRegister<Register>();
2508
2509 // We want to swap high/low, then bswap each one, and then do the same
2510 // as a 32 bit reverse.
2511 // Exchange high and low.
2512 __ movl(temp, reg_low);
2513 __ movl(reg_low, reg_high);
2514 __ movl(reg_high, temp);
2515
2516 // bit-reverse low
2517 __ bswapl(reg_low);
2518 SwapBits(reg_low, temp, 1, 0x55555555, assembler);
2519 SwapBits(reg_low, temp, 2, 0x33333333, assembler);
2520 SwapBits(reg_low, temp, 4, 0x0f0f0f0f, assembler);
2521
2522 // bit-reverse high
2523 __ bswapl(reg_high);
2524 SwapBits(reg_high, temp, 1, 0x55555555, assembler);
2525 SwapBits(reg_high, temp, 2, 0x33333333, assembler);
2526 SwapBits(reg_high, temp, 4, 0x0f0f0f0f, assembler);
2527 }
2528
CreateBitCountLocations(ArenaAllocator * allocator,CodeGeneratorX86 * codegen,HInvoke * invoke,bool is_long)2529 static void CreateBitCountLocations(
2530 ArenaAllocator* allocator, CodeGeneratorX86* codegen, HInvoke* invoke, bool is_long) {
2531 if (!codegen->GetInstructionSetFeatures().HasPopCnt()) {
2532 // Do nothing if there is no popcnt support. This results in generating
2533 // a call for the intrinsic rather than direct code.
2534 return;
2535 }
2536 LocationSummary* locations =
2537 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2538 if (is_long) {
2539 locations->AddTemp(Location::RequiresRegister());
2540 }
2541 locations->SetInAt(0, Location::Any());
2542 locations->SetOut(Location::RequiresRegister());
2543 }
2544
GenBitCount(X86Assembler * assembler,CodeGeneratorX86 * codegen,HInvoke * invoke,bool is_long)2545 static void GenBitCount(X86Assembler* assembler,
2546 CodeGeneratorX86* codegen,
2547 HInvoke* invoke, bool is_long) {
2548 LocationSummary* locations = invoke->GetLocations();
2549 Location src = locations->InAt(0);
2550 Register out = locations->Out().AsRegister<Register>();
2551
2552 if (invoke->InputAt(0)->IsConstant()) {
2553 // Evaluate this at compile time.
2554 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
2555 int32_t result = is_long
2556 ? POPCOUNT(static_cast<uint64_t>(value))
2557 : POPCOUNT(static_cast<uint32_t>(value));
2558 codegen->Load32BitValue(out, result);
2559 return;
2560 }
2561
2562 // Handle the non-constant cases.
2563 if (!is_long) {
2564 if (src.IsRegister()) {
2565 __ popcntl(out, src.AsRegister<Register>());
2566 } else {
2567 DCHECK(src.IsStackSlot());
2568 __ popcntl(out, Address(ESP, src.GetStackIndex()));
2569 }
2570 } else {
2571 // The 64-bit case needs to worry about two parts.
2572 Register temp = locations->GetTemp(0).AsRegister<Register>();
2573 if (src.IsRegisterPair()) {
2574 __ popcntl(temp, src.AsRegisterPairLow<Register>());
2575 __ popcntl(out, src.AsRegisterPairHigh<Register>());
2576 } else {
2577 DCHECK(src.IsDoubleStackSlot());
2578 __ popcntl(temp, Address(ESP, src.GetStackIndex()));
2579 __ popcntl(out, Address(ESP, src.GetHighStackIndex(kX86WordSize)));
2580 }
2581 __ addl(out, temp);
2582 }
2583 }
2584
VisitIntegerBitCount(HInvoke * invoke)2585 void IntrinsicLocationsBuilderX86::VisitIntegerBitCount(HInvoke* invoke) {
2586 CreateBitCountLocations(allocator_, codegen_, invoke, /* is_long= */ false);
2587 }
2588
VisitIntegerBitCount(HInvoke * invoke)2589 void IntrinsicCodeGeneratorX86::VisitIntegerBitCount(HInvoke* invoke) {
2590 GenBitCount(GetAssembler(), codegen_, invoke, /* is_long= */ false);
2591 }
2592
VisitLongBitCount(HInvoke * invoke)2593 void IntrinsicLocationsBuilderX86::VisitLongBitCount(HInvoke* invoke) {
2594 CreateBitCountLocations(allocator_, codegen_, invoke, /* is_long= */ true);
2595 }
2596
VisitLongBitCount(HInvoke * invoke)2597 void IntrinsicCodeGeneratorX86::VisitLongBitCount(HInvoke* invoke) {
2598 GenBitCount(GetAssembler(), codegen_, invoke, /* is_long= */ true);
2599 }
2600
CreateLeadingZeroLocations(ArenaAllocator * allocator,HInvoke * invoke,bool is_long)2601 static void CreateLeadingZeroLocations(ArenaAllocator* allocator, HInvoke* invoke, bool is_long) {
2602 LocationSummary* locations =
2603 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2604 if (is_long) {
2605 locations->SetInAt(0, Location::RequiresRegister());
2606 } else {
2607 locations->SetInAt(0, Location::Any());
2608 }
2609 locations->SetOut(Location::RequiresRegister());
2610 }
2611
GenLeadingZeros(X86Assembler * assembler,CodeGeneratorX86 * codegen,HInvoke * invoke,bool is_long)2612 static void GenLeadingZeros(X86Assembler* assembler,
2613 CodeGeneratorX86* codegen,
2614 HInvoke* invoke, bool is_long) {
2615 LocationSummary* locations = invoke->GetLocations();
2616 Location src = locations->InAt(0);
2617 Register out = locations->Out().AsRegister<Register>();
2618
2619 if (invoke->InputAt(0)->IsConstant()) {
2620 // Evaluate this at compile time.
2621 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
2622 if (value == 0) {
2623 value = is_long ? 64 : 32;
2624 } else {
2625 value = is_long ? CLZ(static_cast<uint64_t>(value)) : CLZ(static_cast<uint32_t>(value));
2626 }
2627 codegen->Load32BitValue(out, value);
2628 return;
2629 }
2630
2631 // Handle the non-constant cases.
2632 if (!is_long) {
2633 if (src.IsRegister()) {
2634 __ bsrl(out, src.AsRegister<Register>());
2635 } else {
2636 DCHECK(src.IsStackSlot());
2637 __ bsrl(out, Address(ESP, src.GetStackIndex()));
2638 }
2639
2640 // BSR sets ZF if the input was zero, and the output is undefined.
2641 NearLabel all_zeroes, done;
2642 __ j(kEqual, &all_zeroes);
2643
2644 // Correct the result from BSR to get the final CLZ result.
2645 __ xorl(out, Immediate(31));
2646 __ jmp(&done);
2647
2648 // Fix the zero case with the expected result.
2649 __ Bind(&all_zeroes);
2650 __ movl(out, Immediate(32));
2651
2652 __ Bind(&done);
2653 return;
2654 }
2655
2656 // 64 bit case needs to worry about both parts of the register.
2657 DCHECK(src.IsRegisterPair());
2658 Register src_lo = src.AsRegisterPairLow<Register>();
2659 Register src_hi = src.AsRegisterPairHigh<Register>();
2660 NearLabel handle_low, done, all_zeroes;
2661
2662 // Is the high word zero?
2663 __ testl(src_hi, src_hi);
2664 __ j(kEqual, &handle_low);
2665
2666 // High word is not zero. We know that the BSR result is defined in this case.
2667 __ bsrl(out, src_hi);
2668
2669 // Correct the result from BSR to get the final CLZ result.
2670 __ xorl(out, Immediate(31));
2671 __ jmp(&done);
2672
2673 // High word was zero. We have to compute the low word count and add 32.
2674 __ Bind(&handle_low);
2675 __ bsrl(out, src_lo);
2676 __ j(kEqual, &all_zeroes);
2677
2678 // We had a valid result. Use an XOR to both correct the result and add 32.
2679 __ xorl(out, Immediate(63));
2680 __ jmp(&done);
2681
2682 // All zero case.
2683 __ Bind(&all_zeroes);
2684 __ movl(out, Immediate(64));
2685
2686 __ Bind(&done);
2687 }
2688
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)2689 void IntrinsicLocationsBuilderX86::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
2690 CreateLeadingZeroLocations(allocator_, invoke, /* is_long= */ false);
2691 }
2692
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)2693 void IntrinsicCodeGeneratorX86::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
2694 GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ false);
2695 }
2696
VisitLongNumberOfLeadingZeros(HInvoke * invoke)2697 void IntrinsicLocationsBuilderX86::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
2698 CreateLeadingZeroLocations(allocator_, invoke, /* is_long= */ true);
2699 }
2700
VisitLongNumberOfLeadingZeros(HInvoke * invoke)2701 void IntrinsicCodeGeneratorX86::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
2702 GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ true);
2703 }
2704
CreateTrailingZeroLocations(ArenaAllocator * allocator,HInvoke * invoke,bool is_long)2705 static void CreateTrailingZeroLocations(ArenaAllocator* allocator, HInvoke* invoke, bool is_long) {
2706 LocationSummary* locations =
2707 new (allocator) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
2708 if (is_long) {
2709 locations->SetInAt(0, Location::RequiresRegister());
2710 } else {
2711 locations->SetInAt(0, Location::Any());
2712 }
2713 locations->SetOut(Location::RequiresRegister());
2714 }
2715
GenTrailingZeros(X86Assembler * assembler,CodeGeneratorX86 * codegen,HInvoke * invoke,bool is_long)2716 static void GenTrailingZeros(X86Assembler* assembler,
2717 CodeGeneratorX86* codegen,
2718 HInvoke* invoke, bool is_long) {
2719 LocationSummary* locations = invoke->GetLocations();
2720 Location src = locations->InAt(0);
2721 Register out = locations->Out().AsRegister<Register>();
2722
2723 if (invoke->InputAt(0)->IsConstant()) {
2724 // Evaluate this at compile time.
2725 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
2726 if (value == 0) {
2727 value = is_long ? 64 : 32;
2728 } else {
2729 value = is_long ? CTZ(static_cast<uint64_t>(value)) : CTZ(static_cast<uint32_t>(value));
2730 }
2731 codegen->Load32BitValue(out, value);
2732 return;
2733 }
2734
2735 // Handle the non-constant cases.
2736 if (!is_long) {
2737 if (src.IsRegister()) {
2738 __ bsfl(out, src.AsRegister<Register>());
2739 } else {
2740 DCHECK(src.IsStackSlot());
2741 __ bsfl(out, Address(ESP, src.GetStackIndex()));
2742 }
2743
2744 // BSF sets ZF if the input was zero, and the output is undefined.
2745 NearLabel done;
2746 __ j(kNotEqual, &done);
2747
2748 // Fix the zero case with the expected result.
2749 __ movl(out, Immediate(32));
2750
2751 __ Bind(&done);
2752 return;
2753 }
2754
2755 // 64 bit case needs to worry about both parts of the register.
2756 DCHECK(src.IsRegisterPair());
2757 Register src_lo = src.AsRegisterPairLow<Register>();
2758 Register src_hi = src.AsRegisterPairHigh<Register>();
2759 NearLabel done, all_zeroes;
2760
2761 // If the low word is zero, then ZF will be set. If not, we have the answer.
2762 __ bsfl(out, src_lo);
2763 __ j(kNotEqual, &done);
2764
2765 // Low word was zero. We have to compute the high word count and add 32.
2766 __ bsfl(out, src_hi);
2767 __ j(kEqual, &all_zeroes);
2768
2769 // We had a valid result. Add 32 to account for the low word being zero.
2770 __ addl(out, Immediate(32));
2771 __ jmp(&done);
2772
2773 // All zero case.
2774 __ Bind(&all_zeroes);
2775 __ movl(out, Immediate(64));
2776
2777 __ Bind(&done);
2778 }
2779
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)2780 void IntrinsicLocationsBuilderX86::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
2781 CreateTrailingZeroLocations(allocator_, invoke, /* is_long= */ false);
2782 }
2783
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)2784 void IntrinsicCodeGeneratorX86::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
2785 GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ false);
2786 }
2787
VisitLongNumberOfTrailingZeros(HInvoke * invoke)2788 void IntrinsicLocationsBuilderX86::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
2789 CreateTrailingZeroLocations(allocator_, invoke, /* is_long= */ true);
2790 }
2791
VisitLongNumberOfTrailingZeros(HInvoke * invoke)2792 void IntrinsicCodeGeneratorX86::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
2793 GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long= */ true);
2794 }
2795
IsSameInput(HInstruction * instruction,size_t input0,size_t input1)2796 static bool IsSameInput(HInstruction* instruction, size_t input0, size_t input1) {
2797 return instruction->InputAt(input0) == instruction->InputAt(input1);
2798 }
2799
2800 // Compute base address for the System.arraycopy intrinsic in `base`.
GenSystemArrayCopyBaseAddress(X86Assembler * assembler,DataType::Type type,const Register & array,const Location & pos,const Register & base)2801 static void GenSystemArrayCopyBaseAddress(X86Assembler* assembler,
2802 DataType::Type type,
2803 const Register& array,
2804 const Location& pos,
2805 const Register& base) {
2806 // This routine is only used by the SystemArrayCopy intrinsic at the
2807 // moment. We can allow DataType::Type::kReference as `type` to implement
2808 // the SystemArrayCopyChar intrinsic.
2809 DCHECK_EQ(type, DataType::Type::kReference);
2810 const int32_t element_size = DataType::Size(type);
2811 const ScaleFactor scale_factor = static_cast<ScaleFactor>(DataType::SizeShift(type));
2812 const uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value();
2813
2814 if (pos.IsConstant()) {
2815 int32_t constant = pos.GetConstant()->AsIntConstant()->GetValue();
2816 __ leal(base, Address(array, element_size * constant + data_offset));
2817 } else {
2818 __ leal(base, Address(array, pos.AsRegister<Register>(), scale_factor, data_offset));
2819 }
2820 }
2821
2822 // Compute end source address for the System.arraycopy intrinsic in `end`.
GenSystemArrayCopyEndAddress(X86Assembler * assembler,DataType::Type type,const Location & copy_length,const Register & base,const Register & end)2823 static void GenSystemArrayCopyEndAddress(X86Assembler* assembler,
2824 DataType::Type type,
2825 const Location& copy_length,
2826 const Register& base,
2827 const Register& end) {
2828 // This routine is only used by the SystemArrayCopy intrinsic at the
2829 // moment. We can allow DataType::Type::kReference as `type` to implement
2830 // the SystemArrayCopyChar intrinsic.
2831 DCHECK_EQ(type, DataType::Type::kReference);
2832 const int32_t element_size = DataType::Size(type);
2833 const ScaleFactor scale_factor = static_cast<ScaleFactor>(DataType::SizeShift(type));
2834
2835 if (copy_length.IsConstant()) {
2836 int32_t constant = copy_length.GetConstant()->AsIntConstant()->GetValue();
2837 __ leal(end, Address(base, element_size * constant));
2838 } else {
2839 __ leal(end, Address(base, copy_length.AsRegister<Register>(), scale_factor, 0));
2840 }
2841 }
2842
VisitSystemArrayCopy(HInvoke * invoke)2843 void IntrinsicLocationsBuilderX86::VisitSystemArrayCopy(HInvoke* invoke) {
2844 // The only read barrier implementation supporting the
2845 // SystemArrayCopy intrinsic is the Baker-style read barriers.
2846 if (gUseReadBarrier && !kUseBakerReadBarrier) {
2847 return;
2848 }
2849
2850 CodeGenerator::CreateSystemArrayCopyLocationSummary(invoke);
2851 if (invoke->GetLocations() != nullptr) {
2852 // Need a byte register for marking.
2853 invoke->GetLocations()->SetTempAt(1, Location::RegisterLocation(ECX));
2854
2855 static constexpr size_t kSrc = 0;
2856 static constexpr size_t kSrcPos = 1;
2857 static constexpr size_t kDest = 2;
2858 static constexpr size_t kDestPos = 3;
2859 static constexpr size_t kLength = 4;
2860
2861 if (!invoke->InputAt(kSrcPos)->IsIntConstant() &&
2862 !invoke->InputAt(kDestPos)->IsIntConstant() &&
2863 !invoke->InputAt(kLength)->IsIntConstant()) {
2864 if (!IsSameInput(invoke, kSrcPos, kDestPos) &&
2865 !IsSameInput(invoke, kSrcPos, kLength) &&
2866 !IsSameInput(invoke, kDestPos, kLength) &&
2867 !IsSameInput(invoke, kSrc, kDest)) {
2868 // Not enough registers, make the length also take a stack slot.
2869 invoke->GetLocations()->SetInAt(kLength, Location::Any());
2870 }
2871 }
2872 }
2873 }
2874
VisitSystemArrayCopy(HInvoke * invoke)2875 void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
2876 // The only read barrier implementation supporting the
2877 // SystemArrayCopy intrinsic is the Baker-style read barriers.
2878 DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier);
2879
2880 X86Assembler* assembler = GetAssembler();
2881 LocationSummary* locations = invoke->GetLocations();
2882
2883 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
2884 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
2885 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
2886 uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
2887 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
2888
2889 Register src = locations->InAt(0).AsRegister<Register>();
2890 Location src_pos = locations->InAt(1);
2891 Register dest = locations->InAt(2).AsRegister<Register>();
2892 Location dest_pos = locations->InAt(3);
2893 Location length_arg = locations->InAt(4);
2894 Location length = length_arg;
2895 Location temp1_loc = locations->GetTemp(0);
2896 Register temp1 = temp1_loc.AsRegister<Register>();
2897 Location temp2_loc = locations->GetTemp(1);
2898 Register temp2 = temp2_loc.AsRegister<Register>();
2899
2900 SlowPathCode* intrinsic_slow_path =
2901 new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
2902 codegen_->AddSlowPath(intrinsic_slow_path);
2903
2904 NearLabel conditions_on_positions_validated;
2905 SystemArrayCopyOptimizations optimizations(invoke);
2906
2907 // If source and destination are the same, we go to slow path if we need to do
2908 // forward copying.
2909 if (src_pos.IsConstant()) {
2910 int32_t src_pos_constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
2911 if (dest_pos.IsConstant()) {
2912 int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
2913 if (optimizations.GetDestinationIsSource()) {
2914 // Checked when building locations.
2915 DCHECK_GE(src_pos_constant, dest_pos_constant);
2916 } else if (src_pos_constant < dest_pos_constant) {
2917 __ cmpl(src, dest);
2918 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
2919 }
2920 } else {
2921 if (!optimizations.GetDestinationIsSource()) {
2922 __ cmpl(src, dest);
2923 __ j(kNotEqual, &conditions_on_positions_validated);
2924 }
2925 __ cmpl(dest_pos.AsRegister<Register>(), Immediate(src_pos_constant));
2926 __ j(kGreater, intrinsic_slow_path->GetEntryLabel());
2927 }
2928 } else {
2929 if (!optimizations.GetDestinationIsSource()) {
2930 __ cmpl(src, dest);
2931 __ j(kNotEqual, &conditions_on_positions_validated);
2932 }
2933 if (dest_pos.IsConstant()) {
2934 int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
2935 __ cmpl(src_pos.AsRegister<Register>(), Immediate(dest_pos_constant));
2936 __ j(kLess, intrinsic_slow_path->GetEntryLabel());
2937 } else {
2938 __ cmpl(src_pos.AsRegister<Register>(), dest_pos.AsRegister<Register>());
2939 __ j(kLess, intrinsic_slow_path->GetEntryLabel());
2940 }
2941 }
2942
2943 __ Bind(&conditions_on_positions_validated);
2944
2945 if (!optimizations.GetSourceIsNotNull()) {
2946 // Bail out if the source is null.
2947 __ testl(src, src);
2948 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
2949 }
2950
2951 if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
2952 // Bail out if the destination is null.
2953 __ testl(dest, dest);
2954 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
2955 }
2956
2957 Location temp3_loc = locations->GetTemp(2);
2958 Register temp3 = temp3_loc.AsRegister<Register>();
2959 if (length.IsStackSlot()) {
2960 __ movl(temp3, Address(ESP, length.GetStackIndex()));
2961 length = Location::RegisterLocation(temp3);
2962 }
2963
2964 // If the length is negative, bail out.
2965 // We have already checked in the LocationsBuilder for the constant case.
2966 if (!length.IsConstant() &&
2967 !optimizations.GetCountIsSourceLength() &&
2968 !optimizations.GetCountIsDestinationLength()) {
2969 __ testl(length.AsRegister<Register>(), length.AsRegister<Register>());
2970 __ j(kLess, intrinsic_slow_path->GetEntryLabel());
2971 }
2972
2973 // Validity checks: source.
2974 CheckPosition(assembler,
2975 src_pos,
2976 src,
2977 length,
2978 intrinsic_slow_path,
2979 temp1,
2980 optimizations.GetCountIsSourceLength());
2981
2982 // Validity checks: dest.
2983 CheckPosition(assembler,
2984 dest_pos,
2985 dest,
2986 length,
2987 intrinsic_slow_path,
2988 temp1,
2989 optimizations.GetCountIsDestinationLength());
2990
2991 if (!optimizations.GetDoesNotNeedTypeCheck()) {
2992 // Check whether all elements of the source array are assignable to the component
2993 // type of the destination array. We do two checks: the classes are the same,
2994 // or the destination is Object[]. If none of these checks succeed, we go to the
2995 // slow path.
2996
2997 if (!optimizations.GetSourceIsNonPrimitiveArray()) {
2998 if (gUseReadBarrier && kUseBakerReadBarrier) {
2999 // /* HeapReference<Class> */ temp1 = src->klass_
3000 codegen_->GenerateFieldLoadWithBakerReadBarrier(
3001 invoke, temp1_loc, src, class_offset, /* needs_null_check= */ false);
3002 // Bail out if the source is not a non primitive array.
3003 // /* HeapReference<Class> */ temp1 = temp1->component_type_
3004 codegen_->GenerateFieldLoadWithBakerReadBarrier(
3005 invoke, temp1_loc, temp1, component_offset, /* needs_null_check= */ false);
3006 __ testl(temp1, temp1);
3007 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
3008 // If heap poisoning is enabled, `temp1` has been unpoisoned
3009 // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
3010 } else {
3011 // /* HeapReference<Class> */ temp1 = src->klass_
3012 __ movl(temp1, Address(src, class_offset));
3013 __ MaybeUnpoisonHeapReference(temp1);
3014 // Bail out if the source is not a non primitive array.
3015 // /* HeapReference<Class> */ temp1 = temp1->component_type_
3016 __ movl(temp1, Address(temp1, component_offset));
3017 __ testl(temp1, temp1);
3018 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
3019 __ MaybeUnpoisonHeapReference(temp1);
3020 }
3021 __ cmpw(Address(temp1, primitive_offset), Immediate(Primitive::kPrimNot));
3022 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
3023 }
3024
3025 if (gUseReadBarrier && kUseBakerReadBarrier) {
3026 if (length.Equals(Location::RegisterLocation(temp3))) {
3027 // When Baker read barriers are enabled, register `temp3`,
3028 // which in the present case contains the `length` parameter,
3029 // will be overwritten below. Make the `length` location
3030 // reference the original stack location; it will be moved
3031 // back to `temp3` later if necessary.
3032 DCHECK(length_arg.IsStackSlot());
3033 length = length_arg;
3034 }
3035
3036 // /* HeapReference<Class> */ temp1 = dest->klass_
3037 codegen_->GenerateFieldLoadWithBakerReadBarrier(
3038 invoke, temp1_loc, dest, class_offset, /* needs_null_check= */ false);
3039
3040 if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
3041 // Bail out if the destination is not a non primitive array.
3042 //
3043 // Register `temp1` is not trashed by the read barrier emitted
3044 // by GenerateFieldLoadWithBakerReadBarrier below, as that
3045 // method produces a call to a ReadBarrierMarkRegX entry point,
3046 // which saves all potentially live registers, including
3047 // temporaries such a `temp1`.
3048 // /* HeapReference<Class> */ temp2 = temp1->component_type_
3049 codegen_->GenerateFieldLoadWithBakerReadBarrier(
3050 invoke, temp2_loc, temp1, component_offset, /* needs_null_check= */ false);
3051 __ testl(temp2, temp2);
3052 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
3053 // If heap poisoning is enabled, `temp2` has been unpoisoned
3054 // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
3055 __ cmpw(Address(temp2, primitive_offset), Immediate(Primitive::kPrimNot));
3056 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
3057 }
3058
3059 // For the same reason given earlier, `temp1` is not trashed by the
3060 // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below.
3061 // /* HeapReference<Class> */ temp2 = src->klass_
3062 codegen_->GenerateFieldLoadWithBakerReadBarrier(
3063 invoke, temp2_loc, src, class_offset, /* needs_null_check= */ false);
3064 // Note: if heap poisoning is on, we are comparing two unpoisoned references here.
3065 __ cmpl(temp1, temp2);
3066
3067 if (optimizations.GetDestinationIsTypedObjectArray()) {
3068 NearLabel do_copy;
3069 __ j(kEqual, &do_copy);
3070 // /* HeapReference<Class> */ temp1 = temp1->component_type_
3071 codegen_->GenerateFieldLoadWithBakerReadBarrier(
3072 invoke, temp1_loc, temp1, component_offset, /* needs_null_check= */ false);
3073 // We do not need to emit a read barrier for the following
3074 // heap reference load, as `temp1` is only used in a
3075 // comparison with null below, and this reference is not
3076 // kept afterwards.
3077 __ cmpl(Address(temp1, super_offset), Immediate(0));
3078 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
3079 __ Bind(&do_copy);
3080 } else {
3081 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
3082 }
3083 } else {
3084 // Non read barrier code.
3085
3086 // /* HeapReference<Class> */ temp1 = dest->klass_
3087 __ movl(temp1, Address(dest, class_offset));
3088 if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
3089 __ MaybeUnpoisonHeapReference(temp1);
3090 // Bail out if the destination is not a non primitive array.
3091 // /* HeapReference<Class> */ temp2 = temp1->component_type_
3092 __ movl(temp2, Address(temp1, component_offset));
3093 __ testl(temp2, temp2);
3094 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
3095 __ MaybeUnpoisonHeapReference(temp2);
3096 __ cmpw(Address(temp2, primitive_offset), Immediate(Primitive::kPrimNot));
3097 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
3098 // Re-poison the heap reference to make the compare instruction below
3099 // compare two poisoned references.
3100 __ PoisonHeapReference(temp1);
3101 }
3102
3103 // Note: if heap poisoning is on, we are comparing two poisoned references here.
3104 __ cmpl(temp1, Address(src, class_offset));
3105
3106 if (optimizations.GetDestinationIsTypedObjectArray()) {
3107 NearLabel do_copy;
3108 __ j(kEqual, &do_copy);
3109 __ MaybeUnpoisonHeapReference(temp1);
3110 // /* HeapReference<Class> */ temp1 = temp1->component_type_
3111 __ movl(temp1, Address(temp1, component_offset));
3112 __ MaybeUnpoisonHeapReference(temp1);
3113 __ cmpl(Address(temp1, super_offset), Immediate(0));
3114 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
3115 __ Bind(&do_copy);
3116 } else {
3117 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
3118 }
3119 }
3120 } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
3121 DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
3122 // Bail out if the source is not a non primitive array.
3123 if (gUseReadBarrier && kUseBakerReadBarrier) {
3124 // /* HeapReference<Class> */ temp1 = src->klass_
3125 codegen_->GenerateFieldLoadWithBakerReadBarrier(
3126 invoke, temp1_loc, src, class_offset, /* needs_null_check= */ false);
3127 // /* HeapReference<Class> */ temp1 = temp1->component_type_
3128 codegen_->GenerateFieldLoadWithBakerReadBarrier(
3129 invoke, temp1_loc, temp1, component_offset, /* needs_null_check= */ false);
3130 __ testl(temp1, temp1);
3131 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
3132 // If heap poisoning is enabled, `temp1` has been unpoisoned
3133 // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
3134 } else {
3135 // /* HeapReference<Class> */ temp1 = src->klass_
3136 __ movl(temp1, Address(src, class_offset));
3137 __ MaybeUnpoisonHeapReference(temp1);
3138 // /* HeapReference<Class> */ temp1 = temp1->component_type_
3139 __ movl(temp1, Address(temp1, component_offset));
3140 __ testl(temp1, temp1);
3141 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
3142 __ MaybeUnpoisonHeapReference(temp1);
3143 }
3144 __ cmpw(Address(temp1, primitive_offset), Immediate(Primitive::kPrimNot));
3145 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
3146 }
3147
3148 const DataType::Type type = DataType::Type::kReference;
3149 const int32_t element_size = DataType::Size(type);
3150
3151 // Compute the base source address in `temp1`.
3152 GenSystemArrayCopyBaseAddress(GetAssembler(), type, src, src_pos, temp1);
3153
3154 if (gUseReadBarrier && kUseBakerReadBarrier) {
3155 // If it is needed (in the case of the fast-path loop), the base
3156 // destination address is computed later, as `temp2` is used for
3157 // intermediate computations.
3158
3159 // Compute the end source address in `temp3`.
3160 if (length.IsStackSlot()) {
3161 // Location `length` is again pointing at a stack slot, as
3162 // register `temp3` (which was containing the length parameter
3163 // earlier) has been overwritten; restore it now
3164 DCHECK(length.Equals(length_arg));
3165 __ movl(temp3, Address(ESP, length.GetStackIndex()));
3166 length = Location::RegisterLocation(temp3);
3167 }
3168 GenSystemArrayCopyEndAddress(GetAssembler(), type, length, temp1, temp3);
3169
3170 // SystemArrayCopy implementation for Baker read barriers (see
3171 // also CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier):
3172 //
3173 // if (src_ptr != end_ptr) {
3174 // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
3175 // lfence; // Load fence or artificial data dependency to prevent load-load reordering
3176 // bool is_gray = (rb_state == ReadBarrier::GrayState());
3177 // if (is_gray) {
3178 // // Slow-path copy.
3179 // for (size_t i = 0; i != length; ++i) {
3180 // dest_array[dest_pos + i] =
3181 // MaybePoison(ReadBarrier::Mark(MaybeUnpoison(src_array[src_pos + i])));
3182 // }
3183 // } else {
3184 // // Fast-path copy.
3185 // do {
3186 // *dest_ptr++ = *src_ptr++;
3187 // } while (src_ptr != end_ptr)
3188 // }
3189 // }
3190
3191 NearLabel loop, done;
3192
3193 // Don't enter copy loop if `length == 0`.
3194 __ cmpl(temp1, temp3);
3195 __ j(kEqual, &done);
3196
3197 // Given the numeric representation, it's enough to check the low bit of the rb_state.
3198 static_assert(ReadBarrier::NonGrayState() == 0, "Expecting non-gray to have value 0");
3199 static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
3200 constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
3201 constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
3202 constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
3203
3204 // if (rb_state == ReadBarrier::GrayState())
3205 // goto slow_path;
3206 // At this point, just do the "if" and make sure that flags are preserved until the branch.
3207 __ testb(Address(src, monitor_offset + gray_byte_position), Immediate(test_value));
3208
3209 // Load fence to prevent load-load reordering.
3210 // Note that this is a no-op, thanks to the x86 memory model.
3211 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
3212
3213 // Slow path used to copy array when `src` is gray.
3214 SlowPathCode* read_barrier_slow_path =
3215 new (codegen_->GetScopedAllocator()) ReadBarrierSystemArrayCopySlowPathX86(invoke);
3216 codegen_->AddSlowPath(read_barrier_slow_path);
3217
3218 // We have done the "if" of the gray bit check above, now branch based on the flags.
3219 __ j(kNotZero, read_barrier_slow_path->GetEntryLabel());
3220
3221 // Fast-path copy.
3222 // Compute the base destination address in `temp2`.
3223 GenSystemArrayCopyBaseAddress(GetAssembler(), type, dest, dest_pos, temp2);
3224 // Iterate over the arrays and do a raw copy of the objects. We don't need to
3225 // poison/unpoison.
3226 __ Bind(&loop);
3227 __ pushl(Address(temp1, 0));
3228 __ cfi().AdjustCFAOffset(4);
3229 __ popl(Address(temp2, 0));
3230 __ cfi().AdjustCFAOffset(-4);
3231 __ addl(temp1, Immediate(element_size));
3232 __ addl(temp2, Immediate(element_size));
3233 __ cmpl(temp1, temp3);
3234 __ j(kNotEqual, &loop);
3235
3236 __ Bind(read_barrier_slow_path->GetExitLabel());
3237 __ Bind(&done);
3238 } else {
3239 // Non read barrier code.
3240 // Compute the base destination address in `temp2`.
3241 GenSystemArrayCopyBaseAddress(GetAssembler(), type, dest, dest_pos, temp2);
3242 // Compute the end source address in `temp3`.
3243 GenSystemArrayCopyEndAddress(GetAssembler(), type, length, temp1, temp3);
3244 // Iterate over the arrays and do a raw copy of the objects. We don't need to
3245 // poison/unpoison.
3246 NearLabel loop, done;
3247 __ cmpl(temp1, temp3);
3248 __ j(kEqual, &done);
3249 __ Bind(&loop);
3250 __ pushl(Address(temp1, 0));
3251 __ cfi().AdjustCFAOffset(4);
3252 __ popl(Address(temp2, 0));
3253 __ cfi().AdjustCFAOffset(-4);
3254 __ addl(temp1, Immediate(element_size));
3255 __ addl(temp2, Immediate(element_size));
3256 __ cmpl(temp1, temp3);
3257 __ j(kNotEqual, &loop);
3258 __ Bind(&done);
3259 }
3260
3261 // We only need one card marking on the destination array.
3262 codegen_->MarkGCCard(temp1, temp2, dest, Register(kNoRegister), /* emit_null_check= */ false);
3263
3264 __ Bind(intrinsic_slow_path->GetExitLabel());
3265 }
3266
RequestBaseMethodAddressInRegister(HInvoke * invoke)3267 static void RequestBaseMethodAddressInRegister(HInvoke* invoke) {
3268 LocationSummary* locations = invoke->GetLocations();
3269 if (locations != nullptr) {
3270 HInvokeStaticOrDirect* invoke_static_or_direct = invoke->AsInvokeStaticOrDirect();
3271 // Note: The base method address is not present yet when this is called from the
3272 // PCRelativeHandlerVisitor via IsCallFreeIntrinsic() to determine whether to insert it.
3273 if (invoke_static_or_direct->HasSpecialInput()) {
3274 DCHECK(invoke_static_or_direct->InputAt(invoke_static_or_direct->GetSpecialInputIndex())
3275 ->IsX86ComputeBaseMethodAddress());
3276 locations->SetInAt(invoke_static_or_direct->GetSpecialInputIndex(),
3277 Location::RequiresRegister());
3278 }
3279 }
3280 }
3281
VisitIntegerValueOf(HInvoke * invoke)3282 void IntrinsicLocationsBuilderX86::VisitIntegerValueOf(HInvoke* invoke) {
3283 DCHECK(invoke->IsInvokeStaticOrDirect());
3284 InvokeRuntimeCallingConvention calling_convention;
3285 IntrinsicVisitor::ComputeIntegerValueOfLocations(
3286 invoke,
3287 codegen_,
3288 Location::RegisterLocation(EAX),
3289 Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
3290 RequestBaseMethodAddressInRegister(invoke);
3291 }
3292
VisitIntegerValueOf(HInvoke * invoke)3293 void IntrinsicCodeGeneratorX86::VisitIntegerValueOf(HInvoke* invoke) {
3294 DCHECK(invoke->IsInvokeStaticOrDirect());
3295 IntrinsicVisitor::IntegerValueOfInfo info =
3296 IntrinsicVisitor::ComputeIntegerValueOfInfo(invoke, codegen_->GetCompilerOptions());
3297 LocationSummary* locations = invoke->GetLocations();
3298 X86Assembler* assembler = GetAssembler();
3299
3300 Register out = locations->Out().AsRegister<Register>();
3301 auto allocate_instance = [&]() {
3302 DCHECK_EQ(out, InvokeRuntimeCallingConvention().GetRegisterAt(0));
3303 codegen_->LoadIntrinsicDeclaringClass(out, invoke->AsInvokeStaticOrDirect());
3304 codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
3305 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
3306 };
3307 if (invoke->InputAt(0)->IsConstant()) {
3308 int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
3309 if (static_cast<uint32_t>(value - info.low) < info.length) {
3310 // Just embed the j.l.Integer in the code.
3311 DCHECK_NE(info.value_boot_image_reference, IntegerValueOfInfo::kInvalidReference);
3312 codegen_->LoadBootImageAddress(
3313 out, info.value_boot_image_reference, invoke->AsInvokeStaticOrDirect());
3314 } else {
3315 DCHECK(locations->CanCall());
3316 // Allocate and initialize a new j.l.Integer.
3317 // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the
3318 // JIT object table.
3319 allocate_instance();
3320 __ movl(Address(out, info.value_offset), Immediate(value));
3321 }
3322 } else {
3323 DCHECK(locations->CanCall());
3324 Register in = locations->InAt(0).AsRegister<Register>();
3325 // Check bounds of our cache.
3326 __ leal(out, Address(in, -info.low));
3327 __ cmpl(out, Immediate(info.length));
3328 NearLabel allocate, done;
3329 __ j(kAboveEqual, &allocate);
3330 // If the value is within the bounds, load the j.l.Integer directly from the array.
3331 constexpr size_t kElementSize = sizeof(mirror::HeapReference<mirror::Object>);
3332 static_assert((1u << TIMES_4) == sizeof(mirror::HeapReference<mirror::Object>),
3333 "Check heap reference size.");
3334 if (codegen_->GetCompilerOptions().IsBootImage()) {
3335 DCHECK_EQ(invoke->InputCount(), invoke->GetNumberOfArguments() + 1u);
3336 size_t method_address_index = invoke->AsInvokeStaticOrDirect()->GetSpecialInputIndex();
3337 HX86ComputeBaseMethodAddress* method_address =
3338 invoke->InputAt(method_address_index)->AsX86ComputeBaseMethodAddress();
3339 DCHECK(method_address != nullptr);
3340 Register method_address_reg =
3341 invoke->GetLocations()->InAt(method_address_index).AsRegister<Register>();
3342 __ movl(out,
3343 Address(method_address_reg, out, TIMES_4, CodeGeneratorX86::kPlaceholder32BitOffset));
3344 codegen_->RecordBootImageIntrinsicPatch(method_address, info.array_data_boot_image_reference);
3345 } else {
3346 // Note: We're about to clobber the index in `out`, so we need to use `in` and
3347 // adjust the offset accordingly.
3348 uint32_t mid_array_boot_image_offset =
3349 info.array_data_boot_image_reference - info.low * kElementSize;
3350 codegen_->LoadBootImageAddress(
3351 out, mid_array_boot_image_offset, invoke->AsInvokeStaticOrDirect());
3352 DCHECK_NE(out, in);
3353 __ movl(out, Address(out, in, TIMES_4, 0));
3354 }
3355 __ MaybeUnpoisonHeapReference(out);
3356 __ jmp(&done);
3357 __ Bind(&allocate);
3358 // Otherwise allocate and initialize a new j.l.Integer.
3359 allocate_instance();
3360 __ movl(Address(out, info.value_offset), in);
3361 __ Bind(&done);
3362 }
3363 }
3364
VisitReferenceGetReferent(HInvoke * invoke)3365 void IntrinsicLocationsBuilderX86::VisitReferenceGetReferent(HInvoke* invoke) {
3366 IntrinsicVisitor::CreateReferenceGetReferentLocations(invoke, codegen_);
3367 RequestBaseMethodAddressInRegister(invoke);
3368 }
3369
VisitReferenceGetReferent(HInvoke * invoke)3370 void IntrinsicCodeGeneratorX86::VisitReferenceGetReferent(HInvoke* invoke) {
3371 X86Assembler* assembler = GetAssembler();
3372 LocationSummary* locations = invoke->GetLocations();
3373
3374 Location obj = locations->InAt(0);
3375 Location out = locations->Out();
3376
3377 SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
3378 codegen_->AddSlowPath(slow_path);
3379
3380 if (gUseReadBarrier) {
3381 // Check self->GetWeakRefAccessEnabled().
3382 ThreadOffset32 offset = Thread::WeakRefAccessEnabledOffset<kX86PointerSize>();
3383 __ fs()->cmpl(Address::Absolute(offset),
3384 Immediate(enum_cast<int32_t>(WeakRefAccessState::kVisiblyEnabled)));
3385 __ j(kNotEqual, slow_path->GetEntryLabel());
3386 }
3387
3388 // Load the java.lang.ref.Reference class, use the output register as a temporary.
3389 codegen_->LoadIntrinsicDeclaringClass(out.AsRegister<Register>(),
3390 invoke->AsInvokeStaticOrDirect());
3391
3392 // Check static fields java.lang.ref.Reference.{disableIntrinsic,slowPathEnabled} together.
3393 MemberOffset disable_intrinsic_offset = IntrinsicVisitor::GetReferenceDisableIntrinsicOffset();
3394 DCHECK_ALIGNED(disable_intrinsic_offset.Uint32Value(), 2u);
3395 DCHECK_EQ(disable_intrinsic_offset.Uint32Value() + 1u,
3396 IntrinsicVisitor::GetReferenceSlowPathEnabledOffset().Uint32Value());
3397 __ cmpw(Address(out.AsRegister<Register>(), disable_intrinsic_offset.Uint32Value()),
3398 Immediate(0));
3399 __ j(kNotEqual, slow_path->GetEntryLabel());
3400
3401 // Load the value from the field.
3402 uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value();
3403 if (gUseReadBarrier && kUseBakerReadBarrier) {
3404 codegen_->GenerateFieldLoadWithBakerReadBarrier(invoke,
3405 out,
3406 obj.AsRegister<Register>(),
3407 referent_offset,
3408 /*needs_null_check=*/ true);
3409 // Note that the fence is a no-op, thanks to the x86 memory model.
3410 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); // `referent` is volatile.
3411 } else {
3412 __ movl(out.AsRegister<Register>(), Address(obj.AsRegister<Register>(), referent_offset));
3413 codegen_->MaybeRecordImplicitNullCheck(invoke);
3414 // Note that the fence is a no-op, thanks to the x86 memory model.
3415 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); // `referent` is volatile.
3416 codegen_->MaybeGenerateReadBarrierSlow(invoke, out, out, obj, referent_offset);
3417 }
3418 __ Bind(slow_path->GetExitLabel());
3419 }
3420
VisitReferenceRefersTo(HInvoke * invoke)3421 void IntrinsicLocationsBuilderX86::VisitReferenceRefersTo(HInvoke* invoke) {
3422 IntrinsicVisitor::CreateReferenceRefersToLocations(invoke);
3423 }
3424
VisitReferenceRefersTo(HInvoke * invoke)3425 void IntrinsicCodeGeneratorX86::VisitReferenceRefersTo(HInvoke* invoke) {
3426 X86Assembler* assembler = GetAssembler();
3427 LocationSummary* locations = invoke->GetLocations();
3428
3429 Register obj = locations->InAt(0).AsRegister<Register>();
3430 Register other = locations->InAt(1).AsRegister<Register>();
3431 Register out = locations->Out().AsRegister<Register>();
3432
3433 uint32_t referent_offset = mirror::Reference::ReferentOffset().Uint32Value();
3434 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
3435
3436 __ movl(out, Address(obj, referent_offset));
3437 codegen_->MaybeRecordImplicitNullCheck(invoke);
3438 __ MaybeUnpoisonHeapReference(out);
3439 // Note that the fence is a no-op, thanks to the x86 memory model.
3440 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny); // `referent` is volatile.
3441
3442 NearLabel end, return_true, return_false;
3443 __ cmpl(out, other);
3444
3445 if (gUseReadBarrier) {
3446 DCHECK(kUseBakerReadBarrier);
3447
3448 __ j(kEqual, &return_true);
3449
3450 // Check if the loaded reference is null.
3451 __ testl(out, out);
3452 __ j(kZero, &return_false);
3453
3454 // For correct memory visibility, we need a barrier before loading the lock word
3455 // but we already have the barrier emitted for volatile load above which is sufficient.
3456
3457 // Load the lockword and check if it is a forwarding address.
3458 static_assert(LockWord::kStateShift == 30u);
3459 static_assert(LockWord::kStateForwardingAddress == 3u);
3460 __ movl(out, Address(out, monitor_offset));
3461 __ cmpl(out, Immediate(static_cast<int32_t>(0xc0000000)));
3462 __ j(kBelow, &return_false);
3463
3464 // Extract the forwarding address and compare with `other`.
3465 __ shll(out, Immediate(LockWord::kForwardingAddressShift));
3466 __ cmpl(out, other);
3467 }
3468
3469 __ j(kNotEqual, &return_false);
3470
3471 // Return true and exit the function.
3472 __ Bind(&return_true);
3473 __ movl(out, Immediate(1));
3474 __ jmp(&end);
3475
3476 // Return false and exit the function.
3477 __ Bind(&return_false);
3478 __ xorl(out, out);
3479 __ Bind(&end);
3480 }
3481
VisitThreadInterrupted(HInvoke * invoke)3482 void IntrinsicLocationsBuilderX86::VisitThreadInterrupted(HInvoke* invoke) {
3483 LocationSummary* locations =
3484 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3485 locations->SetOut(Location::RequiresRegister());
3486 }
3487
VisitThreadInterrupted(HInvoke * invoke)3488 void IntrinsicCodeGeneratorX86::VisitThreadInterrupted(HInvoke* invoke) {
3489 X86Assembler* assembler = GetAssembler();
3490 Register out = invoke->GetLocations()->Out().AsRegister<Register>();
3491 Address address = Address::Absolute(Thread::InterruptedOffset<kX86PointerSize>().Int32Value());
3492 NearLabel done;
3493 __ fs()->movl(out, address);
3494 __ testl(out, out);
3495 __ j(kEqual, &done);
3496 __ fs()->movl(address, Immediate(0));
3497 codegen_->MemoryFence();
3498 __ Bind(&done);
3499 }
3500
VisitReachabilityFence(HInvoke * invoke)3501 void IntrinsicLocationsBuilderX86::VisitReachabilityFence(HInvoke* invoke) {
3502 LocationSummary* locations =
3503 new (allocator_) LocationSummary(invoke, LocationSummary::kNoCall, kIntrinsified);
3504 locations->SetInAt(0, Location::Any());
3505 }
3506
VisitReachabilityFence(HInvoke * invoke ATTRIBUTE_UNUSED)3507 void IntrinsicCodeGeneratorX86::VisitReachabilityFence(HInvoke* invoke ATTRIBUTE_UNUSED) { }
3508
VisitIntegerDivideUnsigned(HInvoke * invoke)3509 void IntrinsicLocationsBuilderX86::VisitIntegerDivideUnsigned(HInvoke* invoke) {
3510 LocationSummary* locations = new (allocator_) LocationSummary(invoke,
3511 LocationSummary::kCallOnSlowPath,
3512 kIntrinsified);
3513 locations->SetInAt(0, Location::RegisterLocation(EAX));
3514 locations->SetInAt(1, Location::RequiresRegister());
3515 locations->SetOut(Location::SameAsFirstInput());
3516 // Intel uses edx:eax as the dividend.
3517 locations->AddTemp(Location::RegisterLocation(EDX));
3518 }
3519
VisitIntegerDivideUnsigned(HInvoke * invoke)3520 void IntrinsicCodeGeneratorX86::VisitIntegerDivideUnsigned(HInvoke* invoke) {
3521 X86Assembler* assembler = GetAssembler();
3522 LocationSummary* locations = invoke->GetLocations();
3523 Location out = locations->Out();
3524 Location first = locations->InAt(0);
3525 Location second = locations->InAt(1);
3526 Register edx = locations->GetTemp(0).AsRegister<Register>();
3527 Register second_reg = second.AsRegister<Register>();
3528
3529 DCHECK_EQ(EAX, first.AsRegister<Register>());
3530 DCHECK_EQ(EAX, out.AsRegister<Register>());
3531 DCHECK_EQ(EDX, edx);
3532
3533 // Check if divisor is zero, bail to managed implementation to handle.
3534 __ testl(second_reg, second_reg);
3535 SlowPathCode* slow_path = new (codegen_->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
3536 codegen_->AddSlowPath(slow_path);
3537 __ j(kEqual, slow_path->GetEntryLabel());
3538
3539 __ xorl(edx, edx);
3540 __ divl(second_reg);
3541
3542 __ Bind(slow_path->GetExitLabel());
3543 }
3544
HasVarHandleIntrinsicImplementation(HInvoke * invoke)3545 static bool HasVarHandleIntrinsicImplementation(HInvoke* invoke) {
3546 VarHandleOptimizations optimizations(invoke);
3547 if (optimizations.GetDoNotIntrinsify()) {
3548 return false;
3549 }
3550
3551 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
3552 DCHECK_LE(expected_coordinates_count, 2u); // Filtered by the `DoNotIntrinsify` flag above.
3553 if (expected_coordinates_count > 1u) {
3554 // Only static and instance fields VarHandle are supported now.
3555 // TODO: add support for arrays and views.
3556 return false;
3557 }
3558
3559 return true;
3560 }
3561
GenerateVarHandleAccessModeCheck(Register varhandle_object,mirror::VarHandle::AccessMode access_mode,SlowPathCode * slow_path,X86Assembler * assembler)3562 static void GenerateVarHandleAccessModeCheck(Register varhandle_object,
3563 mirror::VarHandle::AccessMode access_mode,
3564 SlowPathCode* slow_path,
3565 X86Assembler* assembler) {
3566 const uint32_t access_modes_bitmask_offset =
3567 mirror::VarHandle::AccessModesBitMaskOffset().Uint32Value();
3568 const uint32_t access_mode_bit = 1u << static_cast<uint32_t>(access_mode);
3569
3570 // If the access mode is not supported, bail to runtime implementation to handle
3571 __ testl(Address(varhandle_object, access_modes_bitmask_offset), Immediate(access_mode_bit));
3572 __ j(kZero, slow_path->GetEntryLabel());
3573 }
3574
GenerateVarHandleStaticFieldCheck(Register varhandle_object,SlowPathCode * slow_path,X86Assembler * assembler)3575 static void GenerateVarHandleStaticFieldCheck(Register varhandle_object,
3576 SlowPathCode* slow_path,
3577 X86Assembler* assembler) {
3578 const uint32_t coordtype0_offset = mirror::VarHandle::CoordinateType0Offset().Uint32Value();
3579
3580 // Check that the VarHandle references a static field by checking that coordinateType0 == null.
3581 // Do not emit read barrier (or unpoison the reference) for comparing to null.
3582 __ cmpl(Address(varhandle_object, coordtype0_offset), Immediate(0));
3583 __ j(kNotEqual, slow_path->GetEntryLabel());
3584 }
3585
GenerateSubTypeObjectCheck(Register object,Register temp,Address type_address,SlowPathCode * slow_path,X86Assembler * assembler,bool object_can_be_null=true)3586 static void GenerateSubTypeObjectCheck(Register object,
3587 Register temp,
3588 Address type_address,
3589 SlowPathCode* slow_path,
3590 X86Assembler* assembler,
3591 bool object_can_be_null = true) {
3592 const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value();
3593 const uint32_t super_class_offset = mirror::Class::SuperClassOffset().Uint32Value();
3594 NearLabel check_type_compatibility, type_matched;
3595
3596 // If the object is null, there is no need to check the type
3597 if (object_can_be_null) {
3598 __ testl(object, object);
3599 __ j(kZero, &type_matched);
3600 }
3601
3602 // Do not unpoison for in-memory comparison.
3603 // We deliberately avoid the read barrier, letting the slow path handle the false negatives.
3604 __ movl(temp, Address(object, class_offset));
3605 __ Bind(&check_type_compatibility);
3606 __ cmpl(temp, type_address);
3607 __ j(kEqual, &type_matched);
3608 // Load the super class.
3609 __ MaybeUnpoisonHeapReference(temp);
3610 __ movl(temp, Address(temp, super_class_offset));
3611 // If the super class is null, we reached the root of the hierarchy without a match.
3612 // We let the slow path handle uncovered cases (e.g. interfaces).
3613 __ testl(temp, temp);
3614 __ j(kEqual, slow_path->GetEntryLabel());
3615 __ jmp(&check_type_compatibility);
3616 __ Bind(&type_matched);
3617 }
3618
GenerateVarHandleInstanceFieldChecks(HInvoke * invoke,Register temp,SlowPathCode * slow_path,X86Assembler * assembler)3619 static void GenerateVarHandleInstanceFieldChecks(HInvoke* invoke,
3620 Register temp,
3621 SlowPathCode* slow_path,
3622 X86Assembler* assembler) {
3623 VarHandleOptimizations optimizations(invoke);
3624 LocationSummary* locations = invoke->GetLocations();
3625 Register varhandle_object = locations->InAt(0).AsRegister<Register>();
3626 Register object = locations->InAt(1).AsRegister<Register>();
3627
3628 const uint32_t coordtype0_offset = mirror::VarHandle::CoordinateType0Offset().Uint32Value();
3629 const uint32_t coordtype1_offset = mirror::VarHandle::CoordinateType1Offset().Uint32Value();
3630
3631 // Check that the VarHandle references an instance field by checking that
3632 // coordinateType1 == null. coordinateType0 should be not null, but this is handled by the
3633 // type compatibility check with the source object's type, which will fail for null.
3634 __ cmpl(Address(varhandle_object, coordtype1_offset), Immediate(0));
3635 __ j(kNotEqual, slow_path->GetEntryLabel());
3636
3637 // Check if the object is null
3638 if (!optimizations.GetSkipObjectNullCheck()) {
3639 __ testl(object, object);
3640 __ j(kZero, slow_path->GetEntryLabel());
3641 }
3642
3643 // Check the object's class against coordinateType0.
3644 GenerateSubTypeObjectCheck(object,
3645 temp,
3646 Address(varhandle_object, coordtype0_offset),
3647 slow_path,
3648 assembler,
3649 /* object_can_be_null= */ false);
3650 }
3651
GenerateVarTypePrimitiveTypeCheck(Register varhandle_object,Register temp,DataType::Type type,SlowPathCode * slow_path,X86Assembler * assembler)3652 static void GenerateVarTypePrimitiveTypeCheck(Register varhandle_object,
3653 Register temp,
3654 DataType::Type type,
3655 SlowPathCode* slow_path,
3656 X86Assembler* assembler) {
3657 const uint32_t var_type_offset = mirror::VarHandle::VarTypeOffset().Uint32Value();
3658 const uint32_t primitive_type_offset = mirror::Class::PrimitiveTypeOffset().Uint32Value();
3659 const uint32_t primitive_type = static_cast<uint32_t>(DataTypeToPrimitive(type));
3660
3661 // We do not need a read barrier when loading a reference only for loading a constant field
3662 // through the reference.
3663 __ movl(temp, Address(varhandle_object, var_type_offset));
3664 __ MaybeUnpoisonHeapReference(temp);
3665 __ cmpw(Address(temp, primitive_type_offset), Immediate(primitive_type));
3666 __ j(kNotEqual, slow_path->GetEntryLabel());
3667 }
3668
GenerateVarHandleCommonChecks(HInvoke * invoke,Register temp,SlowPathCode * slow_path,X86Assembler * assembler)3669 static void GenerateVarHandleCommonChecks(HInvoke *invoke,
3670 Register temp,
3671 SlowPathCode* slow_path,
3672 X86Assembler* assembler) {
3673 LocationSummary* locations = invoke->GetLocations();
3674 Register vh_object = locations->InAt(0).AsRegister<Register>();
3675 mirror::VarHandle::AccessMode access_mode =
3676 mirror::VarHandle::GetAccessModeByIntrinsic(invoke->GetIntrinsic());
3677
3678 GenerateVarHandleAccessModeCheck(vh_object,
3679 access_mode,
3680 slow_path,
3681 assembler);
3682
3683 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
3684 switch (expected_coordinates_count) {
3685 case 0u:
3686 GenerateVarHandleStaticFieldCheck(vh_object, slow_path, assembler);
3687 break;
3688 case 1u: {
3689 GenerateVarHandleInstanceFieldChecks(invoke, temp, slow_path, assembler);
3690 break;
3691 }
3692 default:
3693 // Unimplemented
3694 UNREACHABLE();
3695 }
3696
3697 // Check the return type and varType parameters.
3698 mirror::VarHandle::AccessModeTemplate access_mode_template =
3699 mirror::VarHandle::GetAccessModeTemplate(access_mode);
3700 DataType::Type type = invoke->GetType();
3701
3702 switch (access_mode_template) {
3703 case mirror::VarHandle::AccessModeTemplate::kGet:
3704 // Check the varType.primitiveType against the type we're trying to retrieve. Reference types
3705 // are also checked later by a HCheckCast node as an additional check.
3706 GenerateVarTypePrimitiveTypeCheck(vh_object, temp, type, slow_path, assembler);
3707 break;
3708 case mirror::VarHandle::AccessModeTemplate::kSet:
3709 case mirror::VarHandle::AccessModeTemplate::kGetAndUpdate: {
3710 uint32_t value_index = invoke->GetNumberOfArguments() - 1;
3711 DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index);
3712
3713 // Check the varType.primitiveType against the type of the value we're trying to set.
3714 GenerateVarTypePrimitiveTypeCheck(vh_object, temp, value_type, slow_path, assembler);
3715 if (value_type == DataType::Type::kReference) {
3716 const uint32_t var_type_offset = mirror::VarHandle::VarTypeOffset().Uint32Value();
3717
3718 // If the value type is a reference, check it against the varType.
3719 GenerateSubTypeObjectCheck(locations->InAt(value_index).AsRegister<Register>(),
3720 temp,
3721 Address(vh_object, var_type_offset),
3722 slow_path,
3723 assembler);
3724 }
3725 break;
3726 }
3727 case mirror::VarHandle::AccessModeTemplate::kCompareAndSet:
3728 case mirror::VarHandle::AccessModeTemplate::kCompareAndExchange: {
3729 uint32_t new_value_index = invoke->GetNumberOfArguments() - 1;
3730 uint32_t expected_value_index = invoke->GetNumberOfArguments() - 2;
3731 DataType::Type value_type = GetDataTypeFromShorty(invoke, new_value_index);
3732 DCHECK_EQ(value_type, GetDataTypeFromShorty(invoke, expected_value_index));
3733
3734 // Check the varType.primitiveType against the type of the expected value.
3735 GenerateVarTypePrimitiveTypeCheck(vh_object, temp, value_type, slow_path, assembler);
3736 if (value_type == DataType::Type::kReference) {
3737 const uint32_t var_type_offset = mirror::VarHandle::VarTypeOffset().Uint32Value();
3738
3739 // If the value type is a reference, check both the expected and the new value against
3740 // the varType.
3741 GenerateSubTypeObjectCheck(locations->InAt(new_value_index).AsRegister<Register>(),
3742 temp,
3743 Address(vh_object, var_type_offset),
3744 slow_path,
3745 assembler);
3746 GenerateSubTypeObjectCheck(locations->InAt(expected_value_index).AsRegister<Register>(),
3747 temp,
3748 Address(vh_object, var_type_offset),
3749 slow_path,
3750 assembler);
3751 }
3752 break;
3753 }
3754 }
3755 }
3756
3757 // This method loads the field's address referred by a field VarHandle (base + offset).
3758 // The return value is the register containing object's reference (in case of an instance field)
3759 // or the declaring class (in case of a static field). The declaring class is stored in temp
3760 // register. Field's offset is loaded to the `offset` register.
GenerateVarHandleFieldReference(HInvoke * invoke,CodeGeneratorX86 * codegen,Register temp,Register offset)3761 static Register GenerateVarHandleFieldReference(HInvoke* invoke,
3762 CodeGeneratorX86* codegen,
3763 Register temp,
3764 /*out*/ Register offset) {
3765 X86Assembler* assembler = codegen->GetAssembler();
3766 LocationSummary* locations = invoke->GetLocations();
3767 const uint32_t artfield_offset = mirror::FieldVarHandle::ArtFieldOffset().Uint32Value();
3768 const uint32_t offset_offset = ArtField::OffsetOffset().Uint32Value();
3769 const uint32_t declaring_class_offset = ArtField::DeclaringClassOffset().Uint32Value();
3770 Register varhandle_object = locations->InAt(0).AsRegister<Register>();
3771
3772 // Load the ArtField and the offset
3773 __ movl(temp, Address(varhandle_object, artfield_offset));
3774 __ movl(offset, Address(temp, offset_offset));
3775 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
3776 if (expected_coordinates_count == 0) {
3777 // For static fields, load the declaring class
3778 InstructionCodeGeneratorX86* instr_codegen =
3779 down_cast<InstructionCodeGeneratorX86*>(codegen->GetInstructionVisitor());
3780 instr_codegen->GenerateGcRootFieldLoad(invoke,
3781 Location::RegisterLocation(temp),
3782 Address(temp, declaring_class_offset),
3783 /* fixup_label= */ nullptr,
3784 gCompilerReadBarrierOption);
3785 return temp;
3786 }
3787
3788 // For instance fields, return the register containing the object.
3789 DCHECK_EQ(expected_coordinates_count, 1u);
3790
3791 return locations->InAt(1).AsRegister<Register>();
3792 }
3793
CreateVarHandleGetLocations(HInvoke * invoke)3794 static void CreateVarHandleGetLocations(HInvoke* invoke) {
3795 // The only read barrier implementation supporting the
3796 // VarHandleGet intrinsic is the Baker-style read barriers.
3797 if (gUseReadBarrier && !kUseBakerReadBarrier) {
3798 return;
3799 }
3800
3801 if (!HasVarHandleIntrinsicImplementation(invoke)) {
3802 return;
3803 }
3804
3805 ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
3806 LocationSummary* locations = new (allocator) LocationSummary(
3807 invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
3808 locations->SetInAt(0, Location::RequiresRegister());
3809 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
3810 if (expected_coordinates_count == 1u) {
3811 // For instance fields, this is the source object.
3812 locations->SetInAt(1, Location::RequiresRegister());
3813 }
3814 locations->AddTemp(Location::RequiresRegister());
3815
3816 DataType::Type type = invoke->GetType();
3817 switch (DataType::Kind(type)) {
3818 case DataType::Type::kInt64:
3819 locations->AddTemp(Location::RequiresRegister());
3820 if (invoke->GetIntrinsic() != Intrinsics::kVarHandleGet) {
3821 // We need an XmmRegister for Int64 to ensure an atomic load
3822 locations->AddTemp(Location::RequiresFpuRegister());
3823 }
3824 FALLTHROUGH_INTENDED;
3825 case DataType::Type::kInt32:
3826 case DataType::Type::kReference:
3827 locations->SetOut(Location::RequiresRegister());
3828 break;
3829 default:
3830 DCHECK(DataType::IsFloatingPointType(type));
3831 locations->AddTemp(Location::RequiresRegister());
3832 locations->SetOut(Location::RequiresFpuRegister());
3833 }
3834 }
3835
GenerateVarHandleGet(HInvoke * invoke,CodeGeneratorX86 * codegen)3836 static void GenerateVarHandleGet(HInvoke* invoke, CodeGeneratorX86* codegen) {
3837 // The only read barrier implementation supporting the
3838 // VarHandleGet intrinsic is the Baker-style read barriers.
3839 DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier);
3840
3841 X86Assembler* assembler = codegen->GetAssembler();
3842 LocationSummary* locations = invoke->GetLocations();
3843 DataType::Type type = invoke->GetType();
3844 DCHECK_NE(type, DataType::Type::kVoid);
3845 Register temp = locations->GetTemp(0).AsRegister<Register>();
3846 SlowPathCode* slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
3847 codegen->AddSlowPath(slow_path);
3848
3849 GenerateVarHandleCommonChecks(invoke, temp, slow_path, assembler);
3850
3851 Location out = locations->Out();
3852 // Use 'out' as a temporary register if it's a core register
3853 Register offset =
3854 out.IsRegister() ? out.AsRegister<Register>() : locations->GetTemp(1).AsRegister<Register>();
3855
3856 // Get the field referred by the VarHandle. The returned register contains the object reference
3857 // or the declaring class. The field offset will be placed in 'offset'. For static fields, the
3858 // declaring class will be placed in 'temp' register.
3859 Register ref = GenerateVarHandleFieldReference(invoke, codegen, temp, offset);
3860 Address field_addr(ref, offset, TIMES_1, 0);
3861
3862 // Load the value from the field
3863 if (type == DataType::Type::kReference && gCompilerReadBarrierOption == kWithReadBarrier) {
3864 codegen->GenerateReferenceLoadWithBakerReadBarrier(
3865 invoke, out, ref, field_addr, /* needs_null_check= */ false);
3866 } else if (type == DataType::Type::kInt64 &&
3867 invoke->GetIntrinsic() != Intrinsics::kVarHandleGet) {
3868 XmmRegister xmm_temp = locations->GetTemp(2).AsFpuRegister<XmmRegister>();
3869 codegen->LoadFromMemoryNoBarrier(
3870 type, out, field_addr, /* instr= */ nullptr, xmm_temp, /* is_atomic_load= */ true);
3871 } else {
3872 codegen->LoadFromMemoryNoBarrier(type, out, field_addr);
3873 }
3874
3875 if (invoke->GetIntrinsic() == Intrinsics::kVarHandleGetVolatile ||
3876 invoke->GetIntrinsic() == Intrinsics::kVarHandleGetAcquire) {
3877 // Load fence to prevent load-load reordering.
3878 // Note that this is a no-op, thanks to the x86 memory model.
3879 codegen->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
3880 }
3881
3882 __ Bind(slow_path->GetExitLabel());
3883 }
3884
VisitVarHandleGet(HInvoke * invoke)3885 void IntrinsicLocationsBuilderX86::VisitVarHandleGet(HInvoke* invoke) {
3886 CreateVarHandleGetLocations(invoke);
3887 }
3888
VisitVarHandleGet(HInvoke * invoke)3889 void IntrinsicCodeGeneratorX86::VisitVarHandleGet(HInvoke* invoke) {
3890 GenerateVarHandleGet(invoke, codegen_);
3891 }
3892
VisitVarHandleGetVolatile(HInvoke * invoke)3893 void IntrinsicLocationsBuilderX86::VisitVarHandleGetVolatile(HInvoke* invoke) {
3894 CreateVarHandleGetLocations(invoke);
3895 }
3896
VisitVarHandleGetVolatile(HInvoke * invoke)3897 void IntrinsicCodeGeneratorX86::VisitVarHandleGetVolatile(HInvoke* invoke) {
3898 GenerateVarHandleGet(invoke, codegen_);
3899 }
3900
VisitVarHandleGetAcquire(HInvoke * invoke)3901 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAcquire(HInvoke* invoke) {
3902 CreateVarHandleGetLocations(invoke);
3903 }
3904
VisitVarHandleGetAcquire(HInvoke * invoke)3905 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAcquire(HInvoke* invoke) {
3906 GenerateVarHandleGet(invoke, codegen_);
3907 }
3908
VisitVarHandleGetOpaque(HInvoke * invoke)3909 void IntrinsicLocationsBuilderX86::VisitVarHandleGetOpaque(HInvoke* invoke) {
3910 CreateVarHandleGetLocations(invoke);
3911 }
3912
VisitVarHandleGetOpaque(HInvoke * invoke)3913 void IntrinsicCodeGeneratorX86::VisitVarHandleGetOpaque(HInvoke* invoke) {
3914 GenerateVarHandleGet(invoke, codegen_);
3915 }
3916
CreateVarHandleSetLocations(HInvoke * invoke)3917 static void CreateVarHandleSetLocations(HInvoke* invoke) {
3918 // The only read barrier implementation supporting the
3919 // VarHandleGet intrinsic is the Baker-style read barriers.
3920 if (gUseReadBarrier && !kUseBakerReadBarrier) {
3921 return;
3922 }
3923
3924 if (!HasVarHandleIntrinsicImplementation(invoke)) {
3925 return;
3926 }
3927
3928 // The last argument should be the value we intend to set.
3929 uint32_t value_index = invoke->GetNumberOfArguments() - 1;
3930 HInstruction* value = invoke->InputAt(value_index);
3931 DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index);
3932 bool needs_atomicity = invoke->GetIntrinsic() != Intrinsics::kVarHandleSet;
3933 if (value_type == DataType::Type::kInt64 && (!value->IsConstant() || needs_atomicity)) {
3934 // We avoid the case of a non-constant (or volatile) Int64 value because we would need to
3935 // place it in a register pair. If the slow path is taken, the ParallelMove might fail to move
3936 // the pair according to the X86DexCallingConvention in case of an overlap (e.g., move the
3937 // int64 value from <EAX, EBX> to <EBX, ECX>). (Bug: b/168687887)
3938 return;
3939 }
3940
3941 ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
3942 LocationSummary* locations = new (allocator) LocationSummary(
3943 invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
3944 locations->SetInAt(0, Location::RequiresRegister());
3945 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
3946 if (expected_coordinates_count == 1u) {
3947 // For instance fields, this is the source object
3948 locations->SetInAt(1, Location::RequiresRegister());
3949 }
3950
3951 switch (value_type) {
3952 case DataType::Type::kBool:
3953 case DataType::Type::kInt8:
3954 case DataType::Type::kUint8:
3955 // Ensure the value is in a byte register
3956 locations->SetInAt(value_index, Location::ByteRegisterOrConstant(EBX, value));
3957 break;
3958 case DataType::Type::kInt16:
3959 case DataType::Type::kUint16:
3960 case DataType::Type::kInt32:
3961 locations->SetInAt(value_index, Location::RegisterOrConstant(value));
3962 break;
3963 case DataType::Type::kInt64:
3964 // We only handle constant non-atomic int64 values.
3965 DCHECK(value->IsConstant());
3966 locations->SetInAt(value_index, Location::ConstantLocation(value));
3967 break;
3968 case DataType::Type::kReference:
3969 locations->SetInAt(value_index, Location::RequiresRegister());
3970 break;
3971 default:
3972 DCHECK(DataType::IsFloatingPointType(value_type));
3973 if (needs_atomicity && value_type == DataType::Type::kFloat64) {
3974 locations->SetInAt(value_index, Location::RequiresFpuRegister());
3975 } else {
3976 locations->SetInAt(value_index, Location::FpuRegisterOrConstant(value));
3977 }
3978 }
3979
3980 locations->AddTemp(Location::RequiresRegister());
3981 // This temporary register is also used for card for MarkGCCard. Make sure it's a byte register
3982 locations->AddTemp(Location::RegisterLocation(EAX));
3983 if (expected_coordinates_count == 0 && value_type == DataType::Type::kReference) {
3984 // For static reference fields, we need another temporary for the declaring class. We set it
3985 // last because we want to make sure that the first 2 temps are reserved for HandleFieldSet.
3986 locations->AddTemp(Location::RequiresRegister());
3987 }
3988 }
3989
GenerateVarHandleSet(HInvoke * invoke,CodeGeneratorX86 * codegen)3990 static void GenerateVarHandleSet(HInvoke* invoke, CodeGeneratorX86* codegen) {
3991 // The only read barrier implementation supporting the
3992 // VarHandleGet intrinsic is the Baker-style read barriers.
3993 DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier);
3994
3995 X86Assembler* assembler = codegen->GetAssembler();
3996 LocationSummary* locations = invoke->GetLocations();
3997 // The value we want to set is the last argument
3998 uint32_t value_index = invoke->GetNumberOfArguments() - 1;
3999 DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index);
4000 Register temp = locations->GetTemp(0).AsRegister<Register>();
4001 Register temp2 = locations->GetTemp(1).AsRegister<Register>();
4002 SlowPathCode* slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
4003 codegen->AddSlowPath(slow_path);
4004
4005 GenerateVarHandleCommonChecks(invoke, temp, slow_path, assembler);
4006
4007 // For static reference fields, we need another temporary for the declaring class. But since
4008 // for instance fields the object is in a separate register, it is safe to use the first
4009 // temporary register for GenerateVarHandleFieldReference.
4010 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4011 if (value_type == DataType::Type::kReference && expected_coordinates_count == 0) {
4012 temp = locations->GetTemp(2).AsRegister<Register>();
4013 }
4014
4015 Register offset = temp2;
4016 // Get the field referred by the VarHandle. The returned register contains the object reference
4017 // or the declaring class. The field offset will be placed in 'offset'. For static fields, the
4018 // declaring class will be placed in 'temp' register.
4019 Register reference = GenerateVarHandleFieldReference(invoke, codegen, temp, offset);
4020
4021 bool is_volatile = false;
4022 switch (invoke->GetIntrinsic()) {
4023 case Intrinsics::kVarHandleSet:
4024 case Intrinsics::kVarHandleSetOpaque:
4025 // The only constraint for setOpaque is to ensure bitwise atomicity (atomically set 64 bit
4026 // values), but we don't treat Int64 values because we would need to place it in a register
4027 // pair. If the slow path is taken, the Parallel move might fail to move the register pair
4028 // in case of an overlap (e.g., move from <EAX, EBX> to <EBX, ECX>). (Bug: b/168687887)
4029 break;
4030 case Intrinsics::kVarHandleSetRelease:
4031 // setRelease needs to ensure atomicity too. See the above comment.
4032 codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
4033 break;
4034 case Intrinsics::kVarHandleSetVolatile:
4035 is_volatile = true;
4036 break;
4037 default:
4038 LOG(FATAL) << "GenerateVarHandleSet received non-set intrinsic " << invoke->GetIntrinsic();
4039 }
4040
4041 InstructionCodeGeneratorX86* instr_codegen =
4042 down_cast<InstructionCodeGeneratorX86*>(codegen->GetInstructionVisitor());
4043 // Store the value to the field
4044 instr_codegen->HandleFieldSet(
4045 invoke,
4046 value_index,
4047 value_type,
4048 Address(reference, offset, TIMES_1, 0),
4049 reference,
4050 is_volatile,
4051 /* value_can_be_null */ true,
4052 // Value can be null, and this write barrier is not being relied on for other sets.
4053 WriteBarrierKind::kEmitWithNullCheck);
4054
4055 __ Bind(slow_path->GetExitLabel());
4056 }
4057
VisitVarHandleSet(HInvoke * invoke)4058 void IntrinsicLocationsBuilderX86::VisitVarHandleSet(HInvoke* invoke) {
4059 CreateVarHandleSetLocations(invoke);
4060 }
4061
VisitVarHandleSet(HInvoke * invoke)4062 void IntrinsicCodeGeneratorX86::VisitVarHandleSet(HInvoke* invoke) {
4063 GenerateVarHandleSet(invoke, codegen_);
4064 }
4065
VisitVarHandleSetVolatile(HInvoke * invoke)4066 void IntrinsicLocationsBuilderX86::VisitVarHandleSetVolatile(HInvoke* invoke) {
4067 CreateVarHandleSetLocations(invoke);
4068 }
4069
VisitVarHandleSetVolatile(HInvoke * invoke)4070 void IntrinsicCodeGeneratorX86::VisitVarHandleSetVolatile(HInvoke* invoke) {
4071 GenerateVarHandleSet(invoke, codegen_);
4072 }
4073
VisitVarHandleSetRelease(HInvoke * invoke)4074 void IntrinsicLocationsBuilderX86::VisitVarHandleSetRelease(HInvoke* invoke) {
4075 CreateVarHandleSetLocations(invoke);
4076 }
4077
VisitVarHandleSetRelease(HInvoke * invoke)4078 void IntrinsicCodeGeneratorX86::VisitVarHandleSetRelease(HInvoke* invoke) {
4079 GenerateVarHandleSet(invoke, codegen_);
4080 }
4081
VisitVarHandleSetOpaque(HInvoke * invoke)4082 void IntrinsicLocationsBuilderX86::VisitVarHandleSetOpaque(HInvoke* invoke) {
4083 CreateVarHandleSetLocations(invoke);
4084 }
4085
VisitVarHandleSetOpaque(HInvoke * invoke)4086 void IntrinsicCodeGeneratorX86::VisitVarHandleSetOpaque(HInvoke* invoke) {
4087 GenerateVarHandleSet(invoke, codegen_);
4088 }
4089
CreateVarHandleGetAndSetLocations(HInvoke * invoke)4090 static void CreateVarHandleGetAndSetLocations(HInvoke* invoke) {
4091 // The only read barrier implementation supporting the
4092 // VarHandleGet intrinsic is the Baker-style read barriers.
4093 if (gUseReadBarrier && !kUseBakerReadBarrier) {
4094 return;
4095 }
4096
4097 if (!HasVarHandleIntrinsicImplementation(invoke)) {
4098 return;
4099 }
4100
4101 uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4102 uint32_t value_index = number_of_arguments - 1;
4103 DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index);
4104
4105 if (DataType::Is64BitType(value_type)) {
4106 // We avoid the case of an Int64/Float64 value because we would need to place it in a register
4107 // pair. If the slow path is taken, the ParallelMove might fail to move the pair according to
4108 // the X86DexCallingConvention in case of an overlap (e.g., move the 64 bit value from
4109 // <EAX, EBX> to <EBX, ECX>).
4110 return;
4111 }
4112
4113 ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
4114 LocationSummary* locations = new (allocator) LocationSummary(
4115 invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
4116 locations->AddTemp(Location::RequiresRegister());
4117 locations->AddTemp(Location::RequiresRegister());
4118 // We use this temporary for the card, so we need a byte register
4119 locations->AddTemp(Location::RegisterLocation(EBX));
4120 locations->SetInAt(0, Location::RequiresRegister());
4121 if (GetExpectedVarHandleCoordinatesCount(invoke) == 1u) {
4122 // For instance fields, this is the source object
4123 locations->SetInAt(1, Location::RequiresRegister());
4124 } else {
4125 // For static fields, we need another temp because one will be busy with the declaring class.
4126 locations->AddTemp(Location::RequiresRegister());
4127 }
4128 if (value_type == DataType::Type::kFloat32) {
4129 locations->AddTemp(Location::RegisterLocation(EAX));
4130 locations->SetInAt(value_index, Location::FpuRegisterOrConstant(invoke->InputAt(value_index)));
4131 locations->SetOut(Location::RequiresFpuRegister());
4132 } else {
4133 locations->SetInAt(value_index, Location::RegisterLocation(EAX));
4134 locations->SetOut(Location::RegisterLocation(EAX));
4135 }
4136 }
4137
GenerateVarHandleGetAndSet(HInvoke * invoke,CodeGeneratorX86 * codegen)4138 static void GenerateVarHandleGetAndSet(HInvoke* invoke, CodeGeneratorX86* codegen) {
4139 // The only read barrier implementation supporting the
4140 // VarHandleGet intrinsic is the Baker-style read barriers.
4141 DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier);
4142
4143 X86Assembler* assembler = codegen->GetAssembler();
4144 LocationSummary* locations = invoke->GetLocations();
4145 // The value we want to set is the last argument
4146 uint32_t value_index = invoke->GetNumberOfArguments() - 1;
4147 Location value = locations->InAt(value_index);
4148 DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index);
4149 Register temp = locations->GetTemp(1).AsRegister<Register>();
4150 Register temp2 = locations->GetTemp(2).AsRegister<Register>();
4151 SlowPathCode* slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
4152 codegen->AddSlowPath(slow_path);
4153
4154 GenerateVarHandleCommonChecks(invoke, temp, slow_path, assembler);
4155
4156 Register offset = locations->GetTemp(0).AsRegister<Register>();
4157 // Get the field referred by the VarHandle. The returned register contains the object reference
4158 // or the declaring class. The field offset will be placed in 'offset'. For static fields, the
4159 // declaring class will be placed in 'temp' register.
4160 Register reference = GenerateVarHandleFieldReference(invoke, codegen, temp, offset);
4161 Address field_addr(reference, offset, TIMES_1, 0);
4162
4163 if (invoke->GetIntrinsic() == Intrinsics::kVarHandleGetAndSetRelease) {
4164 codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
4165 }
4166
4167 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4168 // For static fields, we need another temporary for the declaring class. But since for instance
4169 // fields the object is in a separate register, it is safe to use the first temporary register.
4170 temp = expected_coordinates_count == 1u ? temp : locations->GetTemp(3).AsRegister<Register>();
4171 // No need for a lock prefix. `xchg` has an implicit lock when it is used with an address.
4172 switch (value_type) {
4173 case DataType::Type::kBool:
4174 __ xchgb(value.AsRegister<ByteRegister>(), field_addr);
4175 __ movzxb(locations->Out().AsRegister<Register>(),
4176 locations->Out().AsRegister<ByteRegister>());
4177 break;
4178 case DataType::Type::kInt8:
4179 __ xchgb(value.AsRegister<ByteRegister>(), field_addr);
4180 __ movsxb(locations->Out().AsRegister<Register>(),
4181 locations->Out().AsRegister<ByteRegister>());
4182 break;
4183 case DataType::Type::kUint16:
4184 __ xchgw(value.AsRegister<Register>(), field_addr);
4185 __ movzxw(locations->Out().AsRegister<Register>(), locations->Out().AsRegister<Register>());
4186 break;
4187 case DataType::Type::kInt16:
4188 __ xchgw(value.AsRegister<Register>(), field_addr);
4189 __ movsxw(locations->Out().AsRegister<Register>(), locations->Out().AsRegister<Register>());
4190 break;
4191 case DataType::Type::kInt32:
4192 __ xchgl(value.AsRegister<Register>(), field_addr);
4193 break;
4194 case DataType::Type::kFloat32:
4195 codegen->Move32(Location::RegisterLocation(EAX), value);
4196 __ xchgl(EAX, field_addr);
4197 __ movd(locations->Out().AsFpuRegister<XmmRegister>(), EAX);
4198 break;
4199 case DataType::Type::kReference: {
4200 if (gUseReadBarrier && kUseBakerReadBarrier) {
4201 // Need to make sure the reference stored in the field is a to-space
4202 // one before attempting the CAS or the CAS could fail incorrectly.
4203 codegen->GenerateReferenceLoadWithBakerReadBarrier(
4204 invoke,
4205 // Unused, used only as a "temporary" within the read barrier.
4206 Location::RegisterLocation(temp),
4207 reference,
4208 field_addr,
4209 /* needs_null_check= */ false,
4210 /* always_update_field= */ true,
4211 &temp2);
4212 }
4213 codegen->MarkGCCard(
4214 temp, temp2, reference, value.AsRegister<Register>(), /* emit_null_check= */ false);
4215 if (kPoisonHeapReferences) {
4216 __ movl(temp, value.AsRegister<Register>());
4217 __ PoisonHeapReference(temp);
4218 __ xchgl(temp, field_addr);
4219 __ UnpoisonHeapReference(temp);
4220 __ movl(locations->Out().AsRegister<Register>(), temp);
4221 } else {
4222 __ xchgl(locations->Out().AsRegister<Register>(), field_addr);
4223 }
4224 break;
4225 }
4226 default:
4227 UNREACHABLE();
4228 }
4229
4230 if (invoke->GetIntrinsic() == Intrinsics::kVarHandleGetAndSetAcquire) {
4231 codegen->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
4232 }
4233
4234 __ Bind(slow_path->GetExitLabel());
4235 }
4236
VisitVarHandleGetAndSet(HInvoke * invoke)4237 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndSet(HInvoke* invoke) {
4238 CreateVarHandleGetAndSetLocations(invoke);
4239 }
4240
VisitVarHandleGetAndSet(HInvoke * invoke)4241 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndSet(HInvoke* invoke) {
4242 GenerateVarHandleGetAndSet(invoke, codegen_);
4243 }
4244
VisitVarHandleGetAndSetAcquire(HInvoke * invoke)4245 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndSetAcquire(HInvoke* invoke) {
4246 CreateVarHandleGetAndSetLocations(invoke);
4247 }
4248
VisitVarHandleGetAndSetAcquire(HInvoke * invoke)4249 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndSetAcquire(HInvoke* invoke) {
4250 GenerateVarHandleGetAndSet(invoke, codegen_);
4251 }
4252
VisitVarHandleGetAndSetRelease(HInvoke * invoke)4253 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndSetRelease(HInvoke* invoke) {
4254 CreateVarHandleGetAndSetLocations(invoke);
4255 }
4256
VisitVarHandleGetAndSetRelease(HInvoke * invoke)4257 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndSetRelease(HInvoke* invoke) {
4258 GenerateVarHandleGetAndSet(invoke, codegen_);
4259 }
4260
CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke * invoke)4261 static void CreateVarHandleCompareAndSetOrExchangeLocations(HInvoke* invoke) {
4262 // The only read barrier implementation supporting the
4263 // VarHandleGet intrinsic is the Baker-style read barriers.
4264 if (gUseReadBarrier && !kUseBakerReadBarrier) {
4265 return;
4266 }
4267
4268 if (!HasVarHandleIntrinsicImplementation(invoke)) {
4269 return;
4270 }
4271
4272 uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4273 uint32_t expected_value_index = number_of_arguments - 2;
4274 uint32_t new_value_index = number_of_arguments - 1;
4275 DataType::Type value_type = GetDataTypeFromShorty(invoke, expected_value_index);
4276 DCHECK_EQ(value_type, GetDataTypeFromShorty(invoke, new_value_index));
4277
4278 if (DataType::Is64BitType(value_type)) {
4279 // We avoid the case of an Int64/Float64 value because we would need to place it in a register
4280 // pair. If the slow path is taken, the ParallelMove might fail to move the pair according to
4281 // the X86DexCallingConvention in case of an overlap (e.g., move the 64 bit value from
4282 // <EAX, EBX> to <EBX, ECX>).
4283 return;
4284 }
4285
4286 ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
4287 LocationSummary* locations = new (allocator) LocationSummary(
4288 invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
4289 locations->AddTemp(Location::RequiresRegister());
4290 locations->AddTemp(Location::RequiresRegister());
4291 // We use this temporary for the card, so we need a byte register
4292 locations->AddTemp(Location::RegisterLocation(EBX));
4293 locations->SetInAt(0, Location::RequiresRegister());
4294 if (GetExpectedVarHandleCoordinatesCount(invoke) == 1u) {
4295 // For instance fields, this is the source object
4296 locations->SetInAt(1, Location::RequiresRegister());
4297 } else {
4298 // For static fields, we need another temp because one will be busy with the declaring class.
4299 locations->AddTemp(Location::RequiresRegister());
4300 }
4301 if (DataType::IsFloatingPointType(value_type)) {
4302 // We need EAX for placing the expected value
4303 locations->AddTemp(Location::RegisterLocation(EAX));
4304 locations->SetInAt(new_value_index,
4305 Location::FpuRegisterOrConstant(invoke->InputAt(new_value_index)));
4306 locations->SetInAt(expected_value_index,
4307 Location::FpuRegisterOrConstant(invoke->InputAt(expected_value_index)));
4308 } else {
4309 // Ensure it's in a byte register
4310 locations->SetInAt(new_value_index, Location::RegisterLocation(ECX));
4311 locations->SetInAt(expected_value_index, Location::RegisterLocation(EAX));
4312 }
4313
4314 mirror::VarHandle::AccessModeTemplate access_mode_template =
4315 mirror::VarHandle::GetAccessModeTemplateByIntrinsic(invoke->GetIntrinsic());
4316
4317 if (access_mode_template == mirror::VarHandle::AccessModeTemplate::kCompareAndExchange &&
4318 value_type == DataType::Type::kFloat32) {
4319 locations->SetOut(Location::RequiresFpuRegister());
4320 } else {
4321 locations->SetOut(Location::RegisterLocation(EAX));
4322 }
4323 }
4324
GenerateVarHandleCompareAndSetOrExchange(HInvoke * invoke,CodeGeneratorX86 * codegen)4325 static void GenerateVarHandleCompareAndSetOrExchange(HInvoke* invoke, CodeGeneratorX86* codegen) {
4326 // The only read barrier implementation supporting the
4327 // VarHandleGet intrinsic is the Baker-style read barriers.
4328 DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier);
4329
4330 X86Assembler* assembler = codegen->GetAssembler();
4331 LocationSummary* locations = invoke->GetLocations();
4332 uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4333 uint32_t expected_value_index = number_of_arguments - 2;
4334 uint32_t new_value_index = number_of_arguments - 1;
4335 DataType::Type type = GetDataTypeFromShorty(invoke, expected_value_index);
4336 DCHECK_EQ(type, GetDataTypeFromShorty(invoke, new_value_index));
4337 Location expected_value = locations->InAt(expected_value_index);
4338 Location new_value = locations->InAt(new_value_index);
4339 Register offset = locations->GetTemp(0).AsRegister<Register>();
4340 Register temp = locations->GetTemp(1).AsRegister<Register>();
4341 Register temp2 = locations->GetTemp(2).AsRegister<Register>();
4342 SlowPathCode* slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
4343 codegen->AddSlowPath(slow_path);
4344
4345 GenerateVarHandleCommonChecks(invoke, temp, slow_path, assembler);
4346
4347 // Get the field referred by the VarHandle. The returned register contains the object reference
4348 // or the declaring class. The field offset will be placed in 'offset'. For static fields, the
4349 // declaring class will be placed in 'temp' register.
4350 Register reference = GenerateVarHandleFieldReference(invoke, codegen, temp, offset);
4351
4352 uint32_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4353 // For generating the compare and exchange, we need 2 temporaries. In case of a static field, the
4354 // first temporary contains the declaring class so we need another temporary. In case of an
4355 // instance field, the object comes in a separate register so it's safe to use the first temp.
4356 temp = (expected_coordinates_count == 1u) ? temp : locations->GetTemp(3).AsRegister<Register>();
4357 DCHECK_NE(temp, reference);
4358
4359 // We are using `lock cmpxchg` in all cases because there is no CAS equivalent that has weak
4360 // failure semantics. `lock cmpxchg` has full barrier semantics, and we don't need scheduling
4361 // barriers at this time.
4362
4363 mirror::VarHandle::AccessModeTemplate access_mode_template =
4364 mirror::VarHandle::GetAccessModeTemplateByIntrinsic(invoke->GetIntrinsic());
4365 bool is_cmpxchg =
4366 access_mode_template == mirror::VarHandle::AccessModeTemplate::kCompareAndExchange;
4367
4368 if (type == DataType::Type::kReference) {
4369 GenReferenceCAS(
4370 invoke, codegen, expected_value, new_value, reference, offset, temp, temp2, is_cmpxchg);
4371 } else {
4372 Location out = locations->Out();
4373 GenPrimitiveCAS(
4374 type, codegen, expected_value, new_value, reference, offset, out, temp, is_cmpxchg);
4375 }
4376
4377 __ Bind(slow_path->GetExitLabel());
4378 }
4379
VisitVarHandleCompareAndSet(HInvoke * invoke)4380 void IntrinsicLocationsBuilderX86::VisitVarHandleCompareAndSet(HInvoke* invoke) {
4381 CreateVarHandleCompareAndSetOrExchangeLocations(invoke);
4382 }
4383
VisitVarHandleCompareAndSet(HInvoke * invoke)4384 void IntrinsicCodeGeneratorX86::VisitVarHandleCompareAndSet(HInvoke* invoke) {
4385 GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_);
4386 }
4387
VisitVarHandleWeakCompareAndSet(HInvoke * invoke)4388 void IntrinsicLocationsBuilderX86::VisitVarHandleWeakCompareAndSet(HInvoke* invoke) {
4389 CreateVarHandleCompareAndSetOrExchangeLocations(invoke);
4390 }
4391
VisitVarHandleWeakCompareAndSet(HInvoke * invoke)4392 void IntrinsicCodeGeneratorX86::VisitVarHandleWeakCompareAndSet(HInvoke* invoke) {
4393 GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_);
4394 }
4395
VisitVarHandleWeakCompareAndSetPlain(HInvoke * invoke)4396 void IntrinsicLocationsBuilderX86::VisitVarHandleWeakCompareAndSetPlain(HInvoke* invoke) {
4397 CreateVarHandleCompareAndSetOrExchangeLocations(invoke);
4398 }
4399
VisitVarHandleWeakCompareAndSetPlain(HInvoke * invoke)4400 void IntrinsicCodeGeneratorX86::VisitVarHandleWeakCompareAndSetPlain(HInvoke* invoke) {
4401 GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_);
4402 }
4403
VisitVarHandleWeakCompareAndSetAcquire(HInvoke * invoke)4404 void IntrinsicLocationsBuilderX86::VisitVarHandleWeakCompareAndSetAcquire(HInvoke* invoke) {
4405 CreateVarHandleCompareAndSetOrExchangeLocations(invoke);
4406 }
4407
VisitVarHandleWeakCompareAndSetAcquire(HInvoke * invoke)4408 void IntrinsicCodeGeneratorX86::VisitVarHandleWeakCompareAndSetAcquire(HInvoke* invoke) {
4409 GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_);
4410 }
4411
VisitVarHandleWeakCompareAndSetRelease(HInvoke * invoke)4412 void IntrinsicLocationsBuilderX86::VisitVarHandleWeakCompareAndSetRelease(HInvoke* invoke) {
4413 CreateVarHandleCompareAndSetOrExchangeLocations(invoke);
4414 }
4415
VisitVarHandleWeakCompareAndSetRelease(HInvoke * invoke)4416 void IntrinsicCodeGeneratorX86::VisitVarHandleWeakCompareAndSetRelease(HInvoke* invoke) {
4417 GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_);
4418 }
4419
VisitVarHandleCompareAndExchange(HInvoke * invoke)4420 void IntrinsicLocationsBuilderX86::VisitVarHandleCompareAndExchange(HInvoke* invoke) {
4421 CreateVarHandleCompareAndSetOrExchangeLocations(invoke);
4422 }
4423
VisitVarHandleCompareAndExchange(HInvoke * invoke)4424 void IntrinsicCodeGeneratorX86::VisitVarHandleCompareAndExchange(HInvoke* invoke) {
4425 GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_);
4426 }
4427
VisitVarHandleCompareAndExchangeAcquire(HInvoke * invoke)4428 void IntrinsicLocationsBuilderX86::VisitVarHandleCompareAndExchangeAcquire(HInvoke* invoke) {
4429 CreateVarHandleCompareAndSetOrExchangeLocations(invoke);
4430 }
4431
VisitVarHandleCompareAndExchangeAcquire(HInvoke * invoke)4432 void IntrinsicCodeGeneratorX86::VisitVarHandleCompareAndExchangeAcquire(HInvoke* invoke) {
4433 GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_);
4434 }
4435
VisitVarHandleCompareAndExchangeRelease(HInvoke * invoke)4436 void IntrinsicLocationsBuilderX86::VisitVarHandleCompareAndExchangeRelease(HInvoke* invoke) {
4437 CreateVarHandleCompareAndSetOrExchangeLocations(invoke);
4438 }
4439
VisitVarHandleCompareAndExchangeRelease(HInvoke * invoke)4440 void IntrinsicCodeGeneratorX86::VisitVarHandleCompareAndExchangeRelease(HInvoke* invoke) {
4441 GenerateVarHandleCompareAndSetOrExchange(invoke, codegen_);
4442 }
4443
CreateVarHandleGetAndAddLocations(HInvoke * invoke)4444 static void CreateVarHandleGetAndAddLocations(HInvoke* invoke) {
4445 // The only read barrier implementation supporting the
4446 // VarHandleGet intrinsic is the Baker-style read barriers.
4447 if (gUseReadBarrier && !kUseBakerReadBarrier) {
4448 return;
4449 }
4450
4451 if (!HasVarHandleIntrinsicImplementation(invoke)) {
4452 return;
4453 }
4454
4455 // The last argument should be the value we intend to set.
4456 uint32_t value_index = invoke->GetNumberOfArguments() - 1;
4457 DataType::Type value_type = GetDataTypeFromShorty(invoke, value_index);
4458 if (DataType::Is64BitType(value_type)) {
4459 // We avoid the case of an Int64/Float64 value because we would need to place it in a register
4460 // pair. If the slow path is taken, the ParallelMove might fail to move the pair according to
4461 // the X86DexCallingConvention in case of an overlap (e.g., move the 64 bit value from
4462 // <EAX, EBX> to <EBX, ECX>). (Bug: b/168687887)
4463 return;
4464 }
4465
4466 ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
4467 LocationSummary* locations = new (allocator) LocationSummary(
4468 invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
4469 locations->AddTemp(Location::RequiresRegister());
4470 locations->AddTemp(Location::RequiresRegister());
4471 locations->SetInAt(0, Location::RequiresRegister());
4472 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4473 if (expected_coordinates_count == 1u) {
4474 // For instance fields, this is the source object
4475 locations->SetInAt(1, Location::RequiresRegister());
4476 } else {
4477 // For static fields, we need another temp because one will be busy with the declaring class.
4478 locations->AddTemp(Location::RequiresRegister());
4479 }
4480
4481 if (DataType::IsFloatingPointType(value_type)) {
4482 locations->AddTemp(Location::RequiresFpuRegister());
4483 locations->AddTemp(Location::RegisterLocation(EAX));
4484 locations->SetInAt(value_index, Location::RequiresFpuRegister());
4485 locations->SetOut(Location::RequiresFpuRegister());
4486 } else {
4487 // xadd updates the register argument with the old value. ByteRegister required for xaddb.
4488 locations->SetInAt(value_index, Location::RegisterLocation(EAX));
4489 locations->SetOut(Location::RegisterLocation(EAX));
4490 }
4491 }
4492
GenerateVarHandleGetAndAdd(HInvoke * invoke,CodeGeneratorX86 * codegen)4493 static void GenerateVarHandleGetAndAdd(HInvoke* invoke, CodeGeneratorX86* codegen) {
4494 // The only read barrier implementation supporting the
4495 // VarHandleGet intrinsic is the Baker-style read barriers.
4496 DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier);
4497
4498 X86Assembler* assembler = codegen->GetAssembler();
4499 LocationSummary* locations = invoke->GetLocations();
4500 uint32_t number_of_arguments = invoke->GetNumberOfArguments();
4501 uint32_t value_index = number_of_arguments - 1;
4502 DataType::Type type = GetDataTypeFromShorty(invoke, value_index);
4503 DCHECK_EQ(type, invoke->GetType());
4504 Location value_loc = locations->InAt(value_index);
4505 Register temp = locations->GetTemp(0).AsRegister<Register>();
4506 SlowPathCode* slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
4507 codegen->AddSlowPath(slow_path);
4508
4509 GenerateVarHandleCommonChecks(invoke, temp, slow_path, assembler);
4510
4511 Register offset = locations->GetTemp(1).AsRegister<Register>();
4512 // Get the field referred by the VarHandle. The returned register contains the object reference
4513 // or the declaring class. The field offset will be placed in 'offset'. For static fields, the
4514 // declaring class will be placed in 'temp' register.
4515 Register reference = GenerateVarHandleFieldReference(invoke, codegen, temp, offset);
4516
4517 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4518 temp = (expected_coordinates_count == 1u) ? temp : locations->GetTemp(2).AsRegister<Register>();
4519 DCHECK_NE(temp, reference);
4520 Address field_addr(reference, offset, TIMES_1, 0);
4521
4522 switch (type) {
4523 case DataType::Type::kInt8:
4524 __ LockXaddb(field_addr, value_loc.AsRegister<ByteRegister>());
4525 __ movsxb(locations->Out().AsRegister<Register>(),
4526 locations->Out().AsRegister<ByteRegister>());
4527 break;
4528 case DataType::Type::kInt16:
4529 __ LockXaddw(field_addr, value_loc.AsRegister<Register>());
4530 __ movsxw(locations->Out().AsRegister<Register>(), locations->Out().AsRegister<Register>());
4531 break;
4532 case DataType::Type::kUint16:
4533 __ LockXaddw(field_addr, value_loc.AsRegister<Register>());
4534 __ movzxw(locations->Out().AsRegister<Register>(), locations->Out().AsRegister<Register>());
4535 break;
4536 case DataType::Type::kInt32:
4537 __ LockXaddl(field_addr, value_loc.AsRegister<Register>());
4538 break;
4539 case DataType::Type::kFloat32: {
4540 Location temp_float =
4541 (expected_coordinates_count == 1u) ? locations->GetTemp(2) : locations->GetTemp(3);
4542 DCHECK(temp_float.IsFpuRegister());
4543 Location eax = Location::RegisterLocation(EAX);
4544 NearLabel try_again;
4545 __ Bind(&try_again);
4546 __ movss(temp_float.AsFpuRegister<XmmRegister>(), field_addr);
4547 __ movd(EAX, temp_float.AsFpuRegister<XmmRegister>());
4548 __ addss(temp_float.AsFpuRegister<XmmRegister>(),
4549 value_loc.AsFpuRegister<XmmRegister>());
4550 GenPrimitiveLockedCmpxchg(type,
4551 codegen,
4552 /* expected_value= */ eax,
4553 /* new_value= */ temp_float,
4554 reference,
4555 offset,
4556 temp);
4557 __ j(kNotZero, &try_again);
4558
4559 // The old value is present in EAX.
4560 codegen->Move32(locations->Out(), eax);
4561 break;
4562 }
4563 default:
4564 UNREACHABLE();
4565 }
4566
4567 __ Bind(slow_path->GetExitLabel());
4568 }
4569
VisitVarHandleGetAndAdd(HInvoke * invoke)4570 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndAdd(HInvoke* invoke) {
4571 CreateVarHandleGetAndAddLocations(invoke);
4572 }
4573
VisitVarHandleGetAndAdd(HInvoke * invoke)4574 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndAdd(HInvoke* invoke) {
4575 GenerateVarHandleGetAndAdd(invoke, codegen_);
4576 }
4577
VisitVarHandleGetAndAddAcquire(HInvoke * invoke)4578 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndAddAcquire(HInvoke* invoke) {
4579 CreateVarHandleGetAndAddLocations(invoke);
4580 }
4581
VisitVarHandleGetAndAddAcquire(HInvoke * invoke)4582 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndAddAcquire(HInvoke* invoke) {
4583 GenerateVarHandleGetAndAdd(invoke, codegen_);
4584 }
4585
VisitVarHandleGetAndAddRelease(HInvoke * invoke)4586 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndAddRelease(HInvoke* invoke) {
4587 CreateVarHandleGetAndAddLocations(invoke);
4588 }
4589
VisitVarHandleGetAndAddRelease(HInvoke * invoke)4590 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndAddRelease(HInvoke* invoke) {
4591 GenerateVarHandleGetAndAdd(invoke, codegen_);
4592 }
4593
CreateVarHandleGetAndBitwiseOpLocations(HInvoke * invoke)4594 static void CreateVarHandleGetAndBitwiseOpLocations(HInvoke* invoke) {
4595 // The only read barrier implementation supporting the
4596 // VarHandleGet intrinsic is the Baker-style read barriers.
4597 if (gUseReadBarrier && !kUseBakerReadBarrier) {
4598 return;
4599 }
4600
4601 if (!HasVarHandleIntrinsicImplementation(invoke)) {
4602 return;
4603 }
4604
4605 // The last argument should be the value we intend to set.
4606 uint32_t value_index = invoke->GetNumberOfArguments() - 1;
4607 if (DataType::Is64BitType(GetDataTypeFromShorty(invoke, value_index))) {
4608 // We avoid the case of an Int64 value because we would need to place it in a register pair.
4609 // If the slow path is taken, the ParallelMove might fail to move the pair according to the
4610 // X86DexCallingConvention in case of an overlap (e.g., move the 64 bit value from
4611 // <EAX, EBX> to <EBX, ECX>). (Bug: b/168687887)
4612 return;
4613 }
4614
4615 ArenaAllocator* allocator = invoke->GetBlock()->GetGraph()->GetAllocator();
4616 LocationSummary* locations = new (allocator) LocationSummary(
4617 invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
4618 // We need a byte register temp to store the result of the bitwise operation
4619 locations->AddTemp(Location::RegisterLocation(EBX));
4620 locations->AddTemp(Location::RequiresRegister());
4621 locations->SetInAt(0, Location::RequiresRegister());
4622 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4623 if (expected_coordinates_count == 1u) {
4624 // For instance fields, this is the source object
4625 locations->SetInAt(1, Location::RequiresRegister());
4626 } else {
4627 // For static fields, we need another temp because one will be busy with the declaring class.
4628 locations->AddTemp(Location::RequiresRegister());
4629 }
4630
4631 locations->SetInAt(value_index, Location::RegisterOrConstant(invoke->InputAt(value_index)));
4632 locations->SetOut(Location::RegisterLocation(EAX));
4633 }
4634
GenerateBitwiseOp(HInvoke * invoke,CodeGeneratorX86 * codegen,Register left,Register right)4635 static void GenerateBitwiseOp(HInvoke* invoke,
4636 CodeGeneratorX86* codegen,
4637 Register left,
4638 Register right) {
4639 X86Assembler* assembler = codegen->GetAssembler();
4640
4641 switch (invoke->GetIntrinsic()) {
4642 case Intrinsics::kVarHandleGetAndBitwiseOr:
4643 case Intrinsics::kVarHandleGetAndBitwiseOrAcquire:
4644 case Intrinsics::kVarHandleGetAndBitwiseOrRelease:
4645 __ orl(left, right);
4646 break;
4647 case Intrinsics::kVarHandleGetAndBitwiseXor:
4648 case Intrinsics::kVarHandleGetAndBitwiseXorAcquire:
4649 case Intrinsics::kVarHandleGetAndBitwiseXorRelease:
4650 __ xorl(left, right);
4651 break;
4652 case Intrinsics::kVarHandleGetAndBitwiseAnd:
4653 case Intrinsics::kVarHandleGetAndBitwiseAndAcquire:
4654 case Intrinsics::kVarHandleGetAndBitwiseAndRelease:
4655 __ andl(left, right);
4656 break;
4657 default:
4658 UNREACHABLE();
4659 }
4660 }
4661
GenerateVarHandleGetAndBitwiseOp(HInvoke * invoke,CodeGeneratorX86 * codegen)4662 static void GenerateVarHandleGetAndBitwiseOp(HInvoke* invoke, CodeGeneratorX86* codegen) {
4663 // The only read barrier implementation supporting the
4664 // VarHandleGet intrinsic is the Baker-style read barriers.
4665 DCHECK_IMPLIES(gUseReadBarrier, kUseBakerReadBarrier);
4666
4667 X86Assembler* assembler = codegen->GetAssembler();
4668 LocationSummary* locations = invoke->GetLocations();
4669 uint32_t value_index = invoke->GetNumberOfArguments() - 1;
4670 DataType::Type type = GetDataTypeFromShorty(invoke, value_index);
4671 DCHECK_EQ(type, invoke->GetType());
4672 Register temp = locations->GetTemp(0).AsRegister<Register>();
4673 SlowPathCode* slow_path = new (codegen->GetScopedAllocator()) IntrinsicSlowPathX86(invoke);
4674 codegen->AddSlowPath(slow_path);
4675
4676 GenerateVarHandleCommonChecks(invoke, temp, slow_path, assembler);
4677
4678 Register offset = locations->GetTemp(1).AsRegister<Register>();
4679 size_t expected_coordinates_count = GetExpectedVarHandleCoordinatesCount(invoke);
4680 // For static field, we need another temporary because the first one contains the declaring class
4681 Register reference =
4682 (expected_coordinates_count == 1u) ? temp : locations->GetTemp(2).AsRegister<Register>();
4683 // Get the field referred by the VarHandle. The returned register contains the object reference
4684 // or the declaring class. The field offset will be placed in 'offset'. For static fields, the
4685 // declaring class will be placed in 'reference' register.
4686 reference = GenerateVarHandleFieldReference(invoke, codegen, reference, offset);
4687 DCHECK_NE(temp, reference);
4688 Address field_addr(reference, offset, TIMES_1, 0);
4689
4690 Register out = locations->Out().AsRegister<Register>();
4691 DCHECK_EQ(out, EAX);
4692
4693 if (invoke->GetIntrinsic() == Intrinsics::kVarHandleGetAndBitwiseOrRelease ||
4694 invoke->GetIntrinsic() == Intrinsics::kVarHandleGetAndBitwiseXorRelease ||
4695 invoke->GetIntrinsic() == Intrinsics::kVarHandleGetAndBitwiseAndRelease) {
4696 codegen->GenerateMemoryBarrier(MemBarrierKind::kAnyStore);
4697 }
4698
4699 NearLabel try_again;
4700 __ Bind(&try_again);
4701 // Place the expected value in EAX for cmpxchg
4702 codegen->LoadFromMemoryNoBarrier(type, locations->Out(), field_addr);
4703 codegen->Move32(locations->GetTemp(0), locations->InAt(value_index));
4704 GenerateBitwiseOp(invoke, codegen, temp, out);
4705 GenPrimitiveLockedCmpxchg(type,
4706 codegen,
4707 /* expected_value= */ locations->Out(),
4708 /* new_value= */ locations->GetTemp(0),
4709 reference,
4710 offset);
4711 // If the cmpxchg failed, another thread changed the value so try again.
4712 __ j(kNotZero, &try_again);
4713
4714 // The old value is present in EAX.
4715
4716 if (invoke->GetIntrinsic() == Intrinsics::kVarHandleGetAndBitwiseOrAcquire ||
4717 invoke->GetIntrinsic() == Intrinsics::kVarHandleGetAndBitwiseXorAcquire ||
4718 invoke->GetIntrinsic() == Intrinsics::kVarHandleGetAndBitwiseAndAcquire) {
4719 codegen->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
4720 }
4721
4722 __ Bind(slow_path->GetExitLabel());
4723 }
4724
VisitVarHandleGetAndBitwiseOr(HInvoke * invoke)4725 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke) {
4726 CreateVarHandleGetAndBitwiseOpLocations(invoke);
4727 }
4728
VisitVarHandleGetAndBitwiseOr(HInvoke * invoke)4729 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseOr(HInvoke* invoke) {
4730 GenerateVarHandleGetAndBitwiseOp(invoke, codegen_);
4731 }
4732
VisitVarHandleGetAndBitwiseOrAcquire(HInvoke * invoke)4733 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke* invoke) {
4734 CreateVarHandleGetAndBitwiseOpLocations(invoke);
4735 }
4736
VisitVarHandleGetAndBitwiseOrAcquire(HInvoke * invoke)4737 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseOrAcquire(HInvoke* invoke) {
4738 GenerateVarHandleGetAndBitwiseOp(invoke, codegen_);
4739 }
4740
VisitVarHandleGetAndBitwiseOrRelease(HInvoke * invoke)4741 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndBitwiseOrRelease(HInvoke* invoke) {
4742 CreateVarHandleGetAndBitwiseOpLocations(invoke);
4743 }
4744
VisitVarHandleGetAndBitwiseOrRelease(HInvoke * invoke)4745 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseOrRelease(HInvoke* invoke) {
4746 GenerateVarHandleGetAndBitwiseOp(invoke, codegen_);
4747 }
4748
VisitVarHandleGetAndBitwiseXor(HInvoke * invoke)4749 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndBitwiseXor(HInvoke* invoke) {
4750 CreateVarHandleGetAndBitwiseOpLocations(invoke);
4751 }
4752
VisitVarHandleGetAndBitwiseXor(HInvoke * invoke)4753 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseXor(HInvoke* invoke) {
4754 GenerateVarHandleGetAndBitwiseOp(invoke, codegen_);
4755 }
4756
VisitVarHandleGetAndBitwiseXorAcquire(HInvoke * invoke)4757 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke* invoke) {
4758 CreateVarHandleGetAndBitwiseOpLocations(invoke);
4759 }
4760
VisitVarHandleGetAndBitwiseXorAcquire(HInvoke * invoke)4761 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseXorAcquire(HInvoke* invoke) {
4762 GenerateVarHandleGetAndBitwiseOp(invoke, codegen_);
4763 }
4764
VisitVarHandleGetAndBitwiseXorRelease(HInvoke * invoke)4765 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndBitwiseXorRelease(HInvoke* invoke) {
4766 CreateVarHandleGetAndBitwiseOpLocations(invoke);
4767 }
4768
VisitVarHandleGetAndBitwiseXorRelease(HInvoke * invoke)4769 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseXorRelease(HInvoke* invoke) {
4770 GenerateVarHandleGetAndBitwiseOp(invoke, codegen_);
4771 }
4772
VisitVarHandleGetAndBitwiseAnd(HInvoke * invoke)4773 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndBitwiseAnd(HInvoke* invoke) {
4774 CreateVarHandleGetAndBitwiseOpLocations(invoke);
4775 }
4776
VisitVarHandleGetAndBitwiseAnd(HInvoke * invoke)4777 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseAnd(HInvoke* invoke) {
4778 GenerateVarHandleGetAndBitwiseOp(invoke, codegen_);
4779 }
4780
VisitVarHandleGetAndBitwiseAndAcquire(HInvoke * invoke)4781 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke* invoke) {
4782 CreateVarHandleGetAndBitwiseOpLocations(invoke);
4783 }
4784
VisitVarHandleGetAndBitwiseAndAcquire(HInvoke * invoke)4785 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseAndAcquire(HInvoke* invoke) {
4786 GenerateVarHandleGetAndBitwiseOp(invoke, codegen_);
4787 }
4788
VisitVarHandleGetAndBitwiseAndRelease(HInvoke * invoke)4789 void IntrinsicLocationsBuilderX86::VisitVarHandleGetAndBitwiseAndRelease(HInvoke* invoke) {
4790 CreateVarHandleGetAndBitwiseOpLocations(invoke);
4791 }
4792
VisitVarHandleGetAndBitwiseAndRelease(HInvoke * invoke)4793 void IntrinsicCodeGeneratorX86::VisitVarHandleGetAndBitwiseAndRelease(HInvoke* invoke) {
4794 GenerateVarHandleGetAndBitwiseOp(invoke, codegen_);
4795 }
4796
GenerateMathFma(HInvoke * invoke,CodeGeneratorX86 * codegen)4797 static void GenerateMathFma(HInvoke* invoke, CodeGeneratorX86* codegen) {
4798 DCHECK(DataType::IsFloatingPointType(invoke->GetType()));
4799 LocationSummary* locations = invoke->GetLocations();
4800 DCHECK(locations->InAt(0).Equals(locations->Out()));
4801 X86Assembler* assembler = codegen->GetAssembler();
4802 XmmRegister left = locations->InAt(0).AsFpuRegister<XmmRegister>();
4803 XmmRegister right = locations->InAt(1).AsFpuRegister<XmmRegister>();
4804 XmmRegister accumulator = locations->InAt(2).AsFpuRegister<XmmRegister>();
4805 if (invoke->GetType() == DataType::Type::kFloat32) {
4806 __ vfmadd213ss(left, right, accumulator);
4807 } else {
4808 DCHECK_EQ(invoke->GetType(), DataType::Type::kFloat64);
4809 __ vfmadd213sd(left, right, accumulator);
4810 }
4811 }
4812
VisitMathFmaDouble(HInvoke * invoke)4813 void IntrinsicCodeGeneratorX86::VisitMathFmaDouble(HInvoke* invoke) {
4814 DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
4815 GenerateMathFma(invoke, codegen_);
4816 }
4817
VisitMathFmaDouble(HInvoke * invoke)4818 void IntrinsicLocationsBuilderX86::VisitMathFmaDouble(HInvoke* invoke) {
4819 if (codegen_->GetInstructionSetFeatures().HasAVX2()) {
4820 CreateFPFPFPToFPCallLocations(allocator_, invoke);
4821 }
4822 }
4823
VisitMathFmaFloat(HInvoke * invoke)4824 void IntrinsicCodeGeneratorX86::VisitMathFmaFloat(HInvoke* invoke) {
4825 DCHECK(codegen_->GetInstructionSetFeatures().HasAVX2());
4826 GenerateMathFma(invoke, codegen_);
4827 }
4828
VisitMathFmaFloat(HInvoke * invoke)4829 void IntrinsicLocationsBuilderX86::VisitMathFmaFloat(HInvoke* invoke) {
4830 if (codegen_->GetInstructionSetFeatures().HasAVX2()) {
4831 CreateFPFPFPToFPCallLocations(allocator_, invoke);
4832 }
4833 }
4834
4835 #define MARK_UNIMPLEMENTED(Name) UNIMPLEMENTED_INTRINSIC(X86, Name)
4836 UNIMPLEMENTED_INTRINSIC_LIST_X86(MARK_UNIMPLEMENTED);
4837 #undef MARK_UNIMPLEMENTED
4838
4839 UNREACHABLE_INTRINSICS(X86)
4840
4841 #undef __
4842
4843 } // namespace x86
4844 } // namespace art
4845