1 /*
2 * Copyright (C) 2015 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "intrinsics_x86.h"
18
19 #include <limits>
20
21 #include "arch/x86/instruction_set_features_x86.h"
22 #include "art_method.h"
23 #include "base/bit_utils.h"
24 #include "code_generator_x86.h"
25 #include "entrypoints/quick/quick_entrypoints.h"
26 #include "intrinsics.h"
27 #include "intrinsics_utils.h"
28 #include "lock_word.h"
29 #include "mirror/array-inl.h"
30 #include "mirror/object_array-inl.h"
31 #include "mirror/reference.h"
32 #include "mirror/string.h"
33 #include "scoped_thread_state_change-inl.h"
34 #include "thread-inl.h"
35 #include "utils/x86/assembler_x86.h"
36 #include "utils/x86/constants_x86.h"
37
38 namespace art {
39
40 namespace x86 {
41
42 static constexpr int kDoubleNaNHigh = 0x7FF80000;
43 static constexpr int kDoubleNaNLow = 0x00000000;
44 static constexpr int64_t kDoubleNaN = INT64_C(0x7FF8000000000000);
45 static constexpr int32_t kFloatNaN = INT32_C(0x7FC00000);
46
IntrinsicLocationsBuilderX86(CodeGeneratorX86 * codegen)47 IntrinsicLocationsBuilderX86::IntrinsicLocationsBuilderX86(CodeGeneratorX86* codegen)
48 : arena_(codegen->GetGraph()->GetArena()),
49 codegen_(codegen) {
50 }
51
52
GetAssembler()53 X86Assembler* IntrinsicCodeGeneratorX86::GetAssembler() {
54 return down_cast<X86Assembler*>(codegen_->GetAssembler());
55 }
56
GetAllocator()57 ArenaAllocator* IntrinsicCodeGeneratorX86::GetAllocator() {
58 return codegen_->GetGraph()->GetArena();
59 }
60
TryDispatch(HInvoke * invoke)61 bool IntrinsicLocationsBuilderX86::TryDispatch(HInvoke* invoke) {
62 Dispatch(invoke);
63 LocationSummary* res = invoke->GetLocations();
64 if (res == nullptr) {
65 return false;
66 }
67 return res->Intrinsified();
68 }
69
MoveArguments(HInvoke * invoke,CodeGeneratorX86 * codegen)70 static void MoveArguments(HInvoke* invoke, CodeGeneratorX86* codegen) {
71 InvokeDexCallingConventionVisitorX86 calling_convention_visitor;
72 IntrinsicVisitor::MoveArguments(invoke, codegen, &calling_convention_visitor);
73 }
74
75 using IntrinsicSlowPathX86 = IntrinsicSlowPath<InvokeDexCallingConventionVisitorX86>;
76
77 // NOLINT on __ macro to suppress wrong warning/fix (misc-macro-parentheses) from clang-tidy.
78 #define __ down_cast<X86Assembler*>(codegen->GetAssembler())-> // NOLINT
79
80 // Slow path implementing the SystemArrayCopy intrinsic copy loop with read barriers.
81 class ReadBarrierSystemArrayCopySlowPathX86 : public SlowPathCode {
82 public:
ReadBarrierSystemArrayCopySlowPathX86(HInstruction * instruction)83 explicit ReadBarrierSystemArrayCopySlowPathX86(HInstruction* instruction)
84 : SlowPathCode(instruction) {
85 DCHECK(kEmitCompilerReadBarrier);
86 DCHECK(kUseBakerReadBarrier);
87 }
88
EmitNativeCode(CodeGenerator * codegen)89 void EmitNativeCode(CodeGenerator* codegen) OVERRIDE {
90 CodeGeneratorX86* x86_codegen = down_cast<CodeGeneratorX86*>(codegen);
91 LocationSummary* locations = instruction_->GetLocations();
92 DCHECK(locations->CanCall());
93 DCHECK(instruction_->IsInvokeStaticOrDirect())
94 << "Unexpected instruction in read barrier arraycopy slow path: "
95 << instruction_->DebugName();
96 DCHECK(instruction_->GetLocations()->Intrinsified());
97 DCHECK_EQ(instruction_->AsInvoke()->GetIntrinsic(), Intrinsics::kSystemArrayCopy);
98
99 int32_t element_size = Primitive::ComponentSize(Primitive::kPrimNot);
100 uint32_t offset = mirror::Array::DataOffset(element_size).Uint32Value();
101
102 Register src = locations->InAt(0).AsRegister<Register>();
103 Location src_pos = locations->InAt(1);
104 Register dest = locations->InAt(2).AsRegister<Register>();
105 Location dest_pos = locations->InAt(3);
106 Location length = locations->InAt(4);
107 Location temp1_loc = locations->GetTemp(0);
108 Register temp1 = temp1_loc.AsRegister<Register>();
109 Register temp2 = locations->GetTemp(1).AsRegister<Register>();
110 Register temp3 = locations->GetTemp(2).AsRegister<Register>();
111
112 __ Bind(GetEntryLabel());
113 // In this code path, registers `temp1`, `temp2`, and `temp3`
114 // (resp.) are not used for the base source address, the base
115 // destination address, and the end source address (resp.), as in
116 // other SystemArrayCopy intrinsic code paths. Instead they are
117 // (resp.) used for:
118 // - the loop index (`i`);
119 // - the source index (`src_index`) and the loaded (source)
120 // reference (`value`); and
121 // - the destination index (`dest_index`).
122
123 // i = 0
124 __ xorl(temp1, temp1);
125 NearLabel loop;
126 __ Bind(&loop);
127 // value = src_array[i + src_pos]
128 if (src_pos.IsConstant()) {
129 int32_t constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
130 int32_t adjusted_offset = offset + constant * element_size;
131 __ movl(temp2, Address(src, temp1, ScaleFactor::TIMES_4, adjusted_offset));
132 } else {
133 __ leal(temp2, Address(src_pos.AsRegister<Register>(), temp1, ScaleFactor::TIMES_1, 0));
134 __ movl(temp2, Address(src, temp2, ScaleFactor::TIMES_4, offset));
135 }
136 __ MaybeUnpoisonHeapReference(temp2);
137 // TODO: Inline the mark bit check before calling the runtime?
138 // value = ReadBarrier::Mark(value)
139 // No need to save live registers; it's taken care of by the
140 // entrypoint. Also, there is no need to update the stack mask,
141 // as this runtime call will not trigger a garbage collection.
142 // (See ReadBarrierMarkSlowPathX86::EmitNativeCode for more
143 // explanations.)
144 DCHECK_NE(temp2, ESP);
145 DCHECK(0 <= temp2 && temp2 < kNumberOfCpuRegisters) << temp2;
146 int32_t entry_point_offset =
147 CodeGenerator::GetReadBarrierMarkEntryPointsOffset<kX86PointerSize>(temp2);
148 // This runtime call does not require a stack map.
149 x86_codegen->InvokeRuntimeWithoutRecordingPcInfo(entry_point_offset, instruction_, this);
150 __ MaybePoisonHeapReference(temp2);
151 // dest_array[i + dest_pos] = value
152 if (dest_pos.IsConstant()) {
153 int32_t constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
154 int32_t adjusted_offset = offset + constant * element_size;
155 __ movl(Address(dest, temp1, ScaleFactor::TIMES_4, adjusted_offset), temp2);
156 } else {
157 __ leal(temp3, Address(dest_pos.AsRegister<Register>(), temp1, ScaleFactor::TIMES_1, 0));
158 __ movl(Address(dest, temp3, ScaleFactor::TIMES_4, offset), temp2);
159 }
160 // ++i
161 __ addl(temp1, Immediate(1));
162 // if (i != length) goto loop
163 x86_codegen->GenerateIntCompare(temp1_loc, length);
164 __ j(kNotEqual, &loop);
165 __ jmp(GetExitLabel());
166 }
167
GetDescription() const168 const char* GetDescription() const OVERRIDE { return "ReadBarrierSystemArrayCopySlowPathX86"; }
169
170 private:
171 DISALLOW_COPY_AND_ASSIGN(ReadBarrierSystemArrayCopySlowPathX86);
172 };
173
174 #undef __
175
176 #define __ assembler->
177
CreateFPToIntLocations(ArenaAllocator * arena,HInvoke * invoke,bool is64bit)178 static void CreateFPToIntLocations(ArenaAllocator* arena, HInvoke* invoke, bool is64bit) {
179 LocationSummary* locations = new (arena) LocationSummary(invoke,
180 LocationSummary::kNoCall,
181 kIntrinsified);
182 locations->SetInAt(0, Location::RequiresFpuRegister());
183 locations->SetOut(Location::RequiresRegister());
184 if (is64bit) {
185 locations->AddTemp(Location::RequiresFpuRegister());
186 }
187 }
188
CreateIntToFPLocations(ArenaAllocator * arena,HInvoke * invoke,bool is64bit)189 static void CreateIntToFPLocations(ArenaAllocator* arena, HInvoke* invoke, bool is64bit) {
190 LocationSummary* locations = new (arena) LocationSummary(invoke,
191 LocationSummary::kNoCall,
192 kIntrinsified);
193 locations->SetInAt(0, Location::RequiresRegister());
194 locations->SetOut(Location::RequiresFpuRegister());
195 if (is64bit) {
196 locations->AddTemp(Location::RequiresFpuRegister());
197 locations->AddTemp(Location::RequiresFpuRegister());
198 }
199 }
200
MoveFPToInt(LocationSummary * locations,bool is64bit,X86Assembler * assembler)201 static void MoveFPToInt(LocationSummary* locations, bool is64bit, X86Assembler* assembler) {
202 Location input = locations->InAt(0);
203 Location output = locations->Out();
204 if (is64bit) {
205 // Need to use the temporary.
206 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
207 __ movsd(temp, input.AsFpuRegister<XmmRegister>());
208 __ movd(output.AsRegisterPairLow<Register>(), temp);
209 __ psrlq(temp, Immediate(32));
210 __ movd(output.AsRegisterPairHigh<Register>(), temp);
211 } else {
212 __ movd(output.AsRegister<Register>(), input.AsFpuRegister<XmmRegister>());
213 }
214 }
215
MoveIntToFP(LocationSummary * locations,bool is64bit,X86Assembler * assembler)216 static void MoveIntToFP(LocationSummary* locations, bool is64bit, X86Assembler* assembler) {
217 Location input = locations->InAt(0);
218 Location output = locations->Out();
219 if (is64bit) {
220 // Need to use the temporary.
221 XmmRegister temp1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
222 XmmRegister temp2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
223 __ movd(temp1, input.AsRegisterPairLow<Register>());
224 __ movd(temp2, input.AsRegisterPairHigh<Register>());
225 __ punpckldq(temp1, temp2);
226 __ movsd(output.AsFpuRegister<XmmRegister>(), temp1);
227 } else {
228 __ movd(output.AsFpuRegister<XmmRegister>(), input.AsRegister<Register>());
229 }
230 }
231
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)232 void IntrinsicLocationsBuilderX86::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
233 CreateFPToIntLocations(arena_, invoke, /* is64bit */ true);
234 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)235 void IntrinsicLocationsBuilderX86::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
236 CreateIntToFPLocations(arena_, invoke, /* is64bit */ true);
237 }
238
VisitDoubleDoubleToRawLongBits(HInvoke * invoke)239 void IntrinsicCodeGeneratorX86::VisitDoubleDoubleToRawLongBits(HInvoke* invoke) {
240 MoveFPToInt(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
241 }
VisitDoubleLongBitsToDouble(HInvoke * invoke)242 void IntrinsicCodeGeneratorX86::VisitDoubleLongBitsToDouble(HInvoke* invoke) {
243 MoveIntToFP(invoke->GetLocations(), /* is64bit */ true, GetAssembler());
244 }
245
VisitFloatFloatToRawIntBits(HInvoke * invoke)246 void IntrinsicLocationsBuilderX86::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
247 CreateFPToIntLocations(arena_, invoke, /* is64bit */ false);
248 }
VisitFloatIntBitsToFloat(HInvoke * invoke)249 void IntrinsicLocationsBuilderX86::VisitFloatIntBitsToFloat(HInvoke* invoke) {
250 CreateIntToFPLocations(arena_, invoke, /* is64bit */ false);
251 }
252
VisitFloatFloatToRawIntBits(HInvoke * invoke)253 void IntrinsicCodeGeneratorX86::VisitFloatFloatToRawIntBits(HInvoke* invoke) {
254 MoveFPToInt(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
255 }
VisitFloatIntBitsToFloat(HInvoke * invoke)256 void IntrinsicCodeGeneratorX86::VisitFloatIntBitsToFloat(HInvoke* invoke) {
257 MoveIntToFP(invoke->GetLocations(), /* is64bit */ false, GetAssembler());
258 }
259
CreateIntToIntLocations(ArenaAllocator * arena,HInvoke * invoke)260 static void CreateIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
261 LocationSummary* locations = new (arena) LocationSummary(invoke,
262 LocationSummary::kNoCall,
263 kIntrinsified);
264 locations->SetInAt(0, Location::RequiresRegister());
265 locations->SetOut(Location::SameAsFirstInput());
266 }
267
CreateLongToIntLocations(ArenaAllocator * arena,HInvoke * invoke)268 static void CreateLongToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
269 LocationSummary* locations = new (arena) LocationSummary(invoke,
270 LocationSummary::kNoCall,
271 kIntrinsified);
272 locations->SetInAt(0, Location::RequiresRegister());
273 locations->SetOut(Location::RequiresRegister());
274 }
275
CreateLongToLongLocations(ArenaAllocator * arena,HInvoke * invoke)276 static void CreateLongToLongLocations(ArenaAllocator* arena, HInvoke* invoke) {
277 LocationSummary* locations = new (arena) LocationSummary(invoke,
278 LocationSummary::kNoCall,
279 kIntrinsified);
280 locations->SetInAt(0, Location::RequiresRegister());
281 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
282 }
283
GenReverseBytes(LocationSummary * locations,Primitive::Type size,X86Assembler * assembler)284 static void GenReverseBytes(LocationSummary* locations,
285 Primitive::Type size,
286 X86Assembler* assembler) {
287 Register out = locations->Out().AsRegister<Register>();
288
289 switch (size) {
290 case Primitive::kPrimShort:
291 // TODO: Can be done with an xchg of 8b registers. This is straight from Quick.
292 __ bswapl(out);
293 __ sarl(out, Immediate(16));
294 break;
295 case Primitive::kPrimInt:
296 __ bswapl(out);
297 break;
298 default:
299 LOG(FATAL) << "Unexpected size for reverse-bytes: " << size;
300 UNREACHABLE();
301 }
302 }
303
VisitIntegerReverseBytes(HInvoke * invoke)304 void IntrinsicLocationsBuilderX86::VisitIntegerReverseBytes(HInvoke* invoke) {
305 CreateIntToIntLocations(arena_, invoke);
306 }
307
VisitIntegerReverseBytes(HInvoke * invoke)308 void IntrinsicCodeGeneratorX86::VisitIntegerReverseBytes(HInvoke* invoke) {
309 GenReverseBytes(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
310 }
311
VisitLongReverseBytes(HInvoke * invoke)312 void IntrinsicLocationsBuilderX86::VisitLongReverseBytes(HInvoke* invoke) {
313 CreateLongToLongLocations(arena_, invoke);
314 }
315
VisitLongReverseBytes(HInvoke * invoke)316 void IntrinsicCodeGeneratorX86::VisitLongReverseBytes(HInvoke* invoke) {
317 LocationSummary* locations = invoke->GetLocations();
318 Location input = locations->InAt(0);
319 Register input_lo = input.AsRegisterPairLow<Register>();
320 Register input_hi = input.AsRegisterPairHigh<Register>();
321 Location output = locations->Out();
322 Register output_lo = output.AsRegisterPairLow<Register>();
323 Register output_hi = output.AsRegisterPairHigh<Register>();
324
325 X86Assembler* assembler = GetAssembler();
326 // Assign the inputs to the outputs, mixing low/high.
327 __ movl(output_lo, input_hi);
328 __ movl(output_hi, input_lo);
329 __ bswapl(output_lo);
330 __ bswapl(output_hi);
331 }
332
VisitShortReverseBytes(HInvoke * invoke)333 void IntrinsicLocationsBuilderX86::VisitShortReverseBytes(HInvoke* invoke) {
334 CreateIntToIntLocations(arena_, invoke);
335 }
336
VisitShortReverseBytes(HInvoke * invoke)337 void IntrinsicCodeGeneratorX86::VisitShortReverseBytes(HInvoke* invoke) {
338 GenReverseBytes(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
339 }
340
341
342 // TODO: Consider Quick's way of doing Double abs through integer operations, as the immediate we
343 // need is 64b.
344
CreateFloatToFloat(ArenaAllocator * arena,HInvoke * invoke)345 static void CreateFloatToFloat(ArenaAllocator* arena, HInvoke* invoke) {
346 // TODO: Enable memory operations when the assembler supports them.
347 LocationSummary* locations = new (arena) LocationSummary(invoke,
348 LocationSummary::kNoCall,
349 kIntrinsified);
350 locations->SetInAt(0, Location::RequiresFpuRegister());
351 locations->SetOut(Location::SameAsFirstInput());
352 HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect();
353 DCHECK(static_or_direct != nullptr);
354 if (static_or_direct->HasSpecialInput() &&
355 invoke->InputAt(static_or_direct->GetSpecialInputIndex())->IsX86ComputeBaseMethodAddress()) {
356 // We need addressibility for the constant area.
357 locations->SetInAt(1, Location::RequiresRegister());
358 // We need a temporary to hold the constant.
359 locations->AddTemp(Location::RequiresFpuRegister());
360 }
361 }
362
MathAbsFP(HInvoke * invoke,bool is64bit,X86Assembler * assembler,CodeGeneratorX86 * codegen)363 static void MathAbsFP(HInvoke* invoke,
364 bool is64bit,
365 X86Assembler* assembler,
366 CodeGeneratorX86* codegen) {
367 LocationSummary* locations = invoke->GetLocations();
368 Location output = locations->Out();
369
370 DCHECK(output.IsFpuRegister());
371 if (locations->GetInputCount() == 2 && locations->InAt(1).IsValid()) {
372 HX86ComputeBaseMethodAddress* method_address =
373 invoke->InputAt(1)->AsX86ComputeBaseMethodAddress();
374 DCHECK(locations->InAt(1).IsRegister());
375 // We also have a constant area pointer.
376 Register constant_area = locations->InAt(1).AsRegister<Register>();
377 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
378 if (is64bit) {
379 __ movsd(temp, codegen->LiteralInt64Address(
380 INT64_C(0x7FFFFFFFFFFFFFFF), method_address, constant_area));
381 __ andpd(output.AsFpuRegister<XmmRegister>(), temp);
382 } else {
383 __ movss(temp, codegen->LiteralInt32Address(
384 INT32_C(0x7FFFFFFF), method_address, constant_area));
385 __ andps(output.AsFpuRegister<XmmRegister>(), temp);
386 }
387 } else {
388 // Create the right constant on an aligned stack.
389 if (is64bit) {
390 __ subl(ESP, Immediate(8));
391 __ pushl(Immediate(0x7FFFFFFF));
392 __ pushl(Immediate(0xFFFFFFFF));
393 __ andpd(output.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
394 } else {
395 __ subl(ESP, Immediate(12));
396 __ pushl(Immediate(0x7FFFFFFF));
397 __ andps(output.AsFpuRegister<XmmRegister>(), Address(ESP, 0));
398 }
399 __ addl(ESP, Immediate(16));
400 }
401 }
402
VisitMathAbsDouble(HInvoke * invoke)403 void IntrinsicLocationsBuilderX86::VisitMathAbsDouble(HInvoke* invoke) {
404 CreateFloatToFloat(arena_, invoke);
405 }
406
VisitMathAbsDouble(HInvoke * invoke)407 void IntrinsicCodeGeneratorX86::VisitMathAbsDouble(HInvoke* invoke) {
408 MathAbsFP(invoke, /* is64bit */ true, GetAssembler(), codegen_);
409 }
410
VisitMathAbsFloat(HInvoke * invoke)411 void IntrinsicLocationsBuilderX86::VisitMathAbsFloat(HInvoke* invoke) {
412 CreateFloatToFloat(arena_, invoke);
413 }
414
VisitMathAbsFloat(HInvoke * invoke)415 void IntrinsicCodeGeneratorX86::VisitMathAbsFloat(HInvoke* invoke) {
416 MathAbsFP(invoke, /* is64bit */ false, GetAssembler(), codegen_);
417 }
418
CreateAbsIntLocation(ArenaAllocator * arena,HInvoke * invoke)419 static void CreateAbsIntLocation(ArenaAllocator* arena, HInvoke* invoke) {
420 LocationSummary* locations = new (arena) LocationSummary(invoke,
421 LocationSummary::kNoCall,
422 kIntrinsified);
423 locations->SetInAt(0, Location::RegisterLocation(EAX));
424 locations->SetOut(Location::SameAsFirstInput());
425 locations->AddTemp(Location::RegisterLocation(EDX));
426 }
427
GenAbsInteger(LocationSummary * locations,X86Assembler * assembler)428 static void GenAbsInteger(LocationSummary* locations, X86Assembler* assembler) {
429 Location output = locations->Out();
430 Register out = output.AsRegister<Register>();
431 DCHECK_EQ(out, EAX);
432 Register temp = locations->GetTemp(0).AsRegister<Register>();
433 DCHECK_EQ(temp, EDX);
434
435 // Sign extend EAX into EDX.
436 __ cdq();
437
438 // XOR EAX with sign.
439 __ xorl(EAX, EDX);
440
441 // Subtract out sign to correct.
442 __ subl(EAX, EDX);
443
444 // The result is in EAX.
445 }
446
CreateAbsLongLocation(ArenaAllocator * arena,HInvoke * invoke)447 static void CreateAbsLongLocation(ArenaAllocator* arena, HInvoke* invoke) {
448 LocationSummary* locations = new (arena) LocationSummary(invoke,
449 LocationSummary::kNoCall,
450 kIntrinsified);
451 locations->SetInAt(0, Location::RequiresRegister());
452 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
453 locations->AddTemp(Location::RequiresRegister());
454 }
455
GenAbsLong(LocationSummary * locations,X86Assembler * assembler)456 static void GenAbsLong(LocationSummary* locations, X86Assembler* assembler) {
457 Location input = locations->InAt(0);
458 Register input_lo = input.AsRegisterPairLow<Register>();
459 Register input_hi = input.AsRegisterPairHigh<Register>();
460 Location output = locations->Out();
461 Register output_lo = output.AsRegisterPairLow<Register>();
462 Register output_hi = output.AsRegisterPairHigh<Register>();
463 Register temp = locations->GetTemp(0).AsRegister<Register>();
464
465 // Compute the sign into the temporary.
466 __ movl(temp, input_hi);
467 __ sarl(temp, Immediate(31));
468
469 // Store the sign into the output.
470 __ movl(output_lo, temp);
471 __ movl(output_hi, temp);
472
473 // XOR the input to the output.
474 __ xorl(output_lo, input_lo);
475 __ xorl(output_hi, input_hi);
476
477 // Subtract the sign.
478 __ subl(output_lo, temp);
479 __ sbbl(output_hi, temp);
480 }
481
VisitMathAbsInt(HInvoke * invoke)482 void IntrinsicLocationsBuilderX86::VisitMathAbsInt(HInvoke* invoke) {
483 CreateAbsIntLocation(arena_, invoke);
484 }
485
VisitMathAbsInt(HInvoke * invoke)486 void IntrinsicCodeGeneratorX86::VisitMathAbsInt(HInvoke* invoke) {
487 GenAbsInteger(invoke->GetLocations(), GetAssembler());
488 }
489
VisitMathAbsLong(HInvoke * invoke)490 void IntrinsicLocationsBuilderX86::VisitMathAbsLong(HInvoke* invoke) {
491 CreateAbsLongLocation(arena_, invoke);
492 }
493
VisitMathAbsLong(HInvoke * invoke)494 void IntrinsicCodeGeneratorX86::VisitMathAbsLong(HInvoke* invoke) {
495 GenAbsLong(invoke->GetLocations(), GetAssembler());
496 }
497
GenMinMaxFP(HInvoke * invoke,bool is_min,bool is_double,X86Assembler * assembler,CodeGeneratorX86 * codegen)498 static void GenMinMaxFP(HInvoke* invoke,
499 bool is_min,
500 bool is_double,
501 X86Assembler* assembler,
502 CodeGeneratorX86* codegen) {
503 LocationSummary* locations = invoke->GetLocations();
504 Location op1_loc = locations->InAt(0);
505 Location op2_loc = locations->InAt(1);
506 Location out_loc = locations->Out();
507 XmmRegister out = out_loc.AsFpuRegister<XmmRegister>();
508
509 // Shortcut for same input locations.
510 if (op1_loc.Equals(op2_loc)) {
511 DCHECK(out_loc.Equals(op1_loc));
512 return;
513 }
514
515 // (out := op1)
516 // out <=? op2
517 // if Nan jmp Nan_label
518 // if out is min jmp done
519 // if op2 is min jmp op2_label
520 // handle -0/+0
521 // jmp done
522 // Nan_label:
523 // out := NaN
524 // op2_label:
525 // out := op2
526 // done:
527 //
528 // This removes one jmp, but needs to copy one input (op1) to out.
529 //
530 // TODO: This is straight from Quick (except literal pool). Make NaN an out-of-line slowpath?
531
532 XmmRegister op2 = op2_loc.AsFpuRegister<XmmRegister>();
533
534 NearLabel nan, done, op2_label;
535 if (is_double) {
536 __ ucomisd(out, op2);
537 } else {
538 __ ucomiss(out, op2);
539 }
540
541 __ j(Condition::kParityEven, &nan);
542
543 __ j(is_min ? Condition::kAbove : Condition::kBelow, &op2_label);
544 __ j(is_min ? Condition::kBelow : Condition::kAbove, &done);
545
546 // Handle 0.0/-0.0.
547 if (is_min) {
548 if (is_double) {
549 __ orpd(out, op2);
550 } else {
551 __ orps(out, op2);
552 }
553 } else {
554 if (is_double) {
555 __ andpd(out, op2);
556 } else {
557 __ andps(out, op2);
558 }
559 }
560 __ jmp(&done);
561
562 // NaN handling.
563 __ Bind(&nan);
564 // Do we have a constant area pointer?
565 if (locations->GetInputCount() == 3 && locations->InAt(2).IsValid()) {
566 HX86ComputeBaseMethodAddress* method_address =
567 invoke->InputAt(2)->AsX86ComputeBaseMethodAddress();
568 DCHECK(locations->InAt(2).IsRegister());
569 Register constant_area = locations->InAt(2).AsRegister<Register>();
570 if (is_double) {
571 __ movsd(out, codegen->LiteralInt64Address(kDoubleNaN, method_address, constant_area));
572 } else {
573 __ movss(out, codegen->LiteralInt32Address(kFloatNaN, method_address, constant_area));
574 }
575 } else {
576 if (is_double) {
577 __ pushl(Immediate(kDoubleNaNHigh));
578 __ pushl(Immediate(kDoubleNaNLow));
579 __ movsd(out, Address(ESP, 0));
580 __ addl(ESP, Immediate(8));
581 } else {
582 __ pushl(Immediate(kFloatNaN));
583 __ movss(out, Address(ESP, 0));
584 __ addl(ESP, Immediate(4));
585 }
586 }
587 __ jmp(&done);
588
589 // out := op2;
590 __ Bind(&op2_label);
591 if (is_double) {
592 __ movsd(out, op2);
593 } else {
594 __ movss(out, op2);
595 }
596
597 // Done.
598 __ Bind(&done);
599 }
600
CreateFPFPToFPLocations(ArenaAllocator * arena,HInvoke * invoke)601 static void CreateFPFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
602 LocationSummary* locations = new (arena) LocationSummary(invoke,
603 LocationSummary::kNoCall,
604 kIntrinsified);
605 locations->SetInAt(0, Location::RequiresFpuRegister());
606 locations->SetInAt(1, Location::RequiresFpuRegister());
607 // The following is sub-optimal, but all we can do for now. It would be fine to also accept
608 // the second input to be the output (we can simply swap inputs).
609 locations->SetOut(Location::SameAsFirstInput());
610 HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect();
611 DCHECK(static_or_direct != nullptr);
612 if (static_or_direct->HasSpecialInput() &&
613 invoke->InputAt(static_or_direct->GetSpecialInputIndex())->IsX86ComputeBaseMethodAddress()) {
614 locations->SetInAt(2, Location::RequiresRegister());
615 }
616 }
617
VisitMathMinDoubleDouble(HInvoke * invoke)618 void IntrinsicLocationsBuilderX86::VisitMathMinDoubleDouble(HInvoke* invoke) {
619 CreateFPFPToFPLocations(arena_, invoke);
620 }
621
VisitMathMinDoubleDouble(HInvoke * invoke)622 void IntrinsicCodeGeneratorX86::VisitMathMinDoubleDouble(HInvoke* invoke) {
623 GenMinMaxFP(invoke,
624 /* is_min */ true,
625 /* is_double */ true,
626 GetAssembler(),
627 codegen_);
628 }
629
VisitMathMinFloatFloat(HInvoke * invoke)630 void IntrinsicLocationsBuilderX86::VisitMathMinFloatFloat(HInvoke* invoke) {
631 CreateFPFPToFPLocations(arena_, invoke);
632 }
633
VisitMathMinFloatFloat(HInvoke * invoke)634 void IntrinsicCodeGeneratorX86::VisitMathMinFloatFloat(HInvoke* invoke) {
635 GenMinMaxFP(invoke,
636 /* is_min */ true,
637 /* is_double */ false,
638 GetAssembler(),
639 codegen_);
640 }
641
VisitMathMaxDoubleDouble(HInvoke * invoke)642 void IntrinsicLocationsBuilderX86::VisitMathMaxDoubleDouble(HInvoke* invoke) {
643 CreateFPFPToFPLocations(arena_, invoke);
644 }
645
VisitMathMaxDoubleDouble(HInvoke * invoke)646 void IntrinsicCodeGeneratorX86::VisitMathMaxDoubleDouble(HInvoke* invoke) {
647 GenMinMaxFP(invoke,
648 /* is_min */ false,
649 /* is_double */ true,
650 GetAssembler(),
651 codegen_);
652 }
653
VisitMathMaxFloatFloat(HInvoke * invoke)654 void IntrinsicLocationsBuilderX86::VisitMathMaxFloatFloat(HInvoke* invoke) {
655 CreateFPFPToFPLocations(arena_, invoke);
656 }
657
VisitMathMaxFloatFloat(HInvoke * invoke)658 void IntrinsicCodeGeneratorX86::VisitMathMaxFloatFloat(HInvoke* invoke) {
659 GenMinMaxFP(invoke,
660 /* is_min */ false,
661 /* is_double */ false,
662 GetAssembler(),
663 codegen_);
664 }
665
GenMinMax(LocationSummary * locations,bool is_min,bool is_long,X86Assembler * assembler)666 static void GenMinMax(LocationSummary* locations, bool is_min, bool is_long,
667 X86Assembler* assembler) {
668 Location op1_loc = locations->InAt(0);
669 Location op2_loc = locations->InAt(1);
670
671 // Shortcut for same input locations.
672 if (op1_loc.Equals(op2_loc)) {
673 // Can return immediately, as op1_loc == out_loc.
674 // Note: if we ever support separate registers, e.g., output into memory, we need to check for
675 // a copy here.
676 DCHECK(locations->Out().Equals(op1_loc));
677 return;
678 }
679
680 if (is_long) {
681 // Need to perform a subtract to get the sign right.
682 // op1 is already in the same location as the output.
683 Location output = locations->Out();
684 Register output_lo = output.AsRegisterPairLow<Register>();
685 Register output_hi = output.AsRegisterPairHigh<Register>();
686
687 Register op2_lo = op2_loc.AsRegisterPairLow<Register>();
688 Register op2_hi = op2_loc.AsRegisterPairHigh<Register>();
689
690 // Spare register to compute the subtraction to set condition code.
691 Register temp = locations->GetTemp(0).AsRegister<Register>();
692
693 // Subtract off op2_low.
694 __ movl(temp, output_lo);
695 __ subl(temp, op2_lo);
696
697 // Now use the same tempo and the borrow to finish the subtraction of op2_hi.
698 __ movl(temp, output_hi);
699 __ sbbl(temp, op2_hi);
700
701 // Now the condition code is correct.
702 Condition cond = is_min ? Condition::kGreaterEqual : Condition::kLess;
703 __ cmovl(cond, output_lo, op2_lo);
704 __ cmovl(cond, output_hi, op2_hi);
705 } else {
706 Register out = locations->Out().AsRegister<Register>();
707 Register op2 = op2_loc.AsRegister<Register>();
708
709 // (out := op1)
710 // out <=? op2
711 // if out is min jmp done
712 // out := op2
713 // done:
714
715 __ cmpl(out, op2);
716 Condition cond = is_min ? Condition::kGreater : Condition::kLess;
717 __ cmovl(cond, out, op2);
718 }
719 }
720
CreateIntIntToIntLocations(ArenaAllocator * arena,HInvoke * invoke)721 static void CreateIntIntToIntLocations(ArenaAllocator* arena, HInvoke* invoke) {
722 LocationSummary* locations = new (arena) LocationSummary(invoke,
723 LocationSummary::kNoCall,
724 kIntrinsified);
725 locations->SetInAt(0, Location::RequiresRegister());
726 locations->SetInAt(1, Location::RequiresRegister());
727 locations->SetOut(Location::SameAsFirstInput());
728 }
729
CreateLongLongToLongLocations(ArenaAllocator * arena,HInvoke * invoke)730 static void CreateLongLongToLongLocations(ArenaAllocator* arena, HInvoke* invoke) {
731 LocationSummary* locations = new (arena) LocationSummary(invoke,
732 LocationSummary::kNoCall,
733 kIntrinsified);
734 locations->SetInAt(0, Location::RequiresRegister());
735 locations->SetInAt(1, Location::RequiresRegister());
736 locations->SetOut(Location::SameAsFirstInput());
737 // Register to use to perform a long subtract to set cc.
738 locations->AddTemp(Location::RequiresRegister());
739 }
740
VisitMathMinIntInt(HInvoke * invoke)741 void IntrinsicLocationsBuilderX86::VisitMathMinIntInt(HInvoke* invoke) {
742 CreateIntIntToIntLocations(arena_, invoke);
743 }
744
VisitMathMinIntInt(HInvoke * invoke)745 void IntrinsicCodeGeneratorX86::VisitMathMinIntInt(HInvoke* invoke) {
746 GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ false, GetAssembler());
747 }
748
VisitMathMinLongLong(HInvoke * invoke)749 void IntrinsicLocationsBuilderX86::VisitMathMinLongLong(HInvoke* invoke) {
750 CreateLongLongToLongLocations(arena_, invoke);
751 }
752
VisitMathMinLongLong(HInvoke * invoke)753 void IntrinsicCodeGeneratorX86::VisitMathMinLongLong(HInvoke* invoke) {
754 GenMinMax(invoke->GetLocations(), /* is_min */ true, /* is_long */ true, GetAssembler());
755 }
756
VisitMathMaxIntInt(HInvoke * invoke)757 void IntrinsicLocationsBuilderX86::VisitMathMaxIntInt(HInvoke* invoke) {
758 CreateIntIntToIntLocations(arena_, invoke);
759 }
760
VisitMathMaxIntInt(HInvoke * invoke)761 void IntrinsicCodeGeneratorX86::VisitMathMaxIntInt(HInvoke* invoke) {
762 GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ false, GetAssembler());
763 }
764
VisitMathMaxLongLong(HInvoke * invoke)765 void IntrinsicLocationsBuilderX86::VisitMathMaxLongLong(HInvoke* invoke) {
766 CreateLongLongToLongLocations(arena_, invoke);
767 }
768
VisitMathMaxLongLong(HInvoke * invoke)769 void IntrinsicCodeGeneratorX86::VisitMathMaxLongLong(HInvoke* invoke) {
770 GenMinMax(invoke->GetLocations(), /* is_min */ false, /* is_long */ true, GetAssembler());
771 }
772
CreateFPToFPLocations(ArenaAllocator * arena,HInvoke * invoke)773 static void CreateFPToFPLocations(ArenaAllocator* arena, HInvoke* invoke) {
774 LocationSummary* locations = new (arena) LocationSummary(invoke,
775 LocationSummary::kNoCall,
776 kIntrinsified);
777 locations->SetInAt(0, Location::RequiresFpuRegister());
778 locations->SetOut(Location::RequiresFpuRegister());
779 }
780
VisitMathSqrt(HInvoke * invoke)781 void IntrinsicLocationsBuilderX86::VisitMathSqrt(HInvoke* invoke) {
782 CreateFPToFPLocations(arena_, invoke);
783 }
784
VisitMathSqrt(HInvoke * invoke)785 void IntrinsicCodeGeneratorX86::VisitMathSqrt(HInvoke* invoke) {
786 LocationSummary* locations = invoke->GetLocations();
787 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
788 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
789
790 GetAssembler()->sqrtsd(out, in);
791 }
792
InvokeOutOfLineIntrinsic(CodeGeneratorX86 * codegen,HInvoke * invoke)793 static void InvokeOutOfLineIntrinsic(CodeGeneratorX86* codegen, HInvoke* invoke) {
794 MoveArguments(invoke, codegen);
795
796 DCHECK(invoke->IsInvokeStaticOrDirect());
797 codegen->GenerateStaticOrDirectCall(invoke->AsInvokeStaticOrDirect(),
798 Location::RegisterLocation(EAX));
799 codegen->RecordPcInfo(invoke, invoke->GetDexPc());
800
801 // Copy the result back to the expected output.
802 Location out = invoke->GetLocations()->Out();
803 if (out.IsValid()) {
804 DCHECK(out.IsRegister());
805 codegen->MoveFromReturnRegister(out, invoke->GetType());
806 }
807 }
808
CreateSSE41FPToFPLocations(ArenaAllocator * arena,HInvoke * invoke,CodeGeneratorX86 * codegen)809 static void CreateSSE41FPToFPLocations(ArenaAllocator* arena,
810 HInvoke* invoke,
811 CodeGeneratorX86* codegen) {
812 // Do we have instruction support?
813 if (codegen->GetInstructionSetFeatures().HasSSE4_1()) {
814 CreateFPToFPLocations(arena, invoke);
815 return;
816 }
817
818 // We have to fall back to a call to the intrinsic.
819 LocationSummary* locations = new (arena) LocationSummary(invoke,
820 LocationSummary::kCallOnMainOnly);
821 InvokeRuntimeCallingConvention calling_convention;
822 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
823 locations->SetOut(Location::FpuRegisterLocation(XMM0));
824 // Needs to be EAX for the invoke.
825 locations->AddTemp(Location::RegisterLocation(EAX));
826 }
827
GenSSE41FPToFPIntrinsic(CodeGeneratorX86 * codegen,HInvoke * invoke,X86Assembler * assembler,int round_mode)828 static void GenSSE41FPToFPIntrinsic(CodeGeneratorX86* codegen,
829 HInvoke* invoke,
830 X86Assembler* assembler,
831 int round_mode) {
832 LocationSummary* locations = invoke->GetLocations();
833 if (locations->WillCall()) {
834 InvokeOutOfLineIntrinsic(codegen, invoke);
835 } else {
836 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
837 XmmRegister out = locations->Out().AsFpuRegister<XmmRegister>();
838 __ roundsd(out, in, Immediate(round_mode));
839 }
840 }
841
VisitMathCeil(HInvoke * invoke)842 void IntrinsicLocationsBuilderX86::VisitMathCeil(HInvoke* invoke) {
843 CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
844 }
845
VisitMathCeil(HInvoke * invoke)846 void IntrinsicCodeGeneratorX86::VisitMathCeil(HInvoke* invoke) {
847 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 2);
848 }
849
VisitMathFloor(HInvoke * invoke)850 void IntrinsicLocationsBuilderX86::VisitMathFloor(HInvoke* invoke) {
851 CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
852 }
853
VisitMathFloor(HInvoke * invoke)854 void IntrinsicCodeGeneratorX86::VisitMathFloor(HInvoke* invoke) {
855 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 1);
856 }
857
VisitMathRint(HInvoke * invoke)858 void IntrinsicLocationsBuilderX86::VisitMathRint(HInvoke* invoke) {
859 CreateSSE41FPToFPLocations(arena_, invoke, codegen_);
860 }
861
VisitMathRint(HInvoke * invoke)862 void IntrinsicCodeGeneratorX86::VisitMathRint(HInvoke* invoke) {
863 GenSSE41FPToFPIntrinsic(codegen_, invoke, GetAssembler(), 0);
864 }
865
VisitMathRoundFloat(HInvoke * invoke)866 void IntrinsicLocationsBuilderX86::VisitMathRoundFloat(HInvoke* invoke) {
867 // Do we have instruction support?
868 if (codegen_->GetInstructionSetFeatures().HasSSE4_1()) {
869 HInvokeStaticOrDirect* static_or_direct = invoke->AsInvokeStaticOrDirect();
870 DCHECK(static_or_direct != nullptr);
871 LocationSummary* locations = new (arena_) LocationSummary(invoke,
872 LocationSummary::kNoCall,
873 kIntrinsified);
874 locations->SetInAt(0, Location::RequiresFpuRegister());
875 if (static_or_direct->HasSpecialInput() &&
876 invoke->InputAt(
877 static_or_direct->GetSpecialInputIndex())->IsX86ComputeBaseMethodAddress()) {
878 locations->SetInAt(1, Location::RequiresRegister());
879 }
880 locations->SetOut(Location::RequiresRegister());
881 locations->AddTemp(Location::RequiresFpuRegister());
882 locations->AddTemp(Location::RequiresFpuRegister());
883 return;
884 }
885
886 // We have to fall back to a call to the intrinsic.
887 LocationSummary* locations = new (arena_) LocationSummary(invoke,
888 LocationSummary::kCallOnMainOnly);
889 InvokeRuntimeCallingConvention calling_convention;
890 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetFpuRegisterAt(0)));
891 locations->SetOut(Location::RegisterLocation(EAX));
892 // Needs to be EAX for the invoke.
893 locations->AddTemp(Location::RegisterLocation(EAX));
894 }
895
VisitMathRoundFloat(HInvoke * invoke)896 void IntrinsicCodeGeneratorX86::VisitMathRoundFloat(HInvoke* invoke) {
897 LocationSummary* locations = invoke->GetLocations();
898 if (locations->WillCall()) { // TODO: can we reach this?
899 InvokeOutOfLineIntrinsic(codegen_, invoke);
900 return;
901 }
902
903 XmmRegister in = locations->InAt(0).AsFpuRegister<XmmRegister>();
904 XmmRegister t1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
905 XmmRegister t2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
906 Register out = locations->Out().AsRegister<Register>();
907 NearLabel skip_incr, done;
908 X86Assembler* assembler = GetAssembler();
909
910 // Since no direct x86 rounding instruction matches the required semantics,
911 // this intrinsic is implemented as follows:
912 // result = floor(in);
913 // if (in - result >= 0.5f)
914 // result = result + 1.0f;
915 __ movss(t2, in);
916 __ roundss(t1, in, Immediate(1));
917 __ subss(t2, t1);
918 if (locations->GetInputCount() == 2 && locations->InAt(1).IsValid()) {
919 // Direct constant area available.
920 HX86ComputeBaseMethodAddress* method_address =
921 invoke->InputAt(1)->AsX86ComputeBaseMethodAddress();
922 Register constant_area = locations->InAt(1).AsRegister<Register>();
923 __ comiss(t2, codegen_->LiteralInt32Address(bit_cast<int32_t, float>(0.5f),
924 method_address,
925 constant_area));
926 __ j(kBelow, &skip_incr);
927 __ addss(t1, codegen_->LiteralInt32Address(bit_cast<int32_t, float>(1.0f),
928 method_address,
929 constant_area));
930 __ Bind(&skip_incr);
931 } else {
932 // No constant area: go through stack.
933 __ pushl(Immediate(bit_cast<int32_t, float>(0.5f)));
934 __ pushl(Immediate(bit_cast<int32_t, float>(1.0f)));
935 __ comiss(t2, Address(ESP, 4));
936 __ j(kBelow, &skip_incr);
937 __ addss(t1, Address(ESP, 0));
938 __ Bind(&skip_incr);
939 __ addl(ESP, Immediate(8));
940 }
941
942 // Final conversion to an integer. Unfortunately this also does not have a
943 // direct x86 instruction, since NaN should map to 0 and large positive
944 // values need to be clipped to the extreme value.
945 __ movl(out, Immediate(kPrimIntMax));
946 __ cvtsi2ss(t2, out);
947 __ comiss(t1, t2);
948 __ j(kAboveEqual, &done); // clipped to max (already in out), does not jump on unordered
949 __ movl(out, Immediate(0)); // does not change flags
950 __ j(kUnordered, &done); // NaN mapped to 0 (just moved in out)
951 __ cvttss2si(out, t1);
952 __ Bind(&done);
953 }
954
CreateFPToFPCallLocations(ArenaAllocator * arena,HInvoke * invoke)955 static void CreateFPToFPCallLocations(ArenaAllocator* arena,
956 HInvoke* invoke) {
957 LocationSummary* locations = new (arena) LocationSummary(invoke,
958 LocationSummary::kCallOnMainOnly,
959 kIntrinsified);
960 InvokeRuntimeCallingConvention calling_convention;
961 locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
962 locations->SetOut(Location::FpuRegisterLocation(XMM0));
963 }
964
GenFPToFPCall(HInvoke * invoke,CodeGeneratorX86 * codegen,QuickEntrypointEnum entry)965 static void GenFPToFPCall(HInvoke* invoke, CodeGeneratorX86* codegen, QuickEntrypointEnum entry) {
966 LocationSummary* locations = invoke->GetLocations();
967 DCHECK(locations->WillCall());
968 DCHECK(invoke->IsInvokeStaticOrDirect());
969 X86Assembler* assembler = codegen->GetAssembler();
970
971 // We need some place to pass the parameters.
972 __ subl(ESP, Immediate(16));
973 __ cfi().AdjustCFAOffset(16);
974
975 // Pass the parameters at the bottom of the stack.
976 __ movsd(Address(ESP, 0), XMM0);
977
978 // If we have a second parameter, pass it next.
979 if (invoke->GetNumberOfArguments() == 2) {
980 __ movsd(Address(ESP, 8), XMM1);
981 }
982
983 // Now do the actual call.
984 codegen->InvokeRuntime(entry, invoke, invoke->GetDexPc());
985
986 // Extract the return value from the FP stack.
987 __ fstpl(Address(ESP, 0));
988 __ movsd(XMM0, Address(ESP, 0));
989
990 // And clean up the stack.
991 __ addl(ESP, Immediate(16));
992 __ cfi().AdjustCFAOffset(-16);
993 }
994
VisitMathCos(HInvoke * invoke)995 void IntrinsicLocationsBuilderX86::VisitMathCos(HInvoke* invoke) {
996 CreateFPToFPCallLocations(arena_, invoke);
997 }
998
VisitMathCos(HInvoke * invoke)999 void IntrinsicCodeGeneratorX86::VisitMathCos(HInvoke* invoke) {
1000 GenFPToFPCall(invoke, codegen_, kQuickCos);
1001 }
1002
VisitMathSin(HInvoke * invoke)1003 void IntrinsicLocationsBuilderX86::VisitMathSin(HInvoke* invoke) {
1004 CreateFPToFPCallLocations(arena_, invoke);
1005 }
1006
VisitMathSin(HInvoke * invoke)1007 void IntrinsicCodeGeneratorX86::VisitMathSin(HInvoke* invoke) {
1008 GenFPToFPCall(invoke, codegen_, kQuickSin);
1009 }
1010
VisitMathAcos(HInvoke * invoke)1011 void IntrinsicLocationsBuilderX86::VisitMathAcos(HInvoke* invoke) {
1012 CreateFPToFPCallLocations(arena_, invoke);
1013 }
1014
VisitMathAcos(HInvoke * invoke)1015 void IntrinsicCodeGeneratorX86::VisitMathAcos(HInvoke* invoke) {
1016 GenFPToFPCall(invoke, codegen_, kQuickAcos);
1017 }
1018
VisitMathAsin(HInvoke * invoke)1019 void IntrinsicLocationsBuilderX86::VisitMathAsin(HInvoke* invoke) {
1020 CreateFPToFPCallLocations(arena_, invoke);
1021 }
1022
VisitMathAsin(HInvoke * invoke)1023 void IntrinsicCodeGeneratorX86::VisitMathAsin(HInvoke* invoke) {
1024 GenFPToFPCall(invoke, codegen_, kQuickAsin);
1025 }
1026
VisitMathAtan(HInvoke * invoke)1027 void IntrinsicLocationsBuilderX86::VisitMathAtan(HInvoke* invoke) {
1028 CreateFPToFPCallLocations(arena_, invoke);
1029 }
1030
VisitMathAtan(HInvoke * invoke)1031 void IntrinsicCodeGeneratorX86::VisitMathAtan(HInvoke* invoke) {
1032 GenFPToFPCall(invoke, codegen_, kQuickAtan);
1033 }
1034
VisitMathCbrt(HInvoke * invoke)1035 void IntrinsicLocationsBuilderX86::VisitMathCbrt(HInvoke* invoke) {
1036 CreateFPToFPCallLocations(arena_, invoke);
1037 }
1038
VisitMathCbrt(HInvoke * invoke)1039 void IntrinsicCodeGeneratorX86::VisitMathCbrt(HInvoke* invoke) {
1040 GenFPToFPCall(invoke, codegen_, kQuickCbrt);
1041 }
1042
VisitMathCosh(HInvoke * invoke)1043 void IntrinsicLocationsBuilderX86::VisitMathCosh(HInvoke* invoke) {
1044 CreateFPToFPCallLocations(arena_, invoke);
1045 }
1046
VisitMathCosh(HInvoke * invoke)1047 void IntrinsicCodeGeneratorX86::VisitMathCosh(HInvoke* invoke) {
1048 GenFPToFPCall(invoke, codegen_, kQuickCosh);
1049 }
1050
VisitMathExp(HInvoke * invoke)1051 void IntrinsicLocationsBuilderX86::VisitMathExp(HInvoke* invoke) {
1052 CreateFPToFPCallLocations(arena_, invoke);
1053 }
1054
VisitMathExp(HInvoke * invoke)1055 void IntrinsicCodeGeneratorX86::VisitMathExp(HInvoke* invoke) {
1056 GenFPToFPCall(invoke, codegen_, kQuickExp);
1057 }
1058
VisitMathExpm1(HInvoke * invoke)1059 void IntrinsicLocationsBuilderX86::VisitMathExpm1(HInvoke* invoke) {
1060 CreateFPToFPCallLocations(arena_, invoke);
1061 }
1062
VisitMathExpm1(HInvoke * invoke)1063 void IntrinsicCodeGeneratorX86::VisitMathExpm1(HInvoke* invoke) {
1064 GenFPToFPCall(invoke, codegen_, kQuickExpm1);
1065 }
1066
VisitMathLog(HInvoke * invoke)1067 void IntrinsicLocationsBuilderX86::VisitMathLog(HInvoke* invoke) {
1068 CreateFPToFPCallLocations(arena_, invoke);
1069 }
1070
VisitMathLog(HInvoke * invoke)1071 void IntrinsicCodeGeneratorX86::VisitMathLog(HInvoke* invoke) {
1072 GenFPToFPCall(invoke, codegen_, kQuickLog);
1073 }
1074
VisitMathLog10(HInvoke * invoke)1075 void IntrinsicLocationsBuilderX86::VisitMathLog10(HInvoke* invoke) {
1076 CreateFPToFPCallLocations(arena_, invoke);
1077 }
1078
VisitMathLog10(HInvoke * invoke)1079 void IntrinsicCodeGeneratorX86::VisitMathLog10(HInvoke* invoke) {
1080 GenFPToFPCall(invoke, codegen_, kQuickLog10);
1081 }
1082
VisitMathSinh(HInvoke * invoke)1083 void IntrinsicLocationsBuilderX86::VisitMathSinh(HInvoke* invoke) {
1084 CreateFPToFPCallLocations(arena_, invoke);
1085 }
1086
VisitMathSinh(HInvoke * invoke)1087 void IntrinsicCodeGeneratorX86::VisitMathSinh(HInvoke* invoke) {
1088 GenFPToFPCall(invoke, codegen_, kQuickSinh);
1089 }
1090
VisitMathTan(HInvoke * invoke)1091 void IntrinsicLocationsBuilderX86::VisitMathTan(HInvoke* invoke) {
1092 CreateFPToFPCallLocations(arena_, invoke);
1093 }
1094
VisitMathTan(HInvoke * invoke)1095 void IntrinsicCodeGeneratorX86::VisitMathTan(HInvoke* invoke) {
1096 GenFPToFPCall(invoke, codegen_, kQuickTan);
1097 }
1098
VisitMathTanh(HInvoke * invoke)1099 void IntrinsicLocationsBuilderX86::VisitMathTanh(HInvoke* invoke) {
1100 CreateFPToFPCallLocations(arena_, invoke);
1101 }
1102
VisitMathTanh(HInvoke * invoke)1103 void IntrinsicCodeGeneratorX86::VisitMathTanh(HInvoke* invoke) {
1104 GenFPToFPCall(invoke, codegen_, kQuickTanh);
1105 }
1106
CreateFPFPToFPCallLocations(ArenaAllocator * arena,HInvoke * invoke)1107 static void CreateFPFPToFPCallLocations(ArenaAllocator* arena,
1108 HInvoke* invoke) {
1109 LocationSummary* locations = new (arena) LocationSummary(invoke,
1110 LocationSummary::kCallOnMainOnly,
1111 kIntrinsified);
1112 InvokeRuntimeCallingConvention calling_convention;
1113 locations->SetInAt(0, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(0)));
1114 locations->SetInAt(1, Location::FpuRegisterLocation(calling_convention.GetFpuRegisterAt(1)));
1115 locations->SetOut(Location::FpuRegisterLocation(XMM0));
1116 }
1117
VisitMathAtan2(HInvoke * invoke)1118 void IntrinsicLocationsBuilderX86::VisitMathAtan2(HInvoke* invoke) {
1119 CreateFPFPToFPCallLocations(arena_, invoke);
1120 }
1121
VisitMathAtan2(HInvoke * invoke)1122 void IntrinsicCodeGeneratorX86::VisitMathAtan2(HInvoke* invoke) {
1123 GenFPToFPCall(invoke, codegen_, kQuickAtan2);
1124 }
1125
VisitMathHypot(HInvoke * invoke)1126 void IntrinsicLocationsBuilderX86::VisitMathHypot(HInvoke* invoke) {
1127 CreateFPFPToFPCallLocations(arena_, invoke);
1128 }
1129
VisitMathHypot(HInvoke * invoke)1130 void IntrinsicCodeGeneratorX86::VisitMathHypot(HInvoke* invoke) {
1131 GenFPToFPCall(invoke, codegen_, kQuickHypot);
1132 }
1133
VisitMathNextAfter(HInvoke * invoke)1134 void IntrinsicLocationsBuilderX86::VisitMathNextAfter(HInvoke* invoke) {
1135 CreateFPFPToFPCallLocations(arena_, invoke);
1136 }
1137
VisitMathNextAfter(HInvoke * invoke)1138 void IntrinsicCodeGeneratorX86::VisitMathNextAfter(HInvoke* invoke) {
1139 GenFPToFPCall(invoke, codegen_, kQuickNextAfter);
1140 }
1141
VisitSystemArrayCopyChar(HInvoke * invoke)1142 void IntrinsicLocationsBuilderX86::VisitSystemArrayCopyChar(HInvoke* invoke) {
1143 // We need at least two of the positions or length to be an integer constant,
1144 // or else we won't have enough free registers.
1145 HIntConstant* src_pos = invoke->InputAt(1)->AsIntConstant();
1146 HIntConstant* dest_pos = invoke->InputAt(3)->AsIntConstant();
1147 HIntConstant* length = invoke->InputAt(4)->AsIntConstant();
1148
1149 int num_constants =
1150 ((src_pos != nullptr) ? 1 : 0)
1151 + ((dest_pos != nullptr) ? 1 : 0)
1152 + ((length != nullptr) ? 1 : 0);
1153
1154 if (num_constants < 2) {
1155 // Not enough free registers.
1156 return;
1157 }
1158
1159 // As long as we are checking, we might as well check to see if the src and dest
1160 // positions are >= 0.
1161 if ((src_pos != nullptr && src_pos->GetValue() < 0) ||
1162 (dest_pos != nullptr && dest_pos->GetValue() < 0)) {
1163 // We will have to fail anyways.
1164 return;
1165 }
1166
1167 // And since we are already checking, check the length too.
1168 if (length != nullptr) {
1169 int32_t len = length->GetValue();
1170 if (len < 0) {
1171 // Just call as normal.
1172 return;
1173 }
1174 }
1175
1176 // Okay, it is safe to generate inline code.
1177 LocationSummary* locations =
1178 new (arena_) LocationSummary(invoke, LocationSummary::kCallOnSlowPath, kIntrinsified);
1179 // arraycopy(Object src, int srcPos, Object dest, int destPos, int length).
1180 locations->SetInAt(0, Location::RequiresRegister());
1181 locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
1182 locations->SetInAt(2, Location::RequiresRegister());
1183 locations->SetInAt(3, Location::RegisterOrConstant(invoke->InputAt(3)));
1184 locations->SetInAt(4, Location::RegisterOrConstant(invoke->InputAt(4)));
1185
1186 // And we need some temporaries. We will use REP MOVSW, so we need fixed registers.
1187 locations->AddTemp(Location::RegisterLocation(ESI));
1188 locations->AddTemp(Location::RegisterLocation(EDI));
1189 locations->AddTemp(Location::RegisterLocation(ECX));
1190 }
1191
CheckPosition(X86Assembler * assembler,Location pos,Register input,Location length,SlowPathCode * slow_path,Register temp,bool length_is_input_length=false)1192 static void CheckPosition(X86Assembler* assembler,
1193 Location pos,
1194 Register input,
1195 Location length,
1196 SlowPathCode* slow_path,
1197 Register temp,
1198 bool length_is_input_length = false) {
1199 // Where is the length in the Array?
1200 const uint32_t length_offset = mirror::Array::LengthOffset().Uint32Value();
1201
1202 if (pos.IsConstant()) {
1203 int32_t pos_const = pos.GetConstant()->AsIntConstant()->GetValue();
1204 if (pos_const == 0) {
1205 if (!length_is_input_length) {
1206 // Check that length(input) >= length.
1207 if (length.IsConstant()) {
1208 __ cmpl(Address(input, length_offset),
1209 Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
1210 } else {
1211 __ cmpl(Address(input, length_offset), length.AsRegister<Register>());
1212 }
1213 __ j(kLess, slow_path->GetEntryLabel());
1214 }
1215 } else {
1216 // Check that length(input) >= pos.
1217 __ movl(temp, Address(input, length_offset));
1218 __ subl(temp, Immediate(pos_const));
1219 __ j(kLess, slow_path->GetEntryLabel());
1220
1221 // Check that (length(input) - pos) >= length.
1222 if (length.IsConstant()) {
1223 __ cmpl(temp, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
1224 } else {
1225 __ cmpl(temp, length.AsRegister<Register>());
1226 }
1227 __ j(kLess, slow_path->GetEntryLabel());
1228 }
1229 } else if (length_is_input_length) {
1230 // The only way the copy can succeed is if pos is zero.
1231 Register pos_reg = pos.AsRegister<Register>();
1232 __ testl(pos_reg, pos_reg);
1233 __ j(kNotEqual, slow_path->GetEntryLabel());
1234 } else {
1235 // Check that pos >= 0.
1236 Register pos_reg = pos.AsRegister<Register>();
1237 __ testl(pos_reg, pos_reg);
1238 __ j(kLess, slow_path->GetEntryLabel());
1239
1240 // Check that pos <= length(input).
1241 __ cmpl(Address(input, length_offset), pos_reg);
1242 __ j(kLess, slow_path->GetEntryLabel());
1243
1244 // Check that (length(input) - pos) >= length.
1245 __ movl(temp, Address(input, length_offset));
1246 __ subl(temp, pos_reg);
1247 if (length.IsConstant()) {
1248 __ cmpl(temp, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
1249 } else {
1250 __ cmpl(temp, length.AsRegister<Register>());
1251 }
1252 __ j(kLess, slow_path->GetEntryLabel());
1253 }
1254 }
1255
VisitSystemArrayCopyChar(HInvoke * invoke)1256 void IntrinsicCodeGeneratorX86::VisitSystemArrayCopyChar(HInvoke* invoke) {
1257 X86Assembler* assembler = GetAssembler();
1258 LocationSummary* locations = invoke->GetLocations();
1259
1260 Register src = locations->InAt(0).AsRegister<Register>();
1261 Location srcPos = locations->InAt(1);
1262 Register dest = locations->InAt(2).AsRegister<Register>();
1263 Location destPos = locations->InAt(3);
1264 Location length = locations->InAt(4);
1265
1266 // Temporaries that we need for MOVSW.
1267 Register src_base = locations->GetTemp(0).AsRegister<Register>();
1268 DCHECK_EQ(src_base, ESI);
1269 Register dest_base = locations->GetTemp(1).AsRegister<Register>();
1270 DCHECK_EQ(dest_base, EDI);
1271 Register count = locations->GetTemp(2).AsRegister<Register>();
1272 DCHECK_EQ(count, ECX);
1273
1274 SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
1275 codegen_->AddSlowPath(slow_path);
1276
1277 // Bail out if the source and destination are the same (to handle overlap).
1278 __ cmpl(src, dest);
1279 __ j(kEqual, slow_path->GetEntryLabel());
1280
1281 // Bail out if the source is null.
1282 __ testl(src, src);
1283 __ j(kEqual, slow_path->GetEntryLabel());
1284
1285 // Bail out if the destination is null.
1286 __ testl(dest, dest);
1287 __ j(kEqual, slow_path->GetEntryLabel());
1288
1289 // If the length is negative, bail out.
1290 // We have already checked in the LocationsBuilder for the constant case.
1291 if (!length.IsConstant()) {
1292 __ cmpl(length.AsRegister<Register>(), length.AsRegister<Register>());
1293 __ j(kLess, slow_path->GetEntryLabel());
1294 }
1295
1296 // We need the count in ECX.
1297 if (length.IsConstant()) {
1298 __ movl(count, Immediate(length.GetConstant()->AsIntConstant()->GetValue()));
1299 } else {
1300 __ movl(count, length.AsRegister<Register>());
1301 }
1302
1303 // Validity checks: source. Use src_base as a temporary register.
1304 CheckPosition(assembler, srcPos, src, Location::RegisterLocation(count), slow_path, src_base);
1305
1306 // Validity checks: dest. Use src_base as a temporary register.
1307 CheckPosition(assembler, destPos, dest, Location::RegisterLocation(count), slow_path, src_base);
1308
1309 // Okay, everything checks out. Finally time to do the copy.
1310 // Check assumption that sizeof(Char) is 2 (used in scaling below).
1311 const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar);
1312 DCHECK_EQ(char_size, 2u);
1313
1314 const uint32_t data_offset = mirror::Array::DataOffset(char_size).Uint32Value();
1315
1316 if (srcPos.IsConstant()) {
1317 int32_t srcPos_const = srcPos.GetConstant()->AsIntConstant()->GetValue();
1318 __ leal(src_base, Address(src, char_size * srcPos_const + data_offset));
1319 } else {
1320 __ leal(src_base, Address(src, srcPos.AsRegister<Register>(),
1321 ScaleFactor::TIMES_2, data_offset));
1322 }
1323 if (destPos.IsConstant()) {
1324 int32_t destPos_const = destPos.GetConstant()->AsIntConstant()->GetValue();
1325
1326 __ leal(dest_base, Address(dest, char_size * destPos_const + data_offset));
1327 } else {
1328 __ leal(dest_base, Address(dest, destPos.AsRegister<Register>(),
1329 ScaleFactor::TIMES_2, data_offset));
1330 }
1331
1332 // Do the move.
1333 __ rep_movsw();
1334
1335 __ Bind(slow_path->GetExitLabel());
1336 }
1337
VisitStringCompareTo(HInvoke * invoke)1338 void IntrinsicLocationsBuilderX86::VisitStringCompareTo(HInvoke* invoke) {
1339 // The inputs plus one temp.
1340 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1341 LocationSummary::kCallOnMainAndSlowPath,
1342 kIntrinsified);
1343 InvokeRuntimeCallingConvention calling_convention;
1344 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1345 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1346 locations->SetOut(Location::RegisterLocation(EAX));
1347 }
1348
VisitStringCompareTo(HInvoke * invoke)1349 void IntrinsicCodeGeneratorX86::VisitStringCompareTo(HInvoke* invoke) {
1350 X86Assembler* assembler = GetAssembler();
1351 LocationSummary* locations = invoke->GetLocations();
1352
1353 // Note that the null check must have been done earlier.
1354 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1355
1356 Register argument = locations->InAt(1).AsRegister<Register>();
1357 __ testl(argument, argument);
1358 SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
1359 codegen_->AddSlowPath(slow_path);
1360 __ j(kEqual, slow_path->GetEntryLabel());
1361
1362 codegen_->InvokeRuntime(kQuickStringCompareTo, invoke, invoke->GetDexPc(), slow_path);
1363 __ Bind(slow_path->GetExitLabel());
1364 }
1365
VisitStringEquals(HInvoke * invoke)1366 void IntrinsicLocationsBuilderX86::VisitStringEquals(HInvoke* invoke) {
1367 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1368 LocationSummary::kNoCall,
1369 kIntrinsified);
1370 locations->SetInAt(0, Location::RequiresRegister());
1371 locations->SetInAt(1, Location::RequiresRegister());
1372
1373 // Request temporary registers, ECX and EDI needed for repe_cmpsl instruction.
1374 locations->AddTemp(Location::RegisterLocation(ECX));
1375 locations->AddTemp(Location::RegisterLocation(EDI));
1376
1377 // Set output, ESI needed for repe_cmpsl instruction anyways.
1378 locations->SetOut(Location::RegisterLocation(ESI), Location::kOutputOverlap);
1379 }
1380
VisitStringEquals(HInvoke * invoke)1381 void IntrinsicCodeGeneratorX86::VisitStringEquals(HInvoke* invoke) {
1382 X86Assembler* assembler = GetAssembler();
1383 LocationSummary* locations = invoke->GetLocations();
1384
1385 Register str = locations->InAt(0).AsRegister<Register>();
1386 Register arg = locations->InAt(1).AsRegister<Register>();
1387 Register ecx = locations->GetTemp(0).AsRegister<Register>();
1388 Register edi = locations->GetTemp(1).AsRegister<Register>();
1389 Register esi = locations->Out().AsRegister<Register>();
1390
1391 NearLabel end, return_true, return_false;
1392
1393 // Get offsets of count, value, and class fields within a string object.
1394 const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1395 const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
1396 const uint32_t class_offset = mirror::Object::ClassOffset().Uint32Value();
1397
1398 // Note that the null check must have been done earlier.
1399 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1400
1401 StringEqualsOptimizations optimizations(invoke);
1402 if (!optimizations.GetArgumentNotNull()) {
1403 // Check if input is null, return false if it is.
1404 __ testl(arg, arg);
1405 __ j(kEqual, &return_false);
1406 }
1407
1408 if (!optimizations.GetArgumentIsString()) {
1409 // Instanceof check for the argument by comparing class fields.
1410 // All string objects must have the same type since String cannot be subclassed.
1411 // Receiver must be a string object, so its class field is equal to all strings' class fields.
1412 // If the argument is a string object, its class field must be equal to receiver's class field.
1413 __ movl(ecx, Address(str, class_offset));
1414 __ cmpl(ecx, Address(arg, class_offset));
1415 __ j(kNotEqual, &return_false);
1416 }
1417
1418 // Reference equality check, return true if same reference.
1419 __ cmpl(str, arg);
1420 __ j(kEqual, &return_true);
1421
1422 // Load length and compression flag of receiver string.
1423 __ movl(ecx, Address(str, count_offset));
1424 // Check if lengths and compression flags are equal, return false if they're not.
1425 // Two identical strings will always have same compression style since
1426 // compression style is decided on alloc.
1427 __ cmpl(ecx, Address(arg, count_offset));
1428 __ j(kNotEqual, &return_false);
1429 // Return true if strings are empty. Even with string compression `count == 0` means empty.
1430 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1431 "Expecting 0=compressed, 1=uncompressed");
1432 __ jecxz(&return_true);
1433
1434 if (mirror::kUseStringCompression) {
1435 NearLabel string_uncompressed;
1436 // Extract length and differentiate between both compressed or both uncompressed.
1437 // Different compression style is cut above.
1438 __ shrl(ecx, Immediate(1));
1439 __ j(kCarrySet, &string_uncompressed);
1440 // Divide string length by 2, rounding up, and continue as if uncompressed.
1441 __ addl(ecx, Immediate(1));
1442 __ shrl(ecx, Immediate(1));
1443 __ Bind(&string_uncompressed);
1444 }
1445 // Load starting addresses of string values into ESI/EDI as required for repe_cmpsl instruction.
1446 __ leal(esi, Address(str, value_offset));
1447 __ leal(edi, Address(arg, value_offset));
1448
1449 // Divide string length by 2 to compare characters 2 at a time and adjust for lengths not
1450 // divisible by 2.
1451 __ addl(ecx, Immediate(1));
1452 __ shrl(ecx, Immediate(1));
1453
1454 // Assertions that must hold in order to compare strings 2 characters (uncompressed)
1455 // or 4 characters (compressed) at a time.
1456 DCHECK_ALIGNED(value_offset, 4);
1457 static_assert(IsAligned<4>(kObjectAlignment), "String of odd length is not zero padded");
1458
1459 // Loop to compare strings two characters at a time starting at the beginning of the string.
1460 __ repe_cmpsl();
1461 // If strings are not equal, zero flag will be cleared.
1462 __ j(kNotEqual, &return_false);
1463
1464 // Return true and exit the function.
1465 // If loop does not result in returning false, we return true.
1466 __ Bind(&return_true);
1467 __ movl(esi, Immediate(1));
1468 __ jmp(&end);
1469
1470 // Return false and exit the function.
1471 __ Bind(&return_false);
1472 __ xorl(esi, esi);
1473 __ Bind(&end);
1474 }
1475
CreateStringIndexOfLocations(HInvoke * invoke,ArenaAllocator * allocator,bool start_at_zero)1476 static void CreateStringIndexOfLocations(HInvoke* invoke,
1477 ArenaAllocator* allocator,
1478 bool start_at_zero) {
1479 LocationSummary* locations = new (allocator) LocationSummary(invoke,
1480 LocationSummary::kCallOnSlowPath,
1481 kIntrinsified);
1482 // The data needs to be in EDI for scasw. So request that the string is there, anyways.
1483 locations->SetInAt(0, Location::RegisterLocation(EDI));
1484 // If we look for a constant char, we'll still have to copy it into EAX. So just request the
1485 // allocator to do that, anyways. We can still do the constant check by checking the parameter
1486 // of the instruction explicitly.
1487 // Note: This works as we don't clobber EAX anywhere.
1488 locations->SetInAt(1, Location::RegisterLocation(EAX));
1489 if (!start_at_zero) {
1490 locations->SetInAt(2, Location::RequiresRegister()); // The starting index.
1491 }
1492 // As we clobber EDI during execution anyways, also use it as the output.
1493 locations->SetOut(Location::SameAsFirstInput());
1494
1495 // repne scasw uses ECX as the counter.
1496 locations->AddTemp(Location::RegisterLocation(ECX));
1497 // Need another temporary to be able to compute the result.
1498 locations->AddTemp(Location::RequiresRegister());
1499 if (mirror::kUseStringCompression) {
1500 // Need another temporary to be able to save unflagged string length.
1501 locations->AddTemp(Location::RequiresRegister());
1502 }
1503 }
1504
GenerateStringIndexOf(HInvoke * invoke,X86Assembler * assembler,CodeGeneratorX86 * codegen,ArenaAllocator * allocator,bool start_at_zero)1505 static void GenerateStringIndexOf(HInvoke* invoke,
1506 X86Assembler* assembler,
1507 CodeGeneratorX86* codegen,
1508 ArenaAllocator* allocator,
1509 bool start_at_zero) {
1510 LocationSummary* locations = invoke->GetLocations();
1511
1512 // Note that the null check must have been done earlier.
1513 DCHECK(!invoke->CanDoImplicitNullCheckOn(invoke->InputAt(0)));
1514
1515 Register string_obj = locations->InAt(0).AsRegister<Register>();
1516 Register search_value = locations->InAt(1).AsRegister<Register>();
1517 Register counter = locations->GetTemp(0).AsRegister<Register>();
1518 Register string_length = locations->GetTemp(1).AsRegister<Register>();
1519 Register out = locations->Out().AsRegister<Register>();
1520 // Only used when string compression feature is on.
1521 Register string_length_flagged;
1522
1523 // Check our assumptions for registers.
1524 DCHECK_EQ(string_obj, EDI);
1525 DCHECK_EQ(search_value, EAX);
1526 DCHECK_EQ(counter, ECX);
1527 DCHECK_EQ(out, EDI);
1528
1529 // Check for code points > 0xFFFF. Either a slow-path check when we don't know statically,
1530 // or directly dispatch for a large constant, or omit slow-path for a small constant or a char.
1531 SlowPathCode* slow_path = nullptr;
1532 HInstruction* code_point = invoke->InputAt(1);
1533 if (code_point->IsIntConstant()) {
1534 if (static_cast<uint32_t>(code_point->AsIntConstant()->GetValue()) >
1535 std::numeric_limits<uint16_t>::max()) {
1536 // Always needs the slow-path. We could directly dispatch to it, but this case should be
1537 // rare, so for simplicity just put the full slow-path down and branch unconditionally.
1538 slow_path = new (allocator) IntrinsicSlowPathX86(invoke);
1539 codegen->AddSlowPath(slow_path);
1540 __ jmp(slow_path->GetEntryLabel());
1541 __ Bind(slow_path->GetExitLabel());
1542 return;
1543 }
1544 } else if (code_point->GetType() != Primitive::kPrimChar) {
1545 __ cmpl(search_value, Immediate(std::numeric_limits<uint16_t>::max()));
1546 slow_path = new (allocator) IntrinsicSlowPathX86(invoke);
1547 codegen->AddSlowPath(slow_path);
1548 __ j(kAbove, slow_path->GetEntryLabel());
1549 }
1550
1551 // From here down, we know that we are looking for a char that fits in 16 bits.
1552 // Location of reference to data array within the String object.
1553 int32_t value_offset = mirror::String::ValueOffset().Int32Value();
1554 // Location of count within the String object.
1555 int32_t count_offset = mirror::String::CountOffset().Int32Value();
1556
1557 // Load the count field of the string containing the length and compression flag.
1558 __ movl(string_length, Address(string_obj, count_offset));
1559
1560 // Do a zero-length check. Even with string compression `count == 0` means empty.
1561 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1562 "Expecting 0=compressed, 1=uncompressed");
1563 // TODO: Support jecxz.
1564 NearLabel not_found_label;
1565 __ testl(string_length, string_length);
1566 __ j(kEqual, ¬_found_label);
1567
1568 if (mirror::kUseStringCompression) {
1569 string_length_flagged = locations->GetTemp(2).AsRegister<Register>();
1570 __ movl(string_length_flagged, string_length);
1571 // Extract the length and shift out the least significant bit used as compression flag.
1572 __ shrl(string_length, Immediate(1));
1573 }
1574
1575 if (start_at_zero) {
1576 // Number of chars to scan is the same as the string length.
1577 __ movl(counter, string_length);
1578
1579 // Move to the start of the string.
1580 __ addl(string_obj, Immediate(value_offset));
1581 } else {
1582 Register start_index = locations->InAt(2).AsRegister<Register>();
1583
1584 // Do a start_index check.
1585 __ cmpl(start_index, string_length);
1586 __ j(kGreaterEqual, ¬_found_label);
1587
1588 // Ensure we have a start index >= 0;
1589 __ xorl(counter, counter);
1590 __ cmpl(start_index, Immediate(0));
1591 __ cmovl(kGreater, counter, start_index);
1592
1593 if (mirror::kUseStringCompression) {
1594 NearLabel modify_counter, offset_uncompressed_label;
1595 __ testl(string_length_flagged, Immediate(1));
1596 __ j(kNotZero, &offset_uncompressed_label);
1597 // Move to the start of the string: string_obj + value_offset + start_index.
1598 __ leal(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_1, value_offset));
1599 __ jmp(&modify_counter);
1600
1601 // Move to the start of the string: string_obj + value_offset + 2 * start_index.
1602 __ Bind(&offset_uncompressed_label);
1603 __ leal(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
1604
1605 // Now update ecx (the repne scasw work counter). We have string.length - start_index left to
1606 // compare.
1607 __ Bind(&modify_counter);
1608 } else {
1609 __ leal(string_obj, Address(string_obj, counter, ScaleFactor::TIMES_2, value_offset));
1610 }
1611 __ negl(counter);
1612 __ leal(counter, Address(string_length, counter, ScaleFactor::TIMES_1, 0));
1613 }
1614
1615 if (mirror::kUseStringCompression) {
1616 NearLabel uncompressed_string_comparison;
1617 NearLabel comparison_done;
1618 __ testl(string_length_flagged, Immediate(1));
1619 __ j(kNotZero, &uncompressed_string_comparison);
1620
1621 // Check if EAX (search_value) is ASCII.
1622 __ cmpl(search_value, Immediate(127));
1623 __ j(kGreater, ¬_found_label);
1624 // Comparing byte-per-byte.
1625 __ repne_scasb();
1626 __ jmp(&comparison_done);
1627
1628 // Everything is set up for repne scasw:
1629 // * Comparison address in EDI.
1630 // * Counter in ECX.
1631 __ Bind(&uncompressed_string_comparison);
1632 __ repne_scasw();
1633 __ Bind(&comparison_done);
1634 } else {
1635 __ repne_scasw();
1636 }
1637 // Did we find a match?
1638 __ j(kNotEqual, ¬_found_label);
1639
1640 // Yes, we matched. Compute the index of the result.
1641 __ subl(string_length, counter);
1642 __ leal(out, Address(string_length, -1));
1643
1644 NearLabel done;
1645 __ jmp(&done);
1646
1647 // Failed to match; return -1.
1648 __ Bind(¬_found_label);
1649 __ movl(out, Immediate(-1));
1650
1651 // And join up at the end.
1652 __ Bind(&done);
1653 if (slow_path != nullptr) {
1654 __ Bind(slow_path->GetExitLabel());
1655 }
1656 }
1657
VisitStringIndexOf(HInvoke * invoke)1658 void IntrinsicLocationsBuilderX86::VisitStringIndexOf(HInvoke* invoke) {
1659 CreateStringIndexOfLocations(invoke, arena_, /* start_at_zero */ true);
1660 }
1661
VisitStringIndexOf(HInvoke * invoke)1662 void IntrinsicCodeGeneratorX86::VisitStringIndexOf(HInvoke* invoke) {
1663 GenerateStringIndexOf(invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ true);
1664 }
1665
VisitStringIndexOfAfter(HInvoke * invoke)1666 void IntrinsicLocationsBuilderX86::VisitStringIndexOfAfter(HInvoke* invoke) {
1667 CreateStringIndexOfLocations(invoke, arena_, /* start_at_zero */ false);
1668 }
1669
VisitStringIndexOfAfter(HInvoke * invoke)1670 void IntrinsicCodeGeneratorX86::VisitStringIndexOfAfter(HInvoke* invoke) {
1671 GenerateStringIndexOf(
1672 invoke, GetAssembler(), codegen_, GetAllocator(), /* start_at_zero */ false);
1673 }
1674
VisitStringNewStringFromBytes(HInvoke * invoke)1675 void IntrinsicLocationsBuilderX86::VisitStringNewStringFromBytes(HInvoke* invoke) {
1676 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1677 LocationSummary::kCallOnMainAndSlowPath,
1678 kIntrinsified);
1679 InvokeRuntimeCallingConvention calling_convention;
1680 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1681 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1682 locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1683 locations->SetInAt(3, Location::RegisterLocation(calling_convention.GetRegisterAt(3)));
1684 locations->SetOut(Location::RegisterLocation(EAX));
1685 }
1686
VisitStringNewStringFromBytes(HInvoke * invoke)1687 void IntrinsicCodeGeneratorX86::VisitStringNewStringFromBytes(HInvoke* invoke) {
1688 X86Assembler* assembler = GetAssembler();
1689 LocationSummary* locations = invoke->GetLocations();
1690
1691 Register byte_array = locations->InAt(0).AsRegister<Register>();
1692 __ testl(byte_array, byte_array);
1693 SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
1694 codegen_->AddSlowPath(slow_path);
1695 __ j(kEqual, slow_path->GetEntryLabel());
1696
1697 codegen_->InvokeRuntime(kQuickAllocStringFromBytes, invoke, invoke->GetDexPc());
1698 CheckEntrypointTypes<kQuickAllocStringFromBytes, void*, void*, int32_t, int32_t, int32_t>();
1699 __ Bind(slow_path->GetExitLabel());
1700 }
1701
VisitStringNewStringFromChars(HInvoke * invoke)1702 void IntrinsicLocationsBuilderX86::VisitStringNewStringFromChars(HInvoke* invoke) {
1703 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1704 LocationSummary::kCallOnMainOnly,
1705 kIntrinsified);
1706 InvokeRuntimeCallingConvention calling_convention;
1707 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1708 locations->SetInAt(1, Location::RegisterLocation(calling_convention.GetRegisterAt(1)));
1709 locations->SetInAt(2, Location::RegisterLocation(calling_convention.GetRegisterAt(2)));
1710 locations->SetOut(Location::RegisterLocation(EAX));
1711 }
1712
VisitStringNewStringFromChars(HInvoke * invoke)1713 void IntrinsicCodeGeneratorX86::VisitStringNewStringFromChars(HInvoke* invoke) {
1714 // No need to emit code checking whether `locations->InAt(2)` is a null
1715 // pointer, as callers of the native method
1716 //
1717 // java.lang.StringFactory.newStringFromChars(int offset, int charCount, char[] data)
1718 //
1719 // all include a null check on `data` before calling that method.
1720 codegen_->InvokeRuntime(kQuickAllocStringFromChars, invoke, invoke->GetDexPc());
1721 CheckEntrypointTypes<kQuickAllocStringFromChars, void*, int32_t, int32_t, void*>();
1722 }
1723
VisitStringNewStringFromString(HInvoke * invoke)1724 void IntrinsicLocationsBuilderX86::VisitStringNewStringFromString(HInvoke* invoke) {
1725 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1726 LocationSummary::kCallOnMainAndSlowPath,
1727 kIntrinsified);
1728 InvokeRuntimeCallingConvention calling_convention;
1729 locations->SetInAt(0, Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
1730 locations->SetOut(Location::RegisterLocation(EAX));
1731 }
1732
VisitStringNewStringFromString(HInvoke * invoke)1733 void IntrinsicCodeGeneratorX86::VisitStringNewStringFromString(HInvoke* invoke) {
1734 X86Assembler* assembler = GetAssembler();
1735 LocationSummary* locations = invoke->GetLocations();
1736
1737 Register string_to_copy = locations->InAt(0).AsRegister<Register>();
1738 __ testl(string_to_copy, string_to_copy);
1739 SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
1740 codegen_->AddSlowPath(slow_path);
1741 __ j(kEqual, slow_path->GetEntryLabel());
1742
1743 codegen_->InvokeRuntime(kQuickAllocStringFromString, invoke, invoke->GetDexPc());
1744 CheckEntrypointTypes<kQuickAllocStringFromString, void*, void*>();
1745 __ Bind(slow_path->GetExitLabel());
1746 }
1747
VisitStringGetCharsNoCheck(HInvoke * invoke)1748 void IntrinsicLocationsBuilderX86::VisitStringGetCharsNoCheck(HInvoke* invoke) {
1749 // public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin);
1750 LocationSummary* locations = new (arena_) LocationSummary(invoke,
1751 LocationSummary::kNoCall,
1752 kIntrinsified);
1753 locations->SetInAt(0, Location::RequiresRegister());
1754 locations->SetInAt(1, Location::RegisterOrConstant(invoke->InputAt(1)));
1755 // Place srcEnd in ECX to save a move below.
1756 locations->SetInAt(2, Location::RegisterLocation(ECX));
1757 locations->SetInAt(3, Location::RequiresRegister());
1758 locations->SetInAt(4, Location::RequiresRegister());
1759
1760 // And we need some temporaries. We will use REP MOVSW, so we need fixed registers.
1761 // We don't have enough registers to also grab ECX, so handle below.
1762 locations->AddTemp(Location::RegisterLocation(ESI));
1763 locations->AddTemp(Location::RegisterLocation(EDI));
1764 }
1765
VisitStringGetCharsNoCheck(HInvoke * invoke)1766 void IntrinsicCodeGeneratorX86::VisitStringGetCharsNoCheck(HInvoke* invoke) {
1767 X86Assembler* assembler = GetAssembler();
1768 LocationSummary* locations = invoke->GetLocations();
1769
1770 size_t char_component_size = Primitive::ComponentSize(Primitive::kPrimChar);
1771 // Location of data in char array buffer.
1772 const uint32_t data_offset = mirror::Array::DataOffset(char_component_size).Uint32Value();
1773 // Location of char array data in string.
1774 const uint32_t value_offset = mirror::String::ValueOffset().Uint32Value();
1775
1776 // public void getChars(int srcBegin, int srcEnd, char[] dst, int dstBegin);
1777 Register obj = locations->InAt(0).AsRegister<Register>();
1778 Location srcBegin = locations->InAt(1);
1779 int srcBegin_value =
1780 srcBegin.IsConstant() ? srcBegin.GetConstant()->AsIntConstant()->GetValue() : 0;
1781 Register srcEnd = locations->InAt(2).AsRegister<Register>();
1782 Register dst = locations->InAt(3).AsRegister<Register>();
1783 Register dstBegin = locations->InAt(4).AsRegister<Register>();
1784
1785 // Check assumption that sizeof(Char) is 2 (used in scaling below).
1786 const size_t char_size = Primitive::ComponentSize(Primitive::kPrimChar);
1787 DCHECK_EQ(char_size, 2u);
1788
1789 // Compute the number of chars (words) to move.
1790 // Save ECX, since we don't know if it will be used later.
1791 __ pushl(ECX);
1792 int stack_adjust = kX86WordSize;
1793 __ cfi().AdjustCFAOffset(stack_adjust);
1794 DCHECK_EQ(srcEnd, ECX);
1795 if (srcBegin.IsConstant()) {
1796 __ subl(ECX, Immediate(srcBegin_value));
1797 } else {
1798 DCHECK(srcBegin.IsRegister());
1799 __ subl(ECX, srcBegin.AsRegister<Register>());
1800 }
1801
1802 NearLabel done;
1803 if (mirror::kUseStringCompression) {
1804 // Location of count in string
1805 const uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1806 const size_t c_char_size = Primitive::ComponentSize(Primitive::kPrimByte);
1807 DCHECK_EQ(c_char_size, 1u);
1808 __ pushl(EAX);
1809 __ cfi().AdjustCFAOffset(stack_adjust);
1810
1811 NearLabel copy_loop, copy_uncompressed;
1812 __ testl(Address(obj, count_offset), Immediate(1));
1813 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1814 "Expecting 0=compressed, 1=uncompressed");
1815 __ j(kNotZero, ©_uncompressed);
1816 // Compute the address of the source string by adding the number of chars from
1817 // the source beginning to the value offset of a string.
1818 __ leal(ESI, CodeGeneratorX86::ArrayAddress(obj, srcBegin, TIMES_1, value_offset));
1819
1820 // Start the loop to copy String's value to Array of Char.
1821 __ leal(EDI, Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset));
1822 __ Bind(©_loop);
1823 __ jecxz(&done);
1824 // Use EAX temporary (convert byte from ESI to word).
1825 // TODO: Use LODSB/STOSW (not supported by X86Assembler) with AH initialized to 0.
1826 __ movzxb(EAX, Address(ESI, 0));
1827 __ movw(Address(EDI, 0), EAX);
1828 __ leal(EDI, Address(EDI, char_size));
1829 __ leal(ESI, Address(ESI, c_char_size));
1830 // TODO: Add support for LOOP to X86Assembler.
1831 __ subl(ECX, Immediate(1));
1832 __ jmp(©_loop);
1833 __ Bind(©_uncompressed);
1834 }
1835
1836 // Do the copy for uncompressed string.
1837 // Compute the address of the destination buffer.
1838 __ leal(EDI, Address(dst, dstBegin, ScaleFactor::TIMES_2, data_offset));
1839 __ leal(ESI, CodeGeneratorX86::ArrayAddress(obj, srcBegin, TIMES_2, value_offset));
1840 __ rep_movsw();
1841
1842 __ Bind(&done);
1843 if (mirror::kUseStringCompression) {
1844 // Restore EAX.
1845 __ popl(EAX);
1846 __ cfi().AdjustCFAOffset(-stack_adjust);
1847 }
1848 // Restore ECX.
1849 __ popl(ECX);
1850 __ cfi().AdjustCFAOffset(-stack_adjust);
1851 }
1852
GenPeek(LocationSummary * locations,Primitive::Type size,X86Assembler * assembler)1853 static void GenPeek(LocationSummary* locations, Primitive::Type size, X86Assembler* assembler) {
1854 Register address = locations->InAt(0).AsRegisterPairLow<Register>();
1855 Location out_loc = locations->Out();
1856 // x86 allows unaligned access. We do not have to check the input or use specific instructions
1857 // to avoid a SIGBUS.
1858 switch (size) {
1859 case Primitive::kPrimByte:
1860 __ movsxb(out_loc.AsRegister<Register>(), Address(address, 0));
1861 break;
1862 case Primitive::kPrimShort:
1863 __ movsxw(out_loc.AsRegister<Register>(), Address(address, 0));
1864 break;
1865 case Primitive::kPrimInt:
1866 __ movl(out_loc.AsRegister<Register>(), Address(address, 0));
1867 break;
1868 case Primitive::kPrimLong:
1869 __ movl(out_loc.AsRegisterPairLow<Register>(), Address(address, 0));
1870 __ movl(out_loc.AsRegisterPairHigh<Register>(), Address(address, 4));
1871 break;
1872 default:
1873 LOG(FATAL) << "Type not recognized for peek: " << size;
1874 UNREACHABLE();
1875 }
1876 }
1877
VisitMemoryPeekByte(HInvoke * invoke)1878 void IntrinsicLocationsBuilderX86::VisitMemoryPeekByte(HInvoke* invoke) {
1879 CreateLongToIntLocations(arena_, invoke);
1880 }
1881
VisitMemoryPeekByte(HInvoke * invoke)1882 void IntrinsicCodeGeneratorX86::VisitMemoryPeekByte(HInvoke* invoke) {
1883 GenPeek(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler());
1884 }
1885
VisitMemoryPeekIntNative(HInvoke * invoke)1886 void IntrinsicLocationsBuilderX86::VisitMemoryPeekIntNative(HInvoke* invoke) {
1887 CreateLongToIntLocations(arena_, invoke);
1888 }
1889
VisitMemoryPeekIntNative(HInvoke * invoke)1890 void IntrinsicCodeGeneratorX86::VisitMemoryPeekIntNative(HInvoke* invoke) {
1891 GenPeek(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
1892 }
1893
VisitMemoryPeekLongNative(HInvoke * invoke)1894 void IntrinsicLocationsBuilderX86::VisitMemoryPeekLongNative(HInvoke* invoke) {
1895 CreateLongToLongLocations(arena_, invoke);
1896 }
1897
VisitMemoryPeekLongNative(HInvoke * invoke)1898 void IntrinsicCodeGeneratorX86::VisitMemoryPeekLongNative(HInvoke* invoke) {
1899 GenPeek(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
1900 }
1901
VisitMemoryPeekShortNative(HInvoke * invoke)1902 void IntrinsicLocationsBuilderX86::VisitMemoryPeekShortNative(HInvoke* invoke) {
1903 CreateLongToIntLocations(arena_, invoke);
1904 }
1905
VisitMemoryPeekShortNative(HInvoke * invoke)1906 void IntrinsicCodeGeneratorX86::VisitMemoryPeekShortNative(HInvoke* invoke) {
1907 GenPeek(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
1908 }
1909
CreateLongIntToVoidLocations(ArenaAllocator * arena,Primitive::Type size,HInvoke * invoke)1910 static void CreateLongIntToVoidLocations(ArenaAllocator* arena, Primitive::Type size,
1911 HInvoke* invoke) {
1912 LocationSummary* locations = new (arena) LocationSummary(invoke,
1913 LocationSummary::kNoCall,
1914 kIntrinsified);
1915 locations->SetInAt(0, Location::RequiresRegister());
1916 HInstruction* value = invoke->InputAt(1);
1917 if (size == Primitive::kPrimByte) {
1918 locations->SetInAt(1, Location::ByteRegisterOrConstant(EDX, value));
1919 } else {
1920 locations->SetInAt(1, Location::RegisterOrConstant(value));
1921 }
1922 }
1923
GenPoke(LocationSummary * locations,Primitive::Type size,X86Assembler * assembler)1924 static void GenPoke(LocationSummary* locations, Primitive::Type size, X86Assembler* assembler) {
1925 Register address = locations->InAt(0).AsRegisterPairLow<Register>();
1926 Location value_loc = locations->InAt(1);
1927 // x86 allows unaligned access. We do not have to check the input or use specific instructions
1928 // to avoid a SIGBUS.
1929 switch (size) {
1930 case Primitive::kPrimByte:
1931 if (value_loc.IsConstant()) {
1932 __ movb(Address(address, 0),
1933 Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue()));
1934 } else {
1935 __ movb(Address(address, 0), value_loc.AsRegister<ByteRegister>());
1936 }
1937 break;
1938 case Primitive::kPrimShort:
1939 if (value_loc.IsConstant()) {
1940 __ movw(Address(address, 0),
1941 Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue()));
1942 } else {
1943 __ movw(Address(address, 0), value_loc.AsRegister<Register>());
1944 }
1945 break;
1946 case Primitive::kPrimInt:
1947 if (value_loc.IsConstant()) {
1948 __ movl(Address(address, 0),
1949 Immediate(value_loc.GetConstant()->AsIntConstant()->GetValue()));
1950 } else {
1951 __ movl(Address(address, 0), value_loc.AsRegister<Register>());
1952 }
1953 break;
1954 case Primitive::kPrimLong:
1955 if (value_loc.IsConstant()) {
1956 int64_t value = value_loc.GetConstant()->AsLongConstant()->GetValue();
1957 __ movl(Address(address, 0), Immediate(Low32Bits(value)));
1958 __ movl(Address(address, 4), Immediate(High32Bits(value)));
1959 } else {
1960 __ movl(Address(address, 0), value_loc.AsRegisterPairLow<Register>());
1961 __ movl(Address(address, 4), value_loc.AsRegisterPairHigh<Register>());
1962 }
1963 break;
1964 default:
1965 LOG(FATAL) << "Type not recognized for poke: " << size;
1966 UNREACHABLE();
1967 }
1968 }
1969
VisitMemoryPokeByte(HInvoke * invoke)1970 void IntrinsicLocationsBuilderX86::VisitMemoryPokeByte(HInvoke* invoke) {
1971 CreateLongIntToVoidLocations(arena_, Primitive::kPrimByte, invoke);
1972 }
1973
VisitMemoryPokeByte(HInvoke * invoke)1974 void IntrinsicCodeGeneratorX86::VisitMemoryPokeByte(HInvoke* invoke) {
1975 GenPoke(invoke->GetLocations(), Primitive::kPrimByte, GetAssembler());
1976 }
1977
VisitMemoryPokeIntNative(HInvoke * invoke)1978 void IntrinsicLocationsBuilderX86::VisitMemoryPokeIntNative(HInvoke* invoke) {
1979 CreateLongIntToVoidLocations(arena_, Primitive::kPrimInt, invoke);
1980 }
1981
VisitMemoryPokeIntNative(HInvoke * invoke)1982 void IntrinsicCodeGeneratorX86::VisitMemoryPokeIntNative(HInvoke* invoke) {
1983 GenPoke(invoke->GetLocations(), Primitive::kPrimInt, GetAssembler());
1984 }
1985
VisitMemoryPokeLongNative(HInvoke * invoke)1986 void IntrinsicLocationsBuilderX86::VisitMemoryPokeLongNative(HInvoke* invoke) {
1987 CreateLongIntToVoidLocations(arena_, Primitive::kPrimLong, invoke);
1988 }
1989
VisitMemoryPokeLongNative(HInvoke * invoke)1990 void IntrinsicCodeGeneratorX86::VisitMemoryPokeLongNative(HInvoke* invoke) {
1991 GenPoke(invoke->GetLocations(), Primitive::kPrimLong, GetAssembler());
1992 }
1993
VisitMemoryPokeShortNative(HInvoke * invoke)1994 void IntrinsicLocationsBuilderX86::VisitMemoryPokeShortNative(HInvoke* invoke) {
1995 CreateLongIntToVoidLocations(arena_, Primitive::kPrimShort, invoke);
1996 }
1997
VisitMemoryPokeShortNative(HInvoke * invoke)1998 void IntrinsicCodeGeneratorX86::VisitMemoryPokeShortNative(HInvoke* invoke) {
1999 GenPoke(invoke->GetLocations(), Primitive::kPrimShort, GetAssembler());
2000 }
2001
VisitThreadCurrentThread(HInvoke * invoke)2002 void IntrinsicLocationsBuilderX86::VisitThreadCurrentThread(HInvoke* invoke) {
2003 LocationSummary* locations = new (arena_) LocationSummary(invoke,
2004 LocationSummary::kNoCall,
2005 kIntrinsified);
2006 locations->SetOut(Location::RequiresRegister());
2007 }
2008
VisitThreadCurrentThread(HInvoke * invoke)2009 void IntrinsicCodeGeneratorX86::VisitThreadCurrentThread(HInvoke* invoke) {
2010 Register out = invoke->GetLocations()->Out().AsRegister<Register>();
2011 GetAssembler()->fs()->movl(out, Address::Absolute(Thread::PeerOffset<kX86PointerSize>()));
2012 }
2013
GenUnsafeGet(HInvoke * invoke,Primitive::Type type,bool is_volatile,CodeGeneratorX86 * codegen)2014 static void GenUnsafeGet(HInvoke* invoke,
2015 Primitive::Type type,
2016 bool is_volatile,
2017 CodeGeneratorX86* codegen) {
2018 X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
2019 LocationSummary* locations = invoke->GetLocations();
2020 Location base_loc = locations->InAt(1);
2021 Register base = base_loc.AsRegister<Register>();
2022 Location offset_loc = locations->InAt(2);
2023 Register offset = offset_loc.AsRegisterPairLow<Register>();
2024 Location output_loc = locations->Out();
2025
2026 switch (type) {
2027 case Primitive::kPrimInt: {
2028 Register output = output_loc.AsRegister<Register>();
2029 __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
2030 break;
2031 }
2032
2033 case Primitive::kPrimNot: {
2034 Register output = output_loc.AsRegister<Register>();
2035 if (kEmitCompilerReadBarrier) {
2036 if (kUseBakerReadBarrier) {
2037 Address src(base, offset, ScaleFactor::TIMES_1, 0);
2038 codegen->GenerateReferenceLoadWithBakerReadBarrier(
2039 invoke, output_loc, base, src, /* needs_null_check */ false);
2040 } else {
2041 __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
2042 codegen->GenerateReadBarrierSlow(
2043 invoke, output_loc, output_loc, base_loc, 0U, offset_loc);
2044 }
2045 } else {
2046 __ movl(output, Address(base, offset, ScaleFactor::TIMES_1, 0));
2047 __ MaybeUnpoisonHeapReference(output);
2048 }
2049 break;
2050 }
2051
2052 case Primitive::kPrimLong: {
2053 Register output_lo = output_loc.AsRegisterPairLow<Register>();
2054 Register output_hi = output_loc.AsRegisterPairHigh<Register>();
2055 if (is_volatile) {
2056 // Need to use a XMM to read atomically.
2057 XmmRegister temp = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2058 __ movsd(temp, Address(base, offset, ScaleFactor::TIMES_1, 0));
2059 __ movd(output_lo, temp);
2060 __ psrlq(temp, Immediate(32));
2061 __ movd(output_hi, temp);
2062 } else {
2063 __ movl(output_lo, Address(base, offset, ScaleFactor::TIMES_1, 0));
2064 __ movl(output_hi, Address(base, offset, ScaleFactor::TIMES_1, 4));
2065 }
2066 }
2067 break;
2068
2069 default:
2070 LOG(FATAL) << "Unsupported op size " << type;
2071 UNREACHABLE();
2072 }
2073 }
2074
CreateIntIntIntToIntLocations(ArenaAllocator * arena,HInvoke * invoke,Primitive::Type type,bool is_volatile)2075 static void CreateIntIntIntToIntLocations(ArenaAllocator* arena,
2076 HInvoke* invoke,
2077 Primitive::Type type,
2078 bool is_volatile) {
2079 bool can_call = kEmitCompilerReadBarrier &&
2080 (invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObject ||
2081 invoke->GetIntrinsic() == Intrinsics::kUnsafeGetObjectVolatile);
2082 LocationSummary* locations = new (arena) LocationSummary(invoke,
2083 (can_call
2084 ? LocationSummary::kCallOnSlowPath
2085 : LocationSummary::kNoCall),
2086 kIntrinsified);
2087 if (can_call && kUseBakerReadBarrier) {
2088 locations->SetCustomSlowPathCallerSaves(RegisterSet::Empty()); // No caller-save registers.
2089 }
2090 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
2091 locations->SetInAt(1, Location::RequiresRegister());
2092 locations->SetInAt(2, Location::RequiresRegister());
2093 if (type == Primitive::kPrimLong) {
2094 if (is_volatile) {
2095 // Need to use XMM to read volatile.
2096 locations->AddTemp(Location::RequiresFpuRegister());
2097 locations->SetOut(Location::RequiresRegister(), Location::kNoOutputOverlap);
2098 } else {
2099 locations->SetOut(Location::RequiresRegister(), Location::kOutputOverlap);
2100 }
2101 } else {
2102 locations->SetOut(Location::RequiresRegister(),
2103 (can_call ? Location::kOutputOverlap : Location::kNoOutputOverlap));
2104 }
2105 }
2106
VisitUnsafeGet(HInvoke * invoke)2107 void IntrinsicLocationsBuilderX86::VisitUnsafeGet(HInvoke* invoke) {
2108 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt, /* is_volatile */ false);
2109 }
VisitUnsafeGetVolatile(HInvoke * invoke)2110 void IntrinsicLocationsBuilderX86::VisitUnsafeGetVolatile(HInvoke* invoke) {
2111 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimInt, /* is_volatile */ true);
2112 }
VisitUnsafeGetLong(HInvoke * invoke)2113 void IntrinsicLocationsBuilderX86::VisitUnsafeGetLong(HInvoke* invoke) {
2114 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong, /* is_volatile */ false);
2115 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)2116 void IntrinsicLocationsBuilderX86::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
2117 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimLong, /* is_volatile */ true);
2118 }
VisitUnsafeGetObject(HInvoke * invoke)2119 void IntrinsicLocationsBuilderX86::VisitUnsafeGetObject(HInvoke* invoke) {
2120 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot, /* is_volatile */ false);
2121 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)2122 void IntrinsicLocationsBuilderX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
2123 CreateIntIntIntToIntLocations(arena_, invoke, Primitive::kPrimNot, /* is_volatile */ true);
2124 }
2125
2126
VisitUnsafeGet(HInvoke * invoke)2127 void IntrinsicCodeGeneratorX86::VisitUnsafeGet(HInvoke* invoke) {
2128 GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ false, codegen_);
2129 }
VisitUnsafeGetVolatile(HInvoke * invoke)2130 void IntrinsicCodeGeneratorX86::VisitUnsafeGetVolatile(HInvoke* invoke) {
2131 GenUnsafeGet(invoke, Primitive::kPrimInt, /* is_volatile */ true, codegen_);
2132 }
VisitUnsafeGetLong(HInvoke * invoke)2133 void IntrinsicCodeGeneratorX86::VisitUnsafeGetLong(HInvoke* invoke) {
2134 GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ false, codegen_);
2135 }
VisitUnsafeGetLongVolatile(HInvoke * invoke)2136 void IntrinsicCodeGeneratorX86::VisitUnsafeGetLongVolatile(HInvoke* invoke) {
2137 GenUnsafeGet(invoke, Primitive::kPrimLong, /* is_volatile */ true, codegen_);
2138 }
VisitUnsafeGetObject(HInvoke * invoke)2139 void IntrinsicCodeGeneratorX86::VisitUnsafeGetObject(HInvoke* invoke) {
2140 GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ false, codegen_);
2141 }
VisitUnsafeGetObjectVolatile(HInvoke * invoke)2142 void IntrinsicCodeGeneratorX86::VisitUnsafeGetObjectVolatile(HInvoke* invoke) {
2143 GenUnsafeGet(invoke, Primitive::kPrimNot, /* is_volatile */ true, codegen_);
2144 }
2145
2146
CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator * arena,Primitive::Type type,HInvoke * invoke,bool is_volatile)2147 static void CreateIntIntIntIntToVoidPlusTempsLocations(ArenaAllocator* arena,
2148 Primitive::Type type,
2149 HInvoke* invoke,
2150 bool is_volatile) {
2151 LocationSummary* locations = new (arena) LocationSummary(invoke,
2152 LocationSummary::kNoCall,
2153 kIntrinsified);
2154 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
2155 locations->SetInAt(1, Location::RequiresRegister());
2156 locations->SetInAt(2, Location::RequiresRegister());
2157 locations->SetInAt(3, Location::RequiresRegister());
2158 if (type == Primitive::kPrimNot) {
2159 // Need temp registers for card-marking.
2160 locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too.
2161 // Ensure the value is in a byte register.
2162 locations->AddTemp(Location::RegisterLocation(ECX));
2163 } else if (type == Primitive::kPrimLong && is_volatile) {
2164 locations->AddTemp(Location::RequiresFpuRegister());
2165 locations->AddTemp(Location::RequiresFpuRegister());
2166 }
2167 }
2168
VisitUnsafePut(HInvoke * invoke)2169 void IntrinsicLocationsBuilderX86::VisitUnsafePut(HInvoke* invoke) {
2170 CreateIntIntIntIntToVoidPlusTempsLocations(
2171 arena_, Primitive::kPrimInt, invoke, /* is_volatile */ false);
2172 }
VisitUnsafePutOrdered(HInvoke * invoke)2173 void IntrinsicLocationsBuilderX86::VisitUnsafePutOrdered(HInvoke* invoke) {
2174 CreateIntIntIntIntToVoidPlusTempsLocations(
2175 arena_, Primitive::kPrimInt, invoke, /* is_volatile */ false);
2176 }
VisitUnsafePutVolatile(HInvoke * invoke)2177 void IntrinsicLocationsBuilderX86::VisitUnsafePutVolatile(HInvoke* invoke) {
2178 CreateIntIntIntIntToVoidPlusTempsLocations(
2179 arena_, Primitive::kPrimInt, invoke, /* is_volatile */ true);
2180 }
VisitUnsafePutObject(HInvoke * invoke)2181 void IntrinsicLocationsBuilderX86::VisitUnsafePutObject(HInvoke* invoke) {
2182 CreateIntIntIntIntToVoidPlusTempsLocations(
2183 arena_, Primitive::kPrimNot, invoke, /* is_volatile */ false);
2184 }
VisitUnsafePutObjectOrdered(HInvoke * invoke)2185 void IntrinsicLocationsBuilderX86::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
2186 CreateIntIntIntIntToVoidPlusTempsLocations(
2187 arena_, Primitive::kPrimNot, invoke, /* is_volatile */ false);
2188 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)2189 void IntrinsicLocationsBuilderX86::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
2190 CreateIntIntIntIntToVoidPlusTempsLocations(
2191 arena_, Primitive::kPrimNot, invoke, /* is_volatile */ true);
2192 }
VisitUnsafePutLong(HInvoke * invoke)2193 void IntrinsicLocationsBuilderX86::VisitUnsafePutLong(HInvoke* invoke) {
2194 CreateIntIntIntIntToVoidPlusTempsLocations(
2195 arena_, Primitive::kPrimLong, invoke, /* is_volatile */ false);
2196 }
VisitUnsafePutLongOrdered(HInvoke * invoke)2197 void IntrinsicLocationsBuilderX86::VisitUnsafePutLongOrdered(HInvoke* invoke) {
2198 CreateIntIntIntIntToVoidPlusTempsLocations(
2199 arena_, Primitive::kPrimLong, invoke, /* is_volatile */ false);
2200 }
VisitUnsafePutLongVolatile(HInvoke * invoke)2201 void IntrinsicLocationsBuilderX86::VisitUnsafePutLongVolatile(HInvoke* invoke) {
2202 CreateIntIntIntIntToVoidPlusTempsLocations(
2203 arena_, Primitive::kPrimLong, invoke, /* is_volatile */ true);
2204 }
2205
2206 // We don't care for ordered: it requires an AnyStore barrier, which is already given by the x86
2207 // memory model.
GenUnsafePut(LocationSummary * locations,Primitive::Type type,bool is_volatile,CodeGeneratorX86 * codegen)2208 static void GenUnsafePut(LocationSummary* locations,
2209 Primitive::Type type,
2210 bool is_volatile,
2211 CodeGeneratorX86* codegen) {
2212 X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
2213 Register base = locations->InAt(1).AsRegister<Register>();
2214 Register offset = locations->InAt(2).AsRegisterPairLow<Register>();
2215 Location value_loc = locations->InAt(3);
2216
2217 if (type == Primitive::kPrimLong) {
2218 Register value_lo = value_loc.AsRegisterPairLow<Register>();
2219 Register value_hi = value_loc.AsRegisterPairHigh<Register>();
2220 if (is_volatile) {
2221 XmmRegister temp1 = locations->GetTemp(0).AsFpuRegister<XmmRegister>();
2222 XmmRegister temp2 = locations->GetTemp(1).AsFpuRegister<XmmRegister>();
2223 __ movd(temp1, value_lo);
2224 __ movd(temp2, value_hi);
2225 __ punpckldq(temp1, temp2);
2226 __ movsd(Address(base, offset, ScaleFactor::TIMES_1, 0), temp1);
2227 } else {
2228 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value_lo);
2229 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 4), value_hi);
2230 }
2231 } else if (kPoisonHeapReferences && type == Primitive::kPrimNot) {
2232 Register temp = locations->GetTemp(0).AsRegister<Register>();
2233 __ movl(temp, value_loc.AsRegister<Register>());
2234 __ PoisonHeapReference(temp);
2235 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), temp);
2236 } else {
2237 __ movl(Address(base, offset, ScaleFactor::TIMES_1, 0), value_loc.AsRegister<Register>());
2238 }
2239
2240 if (is_volatile) {
2241 codegen->MemoryFence();
2242 }
2243
2244 if (type == Primitive::kPrimNot) {
2245 bool value_can_be_null = true; // TODO: Worth finding out this information?
2246 codegen->MarkGCCard(locations->GetTemp(0).AsRegister<Register>(),
2247 locations->GetTemp(1).AsRegister<Register>(),
2248 base,
2249 value_loc.AsRegister<Register>(),
2250 value_can_be_null);
2251 }
2252 }
2253
VisitUnsafePut(HInvoke * invoke)2254 void IntrinsicCodeGeneratorX86::VisitUnsafePut(HInvoke* invoke) {
2255 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, /* is_volatile */ false, codegen_);
2256 }
VisitUnsafePutOrdered(HInvoke * invoke)2257 void IntrinsicCodeGeneratorX86::VisitUnsafePutOrdered(HInvoke* invoke) {
2258 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, /* is_volatile */ false, codegen_);
2259 }
VisitUnsafePutVolatile(HInvoke * invoke)2260 void IntrinsicCodeGeneratorX86::VisitUnsafePutVolatile(HInvoke* invoke) {
2261 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimInt, /* is_volatile */ true, codegen_);
2262 }
VisitUnsafePutObject(HInvoke * invoke)2263 void IntrinsicCodeGeneratorX86::VisitUnsafePutObject(HInvoke* invoke) {
2264 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, /* is_volatile */ false, codegen_);
2265 }
VisitUnsafePutObjectOrdered(HInvoke * invoke)2266 void IntrinsicCodeGeneratorX86::VisitUnsafePutObjectOrdered(HInvoke* invoke) {
2267 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, /* is_volatile */ false, codegen_);
2268 }
VisitUnsafePutObjectVolatile(HInvoke * invoke)2269 void IntrinsicCodeGeneratorX86::VisitUnsafePutObjectVolatile(HInvoke* invoke) {
2270 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimNot, /* is_volatile */ true, codegen_);
2271 }
VisitUnsafePutLong(HInvoke * invoke)2272 void IntrinsicCodeGeneratorX86::VisitUnsafePutLong(HInvoke* invoke) {
2273 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, /* is_volatile */ false, codegen_);
2274 }
VisitUnsafePutLongOrdered(HInvoke * invoke)2275 void IntrinsicCodeGeneratorX86::VisitUnsafePutLongOrdered(HInvoke* invoke) {
2276 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, /* is_volatile */ false, codegen_);
2277 }
VisitUnsafePutLongVolatile(HInvoke * invoke)2278 void IntrinsicCodeGeneratorX86::VisitUnsafePutLongVolatile(HInvoke* invoke) {
2279 GenUnsafePut(invoke->GetLocations(), Primitive::kPrimLong, /* is_volatile */ true, codegen_);
2280 }
2281
CreateIntIntIntIntIntToInt(ArenaAllocator * arena,Primitive::Type type,HInvoke * invoke)2282 static void CreateIntIntIntIntIntToInt(ArenaAllocator* arena,
2283 Primitive::Type type,
2284 HInvoke* invoke) {
2285 bool can_call = kEmitCompilerReadBarrier &&
2286 kUseBakerReadBarrier &&
2287 (invoke->GetIntrinsic() == Intrinsics::kUnsafeCASObject);
2288 LocationSummary* locations = new (arena) LocationSummary(invoke,
2289 (can_call
2290 ? LocationSummary::kCallOnSlowPath
2291 : LocationSummary::kNoCall),
2292 kIntrinsified);
2293 locations->SetInAt(0, Location::NoLocation()); // Unused receiver.
2294 locations->SetInAt(1, Location::RequiresRegister());
2295 // Offset is a long, but in 32 bit mode, we only need the low word.
2296 // Can we update the invoke here to remove a TypeConvert to Long?
2297 locations->SetInAt(2, Location::RequiresRegister());
2298 // Expected value must be in EAX or EDX:EAX.
2299 // For long, new value must be in ECX:EBX.
2300 if (type == Primitive::kPrimLong) {
2301 locations->SetInAt(3, Location::RegisterPairLocation(EAX, EDX));
2302 locations->SetInAt(4, Location::RegisterPairLocation(EBX, ECX));
2303 } else {
2304 locations->SetInAt(3, Location::RegisterLocation(EAX));
2305 locations->SetInAt(4, Location::RequiresRegister());
2306 }
2307
2308 // Force a byte register for the output.
2309 locations->SetOut(Location::RegisterLocation(EAX));
2310 if (type == Primitive::kPrimNot) {
2311 // Need temporary registers for card-marking, and possibly for
2312 // (Baker) read barrier.
2313 locations->AddTemp(Location::RequiresRegister()); // Possibly used for reference poisoning too.
2314 // Need a byte register for marking.
2315 locations->AddTemp(Location::RegisterLocation(ECX));
2316 }
2317 }
2318
VisitUnsafeCASInt(HInvoke * invoke)2319 void IntrinsicLocationsBuilderX86::VisitUnsafeCASInt(HInvoke* invoke) {
2320 CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimInt, invoke);
2321 }
2322
VisitUnsafeCASLong(HInvoke * invoke)2323 void IntrinsicLocationsBuilderX86::VisitUnsafeCASLong(HInvoke* invoke) {
2324 CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimLong, invoke);
2325 }
2326
VisitUnsafeCASObject(HInvoke * invoke)2327 void IntrinsicLocationsBuilderX86::VisitUnsafeCASObject(HInvoke* invoke) {
2328 // The only read barrier implementation supporting the
2329 // UnsafeCASObject intrinsic is the Baker-style read barriers.
2330 if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
2331 return;
2332 }
2333
2334 CreateIntIntIntIntIntToInt(arena_, Primitive::kPrimNot, invoke);
2335 }
2336
GenCAS(Primitive::Type type,HInvoke * invoke,CodeGeneratorX86 * codegen)2337 static void GenCAS(Primitive::Type type, HInvoke* invoke, CodeGeneratorX86* codegen) {
2338 X86Assembler* assembler = down_cast<X86Assembler*>(codegen->GetAssembler());
2339 LocationSummary* locations = invoke->GetLocations();
2340
2341 Register base = locations->InAt(1).AsRegister<Register>();
2342 Register offset = locations->InAt(2).AsRegisterPairLow<Register>();
2343 Location out = locations->Out();
2344 DCHECK_EQ(out.AsRegister<Register>(), EAX);
2345
2346 // The address of the field within the holding object.
2347 Address field_addr(base, offset, ScaleFactor::TIMES_1, 0);
2348
2349 if (type == Primitive::kPrimNot) {
2350 // The only read barrier implementation supporting the
2351 // UnsafeCASObject intrinsic is the Baker-style read barriers.
2352 DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
2353
2354 Location temp1_loc = locations->GetTemp(0);
2355 Register temp1 = temp1_loc.AsRegister<Register>();
2356 Register temp2 = locations->GetTemp(1).AsRegister<Register>();
2357
2358 Register expected = locations->InAt(3).AsRegister<Register>();
2359 // Ensure `expected` is in EAX (required by the CMPXCHG instruction).
2360 DCHECK_EQ(expected, EAX);
2361 Register value = locations->InAt(4).AsRegister<Register>();
2362
2363 // Mark card for object assuming new value is stored.
2364 bool value_can_be_null = true; // TODO: Worth finding out this information?
2365 codegen->MarkGCCard(temp1, temp2, base, value, value_can_be_null);
2366
2367 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
2368 // Need to make sure the reference stored in the field is a to-space
2369 // one before attempting the CAS or the CAS could fail incorrectly.
2370 codegen->GenerateReferenceLoadWithBakerReadBarrier(
2371 invoke,
2372 temp1_loc, // Unused, used only as a "temporary" within the read barrier.
2373 base,
2374 field_addr,
2375 /* needs_null_check */ false,
2376 /* always_update_field */ true,
2377 &temp2);
2378 }
2379
2380 bool base_equals_value = (base == value);
2381 if (kPoisonHeapReferences) {
2382 if (base_equals_value) {
2383 // If `base` and `value` are the same register location, move
2384 // `value` to a temporary register. This way, poisoning
2385 // `value` won't invalidate `base`.
2386 value = temp1;
2387 __ movl(value, base);
2388 }
2389
2390 // Check that the register allocator did not assign the location
2391 // of `expected` (EAX) to `value` nor to `base`, so that heap
2392 // poisoning (when enabled) works as intended below.
2393 // - If `value` were equal to `expected`, both references would
2394 // be poisoned twice, meaning they would not be poisoned at
2395 // all, as heap poisoning uses address negation.
2396 // - If `base` were equal to `expected`, poisoning `expected`
2397 // would invalidate `base`.
2398 DCHECK_NE(value, expected);
2399 DCHECK_NE(base, expected);
2400
2401 __ PoisonHeapReference(expected);
2402 __ PoisonHeapReference(value);
2403 }
2404
2405 __ LockCmpxchgl(field_addr, value);
2406
2407 // LOCK CMPXCHG has full barrier semantics, and we don't need
2408 // scheduling barriers at this time.
2409
2410 // Convert ZF into the Boolean result.
2411 __ setb(kZero, out.AsRegister<Register>());
2412 __ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>());
2413
2414 // If heap poisoning is enabled, we need to unpoison the values
2415 // that were poisoned earlier.
2416 if (kPoisonHeapReferences) {
2417 if (base_equals_value) {
2418 // `value` has been moved to a temporary register, no need to
2419 // unpoison it.
2420 } else {
2421 // Ensure `value` is different from `out`, so that unpoisoning
2422 // the former does not invalidate the latter.
2423 DCHECK_NE(value, out.AsRegister<Register>());
2424 __ UnpoisonHeapReference(value);
2425 }
2426 // Do not unpoison the reference contained in register
2427 // `expected`, as it is the same as register `out` (EAX).
2428 }
2429 } else {
2430 if (type == Primitive::kPrimInt) {
2431 // Ensure the expected value is in EAX (required by the CMPXCHG
2432 // instruction).
2433 DCHECK_EQ(locations->InAt(3).AsRegister<Register>(), EAX);
2434 __ LockCmpxchgl(field_addr, locations->InAt(4).AsRegister<Register>());
2435 } else if (type == Primitive::kPrimLong) {
2436 // Ensure the expected value is in EAX:EDX and that the new
2437 // value is in EBX:ECX (required by the CMPXCHG8B instruction).
2438 DCHECK_EQ(locations->InAt(3).AsRegisterPairLow<Register>(), EAX);
2439 DCHECK_EQ(locations->InAt(3).AsRegisterPairHigh<Register>(), EDX);
2440 DCHECK_EQ(locations->InAt(4).AsRegisterPairLow<Register>(), EBX);
2441 DCHECK_EQ(locations->InAt(4).AsRegisterPairHigh<Register>(), ECX);
2442 __ LockCmpxchg8b(field_addr);
2443 } else {
2444 LOG(FATAL) << "Unexpected CAS type " << type;
2445 }
2446
2447 // LOCK CMPXCHG/LOCK CMPXCHG8B have full barrier semantics, and we
2448 // don't need scheduling barriers at this time.
2449
2450 // Convert ZF into the Boolean result.
2451 __ setb(kZero, out.AsRegister<Register>());
2452 __ movzxb(out.AsRegister<Register>(), out.AsRegister<ByteRegister>());
2453 }
2454 }
2455
VisitUnsafeCASInt(HInvoke * invoke)2456 void IntrinsicCodeGeneratorX86::VisitUnsafeCASInt(HInvoke* invoke) {
2457 GenCAS(Primitive::kPrimInt, invoke, codegen_);
2458 }
2459
VisitUnsafeCASLong(HInvoke * invoke)2460 void IntrinsicCodeGeneratorX86::VisitUnsafeCASLong(HInvoke* invoke) {
2461 GenCAS(Primitive::kPrimLong, invoke, codegen_);
2462 }
2463
VisitUnsafeCASObject(HInvoke * invoke)2464 void IntrinsicCodeGeneratorX86::VisitUnsafeCASObject(HInvoke* invoke) {
2465 // The only read barrier implementation supporting the
2466 // UnsafeCASObject intrinsic is the Baker-style read barriers.
2467 DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
2468
2469 GenCAS(Primitive::kPrimNot, invoke, codegen_);
2470 }
2471
VisitIntegerReverse(HInvoke * invoke)2472 void IntrinsicLocationsBuilderX86::VisitIntegerReverse(HInvoke* invoke) {
2473 LocationSummary* locations = new (arena_) LocationSummary(invoke,
2474 LocationSummary::kNoCall,
2475 kIntrinsified);
2476 locations->SetInAt(0, Location::RequiresRegister());
2477 locations->SetOut(Location::SameAsFirstInput());
2478 locations->AddTemp(Location::RequiresRegister());
2479 }
2480
SwapBits(Register reg,Register temp,int32_t shift,int32_t mask,X86Assembler * assembler)2481 static void SwapBits(Register reg, Register temp, int32_t shift, int32_t mask,
2482 X86Assembler* assembler) {
2483 Immediate imm_shift(shift);
2484 Immediate imm_mask(mask);
2485 __ movl(temp, reg);
2486 __ shrl(reg, imm_shift);
2487 __ andl(temp, imm_mask);
2488 __ andl(reg, imm_mask);
2489 __ shll(temp, imm_shift);
2490 __ orl(reg, temp);
2491 }
2492
VisitIntegerReverse(HInvoke * invoke)2493 void IntrinsicCodeGeneratorX86::VisitIntegerReverse(HInvoke* invoke) {
2494 X86Assembler* assembler = GetAssembler();
2495 LocationSummary* locations = invoke->GetLocations();
2496
2497 Register reg = locations->InAt(0).AsRegister<Register>();
2498 Register temp = locations->GetTemp(0).AsRegister<Register>();
2499
2500 /*
2501 * Use one bswap instruction to reverse byte order first and then use 3 rounds of
2502 * swapping bits to reverse bits in a number x. Using bswap to save instructions
2503 * compared to generic luni implementation which has 5 rounds of swapping bits.
2504 * x = bswap x
2505 * x = (x & 0x55555555) << 1 | (x >> 1) & 0x55555555;
2506 * x = (x & 0x33333333) << 2 | (x >> 2) & 0x33333333;
2507 * x = (x & 0x0F0F0F0F) << 4 | (x >> 4) & 0x0F0F0F0F;
2508 */
2509 __ bswapl(reg);
2510 SwapBits(reg, temp, 1, 0x55555555, assembler);
2511 SwapBits(reg, temp, 2, 0x33333333, assembler);
2512 SwapBits(reg, temp, 4, 0x0f0f0f0f, assembler);
2513 }
2514
VisitLongReverse(HInvoke * invoke)2515 void IntrinsicLocationsBuilderX86::VisitLongReverse(HInvoke* invoke) {
2516 LocationSummary* locations = new (arena_) LocationSummary(invoke,
2517 LocationSummary::kNoCall,
2518 kIntrinsified);
2519 locations->SetInAt(0, Location::RequiresRegister());
2520 locations->SetOut(Location::SameAsFirstInput());
2521 locations->AddTemp(Location::RequiresRegister());
2522 }
2523
VisitLongReverse(HInvoke * invoke)2524 void IntrinsicCodeGeneratorX86::VisitLongReverse(HInvoke* invoke) {
2525 X86Assembler* assembler = GetAssembler();
2526 LocationSummary* locations = invoke->GetLocations();
2527
2528 Register reg_low = locations->InAt(0).AsRegisterPairLow<Register>();
2529 Register reg_high = locations->InAt(0).AsRegisterPairHigh<Register>();
2530 Register temp = locations->GetTemp(0).AsRegister<Register>();
2531
2532 // We want to swap high/low, then bswap each one, and then do the same
2533 // as a 32 bit reverse.
2534 // Exchange high and low.
2535 __ movl(temp, reg_low);
2536 __ movl(reg_low, reg_high);
2537 __ movl(reg_high, temp);
2538
2539 // bit-reverse low
2540 __ bswapl(reg_low);
2541 SwapBits(reg_low, temp, 1, 0x55555555, assembler);
2542 SwapBits(reg_low, temp, 2, 0x33333333, assembler);
2543 SwapBits(reg_low, temp, 4, 0x0f0f0f0f, assembler);
2544
2545 // bit-reverse high
2546 __ bswapl(reg_high);
2547 SwapBits(reg_high, temp, 1, 0x55555555, assembler);
2548 SwapBits(reg_high, temp, 2, 0x33333333, assembler);
2549 SwapBits(reg_high, temp, 4, 0x0f0f0f0f, assembler);
2550 }
2551
CreateBitCountLocations(ArenaAllocator * arena,CodeGeneratorX86 * codegen,HInvoke * invoke,bool is_long)2552 static void CreateBitCountLocations(
2553 ArenaAllocator* arena, CodeGeneratorX86* codegen, HInvoke* invoke, bool is_long) {
2554 if (!codegen->GetInstructionSetFeatures().HasPopCnt()) {
2555 // Do nothing if there is no popcnt support. This results in generating
2556 // a call for the intrinsic rather than direct code.
2557 return;
2558 }
2559 LocationSummary* locations = new (arena) LocationSummary(invoke,
2560 LocationSummary::kNoCall,
2561 kIntrinsified);
2562 if (is_long) {
2563 locations->AddTemp(Location::RequiresRegister());
2564 }
2565 locations->SetInAt(0, Location::Any());
2566 locations->SetOut(Location::RequiresRegister());
2567 }
2568
GenBitCount(X86Assembler * assembler,CodeGeneratorX86 * codegen,HInvoke * invoke,bool is_long)2569 static void GenBitCount(X86Assembler* assembler,
2570 CodeGeneratorX86* codegen,
2571 HInvoke* invoke, bool is_long) {
2572 LocationSummary* locations = invoke->GetLocations();
2573 Location src = locations->InAt(0);
2574 Register out = locations->Out().AsRegister<Register>();
2575
2576 if (invoke->InputAt(0)->IsConstant()) {
2577 // Evaluate this at compile time.
2578 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
2579 int32_t result = is_long
2580 ? POPCOUNT(static_cast<uint64_t>(value))
2581 : POPCOUNT(static_cast<uint32_t>(value));
2582 codegen->Load32BitValue(out, result);
2583 return;
2584 }
2585
2586 // Handle the non-constant cases.
2587 if (!is_long) {
2588 if (src.IsRegister()) {
2589 __ popcntl(out, src.AsRegister<Register>());
2590 } else {
2591 DCHECK(src.IsStackSlot());
2592 __ popcntl(out, Address(ESP, src.GetStackIndex()));
2593 }
2594 } else {
2595 // The 64-bit case needs to worry about two parts.
2596 Register temp = locations->GetTemp(0).AsRegister<Register>();
2597 if (src.IsRegisterPair()) {
2598 __ popcntl(temp, src.AsRegisterPairLow<Register>());
2599 __ popcntl(out, src.AsRegisterPairHigh<Register>());
2600 } else {
2601 DCHECK(src.IsDoubleStackSlot());
2602 __ popcntl(temp, Address(ESP, src.GetStackIndex()));
2603 __ popcntl(out, Address(ESP, src.GetHighStackIndex(kX86WordSize)));
2604 }
2605 __ addl(out, temp);
2606 }
2607 }
2608
VisitIntegerBitCount(HInvoke * invoke)2609 void IntrinsicLocationsBuilderX86::VisitIntegerBitCount(HInvoke* invoke) {
2610 CreateBitCountLocations(arena_, codegen_, invoke, /* is_long */ false);
2611 }
2612
VisitIntegerBitCount(HInvoke * invoke)2613 void IntrinsicCodeGeneratorX86::VisitIntegerBitCount(HInvoke* invoke) {
2614 GenBitCount(GetAssembler(), codegen_, invoke, /* is_long */ false);
2615 }
2616
VisitLongBitCount(HInvoke * invoke)2617 void IntrinsicLocationsBuilderX86::VisitLongBitCount(HInvoke* invoke) {
2618 CreateBitCountLocations(arena_, codegen_, invoke, /* is_long */ true);
2619 }
2620
VisitLongBitCount(HInvoke * invoke)2621 void IntrinsicCodeGeneratorX86::VisitLongBitCount(HInvoke* invoke) {
2622 GenBitCount(GetAssembler(), codegen_, invoke, /* is_long */ true);
2623 }
2624
CreateLeadingZeroLocations(ArenaAllocator * arena,HInvoke * invoke,bool is_long)2625 static void CreateLeadingZeroLocations(ArenaAllocator* arena, HInvoke* invoke, bool is_long) {
2626 LocationSummary* locations = new (arena) LocationSummary(invoke,
2627 LocationSummary::kNoCall,
2628 kIntrinsified);
2629 if (is_long) {
2630 locations->SetInAt(0, Location::RequiresRegister());
2631 } else {
2632 locations->SetInAt(0, Location::Any());
2633 }
2634 locations->SetOut(Location::RequiresRegister());
2635 }
2636
GenLeadingZeros(X86Assembler * assembler,CodeGeneratorX86 * codegen,HInvoke * invoke,bool is_long)2637 static void GenLeadingZeros(X86Assembler* assembler,
2638 CodeGeneratorX86* codegen,
2639 HInvoke* invoke, bool is_long) {
2640 LocationSummary* locations = invoke->GetLocations();
2641 Location src = locations->InAt(0);
2642 Register out = locations->Out().AsRegister<Register>();
2643
2644 if (invoke->InputAt(0)->IsConstant()) {
2645 // Evaluate this at compile time.
2646 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
2647 if (value == 0) {
2648 value = is_long ? 64 : 32;
2649 } else {
2650 value = is_long ? CLZ(static_cast<uint64_t>(value)) : CLZ(static_cast<uint32_t>(value));
2651 }
2652 codegen->Load32BitValue(out, value);
2653 return;
2654 }
2655
2656 // Handle the non-constant cases.
2657 if (!is_long) {
2658 if (src.IsRegister()) {
2659 __ bsrl(out, src.AsRegister<Register>());
2660 } else {
2661 DCHECK(src.IsStackSlot());
2662 __ bsrl(out, Address(ESP, src.GetStackIndex()));
2663 }
2664
2665 // BSR sets ZF if the input was zero, and the output is undefined.
2666 NearLabel all_zeroes, done;
2667 __ j(kEqual, &all_zeroes);
2668
2669 // Correct the result from BSR to get the final CLZ result.
2670 __ xorl(out, Immediate(31));
2671 __ jmp(&done);
2672
2673 // Fix the zero case with the expected result.
2674 __ Bind(&all_zeroes);
2675 __ movl(out, Immediate(32));
2676
2677 __ Bind(&done);
2678 return;
2679 }
2680
2681 // 64 bit case needs to worry about both parts of the register.
2682 DCHECK(src.IsRegisterPair());
2683 Register src_lo = src.AsRegisterPairLow<Register>();
2684 Register src_hi = src.AsRegisterPairHigh<Register>();
2685 NearLabel handle_low, done, all_zeroes;
2686
2687 // Is the high word zero?
2688 __ testl(src_hi, src_hi);
2689 __ j(kEqual, &handle_low);
2690
2691 // High word is not zero. We know that the BSR result is defined in this case.
2692 __ bsrl(out, src_hi);
2693
2694 // Correct the result from BSR to get the final CLZ result.
2695 __ xorl(out, Immediate(31));
2696 __ jmp(&done);
2697
2698 // High word was zero. We have to compute the low word count and add 32.
2699 __ Bind(&handle_low);
2700 __ bsrl(out, src_lo);
2701 __ j(kEqual, &all_zeroes);
2702
2703 // We had a valid result. Use an XOR to both correct the result and add 32.
2704 __ xorl(out, Immediate(63));
2705 __ jmp(&done);
2706
2707 // All zero case.
2708 __ Bind(&all_zeroes);
2709 __ movl(out, Immediate(64));
2710
2711 __ Bind(&done);
2712 }
2713
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)2714 void IntrinsicLocationsBuilderX86::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
2715 CreateLeadingZeroLocations(arena_, invoke, /* is_long */ false);
2716 }
2717
VisitIntegerNumberOfLeadingZeros(HInvoke * invoke)2718 void IntrinsicCodeGeneratorX86::VisitIntegerNumberOfLeadingZeros(HInvoke* invoke) {
2719 GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long */ false);
2720 }
2721
VisitLongNumberOfLeadingZeros(HInvoke * invoke)2722 void IntrinsicLocationsBuilderX86::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
2723 CreateLeadingZeroLocations(arena_, invoke, /* is_long */ true);
2724 }
2725
VisitLongNumberOfLeadingZeros(HInvoke * invoke)2726 void IntrinsicCodeGeneratorX86::VisitLongNumberOfLeadingZeros(HInvoke* invoke) {
2727 GenLeadingZeros(GetAssembler(), codegen_, invoke, /* is_long */ true);
2728 }
2729
CreateTrailingZeroLocations(ArenaAllocator * arena,HInvoke * invoke,bool is_long)2730 static void CreateTrailingZeroLocations(ArenaAllocator* arena, HInvoke* invoke, bool is_long) {
2731 LocationSummary* locations = new (arena) LocationSummary(invoke,
2732 LocationSummary::kNoCall,
2733 kIntrinsified);
2734 if (is_long) {
2735 locations->SetInAt(0, Location::RequiresRegister());
2736 } else {
2737 locations->SetInAt(0, Location::Any());
2738 }
2739 locations->SetOut(Location::RequiresRegister());
2740 }
2741
GenTrailingZeros(X86Assembler * assembler,CodeGeneratorX86 * codegen,HInvoke * invoke,bool is_long)2742 static void GenTrailingZeros(X86Assembler* assembler,
2743 CodeGeneratorX86* codegen,
2744 HInvoke* invoke, bool is_long) {
2745 LocationSummary* locations = invoke->GetLocations();
2746 Location src = locations->InAt(0);
2747 Register out = locations->Out().AsRegister<Register>();
2748
2749 if (invoke->InputAt(0)->IsConstant()) {
2750 // Evaluate this at compile time.
2751 int64_t value = Int64FromConstant(invoke->InputAt(0)->AsConstant());
2752 if (value == 0) {
2753 value = is_long ? 64 : 32;
2754 } else {
2755 value = is_long ? CTZ(static_cast<uint64_t>(value)) : CTZ(static_cast<uint32_t>(value));
2756 }
2757 codegen->Load32BitValue(out, value);
2758 return;
2759 }
2760
2761 // Handle the non-constant cases.
2762 if (!is_long) {
2763 if (src.IsRegister()) {
2764 __ bsfl(out, src.AsRegister<Register>());
2765 } else {
2766 DCHECK(src.IsStackSlot());
2767 __ bsfl(out, Address(ESP, src.GetStackIndex()));
2768 }
2769
2770 // BSF sets ZF if the input was zero, and the output is undefined.
2771 NearLabel done;
2772 __ j(kNotEqual, &done);
2773
2774 // Fix the zero case with the expected result.
2775 __ movl(out, Immediate(32));
2776
2777 __ Bind(&done);
2778 return;
2779 }
2780
2781 // 64 bit case needs to worry about both parts of the register.
2782 DCHECK(src.IsRegisterPair());
2783 Register src_lo = src.AsRegisterPairLow<Register>();
2784 Register src_hi = src.AsRegisterPairHigh<Register>();
2785 NearLabel done, all_zeroes;
2786
2787 // If the low word is zero, then ZF will be set. If not, we have the answer.
2788 __ bsfl(out, src_lo);
2789 __ j(kNotEqual, &done);
2790
2791 // Low word was zero. We have to compute the high word count and add 32.
2792 __ bsfl(out, src_hi);
2793 __ j(kEqual, &all_zeroes);
2794
2795 // We had a valid result. Add 32 to account for the low word being zero.
2796 __ addl(out, Immediate(32));
2797 __ jmp(&done);
2798
2799 // All zero case.
2800 __ Bind(&all_zeroes);
2801 __ movl(out, Immediate(64));
2802
2803 __ Bind(&done);
2804 }
2805
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)2806 void IntrinsicLocationsBuilderX86::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
2807 CreateTrailingZeroLocations(arena_, invoke, /* is_long */ false);
2808 }
2809
VisitIntegerNumberOfTrailingZeros(HInvoke * invoke)2810 void IntrinsicCodeGeneratorX86::VisitIntegerNumberOfTrailingZeros(HInvoke* invoke) {
2811 GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long */ false);
2812 }
2813
VisitLongNumberOfTrailingZeros(HInvoke * invoke)2814 void IntrinsicLocationsBuilderX86::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
2815 CreateTrailingZeroLocations(arena_, invoke, /* is_long */ true);
2816 }
2817
VisitLongNumberOfTrailingZeros(HInvoke * invoke)2818 void IntrinsicCodeGeneratorX86::VisitLongNumberOfTrailingZeros(HInvoke* invoke) {
2819 GenTrailingZeros(GetAssembler(), codegen_, invoke, /* is_long */ true);
2820 }
2821
VisitReferenceGetReferent(HInvoke * invoke)2822 void IntrinsicLocationsBuilderX86::VisitReferenceGetReferent(HInvoke* invoke) {
2823 if (kEmitCompilerReadBarrier) {
2824 // Do not intrinsify this call with the read barrier configuration.
2825 return;
2826 }
2827 LocationSummary* locations = new (arena_) LocationSummary(invoke,
2828 LocationSummary::kCallOnSlowPath,
2829 kIntrinsified);
2830 locations->SetInAt(0, Location::RequiresRegister());
2831 locations->SetOut(Location::SameAsFirstInput());
2832 locations->AddTemp(Location::RequiresRegister());
2833 }
2834
VisitReferenceGetReferent(HInvoke * invoke)2835 void IntrinsicCodeGeneratorX86::VisitReferenceGetReferent(HInvoke* invoke) {
2836 DCHECK(!kEmitCompilerReadBarrier);
2837 LocationSummary* locations = invoke->GetLocations();
2838 X86Assembler* assembler = GetAssembler();
2839
2840 Register obj = locations->InAt(0).AsRegister<Register>();
2841 Register out = locations->Out().AsRegister<Register>();
2842
2843 SlowPathCode* slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
2844 codegen_->AddSlowPath(slow_path);
2845
2846 // Load ArtMethod first.
2847 HInvokeStaticOrDirect* invoke_direct = invoke->AsInvokeStaticOrDirect();
2848 DCHECK(invoke_direct != nullptr);
2849 Location temp_loc = codegen_->GenerateCalleeMethodStaticOrDirectCall(
2850 invoke_direct, locations->GetTemp(0));
2851 DCHECK(temp_loc.Equals(locations->GetTemp(0)));
2852 Register temp = temp_loc.AsRegister<Register>();
2853
2854 // Now get declaring class.
2855 __ movl(temp, Address(temp, ArtMethod::DeclaringClassOffset().Int32Value()));
2856
2857 uint32_t slow_path_flag_offset = codegen_->GetReferenceSlowFlagOffset();
2858 uint32_t disable_flag_offset = codegen_->GetReferenceDisableFlagOffset();
2859 DCHECK_NE(slow_path_flag_offset, 0u);
2860 DCHECK_NE(disable_flag_offset, 0u);
2861 DCHECK_NE(slow_path_flag_offset, disable_flag_offset);
2862
2863 // Check static flags preventing us for using intrinsic.
2864 if (slow_path_flag_offset == disable_flag_offset + 1) {
2865 __ cmpw(Address(temp, disable_flag_offset), Immediate(0));
2866 __ j(kNotEqual, slow_path->GetEntryLabel());
2867 } else {
2868 __ cmpb(Address(temp, disable_flag_offset), Immediate(0));
2869 __ j(kNotEqual, slow_path->GetEntryLabel());
2870 __ cmpb(Address(temp, slow_path_flag_offset), Immediate(0));
2871 __ j(kNotEqual, slow_path->GetEntryLabel());
2872 }
2873
2874 // Fast path.
2875 __ movl(out, Address(obj, mirror::Reference::ReferentOffset().Int32Value()));
2876 codegen_->MaybeRecordImplicitNullCheck(invoke);
2877 __ MaybeUnpoisonHeapReference(out);
2878 __ Bind(slow_path->GetExitLabel());
2879 }
2880
IsSameInput(HInstruction * instruction,size_t input0,size_t input1)2881 static bool IsSameInput(HInstruction* instruction, size_t input0, size_t input1) {
2882 return instruction->InputAt(input0) == instruction->InputAt(input1);
2883 }
2884
2885 // Compute base address for the System.arraycopy intrinsic in `base`.
GenSystemArrayCopyBaseAddress(X86Assembler * assembler,Primitive::Type type,const Register & array,const Location & pos,const Register & base)2886 static void GenSystemArrayCopyBaseAddress(X86Assembler* assembler,
2887 Primitive::Type type,
2888 const Register& array,
2889 const Location& pos,
2890 const Register& base) {
2891 // This routine is only used by the SystemArrayCopy intrinsic at the
2892 // moment. We can allow Primitive::kPrimNot as `type` to implement
2893 // the SystemArrayCopyChar intrinsic.
2894 DCHECK_EQ(type, Primitive::kPrimNot);
2895 const int32_t element_size = Primitive::ComponentSize(type);
2896 const ScaleFactor scale_factor = static_cast<ScaleFactor>(Primitive::ComponentSizeShift(type));
2897 const uint32_t data_offset = mirror::Array::DataOffset(element_size).Uint32Value();
2898
2899 if (pos.IsConstant()) {
2900 int32_t constant = pos.GetConstant()->AsIntConstant()->GetValue();
2901 __ leal(base, Address(array, element_size * constant + data_offset));
2902 } else {
2903 __ leal(base, Address(array, pos.AsRegister<Register>(), scale_factor, data_offset));
2904 }
2905 }
2906
2907 // Compute end source address for the System.arraycopy intrinsic in `end`.
GenSystemArrayCopyEndAddress(X86Assembler * assembler,Primitive::Type type,const Location & copy_length,const Register & base,const Register & end)2908 static void GenSystemArrayCopyEndAddress(X86Assembler* assembler,
2909 Primitive::Type type,
2910 const Location& copy_length,
2911 const Register& base,
2912 const Register& end) {
2913 // This routine is only used by the SystemArrayCopy intrinsic at the
2914 // moment. We can allow Primitive::kPrimNot as `type` to implement
2915 // the SystemArrayCopyChar intrinsic.
2916 DCHECK_EQ(type, Primitive::kPrimNot);
2917 const int32_t element_size = Primitive::ComponentSize(type);
2918 const ScaleFactor scale_factor = static_cast<ScaleFactor>(Primitive::ComponentSizeShift(type));
2919
2920 if (copy_length.IsConstant()) {
2921 int32_t constant = copy_length.GetConstant()->AsIntConstant()->GetValue();
2922 __ leal(end, Address(base, element_size * constant));
2923 } else {
2924 __ leal(end, Address(base, copy_length.AsRegister<Register>(), scale_factor, 0));
2925 }
2926 }
2927
VisitSystemArrayCopy(HInvoke * invoke)2928 void IntrinsicLocationsBuilderX86::VisitSystemArrayCopy(HInvoke* invoke) {
2929 // The only read barrier implementation supporting the
2930 // SystemArrayCopy intrinsic is the Baker-style read barriers.
2931 if (kEmitCompilerReadBarrier && !kUseBakerReadBarrier) {
2932 return;
2933 }
2934
2935 CodeGenerator::CreateSystemArrayCopyLocationSummary(invoke);
2936 if (invoke->GetLocations() != nullptr) {
2937 // Need a byte register for marking.
2938 invoke->GetLocations()->SetTempAt(1, Location::RegisterLocation(ECX));
2939
2940 static constexpr size_t kSrc = 0;
2941 static constexpr size_t kSrcPos = 1;
2942 static constexpr size_t kDest = 2;
2943 static constexpr size_t kDestPos = 3;
2944 static constexpr size_t kLength = 4;
2945
2946 if (!invoke->InputAt(kSrcPos)->IsIntConstant() &&
2947 !invoke->InputAt(kDestPos)->IsIntConstant() &&
2948 !invoke->InputAt(kLength)->IsIntConstant()) {
2949 if (!IsSameInput(invoke, kSrcPos, kDestPos) &&
2950 !IsSameInput(invoke, kSrcPos, kLength) &&
2951 !IsSameInput(invoke, kDestPos, kLength) &&
2952 !IsSameInput(invoke, kSrc, kDest)) {
2953 // Not enough registers, make the length also take a stack slot.
2954 invoke->GetLocations()->SetInAt(kLength, Location::Any());
2955 }
2956 }
2957 }
2958 }
2959
VisitSystemArrayCopy(HInvoke * invoke)2960 void IntrinsicCodeGeneratorX86::VisitSystemArrayCopy(HInvoke* invoke) {
2961 // The only read barrier implementation supporting the
2962 // SystemArrayCopy intrinsic is the Baker-style read barriers.
2963 DCHECK(!kEmitCompilerReadBarrier || kUseBakerReadBarrier);
2964
2965 X86Assembler* assembler = GetAssembler();
2966 LocationSummary* locations = invoke->GetLocations();
2967
2968 uint32_t class_offset = mirror::Object::ClassOffset().Int32Value();
2969 uint32_t super_offset = mirror::Class::SuperClassOffset().Int32Value();
2970 uint32_t component_offset = mirror::Class::ComponentTypeOffset().Int32Value();
2971 uint32_t primitive_offset = mirror::Class::PrimitiveTypeOffset().Int32Value();
2972 uint32_t monitor_offset = mirror::Object::MonitorOffset().Int32Value();
2973
2974 Register src = locations->InAt(0).AsRegister<Register>();
2975 Location src_pos = locations->InAt(1);
2976 Register dest = locations->InAt(2).AsRegister<Register>();
2977 Location dest_pos = locations->InAt(3);
2978 Location length_arg = locations->InAt(4);
2979 Location length = length_arg;
2980 Location temp1_loc = locations->GetTemp(0);
2981 Register temp1 = temp1_loc.AsRegister<Register>();
2982 Location temp2_loc = locations->GetTemp(1);
2983 Register temp2 = temp2_loc.AsRegister<Register>();
2984
2985 SlowPathCode* intrinsic_slow_path = new (GetAllocator()) IntrinsicSlowPathX86(invoke);
2986 codegen_->AddSlowPath(intrinsic_slow_path);
2987
2988 NearLabel conditions_on_positions_validated;
2989 SystemArrayCopyOptimizations optimizations(invoke);
2990
2991 // If source and destination are the same, we go to slow path if we need to do
2992 // forward copying.
2993 if (src_pos.IsConstant()) {
2994 int32_t src_pos_constant = src_pos.GetConstant()->AsIntConstant()->GetValue();
2995 if (dest_pos.IsConstant()) {
2996 int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
2997 if (optimizations.GetDestinationIsSource()) {
2998 // Checked when building locations.
2999 DCHECK_GE(src_pos_constant, dest_pos_constant);
3000 } else if (src_pos_constant < dest_pos_constant) {
3001 __ cmpl(src, dest);
3002 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
3003 }
3004 } else {
3005 if (!optimizations.GetDestinationIsSource()) {
3006 __ cmpl(src, dest);
3007 __ j(kNotEqual, &conditions_on_positions_validated);
3008 }
3009 __ cmpl(dest_pos.AsRegister<Register>(), Immediate(src_pos_constant));
3010 __ j(kGreater, intrinsic_slow_path->GetEntryLabel());
3011 }
3012 } else {
3013 if (!optimizations.GetDestinationIsSource()) {
3014 __ cmpl(src, dest);
3015 __ j(kNotEqual, &conditions_on_positions_validated);
3016 }
3017 if (dest_pos.IsConstant()) {
3018 int32_t dest_pos_constant = dest_pos.GetConstant()->AsIntConstant()->GetValue();
3019 __ cmpl(src_pos.AsRegister<Register>(), Immediate(dest_pos_constant));
3020 __ j(kLess, intrinsic_slow_path->GetEntryLabel());
3021 } else {
3022 __ cmpl(src_pos.AsRegister<Register>(), dest_pos.AsRegister<Register>());
3023 __ j(kLess, intrinsic_slow_path->GetEntryLabel());
3024 }
3025 }
3026
3027 __ Bind(&conditions_on_positions_validated);
3028
3029 if (!optimizations.GetSourceIsNotNull()) {
3030 // Bail out if the source is null.
3031 __ testl(src, src);
3032 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
3033 }
3034
3035 if (!optimizations.GetDestinationIsNotNull() && !optimizations.GetDestinationIsSource()) {
3036 // Bail out if the destination is null.
3037 __ testl(dest, dest);
3038 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
3039 }
3040
3041 Location temp3_loc = locations->GetTemp(2);
3042 Register temp3 = temp3_loc.AsRegister<Register>();
3043 if (length.IsStackSlot()) {
3044 __ movl(temp3, Address(ESP, length.GetStackIndex()));
3045 length = Location::RegisterLocation(temp3);
3046 }
3047
3048 // If the length is negative, bail out.
3049 // We have already checked in the LocationsBuilder for the constant case.
3050 if (!length.IsConstant() &&
3051 !optimizations.GetCountIsSourceLength() &&
3052 !optimizations.GetCountIsDestinationLength()) {
3053 __ testl(length.AsRegister<Register>(), length.AsRegister<Register>());
3054 __ j(kLess, intrinsic_slow_path->GetEntryLabel());
3055 }
3056
3057 // Validity checks: source.
3058 CheckPosition(assembler,
3059 src_pos,
3060 src,
3061 length,
3062 intrinsic_slow_path,
3063 temp1,
3064 optimizations.GetCountIsSourceLength());
3065
3066 // Validity checks: dest.
3067 CheckPosition(assembler,
3068 dest_pos,
3069 dest,
3070 length,
3071 intrinsic_slow_path,
3072 temp1,
3073 optimizations.GetCountIsDestinationLength());
3074
3075 if (!optimizations.GetDoesNotNeedTypeCheck()) {
3076 // Check whether all elements of the source array are assignable to the component
3077 // type of the destination array. We do two checks: the classes are the same,
3078 // or the destination is Object[]. If none of these checks succeed, we go to the
3079 // slow path.
3080
3081 if (!optimizations.GetSourceIsNonPrimitiveArray()) {
3082 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
3083 // /* HeapReference<Class> */ temp1 = src->klass_
3084 codegen_->GenerateFieldLoadWithBakerReadBarrier(
3085 invoke, temp1_loc, src, class_offset, /* needs_null_check */ false);
3086 // Bail out if the source is not a non primitive array.
3087 // /* HeapReference<Class> */ temp1 = temp1->component_type_
3088 codegen_->GenerateFieldLoadWithBakerReadBarrier(
3089 invoke, temp1_loc, temp1, component_offset, /* needs_null_check */ false);
3090 __ testl(temp1, temp1);
3091 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
3092 // If heap poisoning is enabled, `temp1` has been unpoisoned
3093 // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
3094 } else {
3095 // /* HeapReference<Class> */ temp1 = src->klass_
3096 __ movl(temp1, Address(src, class_offset));
3097 __ MaybeUnpoisonHeapReference(temp1);
3098 // Bail out if the source is not a non primitive array.
3099 // /* HeapReference<Class> */ temp1 = temp1->component_type_
3100 __ movl(temp1, Address(temp1, component_offset));
3101 __ testl(temp1, temp1);
3102 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
3103 __ MaybeUnpoisonHeapReference(temp1);
3104 }
3105 __ cmpw(Address(temp1, primitive_offset), Immediate(Primitive::kPrimNot));
3106 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
3107 }
3108
3109 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
3110 if (length.Equals(Location::RegisterLocation(temp3))) {
3111 // When Baker read barriers are enabled, register `temp3`,
3112 // which in the present case contains the `length` parameter,
3113 // will be overwritten below. Make the `length` location
3114 // reference the original stack location; it will be moved
3115 // back to `temp3` later if necessary.
3116 DCHECK(length_arg.IsStackSlot());
3117 length = length_arg;
3118 }
3119
3120 // /* HeapReference<Class> */ temp1 = dest->klass_
3121 codegen_->GenerateFieldLoadWithBakerReadBarrier(
3122 invoke, temp1_loc, dest, class_offset, /* needs_null_check */ false);
3123
3124 if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
3125 // Bail out if the destination is not a non primitive array.
3126 //
3127 // Register `temp1` is not trashed by the read barrier emitted
3128 // by GenerateFieldLoadWithBakerReadBarrier below, as that
3129 // method produces a call to a ReadBarrierMarkRegX entry point,
3130 // which saves all potentially live registers, including
3131 // temporaries such a `temp1`.
3132 // /* HeapReference<Class> */ temp2 = temp1->component_type_
3133 codegen_->GenerateFieldLoadWithBakerReadBarrier(
3134 invoke, temp2_loc, temp1, component_offset, /* needs_null_check */ false);
3135 __ testl(temp2, temp2);
3136 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
3137 // If heap poisoning is enabled, `temp2` has been unpoisoned
3138 // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
3139 __ cmpw(Address(temp2, primitive_offset), Immediate(Primitive::kPrimNot));
3140 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
3141 }
3142
3143 // For the same reason given earlier, `temp1` is not trashed by the
3144 // read barrier emitted by GenerateFieldLoadWithBakerReadBarrier below.
3145 // /* HeapReference<Class> */ temp2 = src->klass_
3146 codegen_->GenerateFieldLoadWithBakerReadBarrier(
3147 invoke, temp2_loc, src, class_offset, /* needs_null_check */ false);
3148 // Note: if heap poisoning is on, we are comparing two unpoisoned references here.
3149 __ cmpl(temp1, temp2);
3150
3151 if (optimizations.GetDestinationIsTypedObjectArray()) {
3152 NearLabel do_copy;
3153 __ j(kEqual, &do_copy);
3154 // /* HeapReference<Class> */ temp1 = temp1->component_type_
3155 codegen_->GenerateFieldLoadWithBakerReadBarrier(
3156 invoke, temp1_loc, temp1, component_offset, /* needs_null_check */ false);
3157 // We do not need to emit a read barrier for the following
3158 // heap reference load, as `temp1` is only used in a
3159 // comparison with null below, and this reference is not
3160 // kept afterwards.
3161 __ cmpl(Address(temp1, super_offset), Immediate(0));
3162 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
3163 __ Bind(&do_copy);
3164 } else {
3165 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
3166 }
3167 } else {
3168 // Non read barrier code.
3169
3170 // /* HeapReference<Class> */ temp1 = dest->klass_
3171 __ movl(temp1, Address(dest, class_offset));
3172 if (!optimizations.GetDestinationIsNonPrimitiveArray()) {
3173 __ MaybeUnpoisonHeapReference(temp1);
3174 // Bail out if the destination is not a non primitive array.
3175 // /* HeapReference<Class> */ temp2 = temp1->component_type_
3176 __ movl(temp2, Address(temp1, component_offset));
3177 __ testl(temp2, temp2);
3178 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
3179 __ MaybeUnpoisonHeapReference(temp2);
3180 __ cmpw(Address(temp2, primitive_offset), Immediate(Primitive::kPrimNot));
3181 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
3182 // Re-poison the heap reference to make the compare instruction below
3183 // compare two poisoned references.
3184 __ PoisonHeapReference(temp1);
3185 }
3186
3187 // Note: if heap poisoning is on, we are comparing two poisoned references here.
3188 __ cmpl(temp1, Address(src, class_offset));
3189
3190 if (optimizations.GetDestinationIsTypedObjectArray()) {
3191 NearLabel do_copy;
3192 __ j(kEqual, &do_copy);
3193 __ MaybeUnpoisonHeapReference(temp1);
3194 // /* HeapReference<Class> */ temp1 = temp1->component_type_
3195 __ movl(temp1, Address(temp1, component_offset));
3196 __ MaybeUnpoisonHeapReference(temp1);
3197 __ cmpl(Address(temp1, super_offset), Immediate(0));
3198 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
3199 __ Bind(&do_copy);
3200 } else {
3201 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
3202 }
3203 }
3204 } else if (!optimizations.GetSourceIsNonPrimitiveArray()) {
3205 DCHECK(optimizations.GetDestinationIsNonPrimitiveArray());
3206 // Bail out if the source is not a non primitive array.
3207 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
3208 // /* HeapReference<Class> */ temp1 = src->klass_
3209 codegen_->GenerateFieldLoadWithBakerReadBarrier(
3210 invoke, temp1_loc, src, class_offset, /* needs_null_check */ false);
3211 // /* HeapReference<Class> */ temp1 = temp1->component_type_
3212 codegen_->GenerateFieldLoadWithBakerReadBarrier(
3213 invoke, temp1_loc, temp1, component_offset, /* needs_null_check */ false);
3214 __ testl(temp1, temp1);
3215 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
3216 // If heap poisoning is enabled, `temp1` has been unpoisoned
3217 // by the the previous call to GenerateFieldLoadWithBakerReadBarrier.
3218 } else {
3219 // /* HeapReference<Class> */ temp1 = src->klass_
3220 __ movl(temp1, Address(src, class_offset));
3221 __ MaybeUnpoisonHeapReference(temp1);
3222 // /* HeapReference<Class> */ temp1 = temp1->component_type_
3223 __ movl(temp1, Address(temp1, component_offset));
3224 __ testl(temp1, temp1);
3225 __ j(kEqual, intrinsic_slow_path->GetEntryLabel());
3226 __ MaybeUnpoisonHeapReference(temp1);
3227 }
3228 __ cmpw(Address(temp1, primitive_offset), Immediate(Primitive::kPrimNot));
3229 __ j(kNotEqual, intrinsic_slow_path->GetEntryLabel());
3230 }
3231
3232 const Primitive::Type type = Primitive::kPrimNot;
3233 const int32_t element_size = Primitive::ComponentSize(type);
3234
3235 // Compute the base source address in `temp1`.
3236 GenSystemArrayCopyBaseAddress(GetAssembler(), type, src, src_pos, temp1);
3237
3238 if (kEmitCompilerReadBarrier && kUseBakerReadBarrier) {
3239 // If it is needed (in the case of the fast-path loop), the base
3240 // destination address is computed later, as `temp2` is used for
3241 // intermediate computations.
3242
3243 // Compute the end source address in `temp3`.
3244 if (length.IsStackSlot()) {
3245 // Location `length` is again pointing at a stack slot, as
3246 // register `temp3` (which was containing the length parameter
3247 // earlier) has been overwritten; restore it now
3248 DCHECK(length.Equals(length_arg));
3249 __ movl(temp3, Address(ESP, length.GetStackIndex()));
3250 length = Location::RegisterLocation(temp3);
3251 }
3252 GenSystemArrayCopyEndAddress(GetAssembler(), type, length, temp1, temp3);
3253
3254 // SystemArrayCopy implementation for Baker read barriers (see
3255 // also CodeGeneratorX86::GenerateReferenceLoadWithBakerReadBarrier):
3256 //
3257 // if (src_ptr != end_ptr) {
3258 // uint32_t rb_state = Lockword(src->monitor_).ReadBarrierState();
3259 // lfence; // Load fence or artificial data dependency to prevent load-load reordering
3260 // bool is_gray = (rb_state == ReadBarrier::GrayState());
3261 // if (is_gray) {
3262 // // Slow-path copy.
3263 // for (size_t i = 0; i != length; ++i) {
3264 // dest_array[dest_pos + i] =
3265 // MaybePoison(ReadBarrier::Mark(MaybeUnpoison(src_array[src_pos + i])));
3266 // }
3267 // } else {
3268 // // Fast-path copy.
3269 // do {
3270 // *dest_ptr++ = *src_ptr++;
3271 // } while (src_ptr != end_ptr)
3272 // }
3273 // }
3274
3275 NearLabel loop, done;
3276
3277 // Don't enter copy loop if `length == 0`.
3278 __ cmpl(temp1, temp3);
3279 __ j(kEqual, &done);
3280
3281 // Given the numeric representation, it's enough to check the low bit of the rb_state.
3282 static_assert(ReadBarrier::WhiteState() == 0, "Expecting white to have value 0");
3283 static_assert(ReadBarrier::GrayState() == 1, "Expecting gray to have value 1");
3284 constexpr uint32_t gray_byte_position = LockWord::kReadBarrierStateShift / kBitsPerByte;
3285 constexpr uint32_t gray_bit_position = LockWord::kReadBarrierStateShift % kBitsPerByte;
3286 constexpr int32_t test_value = static_cast<int8_t>(1 << gray_bit_position);
3287
3288 // if (rb_state == ReadBarrier::GrayState())
3289 // goto slow_path;
3290 // At this point, just do the "if" and make sure that flags are preserved until the branch.
3291 __ testb(Address(src, monitor_offset + gray_byte_position), Immediate(test_value));
3292
3293 // Load fence to prevent load-load reordering.
3294 // Note that this is a no-op, thanks to the x86 memory model.
3295 codegen_->GenerateMemoryBarrier(MemBarrierKind::kLoadAny);
3296
3297 // Slow path used to copy array when `src` is gray.
3298 SlowPathCode* read_barrier_slow_path =
3299 new (GetAllocator()) ReadBarrierSystemArrayCopySlowPathX86(invoke);
3300 codegen_->AddSlowPath(read_barrier_slow_path);
3301
3302 // We have done the "if" of the gray bit check above, now branch based on the flags.
3303 __ j(kNotZero, read_barrier_slow_path->GetEntryLabel());
3304
3305 // Fast-path copy.
3306 // Compute the base destination address in `temp2`.
3307 GenSystemArrayCopyBaseAddress(GetAssembler(), type, dest, dest_pos, temp2);
3308 // Iterate over the arrays and do a raw copy of the objects. We don't need to
3309 // poison/unpoison.
3310 __ Bind(&loop);
3311 __ pushl(Address(temp1, 0));
3312 __ cfi().AdjustCFAOffset(4);
3313 __ popl(Address(temp2, 0));
3314 __ cfi().AdjustCFAOffset(-4);
3315 __ addl(temp1, Immediate(element_size));
3316 __ addl(temp2, Immediate(element_size));
3317 __ cmpl(temp1, temp3);
3318 __ j(kNotEqual, &loop);
3319
3320 __ Bind(read_barrier_slow_path->GetExitLabel());
3321 __ Bind(&done);
3322 } else {
3323 // Non read barrier code.
3324 // Compute the base destination address in `temp2`.
3325 GenSystemArrayCopyBaseAddress(GetAssembler(), type, dest, dest_pos, temp2);
3326 // Compute the end source address in `temp3`.
3327 GenSystemArrayCopyEndAddress(GetAssembler(), type, length, temp1, temp3);
3328 // Iterate over the arrays and do a raw copy of the objects. We don't need to
3329 // poison/unpoison.
3330 NearLabel loop, done;
3331 __ cmpl(temp1, temp3);
3332 __ j(kEqual, &done);
3333 __ Bind(&loop);
3334 __ pushl(Address(temp1, 0));
3335 __ cfi().AdjustCFAOffset(4);
3336 __ popl(Address(temp2, 0));
3337 __ cfi().AdjustCFAOffset(-4);
3338 __ addl(temp1, Immediate(element_size));
3339 __ addl(temp2, Immediate(element_size));
3340 __ cmpl(temp1, temp3);
3341 __ j(kNotEqual, &loop);
3342 __ Bind(&done);
3343 }
3344
3345 // We only need one card marking on the destination array.
3346 codegen_->MarkGCCard(temp1, temp2, dest, Register(kNoRegister), /* value_can_be_null */ false);
3347
3348 __ Bind(intrinsic_slow_path->GetExitLabel());
3349 }
3350
VisitIntegerValueOf(HInvoke * invoke)3351 void IntrinsicLocationsBuilderX86::VisitIntegerValueOf(HInvoke* invoke) {
3352 InvokeRuntimeCallingConvention calling_convention;
3353 IntrinsicVisitor::ComputeIntegerValueOfLocations(
3354 invoke,
3355 codegen_,
3356 Location::RegisterLocation(EAX),
3357 Location::RegisterLocation(calling_convention.GetRegisterAt(0)));
3358 }
3359
VisitIntegerValueOf(HInvoke * invoke)3360 void IntrinsicCodeGeneratorX86::VisitIntegerValueOf(HInvoke* invoke) {
3361 IntrinsicVisitor::IntegerValueOfInfo info = IntrinsicVisitor::ComputeIntegerValueOfInfo();
3362 LocationSummary* locations = invoke->GetLocations();
3363 X86Assembler* assembler = GetAssembler();
3364
3365 Register out = locations->Out().AsRegister<Register>();
3366 InvokeRuntimeCallingConvention calling_convention;
3367 if (invoke->InputAt(0)->IsConstant()) {
3368 int32_t value = invoke->InputAt(0)->AsIntConstant()->GetValue();
3369 if (value >= info.low && value <= info.high) {
3370 // Just embed the j.l.Integer in the code.
3371 ScopedObjectAccess soa(Thread::Current());
3372 mirror::Object* boxed = info.cache->Get(value + (-info.low));
3373 DCHECK(boxed != nullptr && Runtime::Current()->GetHeap()->ObjectIsInBootImageSpace(boxed));
3374 uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(boxed));
3375 __ movl(out, Immediate(address));
3376 } else {
3377 // Allocate and initialize a new j.l.Integer.
3378 // TODO: If we JIT, we could allocate the j.l.Integer now, and store it in the
3379 // JIT object table.
3380 uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
3381 __ movl(calling_convention.GetRegisterAt(0), Immediate(address));
3382 codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
3383 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
3384 __ movl(Address(out, info.value_offset), Immediate(value));
3385 }
3386 } else {
3387 Register in = locations->InAt(0).AsRegister<Register>();
3388 // Check bounds of our cache.
3389 __ leal(out, Address(in, -info.low));
3390 __ cmpl(out, Immediate(info.high - info.low + 1));
3391 NearLabel allocate, done;
3392 __ j(kAboveEqual, &allocate);
3393 // If the value is within the bounds, load the j.l.Integer directly from the array.
3394 uint32_t data_offset = mirror::Array::DataOffset(kHeapReferenceSize).Uint32Value();
3395 uint32_t address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.cache));
3396 __ movl(out, Address(out, TIMES_4, data_offset + address));
3397 __ MaybeUnpoisonHeapReference(out);
3398 __ jmp(&done);
3399 __ Bind(&allocate);
3400 // Otherwise allocate and initialize a new j.l.Integer.
3401 address = dchecked_integral_cast<uint32_t>(reinterpret_cast<uintptr_t>(info.integer));
3402 __ movl(calling_convention.GetRegisterAt(0), Immediate(address));
3403 codegen_->InvokeRuntime(kQuickAllocObjectInitialized, invoke, invoke->GetDexPc());
3404 CheckEntrypointTypes<kQuickAllocObjectWithChecks, void*, mirror::Class*>();
3405 __ movl(Address(out, info.value_offset), in);
3406 __ Bind(&done);
3407 }
3408 }
3409
3410 UNIMPLEMENTED_INTRINSIC(X86, MathRoundDouble)
3411 UNIMPLEMENTED_INTRINSIC(X86, FloatIsInfinite)
3412 UNIMPLEMENTED_INTRINSIC(X86, DoubleIsInfinite)
3413 UNIMPLEMENTED_INTRINSIC(X86, IntegerHighestOneBit)
3414 UNIMPLEMENTED_INTRINSIC(X86, LongHighestOneBit)
3415 UNIMPLEMENTED_INTRINSIC(X86, IntegerLowestOneBit)
3416 UNIMPLEMENTED_INTRINSIC(X86, LongLowestOneBit)
3417
3418 UNIMPLEMENTED_INTRINSIC(X86, StringStringIndexOf);
3419 UNIMPLEMENTED_INTRINSIC(X86, StringStringIndexOfAfter);
3420 UNIMPLEMENTED_INTRINSIC(X86, StringBufferAppend);
3421 UNIMPLEMENTED_INTRINSIC(X86, StringBufferLength);
3422 UNIMPLEMENTED_INTRINSIC(X86, StringBufferToString);
3423 UNIMPLEMENTED_INTRINSIC(X86, StringBuilderAppend);
3424 UNIMPLEMENTED_INTRINSIC(X86, StringBuilderLength);
3425 UNIMPLEMENTED_INTRINSIC(X86, StringBuilderToString);
3426
3427 // 1.8.
3428 UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndAddInt)
3429 UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndAddLong)
3430 UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndSetInt)
3431 UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndSetLong)
3432 UNIMPLEMENTED_INTRINSIC(X86, UnsafeGetAndSetObject)
3433
3434 UNREACHABLE_INTRINSICS(X86)
3435
3436 #undef __
3437
3438 } // namespace x86
3439 } // namespace art
3440