1 /*
2 * Copyright (C) 2017 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "code_generator_arm64.h"
18
19 #include "mirror/array-inl.h"
20 #include "mirror/string.h"
21
22 using namespace vixl::aarch64; // NOLINT(build/namespaces)
23
24 namespace art {
25 namespace arm64 {
26
27 using helpers::ARM64EncodableConstantOrRegister;
28 using helpers::Arm64CanEncodeConstantAsImmediate;
29 using helpers::DRegisterFrom;
30 using helpers::HeapOperand;
31 using helpers::InputRegisterAt;
32 using helpers::Int64ConstantFrom;
33 using helpers::OutputRegister;
34 using helpers::VRegisterFrom;
35 using helpers::WRegisterFrom;
36 using helpers::XRegisterFrom;
37
38 #define __ GetVIXLAssembler()->
39
VisitVecReplicateScalar(HVecReplicateScalar * instruction)40 void LocationsBuilderARM64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
41 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
42 HInstruction* input = instruction->InputAt(0);
43 switch (instruction->GetPackedType()) {
44 case DataType::Type::kBool:
45 case DataType::Type::kUint8:
46 case DataType::Type::kInt8:
47 case DataType::Type::kUint16:
48 case DataType::Type::kInt16:
49 case DataType::Type::kInt32:
50 case DataType::Type::kInt64:
51 locations->SetInAt(0, ARM64EncodableConstantOrRegister(input, instruction));
52 locations->SetOut(Location::RequiresFpuRegister());
53 break;
54 case DataType::Type::kFloat32:
55 case DataType::Type::kFloat64:
56 if (input->IsConstant() &&
57 Arm64CanEncodeConstantAsImmediate(input->AsConstant(), instruction)) {
58 locations->SetInAt(0, Location::ConstantLocation(input->AsConstant()));
59 locations->SetOut(Location::RequiresFpuRegister());
60 } else {
61 locations->SetInAt(0, Location::RequiresFpuRegister());
62 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
63 }
64 break;
65 default:
66 LOG(FATAL) << "Unsupported SIMD type";
67 UNREACHABLE();
68 }
69 }
70
VisitVecReplicateScalar(HVecReplicateScalar * instruction)71 void InstructionCodeGeneratorARM64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
72 LocationSummary* locations = instruction->GetLocations();
73 Location src_loc = locations->InAt(0);
74 VRegister dst = VRegisterFrom(locations->Out());
75 switch (instruction->GetPackedType()) {
76 case DataType::Type::kBool:
77 case DataType::Type::kUint8:
78 case DataType::Type::kInt8:
79 DCHECK_EQ(16u, instruction->GetVectorLength());
80 if (src_loc.IsConstant()) {
81 __ Movi(dst.V16B(), Int64ConstantFrom(src_loc));
82 } else {
83 __ Dup(dst.V16B(), InputRegisterAt(instruction, 0));
84 }
85 break;
86 case DataType::Type::kUint16:
87 case DataType::Type::kInt16:
88 DCHECK_EQ(8u, instruction->GetVectorLength());
89 if (src_loc.IsConstant()) {
90 __ Movi(dst.V8H(), Int64ConstantFrom(src_loc));
91 } else {
92 __ Dup(dst.V8H(), InputRegisterAt(instruction, 0));
93 }
94 break;
95 case DataType::Type::kInt32:
96 DCHECK_EQ(4u, instruction->GetVectorLength());
97 if (src_loc.IsConstant()) {
98 __ Movi(dst.V4S(), Int64ConstantFrom(src_loc));
99 } else {
100 __ Dup(dst.V4S(), InputRegisterAt(instruction, 0));
101 }
102 break;
103 case DataType::Type::kInt64:
104 DCHECK_EQ(2u, instruction->GetVectorLength());
105 if (src_loc.IsConstant()) {
106 __ Movi(dst.V2D(), Int64ConstantFrom(src_loc));
107 } else {
108 __ Dup(dst.V2D(), XRegisterFrom(src_loc));
109 }
110 break;
111 case DataType::Type::kFloat32:
112 DCHECK_EQ(4u, instruction->GetVectorLength());
113 if (src_loc.IsConstant()) {
114 __ Fmov(dst.V4S(), src_loc.GetConstant()->AsFloatConstant()->GetValue());
115 } else {
116 __ Dup(dst.V4S(), VRegisterFrom(src_loc).V4S(), 0);
117 }
118 break;
119 case DataType::Type::kFloat64:
120 DCHECK_EQ(2u, instruction->GetVectorLength());
121 if (src_loc.IsConstant()) {
122 __ Fmov(dst.V2D(), src_loc.GetConstant()->AsDoubleConstant()->GetValue());
123 } else {
124 __ Dup(dst.V2D(), VRegisterFrom(src_loc).V2D(), 0);
125 }
126 break;
127 default:
128 LOG(FATAL) << "Unsupported SIMD type";
129 UNREACHABLE();
130 }
131 }
132
VisitVecExtractScalar(HVecExtractScalar * instruction)133 void LocationsBuilderARM64::VisitVecExtractScalar(HVecExtractScalar* instruction) {
134 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
135 switch (instruction->GetPackedType()) {
136 case DataType::Type::kBool:
137 case DataType::Type::kUint8:
138 case DataType::Type::kInt8:
139 case DataType::Type::kUint16:
140 case DataType::Type::kInt16:
141 case DataType::Type::kInt32:
142 case DataType::Type::kInt64:
143 locations->SetInAt(0, Location::RequiresFpuRegister());
144 locations->SetOut(Location::RequiresRegister());
145 break;
146 case DataType::Type::kFloat32:
147 case DataType::Type::kFloat64:
148 locations->SetInAt(0, Location::RequiresFpuRegister());
149 locations->SetOut(Location::SameAsFirstInput());
150 break;
151 default:
152 LOG(FATAL) << "Unsupported SIMD type";
153 UNREACHABLE();
154 }
155 }
156
VisitVecExtractScalar(HVecExtractScalar * instruction)157 void InstructionCodeGeneratorARM64::VisitVecExtractScalar(HVecExtractScalar* instruction) {
158 LocationSummary* locations = instruction->GetLocations();
159 VRegister src = VRegisterFrom(locations->InAt(0));
160 switch (instruction->GetPackedType()) {
161 case DataType::Type::kInt32:
162 DCHECK_EQ(4u, instruction->GetVectorLength());
163 __ Umov(OutputRegister(instruction), src.V4S(), 0);
164 break;
165 case DataType::Type::kInt64:
166 DCHECK_EQ(2u, instruction->GetVectorLength());
167 __ Umov(OutputRegister(instruction), src.V2D(), 0);
168 break;
169 case DataType::Type::kFloat32:
170 case DataType::Type::kFloat64:
171 DCHECK_LE(2u, instruction->GetVectorLength());
172 DCHECK_LE(instruction->GetVectorLength(), 4u);
173 DCHECK(locations->InAt(0).Equals(locations->Out())); // no code required
174 break;
175 default:
176 LOG(FATAL) << "Unsupported SIMD type";
177 UNREACHABLE();
178 }
179 }
180
181 // Helper to set up locations for vector unary operations.
CreateVecUnOpLocations(ArenaAllocator * allocator,HVecUnaryOperation * instruction)182 static void CreateVecUnOpLocations(ArenaAllocator* allocator, HVecUnaryOperation* instruction) {
183 LocationSummary* locations = new (allocator) LocationSummary(instruction);
184 switch (instruction->GetPackedType()) {
185 case DataType::Type::kBool:
186 locations->SetInAt(0, Location::RequiresFpuRegister());
187 locations->SetOut(Location::RequiresFpuRegister(),
188 instruction->IsVecNot() ? Location::kOutputOverlap
189 : Location::kNoOutputOverlap);
190 break;
191 case DataType::Type::kUint8:
192 case DataType::Type::kInt8:
193 case DataType::Type::kUint16:
194 case DataType::Type::kInt16:
195 case DataType::Type::kInt32:
196 case DataType::Type::kInt64:
197 case DataType::Type::kFloat32:
198 case DataType::Type::kFloat64:
199 locations->SetInAt(0, Location::RequiresFpuRegister());
200 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
201 break;
202 default:
203 LOG(FATAL) << "Unsupported SIMD type";
204 UNREACHABLE();
205 }
206 }
207
VisitVecReduce(HVecReduce * instruction)208 void LocationsBuilderARM64::VisitVecReduce(HVecReduce* instruction) {
209 CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
210 }
211
VisitVecReduce(HVecReduce * instruction)212 void InstructionCodeGeneratorARM64::VisitVecReduce(HVecReduce* instruction) {
213 LocationSummary* locations = instruction->GetLocations();
214 VRegister src = VRegisterFrom(locations->InAt(0));
215 VRegister dst = DRegisterFrom(locations->Out());
216 switch (instruction->GetPackedType()) {
217 case DataType::Type::kInt32:
218 DCHECK_EQ(4u, instruction->GetVectorLength());
219 switch (instruction->GetKind()) {
220 case HVecReduce::kSum:
221 __ Addv(dst.S(), src.V4S());
222 break;
223 case HVecReduce::kMin:
224 __ Sminv(dst.S(), src.V4S());
225 break;
226 case HVecReduce::kMax:
227 __ Smaxv(dst.S(), src.V4S());
228 break;
229 }
230 break;
231 case DataType::Type::kInt64:
232 DCHECK_EQ(2u, instruction->GetVectorLength());
233 switch (instruction->GetKind()) {
234 case HVecReduce::kSum:
235 __ Addp(dst.D(), src.V2D());
236 break;
237 default:
238 LOG(FATAL) << "Unsupported SIMD min/max";
239 UNREACHABLE();
240 }
241 break;
242 default:
243 LOG(FATAL) << "Unsupported SIMD type";
244 UNREACHABLE();
245 }
246 }
247
VisitVecCnv(HVecCnv * instruction)248 void LocationsBuilderARM64::VisitVecCnv(HVecCnv* instruction) {
249 CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
250 }
251
VisitVecCnv(HVecCnv * instruction)252 void InstructionCodeGeneratorARM64::VisitVecCnv(HVecCnv* instruction) {
253 LocationSummary* locations = instruction->GetLocations();
254 VRegister src = VRegisterFrom(locations->InAt(0));
255 VRegister dst = VRegisterFrom(locations->Out());
256 DataType::Type from = instruction->GetInputType();
257 DataType::Type to = instruction->GetResultType();
258 if (from == DataType::Type::kInt32 && to == DataType::Type::kFloat32) {
259 DCHECK_EQ(4u, instruction->GetVectorLength());
260 __ Scvtf(dst.V4S(), src.V4S());
261 } else {
262 LOG(FATAL) << "Unsupported SIMD type";
263 }
264 }
265
VisitVecNeg(HVecNeg * instruction)266 void LocationsBuilderARM64::VisitVecNeg(HVecNeg* instruction) {
267 CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
268 }
269
VisitVecNeg(HVecNeg * instruction)270 void InstructionCodeGeneratorARM64::VisitVecNeg(HVecNeg* instruction) {
271 LocationSummary* locations = instruction->GetLocations();
272 VRegister src = VRegisterFrom(locations->InAt(0));
273 VRegister dst = VRegisterFrom(locations->Out());
274 switch (instruction->GetPackedType()) {
275 case DataType::Type::kUint8:
276 case DataType::Type::kInt8:
277 DCHECK_EQ(16u, instruction->GetVectorLength());
278 __ Neg(dst.V16B(), src.V16B());
279 break;
280 case DataType::Type::kUint16:
281 case DataType::Type::kInt16:
282 DCHECK_EQ(8u, instruction->GetVectorLength());
283 __ Neg(dst.V8H(), src.V8H());
284 break;
285 case DataType::Type::kInt32:
286 DCHECK_EQ(4u, instruction->GetVectorLength());
287 __ Neg(dst.V4S(), src.V4S());
288 break;
289 case DataType::Type::kInt64:
290 DCHECK_EQ(2u, instruction->GetVectorLength());
291 __ Neg(dst.V2D(), src.V2D());
292 break;
293 case DataType::Type::kFloat32:
294 DCHECK_EQ(4u, instruction->GetVectorLength());
295 __ Fneg(dst.V4S(), src.V4S());
296 break;
297 case DataType::Type::kFloat64:
298 DCHECK_EQ(2u, instruction->GetVectorLength());
299 __ Fneg(dst.V2D(), src.V2D());
300 break;
301 default:
302 LOG(FATAL) << "Unsupported SIMD type";
303 UNREACHABLE();
304 }
305 }
306
VisitVecAbs(HVecAbs * instruction)307 void LocationsBuilderARM64::VisitVecAbs(HVecAbs* instruction) {
308 CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
309 }
310
VisitVecAbs(HVecAbs * instruction)311 void InstructionCodeGeneratorARM64::VisitVecAbs(HVecAbs* instruction) {
312 LocationSummary* locations = instruction->GetLocations();
313 VRegister src = VRegisterFrom(locations->InAt(0));
314 VRegister dst = VRegisterFrom(locations->Out());
315 switch (instruction->GetPackedType()) {
316 case DataType::Type::kInt8:
317 DCHECK_EQ(16u, instruction->GetVectorLength());
318 __ Abs(dst.V16B(), src.V16B());
319 break;
320 case DataType::Type::kInt16:
321 DCHECK_EQ(8u, instruction->GetVectorLength());
322 __ Abs(dst.V8H(), src.V8H());
323 break;
324 case DataType::Type::kInt32:
325 DCHECK_EQ(4u, instruction->GetVectorLength());
326 __ Abs(dst.V4S(), src.V4S());
327 break;
328 case DataType::Type::kInt64:
329 DCHECK_EQ(2u, instruction->GetVectorLength());
330 __ Abs(dst.V2D(), src.V2D());
331 break;
332 case DataType::Type::kFloat32:
333 DCHECK_EQ(4u, instruction->GetVectorLength());
334 __ Fabs(dst.V4S(), src.V4S());
335 break;
336 case DataType::Type::kFloat64:
337 DCHECK_EQ(2u, instruction->GetVectorLength());
338 __ Fabs(dst.V2D(), src.V2D());
339 break;
340 default:
341 LOG(FATAL) << "Unsupported SIMD type";
342 UNREACHABLE();
343 }
344 }
345
VisitVecNot(HVecNot * instruction)346 void LocationsBuilderARM64::VisitVecNot(HVecNot* instruction) {
347 CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
348 }
349
VisitVecNot(HVecNot * instruction)350 void InstructionCodeGeneratorARM64::VisitVecNot(HVecNot* instruction) {
351 LocationSummary* locations = instruction->GetLocations();
352 VRegister src = VRegisterFrom(locations->InAt(0));
353 VRegister dst = VRegisterFrom(locations->Out());
354 switch (instruction->GetPackedType()) {
355 case DataType::Type::kBool: // special case boolean-not
356 DCHECK_EQ(16u, instruction->GetVectorLength());
357 __ Movi(dst.V16B(), 1);
358 __ Eor(dst.V16B(), dst.V16B(), src.V16B());
359 break;
360 case DataType::Type::kUint8:
361 case DataType::Type::kInt8:
362 case DataType::Type::kUint16:
363 case DataType::Type::kInt16:
364 case DataType::Type::kInt32:
365 case DataType::Type::kInt64:
366 __ Not(dst.V16B(), src.V16B()); // lanes do not matter
367 break;
368 default:
369 LOG(FATAL) << "Unsupported SIMD type";
370 UNREACHABLE();
371 }
372 }
373
374 // Helper to set up locations for vector binary operations.
CreateVecBinOpLocations(ArenaAllocator * allocator,HVecBinaryOperation * instruction)375 static void CreateVecBinOpLocations(ArenaAllocator* allocator, HVecBinaryOperation* instruction) {
376 LocationSummary* locations = new (allocator) LocationSummary(instruction);
377 switch (instruction->GetPackedType()) {
378 case DataType::Type::kBool:
379 case DataType::Type::kUint8:
380 case DataType::Type::kInt8:
381 case DataType::Type::kUint16:
382 case DataType::Type::kInt16:
383 case DataType::Type::kInt32:
384 case DataType::Type::kInt64:
385 case DataType::Type::kFloat32:
386 case DataType::Type::kFloat64:
387 locations->SetInAt(0, Location::RequiresFpuRegister());
388 locations->SetInAt(1, Location::RequiresFpuRegister());
389 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
390 break;
391 default:
392 LOG(FATAL) << "Unsupported SIMD type";
393 UNREACHABLE();
394 }
395 }
396
VisitVecAdd(HVecAdd * instruction)397 void LocationsBuilderARM64::VisitVecAdd(HVecAdd* instruction) {
398 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
399 }
400
VisitVecAdd(HVecAdd * instruction)401 void InstructionCodeGeneratorARM64::VisitVecAdd(HVecAdd* instruction) {
402 LocationSummary* locations = instruction->GetLocations();
403 VRegister lhs = VRegisterFrom(locations->InAt(0));
404 VRegister rhs = VRegisterFrom(locations->InAt(1));
405 VRegister dst = VRegisterFrom(locations->Out());
406 switch (instruction->GetPackedType()) {
407 case DataType::Type::kUint8:
408 case DataType::Type::kInt8:
409 DCHECK_EQ(16u, instruction->GetVectorLength());
410 __ Add(dst.V16B(), lhs.V16B(), rhs.V16B());
411 break;
412 case DataType::Type::kUint16:
413 case DataType::Type::kInt16:
414 DCHECK_EQ(8u, instruction->GetVectorLength());
415 __ Add(dst.V8H(), lhs.V8H(), rhs.V8H());
416 break;
417 case DataType::Type::kInt32:
418 DCHECK_EQ(4u, instruction->GetVectorLength());
419 __ Add(dst.V4S(), lhs.V4S(), rhs.V4S());
420 break;
421 case DataType::Type::kInt64:
422 DCHECK_EQ(2u, instruction->GetVectorLength());
423 __ Add(dst.V2D(), lhs.V2D(), rhs.V2D());
424 break;
425 case DataType::Type::kFloat32:
426 DCHECK_EQ(4u, instruction->GetVectorLength());
427 __ Fadd(dst.V4S(), lhs.V4S(), rhs.V4S());
428 break;
429 case DataType::Type::kFloat64:
430 DCHECK_EQ(2u, instruction->GetVectorLength());
431 __ Fadd(dst.V2D(), lhs.V2D(), rhs.V2D());
432 break;
433 default:
434 LOG(FATAL) << "Unsupported SIMD type";
435 UNREACHABLE();
436 }
437 }
438
VisitVecHalvingAdd(HVecHalvingAdd * instruction)439 void LocationsBuilderARM64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
440 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
441 }
442
VisitVecHalvingAdd(HVecHalvingAdd * instruction)443 void InstructionCodeGeneratorARM64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
444 LocationSummary* locations = instruction->GetLocations();
445 VRegister lhs = VRegisterFrom(locations->InAt(0));
446 VRegister rhs = VRegisterFrom(locations->InAt(1));
447 VRegister dst = VRegisterFrom(locations->Out());
448 switch (instruction->GetPackedType()) {
449 case DataType::Type::kUint8:
450 DCHECK_EQ(16u, instruction->GetVectorLength());
451 instruction->IsRounded()
452 ? __ Urhadd(dst.V16B(), lhs.V16B(), rhs.V16B())
453 : __ Uhadd(dst.V16B(), lhs.V16B(), rhs.V16B());
454 break;
455 case DataType::Type::kInt8:
456 DCHECK_EQ(16u, instruction->GetVectorLength());
457 instruction->IsRounded()
458 ? __ Srhadd(dst.V16B(), lhs.V16B(), rhs.V16B())
459 : __ Shadd(dst.V16B(), lhs.V16B(), rhs.V16B());
460 break;
461 case DataType::Type::kUint16:
462 DCHECK_EQ(8u, instruction->GetVectorLength());
463 instruction->IsRounded()
464 ? __ Urhadd(dst.V8H(), lhs.V8H(), rhs.V8H())
465 : __ Uhadd(dst.V8H(), lhs.V8H(), rhs.V8H());
466 break;
467 case DataType::Type::kInt16:
468 DCHECK_EQ(8u, instruction->GetVectorLength());
469 instruction->IsRounded()
470 ? __ Srhadd(dst.V8H(), lhs.V8H(), rhs.V8H())
471 : __ Shadd(dst.V8H(), lhs.V8H(), rhs.V8H());
472 break;
473 default:
474 LOG(FATAL) << "Unsupported SIMD type";
475 UNREACHABLE();
476 }
477 }
478
VisitVecSub(HVecSub * instruction)479 void LocationsBuilderARM64::VisitVecSub(HVecSub* instruction) {
480 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
481 }
482
VisitVecSub(HVecSub * instruction)483 void InstructionCodeGeneratorARM64::VisitVecSub(HVecSub* instruction) {
484 LocationSummary* locations = instruction->GetLocations();
485 VRegister lhs = VRegisterFrom(locations->InAt(0));
486 VRegister rhs = VRegisterFrom(locations->InAt(1));
487 VRegister dst = VRegisterFrom(locations->Out());
488 switch (instruction->GetPackedType()) {
489 case DataType::Type::kUint8:
490 case DataType::Type::kInt8:
491 DCHECK_EQ(16u, instruction->GetVectorLength());
492 __ Sub(dst.V16B(), lhs.V16B(), rhs.V16B());
493 break;
494 case DataType::Type::kUint16:
495 case DataType::Type::kInt16:
496 DCHECK_EQ(8u, instruction->GetVectorLength());
497 __ Sub(dst.V8H(), lhs.V8H(), rhs.V8H());
498 break;
499 case DataType::Type::kInt32:
500 DCHECK_EQ(4u, instruction->GetVectorLength());
501 __ Sub(dst.V4S(), lhs.V4S(), rhs.V4S());
502 break;
503 case DataType::Type::kInt64:
504 DCHECK_EQ(2u, instruction->GetVectorLength());
505 __ Sub(dst.V2D(), lhs.V2D(), rhs.V2D());
506 break;
507 case DataType::Type::kFloat32:
508 DCHECK_EQ(4u, instruction->GetVectorLength());
509 __ Fsub(dst.V4S(), lhs.V4S(), rhs.V4S());
510 break;
511 case DataType::Type::kFloat64:
512 DCHECK_EQ(2u, instruction->GetVectorLength());
513 __ Fsub(dst.V2D(), lhs.V2D(), rhs.V2D());
514 break;
515 default:
516 LOG(FATAL) << "Unsupported SIMD type";
517 UNREACHABLE();
518 }
519 }
520
VisitVecMul(HVecMul * instruction)521 void LocationsBuilderARM64::VisitVecMul(HVecMul* instruction) {
522 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
523 }
524
VisitVecMul(HVecMul * instruction)525 void InstructionCodeGeneratorARM64::VisitVecMul(HVecMul* instruction) {
526 LocationSummary* locations = instruction->GetLocations();
527 VRegister lhs = VRegisterFrom(locations->InAt(0));
528 VRegister rhs = VRegisterFrom(locations->InAt(1));
529 VRegister dst = VRegisterFrom(locations->Out());
530 switch (instruction->GetPackedType()) {
531 case DataType::Type::kUint8:
532 case DataType::Type::kInt8:
533 DCHECK_EQ(16u, instruction->GetVectorLength());
534 __ Mul(dst.V16B(), lhs.V16B(), rhs.V16B());
535 break;
536 case DataType::Type::kUint16:
537 case DataType::Type::kInt16:
538 DCHECK_EQ(8u, instruction->GetVectorLength());
539 __ Mul(dst.V8H(), lhs.V8H(), rhs.V8H());
540 break;
541 case DataType::Type::kInt32:
542 DCHECK_EQ(4u, instruction->GetVectorLength());
543 __ Mul(dst.V4S(), lhs.V4S(), rhs.V4S());
544 break;
545 case DataType::Type::kFloat32:
546 DCHECK_EQ(4u, instruction->GetVectorLength());
547 __ Fmul(dst.V4S(), lhs.V4S(), rhs.V4S());
548 break;
549 case DataType::Type::kFloat64:
550 DCHECK_EQ(2u, instruction->GetVectorLength());
551 __ Fmul(dst.V2D(), lhs.V2D(), rhs.V2D());
552 break;
553 default:
554 LOG(FATAL) << "Unsupported SIMD type";
555 UNREACHABLE();
556 }
557 }
558
VisitVecDiv(HVecDiv * instruction)559 void LocationsBuilderARM64::VisitVecDiv(HVecDiv* instruction) {
560 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
561 }
562
VisitVecDiv(HVecDiv * instruction)563 void InstructionCodeGeneratorARM64::VisitVecDiv(HVecDiv* instruction) {
564 LocationSummary* locations = instruction->GetLocations();
565 VRegister lhs = VRegisterFrom(locations->InAt(0));
566 VRegister rhs = VRegisterFrom(locations->InAt(1));
567 VRegister dst = VRegisterFrom(locations->Out());
568 switch (instruction->GetPackedType()) {
569 case DataType::Type::kFloat32:
570 DCHECK_EQ(4u, instruction->GetVectorLength());
571 __ Fdiv(dst.V4S(), lhs.V4S(), rhs.V4S());
572 break;
573 case DataType::Type::kFloat64:
574 DCHECK_EQ(2u, instruction->GetVectorLength());
575 __ Fdiv(dst.V2D(), lhs.V2D(), rhs.V2D());
576 break;
577 default:
578 LOG(FATAL) << "Unsupported SIMD type";
579 UNREACHABLE();
580 }
581 }
582
VisitVecMin(HVecMin * instruction)583 void LocationsBuilderARM64::VisitVecMin(HVecMin* instruction) {
584 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
585 }
586
VisitVecMin(HVecMin * instruction)587 void InstructionCodeGeneratorARM64::VisitVecMin(HVecMin* instruction) {
588 LocationSummary* locations = instruction->GetLocations();
589 VRegister lhs = VRegisterFrom(locations->InAt(0));
590 VRegister rhs = VRegisterFrom(locations->InAt(1));
591 VRegister dst = VRegisterFrom(locations->Out());
592 switch (instruction->GetPackedType()) {
593 case DataType::Type::kUint8:
594 DCHECK_EQ(16u, instruction->GetVectorLength());
595 __ Umin(dst.V16B(), lhs.V16B(), rhs.V16B());
596 break;
597 case DataType::Type::kInt8:
598 DCHECK_EQ(16u, instruction->GetVectorLength());
599 __ Smin(dst.V16B(), lhs.V16B(), rhs.V16B());
600 break;
601 case DataType::Type::kUint16:
602 DCHECK_EQ(8u, instruction->GetVectorLength());
603 __ Umin(dst.V8H(), lhs.V8H(), rhs.V8H());
604 break;
605 case DataType::Type::kInt16:
606 DCHECK_EQ(8u, instruction->GetVectorLength());
607 __ Smin(dst.V8H(), lhs.V8H(), rhs.V8H());
608 break;
609 case DataType::Type::kUint32:
610 DCHECK_EQ(4u, instruction->GetVectorLength());
611 __ Umin(dst.V4S(), lhs.V4S(), rhs.V4S());
612 break;
613 case DataType::Type::kInt32:
614 DCHECK_EQ(4u, instruction->GetVectorLength());
615 __ Smin(dst.V4S(), lhs.V4S(), rhs.V4S());
616 break;
617 case DataType::Type::kFloat32:
618 DCHECK_EQ(4u, instruction->GetVectorLength());
619 __ Fmin(dst.V4S(), lhs.V4S(), rhs.V4S());
620 break;
621 case DataType::Type::kFloat64:
622 DCHECK_EQ(2u, instruction->GetVectorLength());
623 __ Fmin(dst.V2D(), lhs.V2D(), rhs.V2D());
624 break;
625 default:
626 LOG(FATAL) << "Unsupported SIMD type";
627 UNREACHABLE();
628 }
629 }
630
VisitVecMax(HVecMax * instruction)631 void LocationsBuilderARM64::VisitVecMax(HVecMax* instruction) {
632 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
633 }
634
VisitVecMax(HVecMax * instruction)635 void InstructionCodeGeneratorARM64::VisitVecMax(HVecMax* instruction) {
636 LocationSummary* locations = instruction->GetLocations();
637 VRegister lhs = VRegisterFrom(locations->InAt(0));
638 VRegister rhs = VRegisterFrom(locations->InAt(1));
639 VRegister dst = VRegisterFrom(locations->Out());
640 switch (instruction->GetPackedType()) {
641 case DataType::Type::kUint8:
642 DCHECK_EQ(16u, instruction->GetVectorLength());
643 __ Umax(dst.V16B(), lhs.V16B(), rhs.V16B());
644 break;
645 case DataType::Type::kInt8:
646 DCHECK_EQ(16u, instruction->GetVectorLength());
647 __ Smax(dst.V16B(), lhs.V16B(), rhs.V16B());
648 break;
649 case DataType::Type::kUint16:
650 DCHECK_EQ(8u, instruction->GetVectorLength());
651 __ Umax(dst.V8H(), lhs.V8H(), rhs.V8H());
652 break;
653 case DataType::Type::kInt16:
654 DCHECK_EQ(8u, instruction->GetVectorLength());
655 __ Smax(dst.V8H(), lhs.V8H(), rhs.V8H());
656 break;
657 case DataType::Type::kUint32:
658 DCHECK_EQ(4u, instruction->GetVectorLength());
659 __ Umax(dst.V4S(), lhs.V4S(), rhs.V4S());
660 break;
661 case DataType::Type::kInt32:
662 DCHECK_EQ(4u, instruction->GetVectorLength());
663 __ Smax(dst.V4S(), lhs.V4S(), rhs.V4S());
664 break;
665 case DataType::Type::kFloat32:
666 DCHECK_EQ(4u, instruction->GetVectorLength());
667 __ Fmax(dst.V4S(), lhs.V4S(), rhs.V4S());
668 break;
669 case DataType::Type::kFloat64:
670 DCHECK_EQ(2u, instruction->GetVectorLength());
671 __ Fmax(dst.V2D(), lhs.V2D(), rhs.V2D());
672 break;
673 default:
674 LOG(FATAL) << "Unsupported SIMD type";
675 UNREACHABLE();
676 }
677 }
678
VisitVecAnd(HVecAnd * instruction)679 void LocationsBuilderARM64::VisitVecAnd(HVecAnd* instruction) {
680 // TODO: Allow constants supported by BIC (vector, immediate).
681 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
682 }
683
VisitVecAnd(HVecAnd * instruction)684 void InstructionCodeGeneratorARM64::VisitVecAnd(HVecAnd* instruction) {
685 LocationSummary* locations = instruction->GetLocations();
686 VRegister lhs = VRegisterFrom(locations->InAt(0));
687 VRegister rhs = VRegisterFrom(locations->InAt(1));
688 VRegister dst = VRegisterFrom(locations->Out());
689 switch (instruction->GetPackedType()) {
690 case DataType::Type::kBool:
691 case DataType::Type::kUint8:
692 case DataType::Type::kInt8:
693 case DataType::Type::kUint16:
694 case DataType::Type::kInt16:
695 case DataType::Type::kInt32:
696 case DataType::Type::kInt64:
697 case DataType::Type::kFloat32:
698 case DataType::Type::kFloat64:
699 __ And(dst.V16B(), lhs.V16B(), rhs.V16B()); // lanes do not matter
700 break;
701 default:
702 LOG(FATAL) << "Unsupported SIMD type";
703 UNREACHABLE();
704 }
705 }
706
VisitVecAndNot(HVecAndNot * instruction)707 void LocationsBuilderARM64::VisitVecAndNot(HVecAndNot* instruction) {
708 LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
709 }
710
VisitVecAndNot(HVecAndNot * instruction)711 void InstructionCodeGeneratorARM64::VisitVecAndNot(HVecAndNot* instruction) {
712 // TODO: Use BIC (vector, register).
713 LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
714 }
715
VisitVecOr(HVecOr * instruction)716 void LocationsBuilderARM64::VisitVecOr(HVecOr* instruction) {
717 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
718 }
719
VisitVecOr(HVecOr * instruction)720 void InstructionCodeGeneratorARM64::VisitVecOr(HVecOr* instruction) {
721 LocationSummary* locations = instruction->GetLocations();
722 VRegister lhs = VRegisterFrom(locations->InAt(0));
723 VRegister rhs = VRegisterFrom(locations->InAt(1));
724 VRegister dst = VRegisterFrom(locations->Out());
725 switch (instruction->GetPackedType()) {
726 case DataType::Type::kBool:
727 case DataType::Type::kUint8:
728 case DataType::Type::kInt8:
729 case DataType::Type::kUint16:
730 case DataType::Type::kInt16:
731 case DataType::Type::kInt32:
732 case DataType::Type::kInt64:
733 case DataType::Type::kFloat32:
734 case DataType::Type::kFloat64:
735 __ Orr(dst.V16B(), lhs.V16B(), rhs.V16B()); // lanes do not matter
736 break;
737 default:
738 LOG(FATAL) << "Unsupported SIMD type";
739 UNREACHABLE();
740 }
741 }
742
VisitVecXor(HVecXor * instruction)743 void LocationsBuilderARM64::VisitVecXor(HVecXor* instruction) {
744 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
745 }
746
VisitVecXor(HVecXor * instruction)747 void InstructionCodeGeneratorARM64::VisitVecXor(HVecXor* instruction) {
748 LocationSummary* locations = instruction->GetLocations();
749 VRegister lhs = VRegisterFrom(locations->InAt(0));
750 VRegister rhs = VRegisterFrom(locations->InAt(1));
751 VRegister dst = VRegisterFrom(locations->Out());
752 switch (instruction->GetPackedType()) {
753 case DataType::Type::kBool:
754 case DataType::Type::kUint8:
755 case DataType::Type::kInt8:
756 case DataType::Type::kUint16:
757 case DataType::Type::kInt16:
758 case DataType::Type::kInt32:
759 case DataType::Type::kInt64:
760 case DataType::Type::kFloat32:
761 case DataType::Type::kFloat64:
762 __ Eor(dst.V16B(), lhs.V16B(), rhs.V16B()); // lanes do not matter
763 break;
764 default:
765 LOG(FATAL) << "Unsupported SIMD type";
766 UNREACHABLE();
767 }
768 }
769
770 // Helper to set up locations for vector shift operations.
CreateVecShiftLocations(ArenaAllocator * allocator,HVecBinaryOperation * instruction)771 static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperation* instruction) {
772 LocationSummary* locations = new (allocator) LocationSummary(instruction);
773 switch (instruction->GetPackedType()) {
774 case DataType::Type::kUint8:
775 case DataType::Type::kInt8:
776 case DataType::Type::kUint16:
777 case DataType::Type::kInt16:
778 case DataType::Type::kInt32:
779 case DataType::Type::kInt64:
780 locations->SetInAt(0, Location::RequiresFpuRegister());
781 locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
782 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
783 break;
784 default:
785 LOG(FATAL) << "Unsupported SIMD type";
786 UNREACHABLE();
787 }
788 }
789
VisitVecShl(HVecShl * instruction)790 void LocationsBuilderARM64::VisitVecShl(HVecShl* instruction) {
791 CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
792 }
793
VisitVecShl(HVecShl * instruction)794 void InstructionCodeGeneratorARM64::VisitVecShl(HVecShl* instruction) {
795 LocationSummary* locations = instruction->GetLocations();
796 VRegister lhs = VRegisterFrom(locations->InAt(0));
797 VRegister dst = VRegisterFrom(locations->Out());
798 int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
799 switch (instruction->GetPackedType()) {
800 case DataType::Type::kUint8:
801 case DataType::Type::kInt8:
802 DCHECK_EQ(16u, instruction->GetVectorLength());
803 __ Shl(dst.V16B(), lhs.V16B(), value);
804 break;
805 case DataType::Type::kUint16:
806 case DataType::Type::kInt16:
807 DCHECK_EQ(8u, instruction->GetVectorLength());
808 __ Shl(dst.V8H(), lhs.V8H(), value);
809 break;
810 case DataType::Type::kInt32:
811 DCHECK_EQ(4u, instruction->GetVectorLength());
812 __ Shl(dst.V4S(), lhs.V4S(), value);
813 break;
814 case DataType::Type::kInt64:
815 DCHECK_EQ(2u, instruction->GetVectorLength());
816 __ Shl(dst.V2D(), lhs.V2D(), value);
817 break;
818 default:
819 LOG(FATAL) << "Unsupported SIMD type";
820 UNREACHABLE();
821 }
822 }
823
VisitVecShr(HVecShr * instruction)824 void LocationsBuilderARM64::VisitVecShr(HVecShr* instruction) {
825 CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
826 }
827
VisitVecShr(HVecShr * instruction)828 void InstructionCodeGeneratorARM64::VisitVecShr(HVecShr* instruction) {
829 LocationSummary* locations = instruction->GetLocations();
830 VRegister lhs = VRegisterFrom(locations->InAt(0));
831 VRegister dst = VRegisterFrom(locations->Out());
832 int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
833 switch (instruction->GetPackedType()) {
834 case DataType::Type::kUint8:
835 case DataType::Type::kInt8:
836 DCHECK_EQ(16u, instruction->GetVectorLength());
837 __ Sshr(dst.V16B(), lhs.V16B(), value);
838 break;
839 case DataType::Type::kUint16:
840 case DataType::Type::kInt16:
841 DCHECK_EQ(8u, instruction->GetVectorLength());
842 __ Sshr(dst.V8H(), lhs.V8H(), value);
843 break;
844 case DataType::Type::kInt32:
845 DCHECK_EQ(4u, instruction->GetVectorLength());
846 __ Sshr(dst.V4S(), lhs.V4S(), value);
847 break;
848 case DataType::Type::kInt64:
849 DCHECK_EQ(2u, instruction->GetVectorLength());
850 __ Sshr(dst.V2D(), lhs.V2D(), value);
851 break;
852 default:
853 LOG(FATAL) << "Unsupported SIMD type";
854 UNREACHABLE();
855 }
856 }
857
VisitVecUShr(HVecUShr * instruction)858 void LocationsBuilderARM64::VisitVecUShr(HVecUShr* instruction) {
859 CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
860 }
861
VisitVecUShr(HVecUShr * instruction)862 void InstructionCodeGeneratorARM64::VisitVecUShr(HVecUShr* instruction) {
863 LocationSummary* locations = instruction->GetLocations();
864 VRegister lhs = VRegisterFrom(locations->InAt(0));
865 VRegister dst = VRegisterFrom(locations->Out());
866 int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
867 switch (instruction->GetPackedType()) {
868 case DataType::Type::kUint8:
869 case DataType::Type::kInt8:
870 DCHECK_EQ(16u, instruction->GetVectorLength());
871 __ Ushr(dst.V16B(), lhs.V16B(), value);
872 break;
873 case DataType::Type::kUint16:
874 case DataType::Type::kInt16:
875 DCHECK_EQ(8u, instruction->GetVectorLength());
876 __ Ushr(dst.V8H(), lhs.V8H(), value);
877 break;
878 case DataType::Type::kInt32:
879 DCHECK_EQ(4u, instruction->GetVectorLength());
880 __ Ushr(dst.V4S(), lhs.V4S(), value);
881 break;
882 case DataType::Type::kInt64:
883 DCHECK_EQ(2u, instruction->GetVectorLength());
884 __ Ushr(dst.V2D(), lhs.V2D(), value);
885 break;
886 default:
887 LOG(FATAL) << "Unsupported SIMD type";
888 UNREACHABLE();
889 }
890 }
891
VisitVecSetScalars(HVecSetScalars * instruction)892 void LocationsBuilderARM64::VisitVecSetScalars(HVecSetScalars* instruction) {
893 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
894
895 DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented
896
897 HInstruction* input = instruction->InputAt(0);
898 bool is_zero = IsZeroBitPattern(input);
899
900 switch (instruction->GetPackedType()) {
901 case DataType::Type::kBool:
902 case DataType::Type::kUint8:
903 case DataType::Type::kInt8:
904 case DataType::Type::kUint16:
905 case DataType::Type::kInt16:
906 case DataType::Type::kInt32:
907 case DataType::Type::kInt64:
908 locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
909 : Location::RequiresRegister());
910 locations->SetOut(Location::RequiresFpuRegister());
911 break;
912 case DataType::Type::kFloat32:
913 case DataType::Type::kFloat64:
914 locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
915 : Location::RequiresFpuRegister());
916 locations->SetOut(Location::RequiresFpuRegister());
917 break;
918 default:
919 LOG(FATAL) << "Unsupported SIMD type";
920 UNREACHABLE();
921 }
922 }
923
VisitVecSetScalars(HVecSetScalars * instruction)924 void InstructionCodeGeneratorARM64::VisitVecSetScalars(HVecSetScalars* instruction) {
925 LocationSummary* locations = instruction->GetLocations();
926 VRegister dst = VRegisterFrom(locations->Out());
927
928 DCHECK_EQ(1u, instruction->InputCount()); // only one input currently implemented
929
930 // Zero out all other elements first.
931 __ Movi(dst.V16B(), 0);
932
933 // Shorthand for any type of zero.
934 if (IsZeroBitPattern(instruction->InputAt(0))) {
935 return;
936 }
937
938 // Set required elements.
939 switch (instruction->GetPackedType()) {
940 case DataType::Type::kBool:
941 case DataType::Type::kUint8:
942 case DataType::Type::kInt8:
943 DCHECK_EQ(16u, instruction->GetVectorLength());
944 __ Mov(dst.V16B(), 0, InputRegisterAt(instruction, 0));
945 break;
946 case DataType::Type::kUint16:
947 case DataType::Type::kInt16:
948 DCHECK_EQ(8u, instruction->GetVectorLength());
949 __ Mov(dst.V8H(), 0, InputRegisterAt(instruction, 0));
950 break;
951 case DataType::Type::kInt32:
952 DCHECK_EQ(4u, instruction->GetVectorLength());
953 __ Mov(dst.V4S(), 0, InputRegisterAt(instruction, 0));
954 break;
955 case DataType::Type::kInt64:
956 DCHECK_EQ(2u, instruction->GetVectorLength());
957 __ Mov(dst.V2D(), 0, InputRegisterAt(instruction, 0));
958 break;
959 default:
960 LOG(FATAL) << "Unsupported SIMD type";
961 UNREACHABLE();
962 }
963 }
964
965 // Helper to set up locations for vector accumulations.
CreateVecAccumLocations(ArenaAllocator * allocator,HVecOperation * instruction)966 static void CreateVecAccumLocations(ArenaAllocator* allocator, HVecOperation* instruction) {
967 LocationSummary* locations = new (allocator) LocationSummary(instruction);
968 switch (instruction->GetPackedType()) {
969 case DataType::Type::kUint8:
970 case DataType::Type::kInt8:
971 case DataType::Type::kUint16:
972 case DataType::Type::kInt16:
973 case DataType::Type::kInt32:
974 case DataType::Type::kInt64:
975 locations->SetInAt(0, Location::RequiresFpuRegister());
976 locations->SetInAt(1, Location::RequiresFpuRegister());
977 locations->SetInAt(2, Location::RequiresFpuRegister());
978 locations->SetOut(Location::SameAsFirstInput());
979 break;
980 default:
981 LOG(FATAL) << "Unsupported SIMD type";
982 UNREACHABLE();
983 }
984 }
985
VisitVecMultiplyAccumulate(HVecMultiplyAccumulate * instruction)986 void LocationsBuilderARM64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
987 CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction);
988 }
989
990 // Some early revisions of the Cortex-A53 have an erratum (835769) whereby it is possible for a
991 // 64-bit scalar multiply-accumulate instruction in AArch64 state to generate an incorrect result.
992 // However vector MultiplyAccumulate instruction is not affected.
VisitVecMultiplyAccumulate(HVecMultiplyAccumulate * instruction)993 void InstructionCodeGeneratorARM64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
994 LocationSummary* locations = instruction->GetLocations();
995 VRegister acc = VRegisterFrom(locations->InAt(0));
996 VRegister left = VRegisterFrom(locations->InAt(1));
997 VRegister right = VRegisterFrom(locations->InAt(2));
998
999 DCHECK(locations->InAt(0).Equals(locations->Out()));
1000
1001 switch (instruction->GetPackedType()) {
1002 case DataType::Type::kUint8:
1003 case DataType::Type::kInt8:
1004 DCHECK_EQ(16u, instruction->GetVectorLength());
1005 if (instruction->GetOpKind() == HInstruction::kAdd) {
1006 __ Mla(acc.V16B(), left.V16B(), right.V16B());
1007 } else {
1008 __ Mls(acc.V16B(), left.V16B(), right.V16B());
1009 }
1010 break;
1011 case DataType::Type::kUint16:
1012 case DataType::Type::kInt16:
1013 DCHECK_EQ(8u, instruction->GetVectorLength());
1014 if (instruction->GetOpKind() == HInstruction::kAdd) {
1015 __ Mla(acc.V8H(), left.V8H(), right.V8H());
1016 } else {
1017 __ Mls(acc.V8H(), left.V8H(), right.V8H());
1018 }
1019 break;
1020 case DataType::Type::kInt32:
1021 DCHECK_EQ(4u, instruction->GetVectorLength());
1022 if (instruction->GetOpKind() == HInstruction::kAdd) {
1023 __ Mla(acc.V4S(), left.V4S(), right.V4S());
1024 } else {
1025 __ Mls(acc.V4S(), left.V4S(), right.V4S());
1026 }
1027 break;
1028 default:
1029 LOG(FATAL) << "Unsupported SIMD type";
1030 UNREACHABLE();
1031 }
1032 }
1033
VisitVecSADAccumulate(HVecSADAccumulate * instruction)1034 void LocationsBuilderARM64::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
1035 CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction);
1036 // Some conversions require temporary registers.
1037 LocationSummary* locations = instruction->GetLocations();
1038 HVecOperation* a = instruction->InputAt(1)->AsVecOperation();
1039 HVecOperation* b = instruction->InputAt(2)->AsVecOperation();
1040 DCHECK_EQ(HVecOperation::ToSignedType(a->GetPackedType()),
1041 HVecOperation::ToSignedType(b->GetPackedType()));
1042 switch (a->GetPackedType()) {
1043 case DataType::Type::kUint8:
1044 case DataType::Type::kInt8:
1045 switch (instruction->GetPackedType()) {
1046 case DataType::Type::kInt64:
1047 locations->AddTemp(Location::RequiresFpuRegister());
1048 locations->AddTemp(Location::RequiresFpuRegister());
1049 FALLTHROUGH_INTENDED;
1050 case DataType::Type::kInt32:
1051 locations->AddTemp(Location::RequiresFpuRegister());
1052 locations->AddTemp(Location::RequiresFpuRegister());
1053 break;
1054 default:
1055 break;
1056 }
1057 break;
1058 case DataType::Type::kUint16:
1059 case DataType::Type::kInt16:
1060 if (instruction->GetPackedType() == DataType::Type::kInt64) {
1061 locations->AddTemp(Location::RequiresFpuRegister());
1062 locations->AddTemp(Location::RequiresFpuRegister());
1063 }
1064 break;
1065 case DataType::Type::kInt32:
1066 case DataType::Type::kInt64:
1067 if (instruction->GetPackedType() == a->GetPackedType()) {
1068 locations->AddTemp(Location::RequiresFpuRegister());
1069 }
1070 break;
1071 default:
1072 break;
1073 }
1074 }
1075
VisitVecSADAccumulate(HVecSADAccumulate * instruction)1076 void InstructionCodeGeneratorARM64::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
1077 LocationSummary* locations = instruction->GetLocations();
1078 VRegister acc = VRegisterFrom(locations->InAt(0));
1079 VRegister left = VRegisterFrom(locations->InAt(1));
1080 VRegister right = VRegisterFrom(locations->InAt(2));
1081
1082 DCHECK(locations->InAt(0).Equals(locations->Out()));
1083
1084 // Handle all feasible acc_T += sad(a_S, b_S) type combinations (T x S).
1085 HVecOperation* a = instruction->InputAt(1)->AsVecOperation();
1086 HVecOperation* b = instruction->InputAt(2)->AsVecOperation();
1087 DCHECK_EQ(HVecOperation::ToSignedType(a->GetPackedType()),
1088 HVecOperation::ToSignedType(b->GetPackedType()));
1089 switch (a->GetPackedType()) {
1090 case DataType::Type::kUint8:
1091 case DataType::Type::kInt8:
1092 DCHECK_EQ(16u, a->GetVectorLength());
1093 switch (instruction->GetPackedType()) {
1094 case DataType::Type::kInt16:
1095 DCHECK_EQ(8u, instruction->GetVectorLength());
1096 __ Sabal(acc.V8H(), left.V8B(), right.V8B());
1097 __ Sabal2(acc.V8H(), left.V16B(), right.V16B());
1098 break;
1099 case DataType::Type::kInt32: {
1100 DCHECK_EQ(4u, instruction->GetVectorLength());
1101 VRegister tmp1 = VRegisterFrom(locations->GetTemp(0));
1102 VRegister tmp2 = VRegisterFrom(locations->GetTemp(1));
1103 __ Sxtl(tmp1.V8H(), left.V8B());
1104 __ Sxtl(tmp2.V8H(), right.V8B());
1105 __ Sabal(acc.V4S(), tmp1.V4H(), tmp2.V4H());
1106 __ Sabal2(acc.V4S(), tmp1.V8H(), tmp2.V8H());
1107 __ Sxtl2(tmp1.V8H(), left.V16B());
1108 __ Sxtl2(tmp2.V8H(), right.V16B());
1109 __ Sabal(acc.V4S(), tmp1.V4H(), tmp2.V4H());
1110 __ Sabal2(acc.V4S(), tmp1.V8H(), tmp2.V8H());
1111 break;
1112 }
1113 case DataType::Type::kInt64: {
1114 DCHECK_EQ(2u, instruction->GetVectorLength());
1115 VRegister tmp1 = VRegisterFrom(locations->GetTemp(0));
1116 VRegister tmp2 = VRegisterFrom(locations->GetTemp(1));
1117 VRegister tmp3 = VRegisterFrom(locations->GetTemp(2));
1118 VRegister tmp4 = VRegisterFrom(locations->GetTemp(3));
1119 __ Sxtl(tmp1.V8H(), left.V8B());
1120 __ Sxtl(tmp2.V8H(), right.V8B());
1121 __ Sxtl(tmp3.V4S(), tmp1.V4H());
1122 __ Sxtl(tmp4.V4S(), tmp2.V4H());
1123 __ Sabal(acc.V2D(), tmp3.V2S(), tmp4.V2S());
1124 __ Sabal2(acc.V2D(), tmp3.V4S(), tmp4.V4S());
1125 __ Sxtl2(tmp3.V4S(), tmp1.V8H());
1126 __ Sxtl2(tmp4.V4S(), tmp2.V8H());
1127 __ Sabal(acc.V2D(), tmp3.V2S(), tmp4.V2S());
1128 __ Sabal2(acc.V2D(), tmp3.V4S(), tmp4.V4S());
1129 __ Sxtl2(tmp1.V8H(), left.V16B());
1130 __ Sxtl2(tmp2.V8H(), right.V16B());
1131 __ Sxtl(tmp3.V4S(), tmp1.V4H());
1132 __ Sxtl(tmp4.V4S(), tmp2.V4H());
1133 __ Sabal(acc.V2D(), tmp3.V2S(), tmp4.V2S());
1134 __ Sabal2(acc.V2D(), tmp3.V4S(), tmp4.V4S());
1135 __ Sxtl2(tmp3.V4S(), tmp1.V8H());
1136 __ Sxtl2(tmp4.V4S(), tmp2.V8H());
1137 __ Sabal(acc.V2D(), tmp3.V2S(), tmp4.V2S());
1138 __ Sabal2(acc.V2D(), tmp3.V4S(), tmp4.V4S());
1139 break;
1140 }
1141 default:
1142 LOG(FATAL) << "Unsupported SIMD type";
1143 UNREACHABLE();
1144 }
1145 break;
1146 case DataType::Type::kUint16:
1147 case DataType::Type::kInt16:
1148 DCHECK_EQ(8u, a->GetVectorLength());
1149 switch (instruction->GetPackedType()) {
1150 case DataType::Type::kInt32:
1151 DCHECK_EQ(4u, instruction->GetVectorLength());
1152 __ Sabal(acc.V4S(), left.V4H(), right.V4H());
1153 __ Sabal2(acc.V4S(), left.V8H(), right.V8H());
1154 break;
1155 case DataType::Type::kInt64: {
1156 DCHECK_EQ(2u, instruction->GetVectorLength());
1157 VRegister tmp1 = VRegisterFrom(locations->GetTemp(0));
1158 VRegister tmp2 = VRegisterFrom(locations->GetTemp(1));
1159 __ Sxtl(tmp1.V4S(), left.V4H());
1160 __ Sxtl(tmp2.V4S(), right.V4H());
1161 __ Sabal(acc.V2D(), tmp1.V2S(), tmp2.V2S());
1162 __ Sabal2(acc.V2D(), tmp1.V4S(), tmp2.V4S());
1163 __ Sxtl2(tmp1.V4S(), left.V8H());
1164 __ Sxtl2(tmp2.V4S(), right.V8H());
1165 __ Sabal(acc.V2D(), tmp1.V2S(), tmp2.V2S());
1166 __ Sabal2(acc.V2D(), tmp1.V4S(), tmp2.V4S());
1167 break;
1168 }
1169 default:
1170 LOG(FATAL) << "Unsupported SIMD type";
1171 UNREACHABLE();
1172 }
1173 break;
1174 case DataType::Type::kInt32:
1175 DCHECK_EQ(4u, a->GetVectorLength());
1176 switch (instruction->GetPackedType()) {
1177 case DataType::Type::kInt32: {
1178 DCHECK_EQ(4u, instruction->GetVectorLength());
1179 VRegister tmp = VRegisterFrom(locations->GetTemp(0));
1180 __ Sub(tmp.V4S(), left.V4S(), right.V4S());
1181 __ Abs(tmp.V4S(), tmp.V4S());
1182 __ Add(acc.V4S(), acc.V4S(), tmp.V4S());
1183 break;
1184 }
1185 case DataType::Type::kInt64:
1186 DCHECK_EQ(2u, instruction->GetVectorLength());
1187 __ Sabal(acc.V2D(), left.V2S(), right.V2S());
1188 __ Sabal2(acc.V2D(), left.V4S(), right.V4S());
1189 break;
1190 default:
1191 LOG(FATAL) << "Unsupported SIMD type";
1192 UNREACHABLE();
1193 }
1194 break;
1195 case DataType::Type::kInt64:
1196 DCHECK_EQ(2u, a->GetVectorLength());
1197 switch (instruction->GetPackedType()) {
1198 case DataType::Type::kInt64: {
1199 DCHECK_EQ(2u, instruction->GetVectorLength());
1200 VRegister tmp = VRegisterFrom(locations->GetTemp(0));
1201 __ Sub(tmp.V2D(), left.V2D(), right.V2D());
1202 __ Abs(tmp.V2D(), tmp.V2D());
1203 __ Add(acc.V2D(), acc.V2D(), tmp.V2D());
1204 break;
1205 }
1206 default:
1207 LOG(FATAL) << "Unsupported SIMD type";
1208 UNREACHABLE();
1209 }
1210 break;
1211 default:
1212 LOG(FATAL) << "Unsupported SIMD type";
1213 }
1214 }
1215
1216 // Helper to set up locations for vector memory operations.
CreateVecMemLocations(ArenaAllocator * allocator,HVecMemoryOperation * instruction,bool is_load)1217 static void CreateVecMemLocations(ArenaAllocator* allocator,
1218 HVecMemoryOperation* instruction,
1219 bool is_load) {
1220 LocationSummary* locations = new (allocator) LocationSummary(instruction);
1221 switch (instruction->GetPackedType()) {
1222 case DataType::Type::kBool:
1223 case DataType::Type::kUint8:
1224 case DataType::Type::kInt8:
1225 case DataType::Type::kUint16:
1226 case DataType::Type::kInt16:
1227 case DataType::Type::kInt32:
1228 case DataType::Type::kInt64:
1229 case DataType::Type::kFloat32:
1230 case DataType::Type::kFloat64:
1231 locations->SetInAt(0, Location::RequiresRegister());
1232 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
1233 if (is_load) {
1234 locations->SetOut(Location::RequiresFpuRegister());
1235 } else {
1236 locations->SetInAt(2, Location::RequiresFpuRegister());
1237 }
1238 break;
1239 default:
1240 LOG(FATAL) << "Unsupported SIMD type";
1241 UNREACHABLE();
1242 }
1243 }
1244
1245 // Helper to set up locations for vector memory operations. Returns the memory operand and,
1246 // if used, sets the output parameter scratch to a temporary register used in this operand,
1247 // so that the client can release it right after the memory operand use.
VecAddress(HVecMemoryOperation * instruction,UseScratchRegisterScope * temps_scope,size_t size,bool is_string_char_at,Register * scratch)1248 MemOperand InstructionCodeGeneratorARM64::VecAddress(
1249 HVecMemoryOperation* instruction,
1250 UseScratchRegisterScope* temps_scope,
1251 size_t size,
1252 bool is_string_char_at,
1253 /*out*/ Register* scratch) {
1254 LocationSummary* locations = instruction->GetLocations();
1255 Register base = InputRegisterAt(instruction, 0);
1256
1257 if (instruction->InputAt(1)->IsIntermediateAddressIndex()) {
1258 DCHECK(!is_string_char_at);
1259 return MemOperand(base.X(), InputRegisterAt(instruction, 1).X());
1260 }
1261
1262 Location index = locations->InAt(1);
1263 uint32_t offset = is_string_char_at
1264 ? mirror::String::ValueOffset().Uint32Value()
1265 : mirror::Array::DataOffset(size).Uint32Value();
1266 size_t shift = ComponentSizeShiftWidth(size);
1267
1268 // HIntermediateAddress optimization is only applied for scalar ArrayGet and ArraySet.
1269 DCHECK(!instruction->InputAt(0)->IsIntermediateAddress());
1270
1271 if (index.IsConstant()) {
1272 offset += Int64ConstantFrom(index) << shift;
1273 return HeapOperand(base, offset);
1274 } else {
1275 *scratch = temps_scope->AcquireSameSizeAs(base);
1276 __ Add(*scratch, base, Operand(WRegisterFrom(index), LSL, shift));
1277 return HeapOperand(*scratch, offset);
1278 }
1279 }
1280
VisitVecLoad(HVecLoad * instruction)1281 void LocationsBuilderARM64::VisitVecLoad(HVecLoad* instruction) {
1282 CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /*is_load*/ true);
1283 }
1284
VisitVecLoad(HVecLoad * instruction)1285 void InstructionCodeGeneratorARM64::VisitVecLoad(HVecLoad* instruction) {
1286 LocationSummary* locations = instruction->GetLocations();
1287 size_t size = DataType::Size(instruction->GetPackedType());
1288 VRegister reg = VRegisterFrom(locations->Out());
1289 UseScratchRegisterScope temps(GetVIXLAssembler());
1290 Register scratch;
1291
1292 switch (instruction->GetPackedType()) {
1293 case DataType::Type::kUint16:
1294 DCHECK_EQ(8u, instruction->GetVectorLength());
1295 // Special handling of compressed/uncompressed string load.
1296 if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
1297 vixl::aarch64::Label uncompressed_load, done;
1298 // Test compression bit.
1299 static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1300 "Expecting 0=compressed, 1=uncompressed");
1301 uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1302 Register length = temps.AcquireW();
1303 __ Ldr(length, HeapOperand(InputRegisterAt(instruction, 0), count_offset));
1304 __ Tbnz(length.W(), 0, &uncompressed_load);
1305 temps.Release(length); // no longer needed
1306 // Zero extend 8 compressed bytes into 8 chars.
1307 __ Ldr(DRegisterFrom(locations->Out()).V8B(),
1308 VecAddress(instruction, &temps, 1, /*is_string_char_at*/ true, &scratch));
1309 __ Uxtl(reg.V8H(), reg.V8B());
1310 __ B(&done);
1311 if (scratch.IsValid()) {
1312 temps.Release(scratch); // if used, no longer needed
1313 }
1314 // Load 8 direct uncompressed chars.
1315 __ Bind(&uncompressed_load);
1316 __ Ldr(reg, VecAddress(instruction, &temps, size, /*is_string_char_at*/ true, &scratch));
1317 __ Bind(&done);
1318 return;
1319 }
1320 FALLTHROUGH_INTENDED;
1321 case DataType::Type::kBool:
1322 case DataType::Type::kUint8:
1323 case DataType::Type::kInt8:
1324 case DataType::Type::kInt16:
1325 case DataType::Type::kInt32:
1326 case DataType::Type::kFloat32:
1327 case DataType::Type::kInt64:
1328 case DataType::Type::kFloat64:
1329 DCHECK_LE(2u, instruction->GetVectorLength());
1330 DCHECK_LE(instruction->GetVectorLength(), 16u);
1331 __ Ldr(reg, VecAddress(instruction, &temps, size, instruction->IsStringCharAt(), &scratch));
1332 break;
1333 default:
1334 LOG(FATAL) << "Unsupported SIMD type";
1335 UNREACHABLE();
1336 }
1337 }
1338
VisitVecStore(HVecStore * instruction)1339 void LocationsBuilderARM64::VisitVecStore(HVecStore* instruction) {
1340 CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /*is_load*/ false);
1341 }
1342
VisitVecStore(HVecStore * instruction)1343 void InstructionCodeGeneratorARM64::VisitVecStore(HVecStore* instruction) {
1344 LocationSummary* locations = instruction->GetLocations();
1345 size_t size = DataType::Size(instruction->GetPackedType());
1346 VRegister reg = VRegisterFrom(locations->InAt(2));
1347 UseScratchRegisterScope temps(GetVIXLAssembler());
1348 Register scratch;
1349
1350 switch (instruction->GetPackedType()) {
1351 case DataType::Type::kBool:
1352 case DataType::Type::kUint8:
1353 case DataType::Type::kInt8:
1354 case DataType::Type::kUint16:
1355 case DataType::Type::kInt16:
1356 case DataType::Type::kInt32:
1357 case DataType::Type::kFloat32:
1358 case DataType::Type::kInt64:
1359 case DataType::Type::kFloat64:
1360 DCHECK_LE(2u, instruction->GetVectorLength());
1361 DCHECK_LE(instruction->GetVectorLength(), 16u);
1362 __ Str(reg, VecAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch));
1363 break;
1364 default:
1365 LOG(FATAL) << "Unsupported SIMD type";
1366 UNREACHABLE();
1367 }
1368 }
1369
1370 #undef __
1371
1372 } // namespace arm64
1373 } // namespace art
1374