• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2017 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "code_generator_arm64.h"
18 
19 #include "mirror/array-inl.h"
20 #include "mirror/string.h"
21 
22 using namespace vixl::aarch64;  // NOLINT(build/namespaces)
23 
24 namespace art {
25 namespace arm64 {
26 
27 using helpers::ARM64EncodableConstantOrRegister;
28 using helpers::Arm64CanEncodeConstantAsImmediate;
29 using helpers::DRegisterFrom;
30 using helpers::HeapOperand;
31 using helpers::InputRegisterAt;
32 using helpers::Int64ConstantFrom;
33 using helpers::OutputRegister;
34 using helpers::VRegisterFrom;
35 using helpers::WRegisterFrom;
36 using helpers::XRegisterFrom;
37 
38 #define __ GetVIXLAssembler()->
39 
VisitVecReplicateScalar(HVecReplicateScalar * instruction)40 void LocationsBuilderARM64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
41   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
42   HInstruction* input = instruction->InputAt(0);
43   switch (instruction->GetPackedType()) {
44     case DataType::Type::kBool:
45     case DataType::Type::kUint8:
46     case DataType::Type::kInt8:
47     case DataType::Type::kUint16:
48     case DataType::Type::kInt16:
49     case DataType::Type::kInt32:
50     case DataType::Type::kInt64:
51       locations->SetInAt(0, ARM64EncodableConstantOrRegister(input, instruction));
52       locations->SetOut(Location::RequiresFpuRegister());
53       break;
54     case DataType::Type::kFloat32:
55     case DataType::Type::kFloat64:
56       if (input->IsConstant() &&
57           Arm64CanEncodeConstantAsImmediate(input->AsConstant(), instruction)) {
58         locations->SetInAt(0, Location::ConstantLocation(input->AsConstant()));
59         locations->SetOut(Location::RequiresFpuRegister());
60       } else {
61         locations->SetInAt(0, Location::RequiresFpuRegister());
62         locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
63       }
64       break;
65     default:
66       LOG(FATAL) << "Unsupported SIMD type";
67       UNREACHABLE();
68   }
69 }
70 
VisitVecReplicateScalar(HVecReplicateScalar * instruction)71 void InstructionCodeGeneratorARM64::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
72   LocationSummary* locations = instruction->GetLocations();
73   Location src_loc = locations->InAt(0);
74   VRegister dst = VRegisterFrom(locations->Out());
75   switch (instruction->GetPackedType()) {
76     case DataType::Type::kBool:
77     case DataType::Type::kUint8:
78     case DataType::Type::kInt8:
79       DCHECK_EQ(16u, instruction->GetVectorLength());
80       if (src_loc.IsConstant()) {
81         __ Movi(dst.V16B(), Int64ConstantFrom(src_loc));
82       } else {
83         __ Dup(dst.V16B(), InputRegisterAt(instruction, 0));
84       }
85       break;
86     case DataType::Type::kUint16:
87     case DataType::Type::kInt16:
88       DCHECK_EQ(8u, instruction->GetVectorLength());
89       if (src_loc.IsConstant()) {
90         __ Movi(dst.V8H(), Int64ConstantFrom(src_loc));
91       } else {
92         __ Dup(dst.V8H(), InputRegisterAt(instruction, 0));
93       }
94       break;
95     case DataType::Type::kInt32:
96       DCHECK_EQ(4u, instruction->GetVectorLength());
97       if (src_loc.IsConstant()) {
98         __ Movi(dst.V4S(), Int64ConstantFrom(src_loc));
99       } else {
100         __ Dup(dst.V4S(), InputRegisterAt(instruction, 0));
101       }
102       break;
103     case DataType::Type::kInt64:
104       DCHECK_EQ(2u, instruction->GetVectorLength());
105       if (src_loc.IsConstant()) {
106         __ Movi(dst.V2D(), Int64ConstantFrom(src_loc));
107       } else {
108         __ Dup(dst.V2D(), XRegisterFrom(src_loc));
109       }
110       break;
111     case DataType::Type::kFloat32:
112       DCHECK_EQ(4u, instruction->GetVectorLength());
113       if (src_loc.IsConstant()) {
114         __ Fmov(dst.V4S(), src_loc.GetConstant()->AsFloatConstant()->GetValue());
115       } else {
116         __ Dup(dst.V4S(), VRegisterFrom(src_loc).V4S(), 0);
117       }
118       break;
119     case DataType::Type::kFloat64:
120       DCHECK_EQ(2u, instruction->GetVectorLength());
121       if (src_loc.IsConstant()) {
122         __ Fmov(dst.V2D(), src_loc.GetConstant()->AsDoubleConstant()->GetValue());
123       } else {
124         __ Dup(dst.V2D(), VRegisterFrom(src_loc).V2D(), 0);
125       }
126       break;
127     default:
128       LOG(FATAL) << "Unsupported SIMD type";
129       UNREACHABLE();
130   }
131 }
132 
VisitVecExtractScalar(HVecExtractScalar * instruction)133 void LocationsBuilderARM64::VisitVecExtractScalar(HVecExtractScalar* instruction) {
134   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
135   switch (instruction->GetPackedType()) {
136     case DataType::Type::kBool:
137     case DataType::Type::kUint8:
138     case DataType::Type::kInt8:
139     case DataType::Type::kUint16:
140     case DataType::Type::kInt16:
141     case DataType::Type::kInt32:
142     case DataType::Type::kInt64:
143       locations->SetInAt(0, Location::RequiresFpuRegister());
144       locations->SetOut(Location::RequiresRegister());
145       break;
146     case DataType::Type::kFloat32:
147     case DataType::Type::kFloat64:
148       locations->SetInAt(0, Location::RequiresFpuRegister());
149       locations->SetOut(Location::SameAsFirstInput());
150       break;
151     default:
152       LOG(FATAL) << "Unsupported SIMD type";
153       UNREACHABLE();
154   }
155 }
156 
VisitVecExtractScalar(HVecExtractScalar * instruction)157 void InstructionCodeGeneratorARM64::VisitVecExtractScalar(HVecExtractScalar* instruction) {
158   LocationSummary* locations = instruction->GetLocations();
159   VRegister src = VRegisterFrom(locations->InAt(0));
160   switch (instruction->GetPackedType()) {
161     case DataType::Type::kInt32:
162       DCHECK_EQ(4u, instruction->GetVectorLength());
163       __ Umov(OutputRegister(instruction), src.V4S(), 0);
164       break;
165     case DataType::Type::kInt64:
166       DCHECK_EQ(2u, instruction->GetVectorLength());
167       __ Umov(OutputRegister(instruction), src.V2D(), 0);
168       break;
169     case DataType::Type::kFloat32:
170     case DataType::Type::kFloat64:
171       DCHECK_LE(2u, instruction->GetVectorLength());
172       DCHECK_LE(instruction->GetVectorLength(), 4u);
173       DCHECK(locations->InAt(0).Equals(locations->Out()));  // no code required
174       break;
175     default:
176       LOG(FATAL) << "Unsupported SIMD type";
177       UNREACHABLE();
178   }
179 }
180 
181 // Helper to set up locations for vector unary operations.
CreateVecUnOpLocations(ArenaAllocator * allocator,HVecUnaryOperation * instruction)182 static void CreateVecUnOpLocations(ArenaAllocator* allocator, HVecUnaryOperation* instruction) {
183   LocationSummary* locations = new (allocator) LocationSummary(instruction);
184   switch (instruction->GetPackedType()) {
185     case DataType::Type::kBool:
186       locations->SetInAt(0, Location::RequiresFpuRegister());
187       locations->SetOut(Location::RequiresFpuRegister(),
188                         instruction->IsVecNot() ? Location::kOutputOverlap
189                                                 : Location::kNoOutputOverlap);
190       break;
191     case DataType::Type::kUint8:
192     case DataType::Type::kInt8:
193     case DataType::Type::kUint16:
194     case DataType::Type::kInt16:
195     case DataType::Type::kInt32:
196     case DataType::Type::kInt64:
197     case DataType::Type::kFloat32:
198     case DataType::Type::kFloat64:
199       locations->SetInAt(0, Location::RequiresFpuRegister());
200       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
201       break;
202     default:
203       LOG(FATAL) << "Unsupported SIMD type";
204       UNREACHABLE();
205   }
206 }
207 
VisitVecReduce(HVecReduce * instruction)208 void LocationsBuilderARM64::VisitVecReduce(HVecReduce* instruction) {
209   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
210 }
211 
VisitVecReduce(HVecReduce * instruction)212 void InstructionCodeGeneratorARM64::VisitVecReduce(HVecReduce* instruction) {
213   LocationSummary* locations = instruction->GetLocations();
214   VRegister src = VRegisterFrom(locations->InAt(0));
215   VRegister dst = DRegisterFrom(locations->Out());
216   switch (instruction->GetPackedType()) {
217     case DataType::Type::kInt32:
218       DCHECK_EQ(4u, instruction->GetVectorLength());
219       switch (instruction->GetKind()) {
220         case HVecReduce::kSum:
221           __ Addv(dst.S(), src.V4S());
222           break;
223         case HVecReduce::kMin:
224           __ Sminv(dst.S(), src.V4S());
225           break;
226         case HVecReduce::kMax:
227           __ Smaxv(dst.S(), src.V4S());
228           break;
229       }
230       break;
231     case DataType::Type::kInt64:
232       DCHECK_EQ(2u, instruction->GetVectorLength());
233       switch (instruction->GetKind()) {
234         case HVecReduce::kSum:
235           __ Addp(dst.D(), src.V2D());
236           break;
237         default:
238           LOG(FATAL) << "Unsupported SIMD min/max";
239           UNREACHABLE();
240       }
241       break;
242     default:
243       LOG(FATAL) << "Unsupported SIMD type";
244       UNREACHABLE();
245   }
246 }
247 
VisitVecCnv(HVecCnv * instruction)248 void LocationsBuilderARM64::VisitVecCnv(HVecCnv* instruction) {
249   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
250 }
251 
VisitVecCnv(HVecCnv * instruction)252 void InstructionCodeGeneratorARM64::VisitVecCnv(HVecCnv* instruction) {
253   LocationSummary* locations = instruction->GetLocations();
254   VRegister src = VRegisterFrom(locations->InAt(0));
255   VRegister dst = VRegisterFrom(locations->Out());
256   DataType::Type from = instruction->GetInputType();
257   DataType::Type to = instruction->GetResultType();
258   if (from == DataType::Type::kInt32 && to == DataType::Type::kFloat32) {
259     DCHECK_EQ(4u, instruction->GetVectorLength());
260     __ Scvtf(dst.V4S(), src.V4S());
261   } else {
262     LOG(FATAL) << "Unsupported SIMD type";
263   }
264 }
265 
VisitVecNeg(HVecNeg * instruction)266 void LocationsBuilderARM64::VisitVecNeg(HVecNeg* instruction) {
267   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
268 }
269 
VisitVecNeg(HVecNeg * instruction)270 void InstructionCodeGeneratorARM64::VisitVecNeg(HVecNeg* instruction) {
271   LocationSummary* locations = instruction->GetLocations();
272   VRegister src = VRegisterFrom(locations->InAt(0));
273   VRegister dst = VRegisterFrom(locations->Out());
274   switch (instruction->GetPackedType()) {
275     case DataType::Type::kUint8:
276     case DataType::Type::kInt8:
277       DCHECK_EQ(16u, instruction->GetVectorLength());
278       __ Neg(dst.V16B(), src.V16B());
279       break;
280     case DataType::Type::kUint16:
281     case DataType::Type::kInt16:
282       DCHECK_EQ(8u, instruction->GetVectorLength());
283       __ Neg(dst.V8H(), src.V8H());
284       break;
285     case DataType::Type::kInt32:
286       DCHECK_EQ(4u, instruction->GetVectorLength());
287       __ Neg(dst.V4S(), src.V4S());
288       break;
289     case DataType::Type::kInt64:
290       DCHECK_EQ(2u, instruction->GetVectorLength());
291       __ Neg(dst.V2D(), src.V2D());
292       break;
293     case DataType::Type::kFloat32:
294       DCHECK_EQ(4u, instruction->GetVectorLength());
295       __ Fneg(dst.V4S(), src.V4S());
296       break;
297     case DataType::Type::kFloat64:
298       DCHECK_EQ(2u, instruction->GetVectorLength());
299       __ Fneg(dst.V2D(), src.V2D());
300       break;
301     default:
302       LOG(FATAL) << "Unsupported SIMD type";
303       UNREACHABLE();
304   }
305 }
306 
VisitVecAbs(HVecAbs * instruction)307 void LocationsBuilderARM64::VisitVecAbs(HVecAbs* instruction) {
308   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
309 }
310 
VisitVecAbs(HVecAbs * instruction)311 void InstructionCodeGeneratorARM64::VisitVecAbs(HVecAbs* instruction) {
312   LocationSummary* locations = instruction->GetLocations();
313   VRegister src = VRegisterFrom(locations->InAt(0));
314   VRegister dst = VRegisterFrom(locations->Out());
315   switch (instruction->GetPackedType()) {
316     case DataType::Type::kInt8:
317       DCHECK_EQ(16u, instruction->GetVectorLength());
318       __ Abs(dst.V16B(), src.V16B());
319       break;
320     case DataType::Type::kInt16:
321       DCHECK_EQ(8u, instruction->GetVectorLength());
322       __ Abs(dst.V8H(), src.V8H());
323       break;
324     case DataType::Type::kInt32:
325       DCHECK_EQ(4u, instruction->GetVectorLength());
326       __ Abs(dst.V4S(), src.V4S());
327       break;
328     case DataType::Type::kInt64:
329       DCHECK_EQ(2u, instruction->GetVectorLength());
330       __ Abs(dst.V2D(), src.V2D());
331       break;
332     case DataType::Type::kFloat32:
333       DCHECK_EQ(4u, instruction->GetVectorLength());
334       __ Fabs(dst.V4S(), src.V4S());
335       break;
336     case DataType::Type::kFloat64:
337       DCHECK_EQ(2u, instruction->GetVectorLength());
338       __ Fabs(dst.V2D(), src.V2D());
339       break;
340     default:
341       LOG(FATAL) << "Unsupported SIMD type";
342       UNREACHABLE();
343   }
344 }
345 
VisitVecNot(HVecNot * instruction)346 void LocationsBuilderARM64::VisitVecNot(HVecNot* instruction) {
347   CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
348 }
349 
VisitVecNot(HVecNot * instruction)350 void InstructionCodeGeneratorARM64::VisitVecNot(HVecNot* instruction) {
351   LocationSummary* locations = instruction->GetLocations();
352   VRegister src = VRegisterFrom(locations->InAt(0));
353   VRegister dst = VRegisterFrom(locations->Out());
354   switch (instruction->GetPackedType()) {
355     case DataType::Type::kBool:  // special case boolean-not
356       DCHECK_EQ(16u, instruction->GetVectorLength());
357       __ Movi(dst.V16B(), 1);
358       __ Eor(dst.V16B(), dst.V16B(), src.V16B());
359       break;
360     case DataType::Type::kUint8:
361     case DataType::Type::kInt8:
362     case DataType::Type::kUint16:
363     case DataType::Type::kInt16:
364     case DataType::Type::kInt32:
365     case DataType::Type::kInt64:
366       __ Not(dst.V16B(), src.V16B());  // lanes do not matter
367       break;
368     default:
369       LOG(FATAL) << "Unsupported SIMD type";
370       UNREACHABLE();
371   }
372 }
373 
374 // Helper to set up locations for vector binary operations.
CreateVecBinOpLocations(ArenaAllocator * allocator,HVecBinaryOperation * instruction)375 static void CreateVecBinOpLocations(ArenaAllocator* allocator, HVecBinaryOperation* instruction) {
376   LocationSummary* locations = new (allocator) LocationSummary(instruction);
377   switch (instruction->GetPackedType()) {
378     case DataType::Type::kBool:
379     case DataType::Type::kUint8:
380     case DataType::Type::kInt8:
381     case DataType::Type::kUint16:
382     case DataType::Type::kInt16:
383     case DataType::Type::kInt32:
384     case DataType::Type::kInt64:
385     case DataType::Type::kFloat32:
386     case DataType::Type::kFloat64:
387       locations->SetInAt(0, Location::RequiresFpuRegister());
388       locations->SetInAt(1, Location::RequiresFpuRegister());
389       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
390       break;
391     default:
392       LOG(FATAL) << "Unsupported SIMD type";
393       UNREACHABLE();
394   }
395 }
396 
VisitVecAdd(HVecAdd * instruction)397 void LocationsBuilderARM64::VisitVecAdd(HVecAdd* instruction) {
398   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
399 }
400 
VisitVecAdd(HVecAdd * instruction)401 void InstructionCodeGeneratorARM64::VisitVecAdd(HVecAdd* instruction) {
402   LocationSummary* locations = instruction->GetLocations();
403   VRegister lhs = VRegisterFrom(locations->InAt(0));
404   VRegister rhs = VRegisterFrom(locations->InAt(1));
405   VRegister dst = VRegisterFrom(locations->Out());
406   switch (instruction->GetPackedType()) {
407     case DataType::Type::kUint8:
408     case DataType::Type::kInt8:
409       DCHECK_EQ(16u, instruction->GetVectorLength());
410       __ Add(dst.V16B(), lhs.V16B(), rhs.V16B());
411       break;
412     case DataType::Type::kUint16:
413     case DataType::Type::kInt16:
414       DCHECK_EQ(8u, instruction->GetVectorLength());
415       __ Add(dst.V8H(), lhs.V8H(), rhs.V8H());
416       break;
417     case DataType::Type::kInt32:
418       DCHECK_EQ(4u, instruction->GetVectorLength());
419       __ Add(dst.V4S(), lhs.V4S(), rhs.V4S());
420       break;
421     case DataType::Type::kInt64:
422       DCHECK_EQ(2u, instruction->GetVectorLength());
423       __ Add(dst.V2D(), lhs.V2D(), rhs.V2D());
424       break;
425     case DataType::Type::kFloat32:
426       DCHECK_EQ(4u, instruction->GetVectorLength());
427       __ Fadd(dst.V4S(), lhs.V4S(), rhs.V4S());
428       break;
429     case DataType::Type::kFloat64:
430       DCHECK_EQ(2u, instruction->GetVectorLength());
431       __ Fadd(dst.V2D(), lhs.V2D(), rhs.V2D());
432       break;
433     default:
434       LOG(FATAL) << "Unsupported SIMD type";
435       UNREACHABLE();
436   }
437 }
438 
VisitVecHalvingAdd(HVecHalvingAdd * instruction)439 void LocationsBuilderARM64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
440   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
441 }
442 
VisitVecHalvingAdd(HVecHalvingAdd * instruction)443 void InstructionCodeGeneratorARM64::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
444   LocationSummary* locations = instruction->GetLocations();
445   VRegister lhs = VRegisterFrom(locations->InAt(0));
446   VRegister rhs = VRegisterFrom(locations->InAt(1));
447   VRegister dst = VRegisterFrom(locations->Out());
448   switch (instruction->GetPackedType()) {
449     case DataType::Type::kUint8:
450       DCHECK_EQ(16u, instruction->GetVectorLength());
451       instruction->IsRounded()
452           ? __ Urhadd(dst.V16B(), lhs.V16B(), rhs.V16B())
453           : __ Uhadd(dst.V16B(), lhs.V16B(), rhs.V16B());
454       break;
455     case DataType::Type::kInt8:
456       DCHECK_EQ(16u, instruction->GetVectorLength());
457       instruction->IsRounded()
458           ? __ Srhadd(dst.V16B(), lhs.V16B(), rhs.V16B())
459           : __ Shadd(dst.V16B(), lhs.V16B(), rhs.V16B());
460       break;
461     case DataType::Type::kUint16:
462       DCHECK_EQ(8u, instruction->GetVectorLength());
463       instruction->IsRounded()
464           ? __ Urhadd(dst.V8H(), lhs.V8H(), rhs.V8H())
465           : __ Uhadd(dst.V8H(), lhs.V8H(), rhs.V8H());
466       break;
467     case DataType::Type::kInt16:
468       DCHECK_EQ(8u, instruction->GetVectorLength());
469       instruction->IsRounded()
470           ? __ Srhadd(dst.V8H(), lhs.V8H(), rhs.V8H())
471           : __ Shadd(dst.V8H(), lhs.V8H(), rhs.V8H());
472       break;
473     default:
474       LOG(FATAL) << "Unsupported SIMD type";
475       UNREACHABLE();
476   }
477 }
478 
VisitVecSub(HVecSub * instruction)479 void LocationsBuilderARM64::VisitVecSub(HVecSub* instruction) {
480   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
481 }
482 
VisitVecSub(HVecSub * instruction)483 void InstructionCodeGeneratorARM64::VisitVecSub(HVecSub* instruction) {
484   LocationSummary* locations = instruction->GetLocations();
485   VRegister lhs = VRegisterFrom(locations->InAt(0));
486   VRegister rhs = VRegisterFrom(locations->InAt(1));
487   VRegister dst = VRegisterFrom(locations->Out());
488   switch (instruction->GetPackedType()) {
489     case DataType::Type::kUint8:
490     case DataType::Type::kInt8:
491       DCHECK_EQ(16u, instruction->GetVectorLength());
492       __ Sub(dst.V16B(), lhs.V16B(), rhs.V16B());
493       break;
494     case DataType::Type::kUint16:
495     case DataType::Type::kInt16:
496       DCHECK_EQ(8u, instruction->GetVectorLength());
497       __ Sub(dst.V8H(), lhs.V8H(), rhs.V8H());
498       break;
499     case DataType::Type::kInt32:
500       DCHECK_EQ(4u, instruction->GetVectorLength());
501       __ Sub(dst.V4S(), lhs.V4S(), rhs.V4S());
502       break;
503     case DataType::Type::kInt64:
504       DCHECK_EQ(2u, instruction->GetVectorLength());
505       __ Sub(dst.V2D(), lhs.V2D(), rhs.V2D());
506       break;
507     case DataType::Type::kFloat32:
508       DCHECK_EQ(4u, instruction->GetVectorLength());
509       __ Fsub(dst.V4S(), lhs.V4S(), rhs.V4S());
510       break;
511     case DataType::Type::kFloat64:
512       DCHECK_EQ(2u, instruction->GetVectorLength());
513       __ Fsub(dst.V2D(), lhs.V2D(), rhs.V2D());
514       break;
515     default:
516       LOG(FATAL) << "Unsupported SIMD type";
517       UNREACHABLE();
518   }
519 }
520 
VisitVecMul(HVecMul * instruction)521 void LocationsBuilderARM64::VisitVecMul(HVecMul* instruction) {
522   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
523 }
524 
VisitVecMul(HVecMul * instruction)525 void InstructionCodeGeneratorARM64::VisitVecMul(HVecMul* instruction) {
526   LocationSummary* locations = instruction->GetLocations();
527   VRegister lhs = VRegisterFrom(locations->InAt(0));
528   VRegister rhs = VRegisterFrom(locations->InAt(1));
529   VRegister dst = VRegisterFrom(locations->Out());
530   switch (instruction->GetPackedType()) {
531     case DataType::Type::kUint8:
532     case DataType::Type::kInt8:
533       DCHECK_EQ(16u, instruction->GetVectorLength());
534       __ Mul(dst.V16B(), lhs.V16B(), rhs.V16B());
535       break;
536     case DataType::Type::kUint16:
537     case DataType::Type::kInt16:
538       DCHECK_EQ(8u, instruction->GetVectorLength());
539       __ Mul(dst.V8H(), lhs.V8H(), rhs.V8H());
540       break;
541     case DataType::Type::kInt32:
542       DCHECK_EQ(4u, instruction->GetVectorLength());
543       __ Mul(dst.V4S(), lhs.V4S(), rhs.V4S());
544       break;
545     case DataType::Type::kFloat32:
546       DCHECK_EQ(4u, instruction->GetVectorLength());
547       __ Fmul(dst.V4S(), lhs.V4S(), rhs.V4S());
548       break;
549     case DataType::Type::kFloat64:
550       DCHECK_EQ(2u, instruction->GetVectorLength());
551       __ Fmul(dst.V2D(), lhs.V2D(), rhs.V2D());
552       break;
553     default:
554       LOG(FATAL) << "Unsupported SIMD type";
555       UNREACHABLE();
556   }
557 }
558 
VisitVecDiv(HVecDiv * instruction)559 void LocationsBuilderARM64::VisitVecDiv(HVecDiv* instruction) {
560   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
561 }
562 
VisitVecDiv(HVecDiv * instruction)563 void InstructionCodeGeneratorARM64::VisitVecDiv(HVecDiv* instruction) {
564   LocationSummary* locations = instruction->GetLocations();
565   VRegister lhs = VRegisterFrom(locations->InAt(0));
566   VRegister rhs = VRegisterFrom(locations->InAt(1));
567   VRegister dst = VRegisterFrom(locations->Out());
568   switch (instruction->GetPackedType()) {
569     case DataType::Type::kFloat32:
570       DCHECK_EQ(4u, instruction->GetVectorLength());
571       __ Fdiv(dst.V4S(), lhs.V4S(), rhs.V4S());
572       break;
573     case DataType::Type::kFloat64:
574       DCHECK_EQ(2u, instruction->GetVectorLength());
575       __ Fdiv(dst.V2D(), lhs.V2D(), rhs.V2D());
576       break;
577     default:
578       LOG(FATAL) << "Unsupported SIMD type";
579       UNREACHABLE();
580   }
581 }
582 
VisitVecMin(HVecMin * instruction)583 void LocationsBuilderARM64::VisitVecMin(HVecMin* instruction) {
584   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
585 }
586 
VisitVecMin(HVecMin * instruction)587 void InstructionCodeGeneratorARM64::VisitVecMin(HVecMin* instruction) {
588   LocationSummary* locations = instruction->GetLocations();
589   VRegister lhs = VRegisterFrom(locations->InAt(0));
590   VRegister rhs = VRegisterFrom(locations->InAt(1));
591   VRegister dst = VRegisterFrom(locations->Out());
592   switch (instruction->GetPackedType()) {
593     case DataType::Type::kUint8:
594       DCHECK_EQ(16u, instruction->GetVectorLength());
595       __ Umin(dst.V16B(), lhs.V16B(), rhs.V16B());
596       break;
597     case DataType::Type::kInt8:
598       DCHECK_EQ(16u, instruction->GetVectorLength());
599       __ Smin(dst.V16B(), lhs.V16B(), rhs.V16B());
600       break;
601     case DataType::Type::kUint16:
602       DCHECK_EQ(8u, instruction->GetVectorLength());
603       __ Umin(dst.V8H(), lhs.V8H(), rhs.V8H());
604       break;
605     case DataType::Type::kInt16:
606       DCHECK_EQ(8u, instruction->GetVectorLength());
607       __ Smin(dst.V8H(), lhs.V8H(), rhs.V8H());
608       break;
609     case DataType::Type::kUint32:
610       DCHECK_EQ(4u, instruction->GetVectorLength());
611       __ Umin(dst.V4S(), lhs.V4S(), rhs.V4S());
612       break;
613     case DataType::Type::kInt32:
614       DCHECK_EQ(4u, instruction->GetVectorLength());
615       __ Smin(dst.V4S(), lhs.V4S(), rhs.V4S());
616       break;
617     case DataType::Type::kFloat32:
618       DCHECK_EQ(4u, instruction->GetVectorLength());
619       __ Fmin(dst.V4S(), lhs.V4S(), rhs.V4S());
620       break;
621     case DataType::Type::kFloat64:
622       DCHECK_EQ(2u, instruction->GetVectorLength());
623       __ Fmin(dst.V2D(), lhs.V2D(), rhs.V2D());
624       break;
625     default:
626       LOG(FATAL) << "Unsupported SIMD type";
627       UNREACHABLE();
628   }
629 }
630 
VisitVecMax(HVecMax * instruction)631 void LocationsBuilderARM64::VisitVecMax(HVecMax* instruction) {
632   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
633 }
634 
VisitVecMax(HVecMax * instruction)635 void InstructionCodeGeneratorARM64::VisitVecMax(HVecMax* instruction) {
636   LocationSummary* locations = instruction->GetLocations();
637   VRegister lhs = VRegisterFrom(locations->InAt(0));
638   VRegister rhs = VRegisterFrom(locations->InAt(1));
639   VRegister dst = VRegisterFrom(locations->Out());
640   switch (instruction->GetPackedType()) {
641     case DataType::Type::kUint8:
642       DCHECK_EQ(16u, instruction->GetVectorLength());
643       __ Umax(dst.V16B(), lhs.V16B(), rhs.V16B());
644       break;
645     case DataType::Type::kInt8:
646       DCHECK_EQ(16u, instruction->GetVectorLength());
647       __ Smax(dst.V16B(), lhs.V16B(), rhs.V16B());
648       break;
649     case DataType::Type::kUint16:
650       DCHECK_EQ(8u, instruction->GetVectorLength());
651       __ Umax(dst.V8H(), lhs.V8H(), rhs.V8H());
652       break;
653     case DataType::Type::kInt16:
654       DCHECK_EQ(8u, instruction->GetVectorLength());
655       __ Smax(dst.V8H(), lhs.V8H(), rhs.V8H());
656       break;
657     case DataType::Type::kUint32:
658       DCHECK_EQ(4u, instruction->GetVectorLength());
659       __ Umax(dst.V4S(), lhs.V4S(), rhs.V4S());
660       break;
661     case DataType::Type::kInt32:
662       DCHECK_EQ(4u, instruction->GetVectorLength());
663       __ Smax(dst.V4S(), lhs.V4S(), rhs.V4S());
664       break;
665     case DataType::Type::kFloat32:
666       DCHECK_EQ(4u, instruction->GetVectorLength());
667       __ Fmax(dst.V4S(), lhs.V4S(), rhs.V4S());
668       break;
669     case DataType::Type::kFloat64:
670       DCHECK_EQ(2u, instruction->GetVectorLength());
671       __ Fmax(dst.V2D(), lhs.V2D(), rhs.V2D());
672       break;
673     default:
674       LOG(FATAL) << "Unsupported SIMD type";
675       UNREACHABLE();
676   }
677 }
678 
VisitVecAnd(HVecAnd * instruction)679 void LocationsBuilderARM64::VisitVecAnd(HVecAnd* instruction) {
680   // TODO: Allow constants supported by BIC (vector, immediate).
681   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
682 }
683 
VisitVecAnd(HVecAnd * instruction)684 void InstructionCodeGeneratorARM64::VisitVecAnd(HVecAnd* instruction) {
685   LocationSummary* locations = instruction->GetLocations();
686   VRegister lhs = VRegisterFrom(locations->InAt(0));
687   VRegister rhs = VRegisterFrom(locations->InAt(1));
688   VRegister dst = VRegisterFrom(locations->Out());
689   switch (instruction->GetPackedType()) {
690     case DataType::Type::kBool:
691     case DataType::Type::kUint8:
692     case DataType::Type::kInt8:
693     case DataType::Type::kUint16:
694     case DataType::Type::kInt16:
695     case DataType::Type::kInt32:
696     case DataType::Type::kInt64:
697     case DataType::Type::kFloat32:
698     case DataType::Type::kFloat64:
699       __ And(dst.V16B(), lhs.V16B(), rhs.V16B());  // lanes do not matter
700       break;
701     default:
702       LOG(FATAL) << "Unsupported SIMD type";
703       UNREACHABLE();
704   }
705 }
706 
VisitVecAndNot(HVecAndNot * instruction)707 void LocationsBuilderARM64::VisitVecAndNot(HVecAndNot* instruction) {
708   LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
709 }
710 
VisitVecAndNot(HVecAndNot * instruction)711 void InstructionCodeGeneratorARM64::VisitVecAndNot(HVecAndNot* instruction) {
712   // TODO: Use BIC (vector, register).
713   LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
714 }
715 
VisitVecOr(HVecOr * instruction)716 void LocationsBuilderARM64::VisitVecOr(HVecOr* instruction) {
717   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
718 }
719 
VisitVecOr(HVecOr * instruction)720 void InstructionCodeGeneratorARM64::VisitVecOr(HVecOr* instruction) {
721   LocationSummary* locations = instruction->GetLocations();
722   VRegister lhs = VRegisterFrom(locations->InAt(0));
723   VRegister rhs = VRegisterFrom(locations->InAt(1));
724   VRegister dst = VRegisterFrom(locations->Out());
725   switch (instruction->GetPackedType()) {
726     case DataType::Type::kBool:
727     case DataType::Type::kUint8:
728     case DataType::Type::kInt8:
729     case DataType::Type::kUint16:
730     case DataType::Type::kInt16:
731     case DataType::Type::kInt32:
732     case DataType::Type::kInt64:
733     case DataType::Type::kFloat32:
734     case DataType::Type::kFloat64:
735       __ Orr(dst.V16B(), lhs.V16B(), rhs.V16B());  // lanes do not matter
736       break;
737     default:
738       LOG(FATAL) << "Unsupported SIMD type";
739       UNREACHABLE();
740   }
741 }
742 
VisitVecXor(HVecXor * instruction)743 void LocationsBuilderARM64::VisitVecXor(HVecXor* instruction) {
744   CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
745 }
746 
VisitVecXor(HVecXor * instruction)747 void InstructionCodeGeneratorARM64::VisitVecXor(HVecXor* instruction) {
748   LocationSummary* locations = instruction->GetLocations();
749   VRegister lhs = VRegisterFrom(locations->InAt(0));
750   VRegister rhs = VRegisterFrom(locations->InAt(1));
751   VRegister dst = VRegisterFrom(locations->Out());
752   switch (instruction->GetPackedType()) {
753     case DataType::Type::kBool:
754     case DataType::Type::kUint8:
755     case DataType::Type::kInt8:
756     case DataType::Type::kUint16:
757     case DataType::Type::kInt16:
758     case DataType::Type::kInt32:
759     case DataType::Type::kInt64:
760     case DataType::Type::kFloat32:
761     case DataType::Type::kFloat64:
762       __ Eor(dst.V16B(), lhs.V16B(), rhs.V16B());  // lanes do not matter
763       break;
764     default:
765       LOG(FATAL) << "Unsupported SIMD type";
766       UNREACHABLE();
767   }
768 }
769 
770 // Helper to set up locations for vector shift operations.
CreateVecShiftLocations(ArenaAllocator * allocator,HVecBinaryOperation * instruction)771 static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperation* instruction) {
772   LocationSummary* locations = new (allocator) LocationSummary(instruction);
773   switch (instruction->GetPackedType()) {
774     case DataType::Type::kUint8:
775     case DataType::Type::kInt8:
776     case DataType::Type::kUint16:
777     case DataType::Type::kInt16:
778     case DataType::Type::kInt32:
779     case DataType::Type::kInt64:
780       locations->SetInAt(0, Location::RequiresFpuRegister());
781       locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)->AsConstant()));
782       locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
783       break;
784     default:
785       LOG(FATAL) << "Unsupported SIMD type";
786       UNREACHABLE();
787   }
788 }
789 
VisitVecShl(HVecShl * instruction)790 void LocationsBuilderARM64::VisitVecShl(HVecShl* instruction) {
791   CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
792 }
793 
VisitVecShl(HVecShl * instruction)794 void InstructionCodeGeneratorARM64::VisitVecShl(HVecShl* instruction) {
795   LocationSummary* locations = instruction->GetLocations();
796   VRegister lhs = VRegisterFrom(locations->InAt(0));
797   VRegister dst = VRegisterFrom(locations->Out());
798   int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
799   switch (instruction->GetPackedType()) {
800     case DataType::Type::kUint8:
801     case DataType::Type::kInt8:
802       DCHECK_EQ(16u, instruction->GetVectorLength());
803       __ Shl(dst.V16B(), lhs.V16B(), value);
804       break;
805     case DataType::Type::kUint16:
806     case DataType::Type::kInt16:
807       DCHECK_EQ(8u, instruction->GetVectorLength());
808       __ Shl(dst.V8H(), lhs.V8H(), value);
809       break;
810     case DataType::Type::kInt32:
811       DCHECK_EQ(4u, instruction->GetVectorLength());
812       __ Shl(dst.V4S(), lhs.V4S(), value);
813       break;
814     case DataType::Type::kInt64:
815       DCHECK_EQ(2u, instruction->GetVectorLength());
816       __ Shl(dst.V2D(), lhs.V2D(), value);
817       break;
818     default:
819       LOG(FATAL) << "Unsupported SIMD type";
820       UNREACHABLE();
821   }
822 }
823 
VisitVecShr(HVecShr * instruction)824 void LocationsBuilderARM64::VisitVecShr(HVecShr* instruction) {
825   CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
826 }
827 
VisitVecShr(HVecShr * instruction)828 void InstructionCodeGeneratorARM64::VisitVecShr(HVecShr* instruction) {
829   LocationSummary* locations = instruction->GetLocations();
830   VRegister lhs = VRegisterFrom(locations->InAt(0));
831   VRegister dst = VRegisterFrom(locations->Out());
832   int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
833   switch (instruction->GetPackedType()) {
834     case DataType::Type::kUint8:
835     case DataType::Type::kInt8:
836       DCHECK_EQ(16u, instruction->GetVectorLength());
837       __ Sshr(dst.V16B(), lhs.V16B(), value);
838       break;
839     case DataType::Type::kUint16:
840     case DataType::Type::kInt16:
841       DCHECK_EQ(8u, instruction->GetVectorLength());
842       __ Sshr(dst.V8H(), lhs.V8H(), value);
843       break;
844     case DataType::Type::kInt32:
845       DCHECK_EQ(4u, instruction->GetVectorLength());
846       __ Sshr(dst.V4S(), lhs.V4S(), value);
847       break;
848     case DataType::Type::kInt64:
849       DCHECK_EQ(2u, instruction->GetVectorLength());
850       __ Sshr(dst.V2D(), lhs.V2D(), value);
851       break;
852     default:
853       LOG(FATAL) << "Unsupported SIMD type";
854       UNREACHABLE();
855   }
856 }
857 
VisitVecUShr(HVecUShr * instruction)858 void LocationsBuilderARM64::VisitVecUShr(HVecUShr* instruction) {
859   CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
860 }
861 
VisitVecUShr(HVecUShr * instruction)862 void InstructionCodeGeneratorARM64::VisitVecUShr(HVecUShr* instruction) {
863   LocationSummary* locations = instruction->GetLocations();
864   VRegister lhs = VRegisterFrom(locations->InAt(0));
865   VRegister dst = VRegisterFrom(locations->Out());
866   int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
867   switch (instruction->GetPackedType()) {
868     case DataType::Type::kUint8:
869     case DataType::Type::kInt8:
870       DCHECK_EQ(16u, instruction->GetVectorLength());
871       __ Ushr(dst.V16B(), lhs.V16B(), value);
872       break;
873     case DataType::Type::kUint16:
874     case DataType::Type::kInt16:
875       DCHECK_EQ(8u, instruction->GetVectorLength());
876       __ Ushr(dst.V8H(), lhs.V8H(), value);
877       break;
878     case DataType::Type::kInt32:
879       DCHECK_EQ(4u, instruction->GetVectorLength());
880       __ Ushr(dst.V4S(), lhs.V4S(), value);
881       break;
882     case DataType::Type::kInt64:
883       DCHECK_EQ(2u, instruction->GetVectorLength());
884       __ Ushr(dst.V2D(), lhs.V2D(), value);
885       break;
886     default:
887       LOG(FATAL) << "Unsupported SIMD type";
888       UNREACHABLE();
889   }
890 }
891 
VisitVecSetScalars(HVecSetScalars * instruction)892 void LocationsBuilderARM64::VisitVecSetScalars(HVecSetScalars* instruction) {
893   LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
894 
895   DCHECK_EQ(1u, instruction->InputCount());  // only one input currently implemented
896 
897   HInstruction* input = instruction->InputAt(0);
898   bool is_zero = IsZeroBitPattern(input);
899 
900   switch (instruction->GetPackedType()) {
901     case DataType::Type::kBool:
902     case DataType::Type::kUint8:
903     case DataType::Type::kInt8:
904     case DataType::Type::kUint16:
905     case DataType::Type::kInt16:
906     case DataType::Type::kInt32:
907     case DataType::Type::kInt64:
908       locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
909                                     : Location::RequiresRegister());
910       locations->SetOut(Location::RequiresFpuRegister());
911       break;
912     case DataType::Type::kFloat32:
913     case DataType::Type::kFloat64:
914       locations->SetInAt(0, is_zero ? Location::ConstantLocation(input->AsConstant())
915                                     : Location::RequiresFpuRegister());
916       locations->SetOut(Location::RequiresFpuRegister());
917       break;
918     default:
919       LOG(FATAL) << "Unsupported SIMD type";
920       UNREACHABLE();
921   }
922 }
923 
VisitVecSetScalars(HVecSetScalars * instruction)924 void InstructionCodeGeneratorARM64::VisitVecSetScalars(HVecSetScalars* instruction) {
925   LocationSummary* locations = instruction->GetLocations();
926   VRegister dst = VRegisterFrom(locations->Out());
927 
928   DCHECK_EQ(1u, instruction->InputCount());  // only one input currently implemented
929 
930   // Zero out all other elements first.
931   __ Movi(dst.V16B(), 0);
932 
933   // Shorthand for any type of zero.
934   if (IsZeroBitPattern(instruction->InputAt(0))) {
935     return;
936   }
937 
938   // Set required elements.
939   switch (instruction->GetPackedType()) {
940     case DataType::Type::kBool:
941     case DataType::Type::kUint8:
942     case DataType::Type::kInt8:
943       DCHECK_EQ(16u, instruction->GetVectorLength());
944       __ Mov(dst.V16B(), 0, InputRegisterAt(instruction, 0));
945       break;
946     case DataType::Type::kUint16:
947     case DataType::Type::kInt16:
948       DCHECK_EQ(8u, instruction->GetVectorLength());
949       __ Mov(dst.V8H(), 0, InputRegisterAt(instruction, 0));
950       break;
951     case DataType::Type::kInt32:
952       DCHECK_EQ(4u, instruction->GetVectorLength());
953       __ Mov(dst.V4S(), 0, InputRegisterAt(instruction, 0));
954       break;
955     case DataType::Type::kInt64:
956       DCHECK_EQ(2u, instruction->GetVectorLength());
957       __ Mov(dst.V2D(), 0, InputRegisterAt(instruction, 0));
958       break;
959     default:
960       LOG(FATAL) << "Unsupported SIMD type";
961       UNREACHABLE();
962   }
963 }
964 
965 // Helper to set up locations for vector accumulations.
CreateVecAccumLocations(ArenaAllocator * allocator,HVecOperation * instruction)966 static void CreateVecAccumLocations(ArenaAllocator* allocator, HVecOperation* instruction) {
967   LocationSummary* locations = new (allocator) LocationSummary(instruction);
968   switch (instruction->GetPackedType()) {
969     case DataType::Type::kUint8:
970     case DataType::Type::kInt8:
971     case DataType::Type::kUint16:
972     case DataType::Type::kInt16:
973     case DataType::Type::kInt32:
974     case DataType::Type::kInt64:
975       locations->SetInAt(0, Location::RequiresFpuRegister());
976       locations->SetInAt(1, Location::RequiresFpuRegister());
977       locations->SetInAt(2, Location::RequiresFpuRegister());
978       locations->SetOut(Location::SameAsFirstInput());
979       break;
980     default:
981       LOG(FATAL) << "Unsupported SIMD type";
982       UNREACHABLE();
983   }
984 }
985 
VisitVecMultiplyAccumulate(HVecMultiplyAccumulate * instruction)986 void LocationsBuilderARM64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
987   CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction);
988 }
989 
990 // Some early revisions of the Cortex-A53 have an erratum (835769) whereby it is possible for a
991 // 64-bit scalar multiply-accumulate instruction in AArch64 state to generate an incorrect result.
992 // However vector MultiplyAccumulate instruction is not affected.
VisitVecMultiplyAccumulate(HVecMultiplyAccumulate * instruction)993 void InstructionCodeGeneratorARM64::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
994   LocationSummary* locations = instruction->GetLocations();
995   VRegister acc = VRegisterFrom(locations->InAt(0));
996   VRegister left = VRegisterFrom(locations->InAt(1));
997   VRegister right = VRegisterFrom(locations->InAt(2));
998 
999   DCHECK(locations->InAt(0).Equals(locations->Out()));
1000 
1001   switch (instruction->GetPackedType()) {
1002     case DataType::Type::kUint8:
1003     case DataType::Type::kInt8:
1004       DCHECK_EQ(16u, instruction->GetVectorLength());
1005       if (instruction->GetOpKind() == HInstruction::kAdd) {
1006         __ Mla(acc.V16B(), left.V16B(), right.V16B());
1007       } else {
1008         __ Mls(acc.V16B(), left.V16B(), right.V16B());
1009       }
1010       break;
1011     case DataType::Type::kUint16:
1012     case DataType::Type::kInt16:
1013       DCHECK_EQ(8u, instruction->GetVectorLength());
1014       if (instruction->GetOpKind() == HInstruction::kAdd) {
1015         __ Mla(acc.V8H(), left.V8H(), right.V8H());
1016       } else {
1017         __ Mls(acc.V8H(), left.V8H(), right.V8H());
1018       }
1019       break;
1020     case DataType::Type::kInt32:
1021       DCHECK_EQ(4u, instruction->GetVectorLength());
1022       if (instruction->GetOpKind() == HInstruction::kAdd) {
1023         __ Mla(acc.V4S(), left.V4S(), right.V4S());
1024       } else {
1025         __ Mls(acc.V4S(), left.V4S(), right.V4S());
1026       }
1027       break;
1028     default:
1029       LOG(FATAL) << "Unsupported SIMD type";
1030       UNREACHABLE();
1031   }
1032 }
1033 
VisitVecSADAccumulate(HVecSADAccumulate * instruction)1034 void LocationsBuilderARM64::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
1035   CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction);
1036   // Some conversions require temporary registers.
1037   LocationSummary* locations = instruction->GetLocations();
1038   HVecOperation* a = instruction->InputAt(1)->AsVecOperation();
1039   HVecOperation* b = instruction->InputAt(2)->AsVecOperation();
1040   DCHECK_EQ(HVecOperation::ToSignedType(a->GetPackedType()),
1041             HVecOperation::ToSignedType(b->GetPackedType()));
1042   switch (a->GetPackedType()) {
1043     case DataType::Type::kUint8:
1044     case DataType::Type::kInt8:
1045       switch (instruction->GetPackedType()) {
1046         case DataType::Type::kInt64:
1047           locations->AddTemp(Location::RequiresFpuRegister());
1048           locations->AddTemp(Location::RequiresFpuRegister());
1049           FALLTHROUGH_INTENDED;
1050         case DataType::Type::kInt32:
1051           locations->AddTemp(Location::RequiresFpuRegister());
1052           locations->AddTemp(Location::RequiresFpuRegister());
1053           break;
1054         default:
1055           break;
1056       }
1057       break;
1058     case DataType::Type::kUint16:
1059     case DataType::Type::kInt16:
1060       if (instruction->GetPackedType() == DataType::Type::kInt64) {
1061         locations->AddTemp(Location::RequiresFpuRegister());
1062         locations->AddTemp(Location::RequiresFpuRegister());
1063       }
1064       break;
1065     case DataType::Type::kInt32:
1066     case DataType::Type::kInt64:
1067       if (instruction->GetPackedType() == a->GetPackedType()) {
1068         locations->AddTemp(Location::RequiresFpuRegister());
1069       }
1070       break;
1071     default:
1072       break;
1073   }
1074 }
1075 
VisitVecSADAccumulate(HVecSADAccumulate * instruction)1076 void InstructionCodeGeneratorARM64::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
1077   LocationSummary* locations = instruction->GetLocations();
1078   VRegister acc = VRegisterFrom(locations->InAt(0));
1079   VRegister left = VRegisterFrom(locations->InAt(1));
1080   VRegister right = VRegisterFrom(locations->InAt(2));
1081 
1082   DCHECK(locations->InAt(0).Equals(locations->Out()));
1083 
1084   // Handle all feasible acc_T += sad(a_S, b_S) type combinations (T x S).
1085   HVecOperation* a = instruction->InputAt(1)->AsVecOperation();
1086   HVecOperation* b = instruction->InputAt(2)->AsVecOperation();
1087   DCHECK_EQ(HVecOperation::ToSignedType(a->GetPackedType()),
1088             HVecOperation::ToSignedType(b->GetPackedType()));
1089   switch (a->GetPackedType()) {
1090     case DataType::Type::kUint8:
1091     case DataType::Type::kInt8:
1092       DCHECK_EQ(16u, a->GetVectorLength());
1093       switch (instruction->GetPackedType()) {
1094         case DataType::Type::kInt16:
1095           DCHECK_EQ(8u, instruction->GetVectorLength());
1096           __ Sabal(acc.V8H(), left.V8B(), right.V8B());
1097           __ Sabal2(acc.V8H(), left.V16B(), right.V16B());
1098           break;
1099         case DataType::Type::kInt32: {
1100           DCHECK_EQ(4u, instruction->GetVectorLength());
1101           VRegister tmp1 = VRegisterFrom(locations->GetTemp(0));
1102           VRegister tmp2 = VRegisterFrom(locations->GetTemp(1));
1103           __ Sxtl(tmp1.V8H(), left.V8B());
1104           __ Sxtl(tmp2.V8H(), right.V8B());
1105           __ Sabal(acc.V4S(), tmp1.V4H(), tmp2.V4H());
1106           __ Sabal2(acc.V4S(), tmp1.V8H(), tmp2.V8H());
1107           __ Sxtl2(tmp1.V8H(), left.V16B());
1108           __ Sxtl2(tmp2.V8H(), right.V16B());
1109           __ Sabal(acc.V4S(), tmp1.V4H(), tmp2.V4H());
1110           __ Sabal2(acc.V4S(), tmp1.V8H(), tmp2.V8H());
1111           break;
1112         }
1113         case DataType::Type::kInt64: {
1114           DCHECK_EQ(2u, instruction->GetVectorLength());
1115           VRegister tmp1 = VRegisterFrom(locations->GetTemp(0));
1116           VRegister tmp2 = VRegisterFrom(locations->GetTemp(1));
1117           VRegister tmp3 = VRegisterFrom(locations->GetTemp(2));
1118           VRegister tmp4 = VRegisterFrom(locations->GetTemp(3));
1119           __ Sxtl(tmp1.V8H(), left.V8B());
1120           __ Sxtl(tmp2.V8H(), right.V8B());
1121           __ Sxtl(tmp3.V4S(), tmp1.V4H());
1122           __ Sxtl(tmp4.V4S(), tmp2.V4H());
1123           __ Sabal(acc.V2D(), tmp3.V2S(), tmp4.V2S());
1124           __ Sabal2(acc.V2D(), tmp3.V4S(), tmp4.V4S());
1125           __ Sxtl2(tmp3.V4S(), tmp1.V8H());
1126           __ Sxtl2(tmp4.V4S(), tmp2.V8H());
1127           __ Sabal(acc.V2D(), tmp3.V2S(), tmp4.V2S());
1128           __ Sabal2(acc.V2D(), tmp3.V4S(), tmp4.V4S());
1129           __ Sxtl2(tmp1.V8H(), left.V16B());
1130           __ Sxtl2(tmp2.V8H(), right.V16B());
1131           __ Sxtl(tmp3.V4S(), tmp1.V4H());
1132           __ Sxtl(tmp4.V4S(), tmp2.V4H());
1133           __ Sabal(acc.V2D(), tmp3.V2S(), tmp4.V2S());
1134           __ Sabal2(acc.V2D(), tmp3.V4S(), tmp4.V4S());
1135           __ Sxtl2(tmp3.V4S(), tmp1.V8H());
1136           __ Sxtl2(tmp4.V4S(), tmp2.V8H());
1137           __ Sabal(acc.V2D(), tmp3.V2S(), tmp4.V2S());
1138           __ Sabal2(acc.V2D(), tmp3.V4S(), tmp4.V4S());
1139           break;
1140         }
1141         default:
1142           LOG(FATAL) << "Unsupported SIMD type";
1143           UNREACHABLE();
1144       }
1145       break;
1146     case DataType::Type::kUint16:
1147     case DataType::Type::kInt16:
1148       DCHECK_EQ(8u, a->GetVectorLength());
1149       switch (instruction->GetPackedType()) {
1150         case DataType::Type::kInt32:
1151           DCHECK_EQ(4u, instruction->GetVectorLength());
1152           __ Sabal(acc.V4S(), left.V4H(), right.V4H());
1153           __ Sabal2(acc.V4S(), left.V8H(), right.V8H());
1154           break;
1155         case DataType::Type::kInt64: {
1156           DCHECK_EQ(2u, instruction->GetVectorLength());
1157           VRegister tmp1 = VRegisterFrom(locations->GetTemp(0));
1158           VRegister tmp2 = VRegisterFrom(locations->GetTemp(1));
1159           __ Sxtl(tmp1.V4S(), left.V4H());
1160           __ Sxtl(tmp2.V4S(), right.V4H());
1161           __ Sabal(acc.V2D(), tmp1.V2S(), tmp2.V2S());
1162           __ Sabal2(acc.V2D(), tmp1.V4S(), tmp2.V4S());
1163           __ Sxtl2(tmp1.V4S(), left.V8H());
1164           __ Sxtl2(tmp2.V4S(), right.V8H());
1165           __ Sabal(acc.V2D(), tmp1.V2S(), tmp2.V2S());
1166           __ Sabal2(acc.V2D(), tmp1.V4S(), tmp2.V4S());
1167           break;
1168         }
1169         default:
1170           LOG(FATAL) << "Unsupported SIMD type";
1171           UNREACHABLE();
1172       }
1173       break;
1174     case DataType::Type::kInt32:
1175       DCHECK_EQ(4u, a->GetVectorLength());
1176       switch (instruction->GetPackedType()) {
1177         case DataType::Type::kInt32: {
1178           DCHECK_EQ(4u, instruction->GetVectorLength());
1179           VRegister tmp = VRegisterFrom(locations->GetTemp(0));
1180           __ Sub(tmp.V4S(), left.V4S(), right.V4S());
1181           __ Abs(tmp.V4S(), tmp.V4S());
1182           __ Add(acc.V4S(), acc.V4S(), tmp.V4S());
1183           break;
1184         }
1185         case DataType::Type::kInt64:
1186           DCHECK_EQ(2u, instruction->GetVectorLength());
1187           __ Sabal(acc.V2D(), left.V2S(), right.V2S());
1188           __ Sabal2(acc.V2D(), left.V4S(), right.V4S());
1189           break;
1190         default:
1191           LOG(FATAL) << "Unsupported SIMD type";
1192           UNREACHABLE();
1193       }
1194       break;
1195     case DataType::Type::kInt64:
1196       DCHECK_EQ(2u, a->GetVectorLength());
1197       switch (instruction->GetPackedType()) {
1198         case DataType::Type::kInt64: {
1199           DCHECK_EQ(2u, instruction->GetVectorLength());
1200           VRegister tmp = VRegisterFrom(locations->GetTemp(0));
1201           __ Sub(tmp.V2D(), left.V2D(), right.V2D());
1202           __ Abs(tmp.V2D(), tmp.V2D());
1203           __ Add(acc.V2D(), acc.V2D(), tmp.V2D());
1204           break;
1205         }
1206         default:
1207           LOG(FATAL) << "Unsupported SIMD type";
1208           UNREACHABLE();
1209       }
1210       break;
1211     default:
1212       LOG(FATAL) << "Unsupported SIMD type";
1213   }
1214 }
1215 
1216 // Helper to set up locations for vector memory operations.
CreateVecMemLocations(ArenaAllocator * allocator,HVecMemoryOperation * instruction,bool is_load)1217 static void CreateVecMemLocations(ArenaAllocator* allocator,
1218                                   HVecMemoryOperation* instruction,
1219                                   bool is_load) {
1220   LocationSummary* locations = new (allocator) LocationSummary(instruction);
1221   switch (instruction->GetPackedType()) {
1222     case DataType::Type::kBool:
1223     case DataType::Type::kUint8:
1224     case DataType::Type::kInt8:
1225     case DataType::Type::kUint16:
1226     case DataType::Type::kInt16:
1227     case DataType::Type::kInt32:
1228     case DataType::Type::kInt64:
1229     case DataType::Type::kFloat32:
1230     case DataType::Type::kFloat64:
1231       locations->SetInAt(0, Location::RequiresRegister());
1232       locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
1233       if (is_load) {
1234         locations->SetOut(Location::RequiresFpuRegister());
1235       } else {
1236         locations->SetInAt(2, Location::RequiresFpuRegister());
1237       }
1238       break;
1239     default:
1240       LOG(FATAL) << "Unsupported SIMD type";
1241       UNREACHABLE();
1242   }
1243 }
1244 
1245 // Helper to set up locations for vector memory operations. Returns the memory operand and,
1246 // if used, sets the output parameter scratch to a temporary register used in this operand,
1247 // so that the client can release it right after the memory operand use.
VecAddress(HVecMemoryOperation * instruction,UseScratchRegisterScope * temps_scope,size_t size,bool is_string_char_at,Register * scratch)1248 MemOperand InstructionCodeGeneratorARM64::VecAddress(
1249     HVecMemoryOperation* instruction,
1250     UseScratchRegisterScope* temps_scope,
1251     size_t size,
1252     bool is_string_char_at,
1253     /*out*/ Register* scratch) {
1254   LocationSummary* locations = instruction->GetLocations();
1255   Register base = InputRegisterAt(instruction, 0);
1256 
1257   if (instruction->InputAt(1)->IsIntermediateAddressIndex()) {
1258     DCHECK(!is_string_char_at);
1259     return MemOperand(base.X(), InputRegisterAt(instruction, 1).X());
1260   }
1261 
1262   Location index = locations->InAt(1);
1263   uint32_t offset = is_string_char_at
1264       ? mirror::String::ValueOffset().Uint32Value()
1265       : mirror::Array::DataOffset(size).Uint32Value();
1266   size_t shift = ComponentSizeShiftWidth(size);
1267 
1268   // HIntermediateAddress optimization is only applied for scalar ArrayGet and ArraySet.
1269   DCHECK(!instruction->InputAt(0)->IsIntermediateAddress());
1270 
1271   if (index.IsConstant()) {
1272     offset += Int64ConstantFrom(index) << shift;
1273     return HeapOperand(base, offset);
1274   } else {
1275     *scratch = temps_scope->AcquireSameSizeAs(base);
1276     __ Add(*scratch, base, Operand(WRegisterFrom(index), LSL, shift));
1277     return HeapOperand(*scratch, offset);
1278   }
1279 }
1280 
VisitVecLoad(HVecLoad * instruction)1281 void LocationsBuilderARM64::VisitVecLoad(HVecLoad* instruction) {
1282   CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /*is_load*/ true);
1283 }
1284 
VisitVecLoad(HVecLoad * instruction)1285 void InstructionCodeGeneratorARM64::VisitVecLoad(HVecLoad* instruction) {
1286   LocationSummary* locations = instruction->GetLocations();
1287   size_t size = DataType::Size(instruction->GetPackedType());
1288   VRegister reg = VRegisterFrom(locations->Out());
1289   UseScratchRegisterScope temps(GetVIXLAssembler());
1290   Register scratch;
1291 
1292   switch (instruction->GetPackedType()) {
1293     case DataType::Type::kUint16:
1294       DCHECK_EQ(8u, instruction->GetVectorLength());
1295       // Special handling of compressed/uncompressed string load.
1296       if (mirror::kUseStringCompression && instruction->IsStringCharAt()) {
1297         vixl::aarch64::Label uncompressed_load, done;
1298         // Test compression bit.
1299         static_assert(static_cast<uint32_t>(mirror::StringCompressionFlag::kCompressed) == 0u,
1300                       "Expecting 0=compressed, 1=uncompressed");
1301         uint32_t count_offset = mirror::String::CountOffset().Uint32Value();
1302         Register length = temps.AcquireW();
1303         __ Ldr(length, HeapOperand(InputRegisterAt(instruction, 0), count_offset));
1304         __ Tbnz(length.W(), 0, &uncompressed_load);
1305         temps.Release(length);  // no longer needed
1306         // Zero extend 8 compressed bytes into 8 chars.
1307         __ Ldr(DRegisterFrom(locations->Out()).V8B(),
1308                VecAddress(instruction, &temps, 1, /*is_string_char_at*/ true, &scratch));
1309         __ Uxtl(reg.V8H(), reg.V8B());
1310         __ B(&done);
1311         if (scratch.IsValid()) {
1312           temps.Release(scratch);  // if used, no longer needed
1313         }
1314         // Load 8 direct uncompressed chars.
1315         __ Bind(&uncompressed_load);
1316         __ Ldr(reg, VecAddress(instruction, &temps, size, /*is_string_char_at*/ true, &scratch));
1317         __ Bind(&done);
1318         return;
1319       }
1320       FALLTHROUGH_INTENDED;
1321     case DataType::Type::kBool:
1322     case DataType::Type::kUint8:
1323     case DataType::Type::kInt8:
1324     case DataType::Type::kInt16:
1325     case DataType::Type::kInt32:
1326     case DataType::Type::kFloat32:
1327     case DataType::Type::kInt64:
1328     case DataType::Type::kFloat64:
1329       DCHECK_LE(2u, instruction->GetVectorLength());
1330       DCHECK_LE(instruction->GetVectorLength(), 16u);
1331       __ Ldr(reg, VecAddress(instruction, &temps, size, instruction->IsStringCharAt(), &scratch));
1332       break;
1333     default:
1334       LOG(FATAL) << "Unsupported SIMD type";
1335       UNREACHABLE();
1336   }
1337 }
1338 
VisitVecStore(HVecStore * instruction)1339 void LocationsBuilderARM64::VisitVecStore(HVecStore* instruction) {
1340   CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /*is_load*/ false);
1341 }
1342 
VisitVecStore(HVecStore * instruction)1343 void InstructionCodeGeneratorARM64::VisitVecStore(HVecStore* instruction) {
1344   LocationSummary* locations = instruction->GetLocations();
1345   size_t size = DataType::Size(instruction->GetPackedType());
1346   VRegister reg = VRegisterFrom(locations->InAt(2));
1347   UseScratchRegisterScope temps(GetVIXLAssembler());
1348   Register scratch;
1349 
1350   switch (instruction->GetPackedType()) {
1351     case DataType::Type::kBool:
1352     case DataType::Type::kUint8:
1353     case DataType::Type::kInt8:
1354     case DataType::Type::kUint16:
1355     case DataType::Type::kInt16:
1356     case DataType::Type::kInt32:
1357     case DataType::Type::kFloat32:
1358     case DataType::Type::kInt64:
1359     case DataType::Type::kFloat64:
1360       DCHECK_LE(2u, instruction->GetVectorLength());
1361       DCHECK_LE(instruction->GetVectorLength(), 16u);
1362       __ Str(reg, VecAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch));
1363       break;
1364     default:
1365       LOG(FATAL) << "Unsupported SIMD type";
1366       UNREACHABLE();
1367   }
1368 }
1369 
1370 #undef __
1371 
1372 }  // namespace arm64
1373 }  // namespace art
1374