1 /*
2 * Copyright (C) 2020 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "code_generator_arm64.h"
18
19 #include "arch/arm64/instruction_set_features_arm64.h"
20 #include "base/bit_utils_iterator.h"
21 #include "mirror/array-inl.h"
22 #include "mirror/string.h"
23
24 using namespace vixl::aarch64; // NOLINT(build/namespaces)
25
26 namespace art HIDDEN {
27 namespace arm64 {
28
29 using helpers::DRegisterFrom;
30 using helpers::InputRegisterAt;
31 using helpers::Int64FromLocation;
32 using helpers::LocationFrom;
33 using helpers::OutputRegister;
34 using helpers::SveStackOperandFrom;
35 using helpers::VRegisterFrom;
36 using helpers::ZRegisterFrom;
37 using helpers::XRegisterFrom;
38
39 #define __ GetVIXLAssembler()->
40
41 // Returns whether the value of the constant can be directly encoded into the instruction as
42 // immediate.
SVECanEncodeConstantAsImmediate(HConstant * constant,HInstruction * instr)43 static bool SVECanEncodeConstantAsImmediate(HConstant* constant, HInstruction* instr) {
44 if (instr->IsVecReplicateScalar()) {
45 if (constant->IsLongConstant()) {
46 return false;
47 } else if (constant->IsFloatConstant()) {
48 return vixl::aarch64::Assembler::IsImmFP32(constant->AsFloatConstant()->GetValue());
49 } else if (constant->IsDoubleConstant()) {
50 return vixl::aarch64::Assembler::IsImmFP64(constant->AsDoubleConstant()->GetValue());
51 }
52 // TODO: Make use of shift part of DUP instruction.
53 int64_t value = CodeGenerator::GetInt64ValueOf(constant);
54 return IsInt<8>(value);
55 }
56
57 return false;
58 }
59
60 // Returns
61 // - constant location - if 'constant' is an actual constant and its value can be
62 // encoded into the instruction.
63 // - register location otherwise.
SVEEncodableConstantOrRegister(HInstruction * constant,HInstruction * instr)64 inline Location SVEEncodableConstantOrRegister(HInstruction* constant, HInstruction* instr) {
65 if (constant->IsConstant()
66 && SVECanEncodeConstantAsImmediate(constant->AsConstant(), instr)) {
67 return Location::ConstantLocation(constant);
68 }
69
70 return Location::RequiresRegister();
71 }
72
ValidateVectorLength(HVecOperation * instr) const73 void InstructionCodeGeneratorARM64Sve::ValidateVectorLength(HVecOperation* instr) const {
74 DCHECK_EQ(DataType::Size(instr->GetPackedType()) * instr->GetVectorLength(),
75 codegen_->GetSIMDRegisterWidth());
76 }
77
VisitVecReplicateScalar(HVecReplicateScalar * instruction)78 void LocationsBuilderARM64Sve::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
79 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
80 HInstruction* input = instruction->InputAt(0);
81 switch (instruction->GetPackedType()) {
82 case DataType::Type::kBool:
83 case DataType::Type::kUint8:
84 case DataType::Type::kInt8:
85 case DataType::Type::kUint16:
86 case DataType::Type::kInt16:
87 case DataType::Type::kInt32:
88 case DataType::Type::kInt64:
89 locations->SetInAt(0, SVEEncodableConstantOrRegister(input, instruction));
90 locations->SetOut(Location::RequiresFpuRegister());
91 break;
92 case DataType::Type::kFloat32:
93 case DataType::Type::kFloat64:
94 if (input->IsConstant() &&
95 SVECanEncodeConstantAsImmediate(input->AsConstant(), instruction)) {
96 locations->SetInAt(0, Location::ConstantLocation(input));
97 locations->SetOut(Location::RequiresFpuRegister());
98 } else {
99 locations->SetInAt(0, Location::RequiresFpuRegister());
100 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
101 }
102 break;
103 default:
104 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
105 UNREACHABLE();
106 }
107 }
108
VisitVecReplicateScalar(HVecReplicateScalar * instruction)109 void InstructionCodeGeneratorARM64Sve::VisitVecReplicateScalar(HVecReplicateScalar* instruction) {
110 DCHECK(instruction->IsPredicated());
111 LocationSummary* locations = instruction->GetLocations();
112 Location src_loc = locations->InAt(0);
113 const ZRegister dst = ZRegisterFrom(locations->Out());
114 ValidateVectorLength(instruction);
115 switch (instruction->GetPackedType()) {
116 case DataType::Type::kBool:
117 case DataType::Type::kUint8:
118 case DataType::Type::kInt8:
119 if (src_loc.IsConstant()) {
120 __ Dup(dst.VnB(), Int64FromLocation(src_loc));
121 } else {
122 __ Dup(dst.VnB(), InputRegisterAt(instruction, 0));
123 }
124 break;
125 case DataType::Type::kUint16:
126 case DataType::Type::kInt16:
127 if (src_loc.IsConstant()) {
128 __ Dup(dst.VnH(), Int64FromLocation(src_loc));
129 } else {
130 __ Dup(dst.VnH(), InputRegisterAt(instruction, 0));
131 }
132 break;
133 case DataType::Type::kInt32:
134 if (src_loc.IsConstant()) {
135 __ Dup(dst.VnS(), Int64FromLocation(src_loc));
136 } else {
137 __ Dup(dst.VnS(), InputRegisterAt(instruction, 0));
138 }
139 break;
140 case DataType::Type::kInt64:
141 if (src_loc.IsConstant()) {
142 __ Dup(dst.VnD(), Int64FromLocation(src_loc));
143 } else {
144 __ Dup(dst.VnD(), XRegisterFrom(src_loc));
145 }
146 break;
147 case DataType::Type::kFloat32:
148 if (src_loc.IsConstant()) {
149 __ Fdup(dst.VnS(), src_loc.GetConstant()->AsFloatConstant()->GetValue());
150 } else {
151 __ Dup(dst.VnS(), ZRegisterFrom(src_loc).VnS(), 0);
152 }
153 break;
154 case DataType::Type::kFloat64:
155 if (src_loc.IsConstant()) {
156 __ Fdup(dst.VnD(), src_loc.GetConstant()->AsDoubleConstant()->GetValue());
157 } else {
158 __ Dup(dst.VnD(), ZRegisterFrom(src_loc).VnD(), 0);
159 }
160 break;
161 default:
162 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
163 UNREACHABLE();
164 }
165 }
166
VisitVecExtractScalar(HVecExtractScalar * instruction)167 void LocationsBuilderARM64Sve::VisitVecExtractScalar(HVecExtractScalar* instruction) {
168 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
169 switch (instruction->GetPackedType()) {
170 case DataType::Type::kBool:
171 case DataType::Type::kUint8:
172 case DataType::Type::kInt8:
173 case DataType::Type::kUint16:
174 case DataType::Type::kInt16:
175 case DataType::Type::kInt32:
176 case DataType::Type::kInt64:
177 locations->SetInAt(0, Location::RequiresFpuRegister());
178 locations->SetOut(Location::RequiresRegister());
179 break;
180 case DataType::Type::kFloat32:
181 case DataType::Type::kFloat64:
182 locations->SetInAt(0, Location::RequiresFpuRegister());
183 locations->SetOut(Location::SameAsFirstInput());
184 break;
185 default:
186 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
187 UNREACHABLE();
188 }
189 }
190
VisitVecExtractScalar(HVecExtractScalar * instruction)191 void InstructionCodeGeneratorARM64Sve::VisitVecExtractScalar(HVecExtractScalar* instruction) {
192 DCHECK(instruction->IsPredicated());
193 LocationSummary* locations = instruction->GetLocations();
194 const VRegister src = VRegisterFrom(locations->InAt(0));
195 ValidateVectorLength(instruction);
196 switch (instruction->GetPackedType()) {
197 case DataType::Type::kInt32:
198 __ Umov(OutputRegister(instruction), src.V4S(), 0);
199 break;
200 case DataType::Type::kInt64:
201 __ Umov(OutputRegister(instruction), src.V2D(), 0);
202 break;
203 case DataType::Type::kFloat32:
204 case DataType::Type::kFloat64:
205 DCHECK(locations->InAt(0).Equals(locations->Out())); // no code required
206 break;
207 default:
208 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
209 UNREACHABLE();
210 }
211 }
212
213 // Helper to set up locations for vector unary operations.
CreateVecUnOpLocations(ArenaAllocator * allocator,HVecUnaryOperation * instruction)214 static void CreateVecUnOpLocations(ArenaAllocator* allocator, HVecUnaryOperation* instruction) {
215 LocationSummary* locations = new (allocator) LocationSummary(instruction);
216 switch (instruction->GetPackedType()) {
217 case DataType::Type::kBool:
218 locations->SetInAt(0, Location::RequiresFpuRegister());
219 locations->SetOut(Location::RequiresFpuRegister(),
220 instruction->IsVecNot() ? Location::kOutputOverlap
221 : Location::kNoOutputOverlap);
222 break;
223 case DataType::Type::kUint8:
224 case DataType::Type::kInt8:
225 case DataType::Type::kUint16:
226 case DataType::Type::kInt16:
227 case DataType::Type::kInt32:
228 case DataType::Type::kInt64:
229 case DataType::Type::kFloat32:
230 case DataType::Type::kFloat64:
231 locations->SetInAt(0, Location::RequiresFpuRegister());
232 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
233 break;
234 default:
235 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
236 UNREACHABLE();
237 }
238 }
239
VisitVecReduce(HVecReduce * instruction)240 void LocationsBuilderARM64Sve::VisitVecReduce(HVecReduce* instruction) {
241 CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
242 }
243
VisitVecReduce(HVecReduce * instruction)244 void InstructionCodeGeneratorARM64Sve::VisitVecReduce(HVecReduce* instruction) {
245 DCHECK(instruction->IsPredicated());
246 LocationSummary* locations = instruction->GetLocations();
247 const ZRegister src = ZRegisterFrom(locations->InAt(0));
248 const VRegister dst = DRegisterFrom(locations->Out());
249 const PRegister p_reg = LoopPReg();
250 ValidateVectorLength(instruction);
251 switch (instruction->GetPackedType()) {
252 case DataType::Type::kInt32:
253 switch (instruction->GetReductionKind()) {
254 case HVecReduce::kSum:
255 __ Saddv(dst.S(), p_reg, src.VnS());
256 break;
257 default:
258 LOG(FATAL) << "Unsupported SIMD instruction";
259 UNREACHABLE();
260 }
261 break;
262 case DataType::Type::kInt64:
263 switch (instruction->GetReductionKind()) {
264 case HVecReduce::kSum:
265 __ Uaddv(dst.D(), p_reg, src.VnD());
266 break;
267 default:
268 LOG(FATAL) << "Unsupported SIMD instruction";
269 UNREACHABLE();
270 }
271 break;
272 default:
273 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
274 UNREACHABLE();
275 }
276 }
277
VisitVecCnv(HVecCnv * instruction)278 void LocationsBuilderARM64Sve::VisitVecCnv(HVecCnv* instruction) {
279 CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
280 }
281
VisitVecCnv(HVecCnv * instruction)282 void InstructionCodeGeneratorARM64Sve::VisitVecCnv(HVecCnv* instruction) {
283 DCHECK(instruction->IsPredicated());
284 LocationSummary* locations = instruction->GetLocations();
285 const ZRegister src = ZRegisterFrom(locations->InAt(0));
286 const ZRegister dst = ZRegisterFrom(locations->Out());
287 const PRegisterM p_reg = LoopPReg().Merging();
288 DataType::Type from = instruction->GetInputType();
289 DataType::Type to = instruction->GetResultType();
290 ValidateVectorLength(instruction);
291 if (from == DataType::Type::kInt32 && to == DataType::Type::kFloat32) {
292 __ Scvtf(dst.VnS(), p_reg, src.VnS());
293 } else {
294 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
295 }
296 }
297
VisitVecNeg(HVecNeg * instruction)298 void LocationsBuilderARM64Sve::VisitVecNeg(HVecNeg* instruction) {
299 CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
300 }
301
VisitVecNeg(HVecNeg * instruction)302 void InstructionCodeGeneratorARM64Sve::VisitVecNeg(HVecNeg* instruction) {
303 DCHECK(instruction->IsPredicated());
304 LocationSummary* locations = instruction->GetLocations();
305 const ZRegister src = ZRegisterFrom(locations->InAt(0));
306 const ZRegister dst = ZRegisterFrom(locations->Out());
307 const PRegisterM p_reg = LoopPReg().Merging();
308 ValidateVectorLength(instruction);
309 switch (instruction->GetPackedType()) {
310 case DataType::Type::kUint8:
311 case DataType::Type::kInt8:
312 __ Neg(dst.VnB(), p_reg, src.VnB());
313 break;
314 case DataType::Type::kUint16:
315 case DataType::Type::kInt16:
316 __ Neg(dst.VnH(), p_reg, src.VnH());
317 break;
318 case DataType::Type::kInt32:
319 __ Neg(dst.VnS(), p_reg, src.VnS());
320 break;
321 case DataType::Type::kInt64:
322 __ Neg(dst.VnD(), p_reg, src.VnD());
323 break;
324 case DataType::Type::kFloat32:
325 __ Fneg(dst.VnS(), p_reg, src.VnS());
326 break;
327 case DataType::Type::kFloat64:
328 __ Fneg(dst.VnD(), p_reg, src.VnD());
329 break;
330 default:
331 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
332 UNREACHABLE();
333 }
334 }
335
VisitVecAbs(HVecAbs * instruction)336 void LocationsBuilderARM64Sve::VisitVecAbs(HVecAbs* instruction) {
337 CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
338 }
339
VisitVecAbs(HVecAbs * instruction)340 void InstructionCodeGeneratorARM64Sve::VisitVecAbs(HVecAbs* instruction) {
341 DCHECK(instruction->IsPredicated());
342 LocationSummary* locations = instruction->GetLocations();
343 const ZRegister src = ZRegisterFrom(locations->InAt(0));
344 const ZRegister dst = ZRegisterFrom(locations->Out());
345 const PRegisterM p_reg = LoopPReg().Merging();
346 ValidateVectorLength(instruction);
347 switch (instruction->GetPackedType()) {
348 case DataType::Type::kInt8:
349 __ Abs(dst.VnB(), p_reg, src.VnB());
350 break;
351 case DataType::Type::kInt16:
352 __ Abs(dst.VnH(), p_reg, src.VnH());
353 break;
354 case DataType::Type::kInt32:
355 __ Abs(dst.VnS(), p_reg, src.VnS());
356 break;
357 case DataType::Type::kInt64:
358 __ Abs(dst.VnD(), p_reg, src.VnD());
359 break;
360 case DataType::Type::kFloat32:
361 __ Fabs(dst.VnS(), p_reg, src.VnS());
362 break;
363 case DataType::Type::kFloat64:
364 __ Fabs(dst.VnD(), p_reg, src.VnD());
365 break;
366 default:
367 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
368 UNREACHABLE();
369 }
370 }
371
VisitVecNot(HVecNot * instruction)372 void LocationsBuilderARM64Sve::VisitVecNot(HVecNot* instruction) {
373 CreateVecUnOpLocations(GetGraph()->GetAllocator(), instruction);
374 }
375
VisitVecNot(HVecNot * instruction)376 void InstructionCodeGeneratorARM64Sve::VisitVecNot(HVecNot* instruction) {
377 DCHECK(instruction->IsPredicated());
378 LocationSummary* locations = instruction->GetLocations();
379 const ZRegister src = ZRegisterFrom(locations->InAt(0));
380 const ZRegister dst = ZRegisterFrom(locations->Out());
381 const PRegisterM p_reg = LoopPReg().Merging();
382 ValidateVectorLength(instruction);
383 switch (instruction->GetPackedType()) {
384 case DataType::Type::kBool: // special case boolean-not
385 __ Dup(dst.VnB(), 1);
386 __ Eor(dst.VnB(), p_reg, dst.VnB(), src.VnB());
387 break;
388 case DataType::Type::kUint8:
389 case DataType::Type::kInt8:
390 __ Not(dst.VnB(), p_reg, src.VnB());
391 break;
392 case DataType::Type::kUint16:
393 case DataType::Type::kInt16:
394 __ Not(dst.VnH(), p_reg, src.VnH());
395 break;
396 case DataType::Type::kInt32:
397 __ Not(dst.VnS(), p_reg, src.VnS());
398 break;
399 case DataType::Type::kInt64:
400 __ Not(dst.VnD(), p_reg, src.VnD());
401 break;
402 default:
403 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
404 UNREACHABLE();
405 }
406 }
407
408 // Helper to set up locations for vector binary operations.
CreateVecBinOpLocations(ArenaAllocator * allocator,HVecBinaryOperation * instruction)409 static void CreateVecBinOpLocations(ArenaAllocator* allocator, HVecBinaryOperation* instruction) {
410 LocationSummary* locations = new (allocator) LocationSummary(instruction);
411 switch (instruction->GetPackedType()) {
412 case DataType::Type::kBool:
413 case DataType::Type::kUint8:
414 case DataType::Type::kInt8:
415 case DataType::Type::kUint16:
416 case DataType::Type::kInt16:
417 case DataType::Type::kInt32:
418 case DataType::Type::kInt64:
419 case DataType::Type::kFloat32:
420 case DataType::Type::kFloat64:
421 locations->SetInAt(0, Location::RequiresFpuRegister());
422 locations->SetInAt(1, Location::RequiresFpuRegister());
423 locations->SetOut(Location::SameAsFirstInput());
424 break;
425 default:
426 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
427 UNREACHABLE();
428 }
429 }
430
VisitVecAdd(HVecAdd * instruction)431 void LocationsBuilderARM64Sve::VisitVecAdd(HVecAdd* instruction) {
432 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
433 }
434
VisitVecAdd(HVecAdd * instruction)435 void InstructionCodeGeneratorARM64Sve::VisitVecAdd(HVecAdd* instruction) {
436 DCHECK(instruction->IsPredicated());
437 LocationSummary* locations = instruction->GetLocations();
438 const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
439 const ZRegister rhs = ZRegisterFrom(locations->InAt(1));
440 const ZRegister dst = ZRegisterFrom(locations->Out());
441 const PRegisterM p_reg = LoopPReg().Merging();
442 ValidateVectorLength(instruction);
443 switch (instruction->GetPackedType()) {
444 case DataType::Type::kUint8:
445 case DataType::Type::kInt8:
446 __ Add(dst.VnB(), p_reg, lhs.VnB(), rhs.VnB());
447 break;
448 case DataType::Type::kUint16:
449 case DataType::Type::kInt16:
450 __ Add(dst.VnH(), p_reg, lhs.VnH(), rhs.VnH());
451 break;
452 case DataType::Type::kInt32:
453 __ Add(dst.VnS(), p_reg, lhs.VnS(), rhs.VnS());
454 break;
455 case DataType::Type::kInt64:
456 __ Add(dst.VnD(), p_reg, lhs.VnD(), rhs.VnD());
457 break;
458 case DataType::Type::kFloat32:
459 __ Fadd(dst.VnS(), p_reg, lhs.VnS(), rhs.VnS(), StrictNaNPropagation);
460 break;
461 case DataType::Type::kFloat64:
462 __ Fadd(dst.VnD(), p_reg, lhs.VnD(), rhs.VnD(), StrictNaNPropagation);
463 break;
464 default:
465 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
466 UNREACHABLE();
467 }
468 }
469
VisitVecSaturationAdd(HVecSaturationAdd * instruction)470 void LocationsBuilderARM64Sve::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
471 LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
472 UNREACHABLE();
473 }
474
VisitVecSaturationAdd(HVecSaturationAdd * instruction)475 void InstructionCodeGeneratorARM64Sve::VisitVecSaturationAdd(HVecSaturationAdd* instruction) {
476 LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
477 UNREACHABLE();
478 }
479
VisitVecHalvingAdd(HVecHalvingAdd * instruction)480 void LocationsBuilderARM64Sve::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
481 LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
482 UNREACHABLE();
483 }
484
VisitVecHalvingAdd(HVecHalvingAdd * instruction)485 void InstructionCodeGeneratorARM64Sve::VisitVecHalvingAdd(HVecHalvingAdd* instruction) {
486 LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
487 UNREACHABLE();
488 }
489
VisitVecSub(HVecSub * instruction)490 void LocationsBuilderARM64Sve::VisitVecSub(HVecSub* instruction) {
491 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
492 }
493
VisitVecSub(HVecSub * instruction)494 void InstructionCodeGeneratorARM64Sve::VisitVecSub(HVecSub* instruction) {
495 DCHECK(instruction->IsPredicated());
496 LocationSummary* locations = instruction->GetLocations();
497 const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
498 const ZRegister rhs = ZRegisterFrom(locations->InAt(1));
499 const ZRegister dst = ZRegisterFrom(locations->Out());
500 const PRegisterM p_reg = LoopPReg().Merging();
501 ValidateVectorLength(instruction);
502 switch (instruction->GetPackedType()) {
503 case DataType::Type::kUint8:
504 case DataType::Type::kInt8:
505 __ Sub(dst.VnB(), p_reg, lhs.VnB(), rhs.VnB());
506 break;
507 case DataType::Type::kUint16:
508 case DataType::Type::kInt16:
509 __ Sub(dst.VnH(), p_reg, lhs.VnH(), rhs.VnH());
510 break;
511 case DataType::Type::kInt32:
512 __ Sub(dst.VnS(), p_reg, lhs.VnS(), rhs.VnS());
513 break;
514 case DataType::Type::kInt64:
515 __ Sub(dst.VnD(), p_reg, lhs.VnD(), rhs.VnD());
516 break;
517 case DataType::Type::kFloat32:
518 __ Fsub(dst.VnS(), p_reg, lhs.VnS(), rhs.VnS());
519 break;
520 case DataType::Type::kFloat64:
521 __ Fsub(dst.VnD(), p_reg, lhs.VnD(), rhs.VnD());
522 break;
523 default:
524 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
525 UNREACHABLE();
526 }
527 }
528
VisitVecSaturationSub(HVecSaturationSub * instruction)529 void LocationsBuilderARM64Sve::VisitVecSaturationSub(HVecSaturationSub* instruction) {
530 LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
531 UNREACHABLE();
532 }
533
VisitVecSaturationSub(HVecSaturationSub * instruction)534 void InstructionCodeGeneratorARM64Sve::VisitVecSaturationSub(HVecSaturationSub* instruction) {
535 LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
536 UNREACHABLE();
537 }
538
VisitVecMul(HVecMul * instruction)539 void LocationsBuilderARM64Sve::VisitVecMul(HVecMul* instruction) {
540 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
541 }
542
VisitVecMul(HVecMul * instruction)543 void InstructionCodeGeneratorARM64Sve::VisitVecMul(HVecMul* instruction) {
544 DCHECK(instruction->IsPredicated());
545 LocationSummary* locations = instruction->GetLocations();
546 const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
547 const ZRegister rhs = ZRegisterFrom(locations->InAt(1));
548 const ZRegister dst = ZRegisterFrom(locations->Out());
549 const PRegisterM p_reg = LoopPReg().Merging();
550 ValidateVectorLength(instruction);
551 switch (instruction->GetPackedType()) {
552 case DataType::Type::kUint8:
553 case DataType::Type::kInt8:
554 __ Mul(dst.VnB(), p_reg, lhs.VnB(), rhs.VnB());
555 break;
556 case DataType::Type::kUint16:
557 case DataType::Type::kInt16:
558 __ Mul(dst.VnH(), p_reg, lhs.VnH(), rhs.VnH());
559 break;
560 case DataType::Type::kInt32:
561 __ Mul(dst.VnS(), p_reg, lhs.VnS(), rhs.VnS());
562 break;
563 case DataType::Type::kInt64:
564 __ Mul(dst.VnD(), p_reg, lhs.VnD(), rhs.VnD());
565 break;
566 case DataType::Type::kFloat32:
567 __ Fmul(dst.VnS(), p_reg, lhs.VnS(), rhs.VnS(), StrictNaNPropagation);
568 break;
569 case DataType::Type::kFloat64:
570 __ Fmul(dst.VnD(), p_reg, lhs.VnD(), rhs.VnD(), StrictNaNPropagation);
571 break;
572 default:
573 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
574 UNREACHABLE();
575 }
576 }
577
VisitVecDiv(HVecDiv * instruction)578 void LocationsBuilderARM64Sve::VisitVecDiv(HVecDiv* instruction) {
579 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
580 }
581
VisitVecDiv(HVecDiv * instruction)582 void InstructionCodeGeneratorARM64Sve::VisitVecDiv(HVecDiv* instruction) {
583 DCHECK(instruction->IsPredicated());
584 LocationSummary* locations = instruction->GetLocations();
585 const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
586 const ZRegister rhs = ZRegisterFrom(locations->InAt(1));
587 const ZRegister dst = ZRegisterFrom(locations->Out());
588 const PRegisterM p_reg = LoopPReg().Merging();
589 ValidateVectorLength(instruction);
590
591 // Note: VIXL guarantees StrictNaNPropagation for Fdiv.
592 switch (instruction->GetPackedType()) {
593 case DataType::Type::kFloat32:
594 __ Fdiv(dst.VnS(), p_reg, lhs.VnS(), rhs.VnS());
595 break;
596 case DataType::Type::kFloat64:
597 __ Fdiv(dst.VnD(), p_reg, lhs.VnD(), rhs.VnD());
598 break;
599 default:
600 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
601 UNREACHABLE();
602 }
603 }
604
VisitVecMin(HVecMin * instruction)605 void LocationsBuilderARM64Sve::VisitVecMin(HVecMin* instruction) {
606 LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
607 UNREACHABLE();
608 }
609
VisitVecMin(HVecMin * instruction)610 void InstructionCodeGeneratorARM64Sve::VisitVecMin(HVecMin* instruction) {
611 LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
612 UNREACHABLE();
613 }
614
VisitVecMax(HVecMax * instruction)615 void LocationsBuilderARM64Sve::VisitVecMax(HVecMax* instruction) {
616 LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
617 UNREACHABLE();
618 }
619
VisitVecMax(HVecMax * instruction)620 void InstructionCodeGeneratorARM64Sve::VisitVecMax(HVecMax* instruction) {
621 LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
622 UNREACHABLE();
623 }
624
VisitVecAnd(HVecAnd * instruction)625 void LocationsBuilderARM64Sve::VisitVecAnd(HVecAnd* instruction) {
626 // TODO: Allow constants supported by BIC (vector, immediate).
627 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
628 }
629
VisitVecAnd(HVecAnd * instruction)630 void InstructionCodeGeneratorARM64Sve::VisitVecAnd(HVecAnd* instruction) {
631 DCHECK(instruction->IsPredicated());
632 LocationSummary* locations = instruction->GetLocations();
633 const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
634 const ZRegister rhs = ZRegisterFrom(locations->InAt(1));
635 const ZRegister dst = ZRegisterFrom(locations->Out());
636 const PRegisterM p_reg = LoopPReg().Merging();
637 ValidateVectorLength(instruction);
638 switch (instruction->GetPackedType()) {
639 case DataType::Type::kBool:
640 case DataType::Type::kUint8:
641 case DataType::Type::kInt8:
642 __ And(dst.VnB(), p_reg, lhs.VnB(), rhs.VnB());
643 break;
644 case DataType::Type::kUint16:
645 case DataType::Type::kInt16:
646 __ And(dst.VnH(), p_reg, lhs.VnH(), rhs.VnH());
647 break;
648 case DataType::Type::kInt32:
649 case DataType::Type::kFloat32:
650 __ And(dst.VnS(), p_reg, lhs.VnS(), rhs.VnS());
651 break;
652 case DataType::Type::kInt64:
653 case DataType::Type::kFloat64:
654 __ And(dst.VnD(), p_reg, lhs.VnD(), rhs.VnD());
655 break;
656 default:
657 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
658 UNREACHABLE();
659 }
660 }
661
VisitVecAndNot(HVecAndNot * instruction)662 void LocationsBuilderARM64Sve::VisitVecAndNot(HVecAndNot* instruction) {
663 LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
664 }
665
VisitVecAndNot(HVecAndNot * instruction)666 void InstructionCodeGeneratorARM64Sve::VisitVecAndNot(HVecAndNot* instruction) {
667 // TODO: Use BIC (vector, register).
668 LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
669 }
670
VisitVecOr(HVecOr * instruction)671 void LocationsBuilderARM64Sve::VisitVecOr(HVecOr* instruction) {
672 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
673 }
674
VisitVecOr(HVecOr * instruction)675 void InstructionCodeGeneratorARM64Sve::VisitVecOr(HVecOr* instruction) {
676 DCHECK(instruction->IsPredicated());
677 LocationSummary* locations = instruction->GetLocations();
678 const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
679 const ZRegister rhs = ZRegisterFrom(locations->InAt(1));
680 const ZRegister dst = ZRegisterFrom(locations->Out());
681 const PRegisterM p_reg = LoopPReg().Merging();
682 ValidateVectorLength(instruction);
683 switch (instruction->GetPackedType()) {
684 case DataType::Type::kBool:
685 case DataType::Type::kUint8:
686 case DataType::Type::kInt8:
687 __ Orr(dst.VnB(), p_reg, lhs.VnB(), rhs.VnB());
688 break;
689 case DataType::Type::kUint16:
690 case DataType::Type::kInt16:
691 __ Orr(dst.VnH(), p_reg, lhs.VnH(), rhs.VnH());
692 break;
693 case DataType::Type::kInt32:
694 case DataType::Type::kFloat32:
695 __ Orr(dst.VnS(), p_reg, lhs.VnS(), rhs.VnS());
696 break;
697 case DataType::Type::kInt64:
698 case DataType::Type::kFloat64:
699 __ Orr(dst.VnD(), p_reg, lhs.VnD(), rhs.VnD());
700 break;
701 default:
702 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
703 UNREACHABLE();
704 }
705 }
706
VisitVecXor(HVecXor * instruction)707 void LocationsBuilderARM64Sve::VisitVecXor(HVecXor* instruction) {
708 CreateVecBinOpLocations(GetGraph()->GetAllocator(), instruction);
709 }
710
VisitVecXor(HVecXor * instruction)711 void InstructionCodeGeneratorARM64Sve::VisitVecXor(HVecXor* instruction) {
712 DCHECK(instruction->IsPredicated());
713 LocationSummary* locations = instruction->GetLocations();
714 const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
715 const ZRegister rhs = ZRegisterFrom(locations->InAt(1));
716 const ZRegister dst = ZRegisterFrom(locations->Out());
717 const PRegisterM p_reg = LoopPReg().Merging();
718 ValidateVectorLength(instruction);
719 switch (instruction->GetPackedType()) {
720 case DataType::Type::kBool:
721 case DataType::Type::kUint8:
722 case DataType::Type::kInt8:
723 __ Eor(dst.VnB(), p_reg, lhs.VnB(), rhs.VnB());
724 break;
725 case DataType::Type::kUint16:
726 case DataType::Type::kInt16:
727 __ Eor(dst.VnH(), p_reg, lhs.VnH(), rhs.VnH());
728 break;
729 case DataType::Type::kInt32:
730 case DataType::Type::kFloat32:
731 __ Eor(dst.VnS(), p_reg, lhs.VnS(), rhs.VnS());
732 break;
733 case DataType::Type::kInt64:
734 case DataType::Type::kFloat64:
735 __ Eor(dst.VnD(), p_reg, lhs.VnD(), rhs.VnD());
736 break;
737 default:
738 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
739 UNREACHABLE();
740 }
741 }
742
743 // Helper to set up locations for vector shift operations.
CreateVecShiftLocations(ArenaAllocator * allocator,HVecBinaryOperation * instruction)744 static void CreateVecShiftLocations(ArenaAllocator* allocator, HVecBinaryOperation* instruction) {
745 LocationSummary* locations = new (allocator) LocationSummary(instruction);
746 switch (instruction->GetPackedType()) {
747 case DataType::Type::kUint8:
748 case DataType::Type::kInt8:
749 case DataType::Type::kUint16:
750 case DataType::Type::kInt16:
751 case DataType::Type::kInt32:
752 case DataType::Type::kInt64:
753 locations->SetInAt(0, Location::RequiresFpuRegister());
754 locations->SetInAt(1, Location::ConstantLocation(instruction->InputAt(1)));
755 locations->SetOut(Location::RequiresFpuRegister(), Location::kNoOutputOverlap);
756 break;
757 default:
758 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
759 UNREACHABLE();
760 }
761 }
762
VisitVecShl(HVecShl * instruction)763 void LocationsBuilderARM64Sve::VisitVecShl(HVecShl* instruction) {
764 CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
765 }
766
VisitVecShl(HVecShl * instruction)767 void InstructionCodeGeneratorARM64Sve::VisitVecShl(HVecShl* instruction) {
768 DCHECK(instruction->IsPredicated());
769 LocationSummary* locations = instruction->GetLocations();
770 const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
771 const ZRegister dst = ZRegisterFrom(locations->Out());
772 const PRegisterM p_reg = LoopPReg().Merging();
773 int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
774 ValidateVectorLength(instruction);
775 switch (instruction->GetPackedType()) {
776 case DataType::Type::kUint8:
777 case DataType::Type::kInt8:
778 __ Lsl(dst.VnB(), p_reg, lhs.VnB(), value);
779 break;
780 case DataType::Type::kUint16:
781 case DataType::Type::kInt16:
782 __ Lsl(dst.VnH(), p_reg, lhs.VnH(), value);
783 break;
784 case DataType::Type::kInt32:
785 __ Lsl(dst.VnS(), p_reg, lhs.VnS(), value);
786 break;
787 case DataType::Type::kInt64:
788 __ Lsl(dst.VnD(), p_reg, lhs.VnD(), value);
789 break;
790 default:
791 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
792 UNREACHABLE();
793 }
794 }
795
VisitVecShr(HVecShr * instruction)796 void LocationsBuilderARM64Sve::VisitVecShr(HVecShr* instruction) {
797 CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
798 }
799
VisitVecShr(HVecShr * instruction)800 void InstructionCodeGeneratorARM64Sve::VisitVecShr(HVecShr* instruction) {
801 DCHECK(instruction->IsPredicated());
802 LocationSummary* locations = instruction->GetLocations();
803 const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
804 const ZRegister dst = ZRegisterFrom(locations->Out());
805 const PRegisterM p_reg = LoopPReg().Merging();
806 int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
807 ValidateVectorLength(instruction);
808 switch (instruction->GetPackedType()) {
809 case DataType::Type::kUint8:
810 case DataType::Type::kInt8:
811 __ Asr(dst.VnB(), p_reg, lhs.VnB(), value);
812 break;
813 case DataType::Type::kUint16:
814 case DataType::Type::kInt16:
815 __ Asr(dst.VnH(), p_reg, lhs.VnH(), value);
816 break;
817 case DataType::Type::kInt32:
818 __ Asr(dst.VnS(), p_reg, lhs.VnS(), value);
819 break;
820 case DataType::Type::kInt64:
821 __ Asr(dst.VnD(), p_reg, lhs.VnD(), value);
822 break;
823 default:
824 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
825 UNREACHABLE();
826 }
827 }
828
VisitVecUShr(HVecUShr * instruction)829 void LocationsBuilderARM64Sve::VisitVecUShr(HVecUShr* instruction) {
830 CreateVecShiftLocations(GetGraph()->GetAllocator(), instruction);
831 }
832
VisitVecUShr(HVecUShr * instruction)833 void InstructionCodeGeneratorARM64Sve::VisitVecUShr(HVecUShr* instruction) {
834 DCHECK(instruction->IsPredicated());
835 LocationSummary* locations = instruction->GetLocations();
836 const ZRegister lhs = ZRegisterFrom(locations->InAt(0));
837 const ZRegister dst = ZRegisterFrom(locations->Out());
838 const PRegisterM p_reg = LoopPReg().Merging();
839 int32_t value = locations->InAt(1).GetConstant()->AsIntConstant()->GetValue();
840 ValidateVectorLength(instruction);
841 switch (instruction->GetPackedType()) {
842 case DataType::Type::kUint8:
843 case DataType::Type::kInt8:
844 __ Lsr(dst.VnB(), p_reg, lhs.VnB(), value);
845 break;
846 case DataType::Type::kUint16:
847 case DataType::Type::kInt16:
848 __ Lsr(dst.VnH(), p_reg, lhs.VnH(), value);
849 break;
850 case DataType::Type::kInt32:
851 __ Lsr(dst.VnS(), p_reg, lhs.VnS(), value);
852 break;
853 case DataType::Type::kInt64:
854 __ Lsr(dst.VnD(), p_reg, lhs.VnD(), value);
855 break;
856 default:
857 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
858 UNREACHABLE();
859 }
860 }
861
VisitVecSetScalars(HVecSetScalars * instruction)862 void LocationsBuilderARM64Sve::VisitVecSetScalars(HVecSetScalars* instruction) {
863 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
864
865 DCHECK_EQ(2u, instruction->InputCount()); // only one input currently implemented + predicate.
866
867 HInstruction* input = instruction->InputAt(0);
868 bool is_zero = IsZeroBitPattern(input);
869
870 switch (instruction->GetPackedType()) {
871 case DataType::Type::kBool:
872 case DataType::Type::kUint8:
873 case DataType::Type::kInt8:
874 case DataType::Type::kUint16:
875 case DataType::Type::kInt16:
876 case DataType::Type::kInt32:
877 case DataType::Type::kInt64:
878 locations->SetInAt(0, is_zero ? Location::ConstantLocation(input)
879 : Location::RequiresRegister());
880 locations->SetOut(Location::RequiresFpuRegister());
881 break;
882 case DataType::Type::kFloat32:
883 case DataType::Type::kFloat64:
884 locations->SetInAt(0, is_zero ? Location::ConstantLocation(input)
885 : Location::RequiresFpuRegister());
886 locations->SetOut(Location::RequiresFpuRegister());
887 break;
888 default:
889 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
890 UNREACHABLE();
891 }
892 }
893
VisitVecSetScalars(HVecSetScalars * instruction)894 void InstructionCodeGeneratorARM64Sve::VisitVecSetScalars(HVecSetScalars* instruction) {
895 DCHECK(instruction->IsPredicated());
896 LocationSummary* locations = instruction->GetLocations();
897 const ZRegister z_dst = ZRegisterFrom(locations->Out());
898
899 DCHECK_EQ(2u, instruction->InputCount()); // only one input currently implemented + predicate.
900
901 // Zero out all other elements first.
902 __ Dup(z_dst.VnB(), 0);
903
904 const VRegister dst = VRegisterFrom(locations->Out());
905 // Shorthand for any type of zero.
906 if (IsZeroBitPattern(instruction->InputAt(0))) {
907 return;
908 }
909 ValidateVectorLength(instruction);
910
911 // Set required elements.
912 switch (instruction->GetPackedType()) {
913 case DataType::Type::kBool:
914 case DataType::Type::kUint8:
915 case DataType::Type::kInt8:
916 __ Mov(dst.V16B(), 0, InputRegisterAt(instruction, 0));
917 break;
918 case DataType::Type::kUint16:
919 case DataType::Type::kInt16:
920 __ Mov(dst.V8H(), 0, InputRegisterAt(instruction, 0));
921 break;
922 case DataType::Type::kInt32:
923 __ Mov(dst.V4S(), 0, InputRegisterAt(instruction, 0));
924 break;
925 case DataType::Type::kInt64:
926 __ Mov(dst.V2D(), 0, InputRegisterAt(instruction, 0));
927 break;
928 default:
929 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
930 UNREACHABLE();
931 }
932 }
933
934 // Helper to set up locations for vector accumulations.
CreateVecAccumLocations(ArenaAllocator * allocator,HVecOperation * instruction)935 static void CreateVecAccumLocations(ArenaAllocator* allocator, HVecOperation* instruction) {
936 LocationSummary* locations = new (allocator) LocationSummary(instruction);
937 switch (instruction->GetPackedType()) {
938 case DataType::Type::kUint8:
939 case DataType::Type::kInt8:
940 case DataType::Type::kUint16:
941 case DataType::Type::kInt16:
942 case DataType::Type::kInt32:
943 case DataType::Type::kInt64:
944 locations->SetInAt(0, Location::RequiresFpuRegister());
945 locations->SetInAt(1, Location::RequiresFpuRegister());
946 locations->SetInAt(2, Location::RequiresFpuRegister());
947 locations->SetOut(Location::SameAsFirstInput());
948 break;
949 default:
950 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
951 UNREACHABLE();
952 }
953 }
954
VisitVecMultiplyAccumulate(HVecMultiplyAccumulate * instruction)955 void LocationsBuilderARM64Sve::VisitVecMultiplyAccumulate(HVecMultiplyAccumulate* instruction) {
956 CreateVecAccumLocations(GetGraph()->GetAllocator(), instruction);
957 }
958
959 // Some early revisions of the Cortex-A53 have an erratum (835769) whereby it is possible for a
960 // 64-bit scalar multiply-accumulate instruction in AArch64 state to generate an incorrect result.
961 // However vector MultiplyAccumulate instruction is not affected.
VisitVecMultiplyAccumulate(HVecMultiplyAccumulate * instruction)962 void InstructionCodeGeneratorARM64Sve::VisitVecMultiplyAccumulate(
963 HVecMultiplyAccumulate* instruction) {
964 DCHECK(instruction->IsPredicated());
965 LocationSummary* locations = instruction->GetLocations();
966 const ZRegister acc = ZRegisterFrom(locations->InAt(0));
967 const ZRegister left = ZRegisterFrom(locations->InAt(1));
968 const ZRegister right = ZRegisterFrom(locations->InAt(2));
969 const PRegisterM p_reg = LoopPReg().Merging();
970
971 DCHECK(locations->InAt(0).Equals(locations->Out()));
972 ValidateVectorLength(instruction);
973
974 switch (instruction->GetPackedType()) {
975 case DataType::Type::kUint8:
976 case DataType::Type::kInt8:
977 if (instruction->GetOpKind() == HInstruction::kAdd) {
978 __ Mla(acc.VnB(), p_reg, acc.VnB(), left.VnB(), right.VnB());
979 } else {
980 __ Mls(acc.VnB(), p_reg, acc.VnB(), left.VnB(), right.VnB());
981 }
982 break;
983 case DataType::Type::kUint16:
984 case DataType::Type::kInt16:
985 if (instruction->GetOpKind() == HInstruction::kAdd) {
986 __ Mla(acc.VnH(), p_reg, acc.VnB(), left.VnH(), right.VnH());
987 } else {
988 __ Mls(acc.VnH(), p_reg, acc.VnB(), left.VnH(), right.VnH());
989 }
990 break;
991 case DataType::Type::kInt32:
992 if (instruction->GetOpKind() == HInstruction::kAdd) {
993 __ Mla(acc.VnS(), p_reg, acc.VnB(), left.VnS(), right.VnS());
994 } else {
995 __ Mls(acc.VnS(), p_reg, acc.VnB(), left.VnS(), right.VnS());
996 }
997 break;
998 default:
999 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1000 UNREACHABLE();
1001 }
1002 }
1003
VisitVecSADAccumulate(HVecSADAccumulate * instruction)1004 void LocationsBuilderARM64Sve::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
1005 LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
1006 UNREACHABLE();
1007 }
1008
VisitVecSADAccumulate(HVecSADAccumulate * instruction)1009 void InstructionCodeGeneratorARM64Sve::VisitVecSADAccumulate(HVecSADAccumulate* instruction) {
1010 LOG(FATAL) << "Unsupported SIMD instruction " << instruction->GetId();
1011 UNREACHABLE();
1012 }
1013
VisitVecDotProd(HVecDotProd * instruction)1014 void LocationsBuilderARM64Sve::VisitVecDotProd(HVecDotProd* instruction) {
1015 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
1016 DCHECK(instruction->GetPackedType() == DataType::Type::kInt32);
1017 locations->SetInAt(0, Location::RequiresFpuRegister());
1018 locations->SetInAt(1, Location::RequiresFpuRegister());
1019 locations->SetInAt(2, Location::RequiresFpuRegister());
1020 locations->SetOut(Location::SameAsFirstInput());
1021
1022 locations->AddTemp(Location::RequiresFpuRegister());
1023 }
1024
VisitVecDotProd(HVecDotProd * instruction)1025 void InstructionCodeGeneratorARM64Sve::VisitVecDotProd(HVecDotProd* instruction) {
1026 DCHECK(instruction->IsPredicated());
1027 LocationSummary* locations = instruction->GetLocations();
1028 DCHECK(locations->InAt(0).Equals(locations->Out()));
1029 const ZRegister acc = ZRegisterFrom(locations->InAt(0));
1030 const ZRegister left = ZRegisterFrom(locations->InAt(1));
1031 const ZRegister right = ZRegisterFrom(locations->InAt(2));
1032 const PRegisterM p_reg = LoopPReg().Merging();
1033 HVecOperation* a = instruction->InputAt(1)->AsVecOperation();
1034 HVecOperation* b = instruction->InputAt(2)->AsVecOperation();
1035 DCHECK_EQ(HVecOperation::ToSignedType(a->GetPackedType()),
1036 HVecOperation::ToSignedType(b->GetPackedType()));
1037 DCHECK_EQ(instruction->GetPackedType(), DataType::Type::kInt32);
1038 ValidateVectorLength(instruction);
1039
1040 size_t inputs_data_size = DataType::Size(a->GetPackedType());
1041 switch (inputs_data_size) {
1042 case 1u: {
1043 UseScratchRegisterScope temps(GetVIXLAssembler());
1044 const ZRegister tmp0 = temps.AcquireZ();
1045 const ZRegister tmp1 = ZRegisterFrom(locations->GetTemp(0));
1046
1047 __ Dup(tmp1.VnB(), 0u);
1048 __ Sel(tmp0.VnB(), p_reg, left.VnB(), tmp1.VnB());
1049 __ Sel(tmp1.VnB(), p_reg, right.VnB(), tmp1.VnB());
1050 if (instruction->IsZeroExtending()) {
1051 __ Udot(acc.VnS(), acc.VnS(), tmp0.VnB(), tmp1.VnB());
1052 } else {
1053 __ Sdot(acc.VnS(), acc.VnS(), tmp0.VnB(), tmp1.VnB());
1054 }
1055 break;
1056 }
1057 default:
1058 LOG(FATAL) << "Unsupported SIMD type size: " << inputs_data_size;
1059 }
1060 }
1061
1062 // Helper to set up locations for vector memory operations.
CreateVecMemLocations(ArenaAllocator * allocator,HVecMemoryOperation * instruction,bool is_load)1063 static void CreateVecMemLocations(ArenaAllocator* allocator,
1064 HVecMemoryOperation* instruction,
1065 bool is_load) {
1066 LocationSummary* locations = new (allocator) LocationSummary(instruction);
1067 switch (instruction->GetPackedType()) {
1068 case DataType::Type::kBool:
1069 case DataType::Type::kUint8:
1070 case DataType::Type::kInt8:
1071 case DataType::Type::kUint16:
1072 case DataType::Type::kInt16:
1073 case DataType::Type::kInt32:
1074 case DataType::Type::kInt64:
1075 case DataType::Type::kFloat32:
1076 case DataType::Type::kFloat64:
1077 locations->SetInAt(0, Location::RequiresRegister());
1078 locations->SetInAt(1, Location::RegisterOrConstant(instruction->InputAt(1)));
1079 if (is_load) {
1080 locations->SetOut(Location::RequiresFpuRegister());
1081 } else {
1082 locations->SetInAt(2, Location::RequiresFpuRegister());
1083 }
1084 break;
1085 default:
1086 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1087 UNREACHABLE();
1088 }
1089 }
1090
VisitVecLoad(HVecLoad * instruction)1091 void LocationsBuilderARM64Sve::VisitVecLoad(HVecLoad* instruction) {
1092 CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /*is_load*/ true);
1093 }
1094
VisitVecLoad(HVecLoad * instruction)1095 void InstructionCodeGeneratorARM64Sve::VisitVecLoad(HVecLoad* instruction) {
1096 DCHECK(instruction->IsPredicated());
1097 LocationSummary* locations = instruction->GetLocations();
1098 size_t size = DataType::Size(instruction->GetPackedType());
1099 const ZRegister reg = ZRegisterFrom(locations->Out());
1100 UseScratchRegisterScope temps(GetVIXLAssembler());
1101 Register scratch;
1102 const PRegisterZ p_reg = LoopPReg().Zeroing();
1103 ValidateVectorLength(instruction);
1104
1105 switch (instruction->GetPackedType()) {
1106 case DataType::Type::kInt16: // (short) s.charAt(.) can yield HVecLoad/Int16/StringCharAt.
1107 case DataType::Type::kUint16:
1108 __ Ld1h(reg.VnH(), p_reg,
1109 VecSVEAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch));
1110 break;
1111 case DataType::Type::kBool:
1112 case DataType::Type::kUint8:
1113 case DataType::Type::kInt8:
1114 __ Ld1b(reg.VnB(), p_reg,
1115 VecSVEAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch));
1116 break;
1117 case DataType::Type::kInt32:
1118 case DataType::Type::kFloat32:
1119 __ Ld1w(reg.VnS(), p_reg,
1120 VecSVEAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch));
1121 break;
1122 case DataType::Type::kInt64:
1123 case DataType::Type::kFloat64:
1124 __ Ld1d(reg.VnD(), p_reg,
1125 VecSVEAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch));
1126 break;
1127 default:
1128 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1129 UNREACHABLE();
1130 }
1131 }
1132
VisitVecStore(HVecStore * instruction)1133 void LocationsBuilderARM64Sve::VisitVecStore(HVecStore* instruction) {
1134 CreateVecMemLocations(GetGraph()->GetAllocator(), instruction, /*is_load*/ false);
1135 }
1136
VisitVecStore(HVecStore * instruction)1137 void InstructionCodeGeneratorARM64Sve::VisitVecStore(HVecStore* instruction) {
1138 DCHECK(instruction->IsPredicated());
1139 LocationSummary* locations = instruction->GetLocations();
1140 size_t size = DataType::Size(instruction->GetPackedType());
1141 const ZRegister reg = ZRegisterFrom(locations->InAt(2));
1142 UseScratchRegisterScope temps(GetVIXLAssembler());
1143 Register scratch;
1144 const PRegisterZ p_reg = LoopPReg().Zeroing();
1145 ValidateVectorLength(instruction);
1146
1147 switch (instruction->GetPackedType()) {
1148 case DataType::Type::kBool:
1149 case DataType::Type::kUint8:
1150 case DataType::Type::kInt8:
1151 __ St1b(reg.VnB(), p_reg,
1152 VecSVEAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch));
1153 break;
1154 case DataType::Type::kUint16:
1155 case DataType::Type::kInt16:
1156 __ St1h(reg.VnH(), p_reg,
1157 VecSVEAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch));
1158 break;
1159 case DataType::Type::kInt32:
1160 case DataType::Type::kFloat32:
1161 __ St1w(reg.VnS(), p_reg,
1162 VecSVEAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch));
1163 break;
1164 case DataType::Type::kInt64:
1165 case DataType::Type::kFloat64:
1166 __ St1d(reg.VnD(), p_reg,
1167 VecSVEAddress(instruction, &temps, size, /*is_string_char_at*/ false, &scratch));
1168 break;
1169 default:
1170 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1171 UNREACHABLE();
1172 }
1173 }
1174
VisitVecPredSetAll(HVecPredSetAll * instruction)1175 void LocationsBuilderARM64Sve::VisitVecPredSetAll(HVecPredSetAll* instruction) {
1176 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
1177 DCHECK(instruction->InputAt(0)->IsIntConstant());
1178 locations->SetInAt(0, Location::NoLocation());
1179 locations->SetOut(Location::NoLocation());
1180 }
1181
VisitVecPredSetAll(HVecPredSetAll * instruction)1182 void InstructionCodeGeneratorARM64Sve::VisitVecPredSetAll(HVecPredSetAll* instruction) {
1183 // Instruction is not predicated, see nodes_vector.h
1184 DCHECK(!instruction->IsPredicated());
1185 const PRegister p_reg = LoopPReg();
1186
1187 switch (instruction->GetPackedType()) {
1188 case DataType::Type::kBool:
1189 case DataType::Type::kUint8:
1190 case DataType::Type::kInt8:
1191 __ Ptrue(p_reg.VnB(), vixl::aarch64::SVE_ALL);
1192 break;
1193 case DataType::Type::kUint16:
1194 case DataType::Type::kInt16:
1195 __ Ptrue(p_reg.VnH(), vixl::aarch64::SVE_ALL);
1196 break;
1197 case DataType::Type::kInt32:
1198 case DataType::Type::kFloat32:
1199 __ Ptrue(p_reg.VnS(), vixl::aarch64::SVE_ALL);
1200 break;
1201 case DataType::Type::kInt64:
1202 case DataType::Type::kFloat64:
1203 __ Ptrue(p_reg.VnD(), vixl::aarch64::SVE_ALL);
1204 break;
1205 default:
1206 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1207 UNREACHABLE();
1208 }
1209 }
1210
VisitVecPredWhile(HVecPredWhile * instruction)1211 void LocationsBuilderARM64Sve::VisitVecPredWhile(HVecPredWhile* instruction) {
1212 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
1213 locations->SetInAt(0, Location::RequiresRegister());
1214 locations->SetInAt(1, Location::RequiresRegister());
1215 // The instruction doesn't really need a core register as out location; this is a hack
1216 // to workaround absence of support for vector predicates in register allocation.
1217 //
1218 // Semantically, the out location of this instruction and predicate inputs locations of
1219 // its users should be a fixed predicate register (similar to
1220 // Location::RegisterLocation(int reg)). But the register allocator (RA) doesn't support
1221 // SIMD regs (e.g. predicate), so LoopPReg() is used explicitly without exposing it
1222 // to the RA.
1223 //
1224 // To make the RA happy Location::NoLocation() was used for all the vector instructions
1225 // predicate inputs; but for the PredSetOperations (e.g. VecPredWhile) Location::NoLocation()
1226 // can't be used without changes to RA - "ssa_liveness_analysis.cc] Check failed:
1227 // input->IsEmittedAtUseSite()" would fire.
1228 //
1229 // Using a core register as a hack is the easiest way to tackle this problem. The RA will
1230 // block one core register for the loop without actually using it; this should not be
1231 // a performance issue as a SIMD loop operates mainly on SIMD registers.
1232 //
1233 // TODO: Support SIMD types in register allocator.
1234 locations->SetOut(Location::RequiresRegister());
1235 }
1236
VisitVecPredWhile(HVecPredWhile * instruction)1237 void InstructionCodeGeneratorARM64Sve::VisitVecPredWhile(HVecPredWhile* instruction) {
1238 // Instruction is not predicated, see nodes_vector.h
1239 DCHECK(!instruction->IsPredicated());
1240 // Current implementation of predicated loop execution only supports kLO condition.
1241 DCHECK(instruction->GetCondKind() == HVecPredWhile::CondKind::kLO);
1242 Register left = InputRegisterAt(instruction, 0);
1243 Register right = InputRegisterAt(instruction, 1);
1244
1245 DCHECK_EQ(codegen_->GetSIMDRegisterWidth() % instruction->GetVectorLength(), 0u);
1246
1247 switch (codegen_->GetSIMDRegisterWidth() / instruction->GetVectorLength()) {
1248 case 1u:
1249 __ Whilelo(LoopPReg().VnB(), left, right);
1250 break;
1251 case 2u:
1252 __ Whilelo(LoopPReg().VnH(), left, right);
1253 break;
1254 case 4u:
1255 __ Whilelo(LoopPReg().VnS(), left, right);
1256 break;
1257 case 8u:
1258 __ Whilelo(LoopPReg().VnD(), left, right);
1259 break;
1260 default:
1261 LOG(FATAL) << "Unsupported SIMD type: " << instruction->GetPackedType();
1262 UNREACHABLE();
1263 }
1264 }
1265
VisitVecPredCondition(HVecPredCondition * instruction)1266 void LocationsBuilderARM64Sve::VisitVecPredCondition(HVecPredCondition* instruction) {
1267 LocationSummary* locations = new (GetGraph()->GetAllocator()) LocationSummary(instruction);
1268 locations->SetInAt(0, Location::NoLocation());
1269 // Result of the operation - a boolean value in a core register.
1270 locations->SetOut(Location::RequiresRegister());
1271 }
1272
VisitVecPredCondition(HVecPredCondition * instruction)1273 void InstructionCodeGeneratorARM64Sve::VisitVecPredCondition(HVecPredCondition* instruction) {
1274 // Instruction is not predicated, see nodes_vector.h
1275 DCHECK(!instruction->IsPredicated());
1276 Register reg = OutputRegister(instruction);
1277 // Currently VecPredCondition is only used as part of vectorized loop check condition
1278 // evaluation.
1279 DCHECK(instruction->GetPCondKind() == HVecPredCondition::PCondKind::kNFirst);
1280 __ Cset(reg, pl);
1281 }
1282
AllocateSIMDScratchLocation(vixl::aarch64::UseScratchRegisterScope * scope)1283 Location InstructionCodeGeneratorARM64Sve::AllocateSIMDScratchLocation(
1284 vixl::aarch64::UseScratchRegisterScope* scope) {
1285 return LocationFrom(scope->AcquireZ());
1286 }
1287
FreeSIMDScratchLocation(Location loc,vixl::aarch64::UseScratchRegisterScope * scope)1288 void InstructionCodeGeneratorARM64Sve::FreeSIMDScratchLocation(Location loc,
1289 vixl::aarch64::UseScratchRegisterScope* scope) {
1290 scope->Release(ZRegisterFrom(loc));
1291 }
1292
LoadSIMDRegFromStack(Location destination,Location source)1293 void InstructionCodeGeneratorARM64Sve::LoadSIMDRegFromStack(Location destination,
1294 Location source) {
1295 __ Ldr(ZRegisterFrom(destination), SveStackOperandFrom(source));
1296 }
1297
MoveSIMDRegToSIMDReg(Location destination,Location source)1298 void InstructionCodeGeneratorARM64Sve::MoveSIMDRegToSIMDReg(Location destination,
1299 Location source) {
1300 __ Mov(ZRegisterFrom(destination), ZRegisterFrom(source));
1301 }
1302
MoveToSIMDStackSlot(Location destination,Location source)1303 void InstructionCodeGeneratorARM64Sve::MoveToSIMDStackSlot(Location destination,
1304 Location source) {
1305 DCHECK(destination.IsSIMDStackSlot());
1306
1307 if (source.IsFpuRegister()) {
1308 __ Str(ZRegisterFrom(source), SveStackOperandFrom(destination));
1309 } else {
1310 DCHECK(source.IsSIMDStackSlot());
1311 UseScratchRegisterScope temps(GetVIXLAssembler());
1312 if (GetVIXLAssembler()->GetScratchVRegisterList()->IsEmpty()) {
1313 // Very rare situation, only when there are cycles in ParallelMoveResolver graph.
1314 const Register temp = temps.AcquireX();
1315 DCHECK_EQ(codegen_->GetSIMDRegisterWidth() % kArm64WordSize, 0u);
1316 // Emit a number of LDR/STR (XRegister, 64-bit) to cover the whole SIMD register size
1317 // when copying a stack slot.
1318 for (size_t offset = 0, e = codegen_->GetSIMDRegisterWidth();
1319 offset < e;
1320 offset += kArm64WordSize) {
1321 __ Ldr(temp, MemOperand(sp, source.GetStackIndex() + offset));
1322 __ Str(temp, MemOperand(sp, destination.GetStackIndex() + offset));
1323 }
1324 } else {
1325 const ZRegister temp = temps.AcquireZ();
1326 __ Ldr(temp, SveStackOperandFrom(source));
1327 __ Str(temp, SveStackOperandFrom(destination));
1328 }
1329 }
1330 }
1331
1332 template <bool is_save>
SaveRestoreLiveRegistersHelperSveImpl(CodeGeneratorARM64 * codegen,LocationSummary * locations,int64_t spill_offset)1333 void SaveRestoreLiveRegistersHelperSveImpl(CodeGeneratorARM64* codegen,
1334 LocationSummary* locations,
1335 int64_t spill_offset) {
1336 const uint32_t core_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ true);
1337 const uint32_t fp_spills = codegen->GetSlowPathSpills(locations, /* core_registers= */ false);
1338 DCHECK(helpers::ArtVixlRegCodeCoherentForRegSet(core_spills,
1339 codegen->GetNumberOfCoreRegisters(),
1340 fp_spills,
1341 codegen->GetNumberOfFloatingPointRegisters()));
1342 MacroAssembler* masm = codegen->GetVIXLAssembler();
1343 Register base = masm->StackPointer();
1344
1345 CPURegList core_list = CPURegList(CPURegister::kRegister, kXRegSize, core_spills);
1346 int64_t core_spill_size = core_list.GetTotalSizeInBytes();
1347 int64_t fp_spill_offset = spill_offset + core_spill_size;
1348
1349 if (codegen->GetGraph()->HasSIMD()) {
1350 if (is_save) {
1351 masm->StoreCPURegList(core_list, MemOperand(base, spill_offset));
1352 } else {
1353 masm->LoadCPURegList(core_list, MemOperand(base, spill_offset));
1354 }
1355 codegen->GetAssembler()->SaveRestoreZRegisterList<is_save>(fp_spills, fp_spill_offset);
1356 return;
1357 }
1358
1359 // Case when we only need to restore D-registers.
1360 DCHECK(!codegen->GetGraph()->HasSIMD());
1361 DCHECK_LE(codegen->GetSlowPathFPWidth(), kDRegSizeInBytes);
1362 CPURegList fp_list = CPURegList(CPURegister::kVRegister, kDRegSize, fp_spills);
1363 if (is_save) {
1364 masm->StoreCPURegList(core_list, MemOperand(base, spill_offset));
1365 masm->StoreCPURegList(fp_list, MemOperand(base, fp_spill_offset));
1366 } else {
1367 masm->LoadCPURegList(core_list, MemOperand(base, spill_offset));
1368 masm->LoadCPURegList(fp_list, MemOperand(base, fp_spill_offset));
1369 }
1370 }
1371
SaveLiveRegistersHelper(LocationSummary * locations,int64_t spill_offset)1372 void InstructionCodeGeneratorARM64Sve::SaveLiveRegistersHelper(LocationSummary* locations,
1373 int64_t spill_offset) {
1374 SaveRestoreLiveRegistersHelperSveImpl</* is_save= */ true>(codegen_, locations, spill_offset);
1375 }
1376
RestoreLiveRegistersHelper(LocationSummary * locations,int64_t spill_offset)1377 void InstructionCodeGeneratorARM64Sve::RestoreLiveRegistersHelper(LocationSummary* locations,
1378 int64_t spill_offset) {
1379 SaveRestoreLiveRegistersHelperSveImpl</* is_save= */ false>(codegen_, locations, spill_offset);
1380 }
1381
1382 #undef __
1383
1384 } // namespace arm64
1385 } // namespace art
1386