1 // Copyright 2019, VIXL authors
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are met:
6 //
7 // * Redistributions of source code must retain the above copyright notice,
8 // this list of conditions and the following disclaimer.
9 // * Redistributions in binary form must reproduce the above copyright notice,
10 // this list of conditions and the following disclaimer in the documentation
11 // and/or other materials provided with the distribution.
12 // * Neither the name of ARM Limited nor the names of its contributors may be
13 // used to endorse or promote products derived from this software without
14 // specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
27 #include "macro-assembler-aarch64.h"
28
29 namespace vixl {
30 namespace aarch64 {
31
AddSubHelper(AddSubHelperOption option,const ZRegister & zd,const ZRegister & zn,IntegerOperand imm)32 void MacroAssembler::AddSubHelper(AddSubHelperOption option,
33 const ZRegister& zd,
34 const ZRegister& zn,
35 IntegerOperand imm) {
36 VIXL_ASSERT(imm.FitsInLane(zd));
37
38 // Simple, encodable cases.
39 if (TrySingleAddSub(option, zd, zn, imm)) return;
40
41 VIXL_ASSERT((option == kAddImmediate) || (option == kSubImmediate));
42 bool add_imm = (option == kAddImmediate);
43
44 // Try to translate Add(..., -imm) to Sub(..., imm) if we can encode it in one
45 // instruction. Also interpret the immediate as signed, so we can convert
46 // Add(zd.VnH(), zn.VnH(), 0xffff...) to Sub(..., 1), etc.
47 IntegerOperand signed_imm(imm.AsIntN(zd.GetLaneSizeInBits()));
48 if (signed_imm.IsNegative()) {
49 AddSubHelperOption n_option = add_imm ? kSubImmediate : kAddImmediate;
50 IntegerOperand n_imm(signed_imm.GetMagnitude());
51 // IntegerOperand can represent -INT_MIN, so this is always safe.
52 VIXL_ASSERT(n_imm.IsPositiveOrZero());
53 if (TrySingleAddSub(n_option, zd, zn, n_imm)) return;
54 }
55
56 // Otherwise, fall back to dup + ADD_z_z/SUB_z_z.
57 UseScratchRegisterScope temps(this);
58 ZRegister scratch = temps.AcquireZ().WithLaneSize(zn.GetLaneSizeInBits());
59 Dup(scratch, imm);
60
61 SingleEmissionCheckScope guard(this);
62 if (add_imm) {
63 add(zd, zn, scratch);
64 } else {
65 sub(zd, zn, scratch);
66 }
67 }
68
TrySingleAddSub(AddSubHelperOption option,const ZRegister & zd,const ZRegister & zn,IntegerOperand imm)69 bool MacroAssembler::TrySingleAddSub(AddSubHelperOption option,
70 const ZRegister& zd,
71 const ZRegister& zn,
72 IntegerOperand imm) {
73 VIXL_ASSERT(imm.FitsInLane(zd));
74
75 int imm8;
76 int shift = -1;
77 if (imm.TryEncodeAsShiftedUintNForLane<8, 0>(zd, &imm8, &shift) ||
78 imm.TryEncodeAsShiftedUintNForLane<8, 8>(zd, &imm8, &shift)) {
79 MovprfxHelperScope guard(this, zd, zn);
80 switch (option) {
81 case kAddImmediate:
82 add(zd, zd, imm8, shift);
83 return true;
84 case kSubImmediate:
85 sub(zd, zd, imm8, shift);
86 return true;
87 }
88 }
89 return false;
90 }
91
IntWideImmHelper(IntArithImmFn imm_fn,SVEArithPredicatedFn reg_macro,const ZRegister & zd,const ZRegister & zn,IntegerOperand imm,bool is_signed)92 void MacroAssembler::IntWideImmHelper(IntArithImmFn imm_fn,
93 SVEArithPredicatedFn reg_macro,
94 const ZRegister& zd,
95 const ZRegister& zn,
96 IntegerOperand imm,
97 bool is_signed) {
98 if (is_signed) {
99 // E.g. MUL_z_zi, SMIN_z_zi, SMAX_z_zi
100 if (imm.IsInt8()) {
101 MovprfxHelperScope guard(this, zd, zn);
102 (this->*imm_fn)(zd, zd, imm.AsInt8());
103 return;
104 }
105 } else {
106 // E.g. UMIN_z_zi, UMAX_z_zi
107 if (imm.IsUint8()) {
108 MovprfxHelperScope guard(this, zd, zn);
109 (this->*imm_fn)(zd, zd, imm.AsUint8());
110 return;
111 }
112 }
113
114 UseScratchRegisterScope temps(this);
115 PRegister pg = temps.AcquireGoverningP();
116 Ptrue(pg.WithSameLaneSizeAs(zd));
117
118 // Try to re-use zd if we can, so we can avoid a movprfx.
119 ZRegister scratch =
120 zd.Aliases(zn) ? temps.AcquireZ().WithLaneSize(zn.GetLaneSizeInBits())
121 : zd;
122 Dup(scratch, imm);
123
124 // The vector-form macro for commutative operations will swap the arguments to
125 // avoid movprfx, if necessary.
126 (this->*reg_macro)(zd, pg.Merging(), zn, scratch);
127 }
128
Mul(const ZRegister & zd,const ZRegister & zn,IntegerOperand imm)129 void MacroAssembler::Mul(const ZRegister& zd,
130 const ZRegister& zn,
131 IntegerOperand imm) {
132 VIXL_ASSERT(allow_macro_instructions_);
133 IntArithImmFn imm_fn = &Assembler::mul;
134 SVEArithPredicatedFn reg_fn = &MacroAssembler::Mul;
135 IntWideImmHelper(imm_fn, reg_fn, zd, zn, imm, true);
136 }
137
Smin(const ZRegister & zd,const ZRegister & zn,IntegerOperand imm)138 void MacroAssembler::Smin(const ZRegister& zd,
139 const ZRegister& zn,
140 IntegerOperand imm) {
141 VIXL_ASSERT(allow_macro_instructions_);
142 VIXL_ASSERT(imm.FitsInSignedLane(zd));
143 IntArithImmFn imm_fn = &Assembler::smin;
144 SVEArithPredicatedFn reg_fn = &MacroAssembler::Smin;
145 IntWideImmHelper(imm_fn, reg_fn, zd, zn, imm, true);
146 }
147
Smax(const ZRegister & zd,const ZRegister & zn,IntegerOperand imm)148 void MacroAssembler::Smax(const ZRegister& zd,
149 const ZRegister& zn,
150 IntegerOperand imm) {
151 VIXL_ASSERT(allow_macro_instructions_);
152 VIXL_ASSERT(imm.FitsInSignedLane(zd));
153 IntArithImmFn imm_fn = &Assembler::smax;
154 SVEArithPredicatedFn reg_fn = &MacroAssembler::Smax;
155 IntWideImmHelper(imm_fn, reg_fn, zd, zn, imm, true);
156 }
157
Umax(const ZRegister & zd,const ZRegister & zn,IntegerOperand imm)158 void MacroAssembler::Umax(const ZRegister& zd,
159 const ZRegister& zn,
160 IntegerOperand imm) {
161 VIXL_ASSERT(allow_macro_instructions_);
162 VIXL_ASSERT(imm.FitsInUnsignedLane(zd));
163 IntArithImmFn imm_fn = &Assembler::umax;
164 SVEArithPredicatedFn reg_fn = &MacroAssembler::Umax;
165 IntWideImmHelper(imm_fn, reg_fn, zd, zn, imm, false);
166 }
167
Umin(const ZRegister & zd,const ZRegister & zn,IntegerOperand imm)168 void MacroAssembler::Umin(const ZRegister& zd,
169 const ZRegister& zn,
170 IntegerOperand imm) {
171 VIXL_ASSERT(allow_macro_instructions_);
172 VIXL_ASSERT(imm.FitsInUnsignedLane(zd));
173 IntArithImmFn imm_fn = &Assembler::umin;
174 SVEArithPredicatedFn reg_fn = &MacroAssembler::Umin;
175 IntWideImmHelper(imm_fn, reg_fn, zd, zn, imm, false);
176 }
177
Addpl(const Register & xd,const Register & xn,int64_t multiplier)178 void MacroAssembler::Addpl(const Register& xd,
179 const Register& xn,
180 int64_t multiplier) {
181 VIXL_ASSERT(allow_macro_instructions_);
182
183 // This macro relies on `Rdvl` to handle some out-of-range cases. Check that
184 // `VL * multiplier` cannot overflow, for any possible value of VL.
185 VIXL_ASSERT(multiplier <= (INT64_MAX / kZRegMaxSizeInBytes));
186 VIXL_ASSERT(multiplier >= (INT64_MIN / kZRegMaxSizeInBytes));
187
188 if (xd.IsZero()) return;
189 if (xn.IsZero() && xd.IsSP()) {
190 // TODO: This operation doesn't make much sense, but we could support it
191 // with a scratch register if necessary.
192 VIXL_UNIMPLEMENTED();
193 }
194
195 // Handling xzr requires an extra move, so defer it until later so we can try
196 // to use `rdvl` instead (via `Addvl`).
197 if (IsInt6(multiplier) && !xn.IsZero()) {
198 SingleEmissionCheckScope guard(this);
199 addpl(xd, xn, static_cast<int>(multiplier));
200 return;
201 }
202
203 // If `multiplier` is a multiple of 8, we can use `Addvl` instead.
204 if ((multiplier % kZRegBitsPerPRegBit) == 0) {
205 Addvl(xd, xn, multiplier / kZRegBitsPerPRegBit);
206 return;
207 }
208
209 if (IsInt6(multiplier)) {
210 VIXL_ASSERT(xn.IsZero()); // Other cases were handled with `addpl`.
211 // There is no simple `rdpl` instruction, and `addpl` cannot accept xzr, so
212 // materialise a zero.
213 MacroEmissionCheckScope guard(this);
214 movz(xd, 0);
215 addpl(xd, xd, static_cast<int>(multiplier));
216 return;
217 }
218
219 // TODO: Some probable cases result in rather long sequences. For example,
220 // `Addpl(sp, sp, 33)` requires five instructions, even though it's only just
221 // outside the encodable range. We should look for ways to cover such cases
222 // without drastically increasing the complexity of this logic.
223
224 // For other cases, calculate xn + (PL * multiplier) using discrete
225 // instructions. This requires two scratch registers in the general case, so
226 // try to re-use the destination as a scratch register.
227 UseScratchRegisterScope temps(this);
228 temps.Include(xd);
229 temps.Exclude(xn);
230
231 Register scratch = temps.AcquireX();
232 // Because there is no `rdpl`, so we have to calculate PL from VL. We can't
233 // scale the multiplier because (we already know) it isn't a multiple of 8.
234 Rdvl(scratch, multiplier);
235
236 MacroEmissionCheckScope guard(this);
237 if (xn.IsZero()) {
238 asr(xd, scratch, kZRegBitsPerPRegBitLog2);
239 } else if (xd.IsSP() || xn.IsSP()) {
240 // TODO: MacroAssembler::Add should be able to handle this.
241 asr(scratch, scratch, kZRegBitsPerPRegBitLog2);
242 add(xd, xn, scratch);
243 } else {
244 add(xd, xn, Operand(scratch, ASR, kZRegBitsPerPRegBitLog2));
245 }
246 }
247
Addvl(const Register & xd,const Register & xn,int64_t multiplier)248 void MacroAssembler::Addvl(const Register& xd,
249 const Register& xn,
250 int64_t multiplier) {
251 VIXL_ASSERT(allow_macro_instructions_);
252 VIXL_ASSERT(xd.IsX());
253 VIXL_ASSERT(xn.IsX());
254
255 // Check that `VL * multiplier` cannot overflow, for any possible value of VL.
256 VIXL_ASSERT(multiplier <= (INT64_MAX / kZRegMaxSizeInBytes));
257 VIXL_ASSERT(multiplier >= (INT64_MIN / kZRegMaxSizeInBytes));
258
259 if (xd.IsZero()) return;
260 if (xn.IsZero() && xd.IsSP()) {
261 // TODO: This operation doesn't make much sense, but we could support it
262 // with a scratch register if necessary. `rdvl` cannot write into `sp`.
263 VIXL_UNIMPLEMENTED();
264 }
265
266 if (IsInt6(multiplier)) {
267 SingleEmissionCheckScope guard(this);
268 if (xn.IsZero()) {
269 rdvl(xd, static_cast<int>(multiplier));
270 } else {
271 addvl(xd, xn, static_cast<int>(multiplier));
272 }
273 return;
274 }
275
276 // TODO: Some probable cases result in rather long sequences. For example,
277 // `Addvl(sp, sp, 42)` requires four instructions, even though it's only just
278 // outside the encodable range. We should look for ways to cover such cases
279 // without drastically increasing the complexity of this logic.
280
281 // For other cases, calculate xn + (VL * multiplier) using discrete
282 // instructions. This requires two scratch registers in the general case, so
283 // we try to re-use the destination as a scratch register.
284 UseScratchRegisterScope temps(this);
285 temps.Include(xd);
286 temps.Exclude(xn);
287
288 Register a = temps.AcquireX();
289 Mov(a, multiplier);
290
291 MacroEmissionCheckScope guard(this);
292 Register b = temps.AcquireX();
293 rdvl(b, 1);
294 if (xn.IsZero()) {
295 mul(xd, a, b);
296 } else if (xd.IsSP() || xn.IsSP()) {
297 mul(a, a, b);
298 add(xd, xn, a);
299 } else {
300 madd(xd, a, b, xn);
301 }
302 }
303
CalculateSVEAddress(const Register & xd,const SVEMemOperand & addr,int vl_divisor_log2)304 void MacroAssembler::CalculateSVEAddress(const Register& xd,
305 const SVEMemOperand& addr,
306 int vl_divisor_log2) {
307 VIXL_ASSERT(allow_macro_instructions_);
308 VIXL_ASSERT(!addr.IsScatterGather());
309 VIXL_ASSERT(xd.IsX());
310
311 // The lower bound is where a whole Z register is accessed.
312 VIXL_ASSERT(!addr.IsMulVl() || (vl_divisor_log2 >= 0));
313 // The upper bound is for P register accesses, and for instructions like
314 // "st1b { z0.d } [...]", where one byte is accessed for every D-sized lane.
315 VIXL_ASSERT(vl_divisor_log2 <= static_cast<int>(kZRegBitsPerPRegBitLog2));
316
317 SVEOffsetModifier mod = addr.GetOffsetModifier();
318 Register base = addr.GetScalarBase();
319
320 if (addr.IsEquivalentToScalar()) {
321 // For example:
322 // [x0]
323 // [x0, #0]
324 // [x0, xzr, LSL 2]
325 Mov(xd, base);
326 } else if (addr.IsScalarPlusImmediate()) {
327 // For example:
328 // [x0, #42]
329 // [x0, #42, MUL VL]
330 int64_t offset = addr.GetImmediateOffset();
331 VIXL_ASSERT(offset != 0); // Handled by IsEquivalentToScalar.
332 if (addr.IsMulVl()) {
333 int vl_divisor = 1 << vl_divisor_log2;
334 // For all possible values of vl_divisor, we can simply use `Addpl`. This
335 // will select `addvl` if necessary.
336 VIXL_ASSERT((kZRegBitsPerPRegBit % vl_divisor) == 0);
337 Addpl(xd, base, offset * (kZRegBitsPerPRegBit / vl_divisor));
338 } else {
339 // IsScalarPlusImmediate() ensures that no other modifiers can occur.
340 VIXL_ASSERT(mod == NO_SVE_OFFSET_MODIFIER);
341 Add(xd, base, offset);
342 }
343 } else if (addr.IsScalarPlusScalar()) {
344 // For example:
345 // [x0, x1]
346 // [x0, x1, LSL #4]
347 Register offset = addr.GetScalarOffset();
348 VIXL_ASSERT(!offset.IsZero()); // Handled by IsEquivalentToScalar.
349 if (mod == SVE_LSL) {
350 Add(xd, base, Operand(offset, LSL, addr.GetShiftAmount()));
351 } else {
352 // IsScalarPlusScalar() ensures that no other modifiers can occur.
353 VIXL_ASSERT(mod == NO_SVE_OFFSET_MODIFIER);
354 Add(xd, base, offset);
355 }
356 } else {
357 // All other forms are scatter-gather addresses, which cannot be evaluated
358 // into an X register.
359 VIXL_UNREACHABLE();
360 }
361 }
362
Cpy(const ZRegister & zd,const PRegister & pg,IntegerOperand imm)363 void MacroAssembler::Cpy(const ZRegister& zd,
364 const PRegister& pg,
365 IntegerOperand imm) {
366 VIXL_ASSERT(allow_macro_instructions_);
367 VIXL_ASSERT(imm.FitsInLane(zd));
368 int imm8;
369 int shift;
370 if (imm.TryEncodeAsShiftedIntNForLane<8, 0>(zd, &imm8, &shift) ||
371 imm.TryEncodeAsShiftedIntNForLane<8, 8>(zd, &imm8, &shift)) {
372 SingleEmissionCheckScope guard(this);
373 cpy(zd, pg, imm8, shift);
374 return;
375 }
376
377 // The fallbacks rely on `cpy` variants that only support merging predication.
378 // If zeroing predication was requested, zero the destination first.
379 if (pg.IsZeroing()) {
380 SingleEmissionCheckScope guard(this);
381 dup(zd, 0);
382 }
383 PRegisterM pg_m = pg.Merging();
384
385 // Try to encode the immediate using fcpy.
386 VIXL_ASSERT(imm.FitsInLane(zd));
387 if (zd.GetLaneSizeInBits() >= kHRegSize) {
388 double fp_imm = 0.0;
389 switch (zd.GetLaneSizeInBits()) {
390 case kHRegSize:
391 fp_imm =
392 FPToDouble(RawbitsToFloat16(imm.AsUint16()), kIgnoreDefaultNaN);
393 break;
394 case kSRegSize:
395 fp_imm = RawbitsToFloat(imm.AsUint32());
396 break;
397 case kDRegSize:
398 fp_imm = RawbitsToDouble(imm.AsUint64());
399 break;
400 default:
401 VIXL_UNREACHABLE();
402 break;
403 }
404 // IsImmFP64 is equivalent to IsImmFP<n> for the same arithmetic value, so
405 // we can use IsImmFP64 for all lane sizes.
406 if (IsImmFP64(fp_imm)) {
407 SingleEmissionCheckScope guard(this);
408 fcpy(zd, pg_m, fp_imm);
409 return;
410 }
411 }
412
413 // Fall back to using a scratch register.
414 UseScratchRegisterScope temps(this);
415 Register scratch = temps.AcquireRegisterToHoldLane(zd);
416 Mov(scratch, imm);
417
418 SingleEmissionCheckScope guard(this);
419 cpy(zd, pg_m, scratch);
420 }
421
422 // TODO: We implement Fcpy (amongst other things) for all FP types because it
423 // allows us to preserve user-specified NaNs. We should come up with some
424 // FPImmediate type to abstract this, and avoid all the duplication below (and
425 // elsewhere).
426
Fcpy(const ZRegister & zd,const PRegisterM & pg,double imm)427 void MacroAssembler::Fcpy(const ZRegister& zd,
428 const PRegisterM& pg,
429 double imm) {
430 VIXL_ASSERT(allow_macro_instructions_);
431 VIXL_ASSERT(pg.IsMerging());
432
433 if (IsImmFP64(imm)) {
434 SingleEmissionCheckScope guard(this);
435 fcpy(zd, pg, imm);
436 return;
437 }
438
439 // As a fall-back, cast the immediate to the required lane size, and try to
440 // encode the bit pattern using `Cpy`.
441 Cpy(zd, pg, FPToRawbitsWithSize(zd.GetLaneSizeInBits(), imm));
442 }
443
Fcpy(const ZRegister & zd,const PRegisterM & pg,float imm)444 void MacroAssembler::Fcpy(const ZRegister& zd,
445 const PRegisterM& pg,
446 float imm) {
447 VIXL_ASSERT(allow_macro_instructions_);
448 VIXL_ASSERT(pg.IsMerging());
449
450 if (IsImmFP32(imm)) {
451 SingleEmissionCheckScope guard(this);
452 fcpy(zd, pg, imm);
453 return;
454 }
455
456 // As a fall-back, cast the immediate to the required lane size, and try to
457 // encode the bit pattern using `Cpy`.
458 Cpy(zd, pg, FPToRawbitsWithSize(zd.GetLaneSizeInBits(), imm));
459 }
460
Fcpy(const ZRegister & zd,const PRegisterM & pg,Float16 imm)461 void MacroAssembler::Fcpy(const ZRegister& zd,
462 const PRegisterM& pg,
463 Float16 imm) {
464 VIXL_ASSERT(allow_macro_instructions_);
465 VIXL_ASSERT(pg.IsMerging());
466
467 if (IsImmFP16(imm)) {
468 SingleEmissionCheckScope guard(this);
469 fcpy(zd, pg, imm);
470 return;
471 }
472
473 // As a fall-back, cast the immediate to the required lane size, and try to
474 // encode the bit pattern using `Cpy`.
475 Cpy(zd, pg, FPToRawbitsWithSize(zd.GetLaneSizeInBits(), imm));
476 }
477
Dup(const ZRegister & zd,IntegerOperand imm)478 void MacroAssembler::Dup(const ZRegister& zd, IntegerOperand imm) {
479 VIXL_ASSERT(allow_macro_instructions_);
480 VIXL_ASSERT(imm.FitsInLane(zd));
481 unsigned lane_size = zd.GetLaneSizeInBits();
482 int imm8;
483 int shift;
484 if (imm.TryEncodeAsShiftedIntNForLane<8, 0>(zd, &imm8, &shift) ||
485 imm.TryEncodeAsShiftedIntNForLane<8, 8>(zd, &imm8, &shift)) {
486 SingleEmissionCheckScope guard(this);
487 dup(zd, imm8, shift);
488 } else if (IsImmLogical(imm.AsUintN(lane_size), lane_size)) {
489 SingleEmissionCheckScope guard(this);
490 dupm(zd, imm.AsUintN(lane_size));
491 } else {
492 UseScratchRegisterScope temps(this);
493 Register scratch = temps.AcquireRegisterToHoldLane(zd);
494 Mov(scratch, imm);
495
496 SingleEmissionCheckScope guard(this);
497 dup(zd, scratch);
498 }
499 }
500
NoncommutativeArithmeticHelper(const ZRegister & zd,const PRegisterM & pg,const ZRegister & zn,const ZRegister & zm,SVEArithPredicatedFn fn,SVEArithPredicatedFn rev_fn)501 void MacroAssembler::NoncommutativeArithmeticHelper(
502 const ZRegister& zd,
503 const PRegisterM& pg,
504 const ZRegister& zn,
505 const ZRegister& zm,
506 SVEArithPredicatedFn fn,
507 SVEArithPredicatedFn rev_fn) {
508 if (zd.Aliases(zn)) {
509 // E.g. zd = zd / zm
510 SingleEmissionCheckScope guard(this);
511 (this->*fn)(zd, pg, zn, zm);
512 } else if (zd.Aliases(zm)) {
513 // E.g. zd = zn / zd
514 SingleEmissionCheckScope guard(this);
515 (this->*rev_fn)(zd, pg, zm, zn);
516 } else {
517 // E.g. zd = zn / zm
518 MovprfxHelperScope guard(this, zd, pg, zn);
519 (this->*fn)(zd, pg, zd, zm);
520 }
521 }
522
FPCommutativeArithmeticHelper(const ZRegister & zd,const PRegisterM & pg,const ZRegister & zn,const ZRegister & zm,SVEArithPredicatedFn fn,FPMacroNaNPropagationOption nan_option)523 void MacroAssembler::FPCommutativeArithmeticHelper(
524 const ZRegister& zd,
525 const PRegisterM& pg,
526 const ZRegister& zn,
527 const ZRegister& zm,
528 SVEArithPredicatedFn fn,
529 FPMacroNaNPropagationOption nan_option) {
530 ResolveFPNaNPropagationOption(&nan_option);
531
532 if (zd.Aliases(zn)) {
533 SingleEmissionCheckScope guard(this);
534 (this->*fn)(zd, pg, zd, zm);
535 } else if (zd.Aliases(zm)) {
536 switch (nan_option) {
537 case FastNaNPropagation: {
538 // Swap the arguments.
539 SingleEmissionCheckScope guard(this);
540 (this->*fn)(zd, pg, zd, zn);
541 return;
542 }
543 case StrictNaNPropagation: {
544 UseScratchRegisterScope temps(this);
545 // Use a scratch register to keep the argument order exactly as
546 // specified.
547 ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zn);
548 {
549 MovprfxHelperScope guard(this, scratch, pg, zn);
550 (this->*fn)(scratch, pg, scratch, zm);
551 }
552 Mov(zd, scratch);
553 return;
554 }
555 case NoFPMacroNaNPropagationSelected:
556 VIXL_UNREACHABLE();
557 return;
558 }
559 } else {
560 MovprfxHelperScope guard(this, zd, pg, zn);
561 (this->*fn)(zd, pg, zd, zm);
562 }
563 }
564
565 // Instructions of the form "inst zda, zn, zm, #num", where they are
566 // non-commutative and no reversed form is provided.
567 #define VIXL_SVE_NONCOMM_ARITH_ZZZZI_LIST(V) \
568 V(Cmla, cmla) \
569 V(Sqrdcmlah, sqrdcmlah)
570
571 #define VIXL_DEFINE_MASM_FUNC(MASMFN, ASMFN) \
572 void MacroAssembler::MASMFN(const ZRegister& zd, \
573 const ZRegister& za, \
574 const ZRegister& zn, \
575 const ZRegister& zm, \
576 int imm) { \
577 if ((zd.Aliases(zn) || zd.Aliases(zm)) && !zd.Aliases(za)) { \
578 UseScratchRegisterScope temps(this); \
579 VIXL_ASSERT(AreSameLaneSize(zn, zm)); \
580 ZRegister ztmp = temps.AcquireZ().WithSameLaneSizeAs(zn); \
581 Mov(ztmp, zd.Aliases(zn) ? zn : zm); \
582 MovprfxHelperScope guard(this, zd, za); \
583 ASMFN(zd, \
584 (zd.Aliases(zn) ? ztmp : zn), \
585 (zd.Aliases(zm) ? ztmp : zm), \
586 imm); \
587 } else { \
588 MovprfxHelperScope guard(this, zd, za); \
589 ASMFN(zd, zn, zm, imm); \
590 } \
591 }
592 VIXL_SVE_NONCOMM_ARITH_ZZZZI_LIST(VIXL_DEFINE_MASM_FUNC)
593 #undef VIXL_DEFINE_MASM_FUNC
594
595 // Instructions of the form "inst zda, zn, zm, #num, #num", where they are
596 // non-commutative and no reversed form is provided.
597 #define VIXL_SVE_NONCOMM_ARITH_ZZZZII_LIST(V) \
598 V(Cmla, cmla) \
599 V(Sqrdcmlah, sqrdcmlah)
600
601 // This doesn't handle zm when it's out of the range that can be encoded in
602 // instruction. The range depends on element size: z0-z7 for H, z0-15 for S.
603 #define VIXL_DEFINE_MASM_FUNC(MASMFN, ASMFN) \
604 void MacroAssembler::MASMFN(const ZRegister& zd, \
605 const ZRegister& za, \
606 const ZRegister& zn, \
607 const ZRegister& zm, \
608 int index, \
609 int rot) { \
610 if ((zd.Aliases(zn) || zd.Aliases(zm)) && !zd.Aliases(za)) { \
611 UseScratchRegisterScope temps(this); \
612 ZRegister ztmp = temps.AcquireZ().WithSameLaneSizeAs(zd); \
613 { \
614 MovprfxHelperScope guard(this, ztmp, za); \
615 ASMFN(ztmp, zn, zm, index, rot); \
616 } \
617 Mov(zd, ztmp); \
618 } else { \
619 MovprfxHelperScope guard(this, zd, za); \
620 ASMFN(zd, zn, zm, index, rot); \
621 } \
622 }
VIXL_SVE_NONCOMM_ARITH_ZZZZII_LIST(VIXL_DEFINE_MASM_FUNC)623 VIXL_SVE_NONCOMM_ARITH_ZZZZII_LIST(VIXL_DEFINE_MASM_FUNC)
624 #undef VIXL_DEFINE_MASM_FUNC
625
626 // Instructions of the form "inst zda, pg, zda, zn", where they are
627 // non-commutative and no reversed form is provided.
628 #define VIXL_SVE_NONCOMM_ARITH_ZPZZ_LIST(V) \
629 V(Addp, addp) \
630 V(Faddp, faddp) \
631 V(Fmaxnmp, fmaxnmp) \
632 V(Fminnmp, fminnmp) \
633 V(Fmaxp, fmaxp) \
634 V(Fminp, fminp) \
635 V(Fscale, fscale) \
636 V(Smaxp, smaxp) \
637 V(Sminp, sminp) \
638 V(Suqadd, suqadd) \
639 V(Umaxp, umaxp) \
640 V(Uminp, uminp) \
641 V(Usqadd, usqadd)
642
643 #define VIXL_DEFINE_MASM_FUNC(MASMFN, ASMFN) \
644 void MacroAssembler::MASMFN(const ZRegister& zd, \
645 const PRegisterM& pg, \
646 const ZRegister& zn, \
647 const ZRegister& zm) { \
648 VIXL_ASSERT(allow_macro_instructions_); \
649 if (zd.Aliases(zm) && !zd.Aliases(zn)) { \
650 UseScratchRegisterScope temps(this); \
651 ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zm); \
652 Mov(scratch, zm); \
653 MovprfxHelperScope guard(this, zd, pg, zn); \
654 ASMFN(zd, pg, zd, scratch); \
655 } else { \
656 MovprfxHelperScope guard(this, zd, pg, zn); \
657 ASMFN(zd, pg, zd, zm); \
658 } \
659 }
660 VIXL_SVE_NONCOMM_ARITH_ZPZZ_LIST(VIXL_DEFINE_MASM_FUNC)
661 #undef VIXL_DEFINE_MASM_FUNC
662
663 // Instructions of the form "inst zda, pg, zda, zn", where they are
664 // non-commutative and a reversed form is provided.
665 #define VIXL_SVE_NONCOMM_ARITH_REVERSE_ZPZZ_LIST(V) \
666 V(Asr, asr) \
667 V(Fdiv, fdiv) \
668 V(Fsub, fsub) \
669 V(Lsl, lsl) \
670 V(Lsr, lsr) \
671 V(Sdiv, sdiv) \
672 V(Shsub, shsub) \
673 V(Sqrshl, sqrshl) \
674 V(Sqshl, sqshl) \
675 V(Sqsub, sqsub) \
676 V(Srshl, srshl) \
677 V(Sub, sub) \
678 V(Udiv, udiv) \
679 V(Uhsub, uhsub) \
680 V(Uqrshl, uqrshl) \
681 V(Uqshl, uqshl) \
682 V(Uqsub, uqsub) \
683 V(Urshl, urshl)
684
685 #define VIXL_DEFINE_MASM_FUNC(MASMFN, ASMFN) \
686 void MacroAssembler::MASMFN(const ZRegister& zd, \
687 const PRegisterM& pg, \
688 const ZRegister& zn, \
689 const ZRegister& zm) { \
690 VIXL_ASSERT(allow_macro_instructions_); \
691 NoncommutativeArithmeticHelper(zd, \
692 pg, \
693 zn, \
694 zm, \
695 static_cast<SVEArithPredicatedFn>( \
696 &Assembler::ASMFN), \
697 static_cast<SVEArithPredicatedFn>( \
698 &Assembler::ASMFN##r)); \
699 }
700 VIXL_SVE_NONCOMM_ARITH_REVERSE_ZPZZ_LIST(VIXL_DEFINE_MASM_FUNC)
701 #undef VIXL_DEFINE_MASM_FUNC
702
703 void MacroAssembler::Fadd(const ZRegister& zd,
704 const PRegisterM& pg,
705 const ZRegister& zn,
706 const ZRegister& zm,
707 FPMacroNaNPropagationOption nan_option) {
708 VIXL_ASSERT(allow_macro_instructions_);
709 FPCommutativeArithmeticHelper(zd,
710 pg,
711 zn,
712 zm,
713 static_cast<SVEArithPredicatedFn>(
714 &Assembler::fadd),
715 nan_option);
716 }
717
Fabd(const ZRegister & zd,const PRegisterM & pg,const ZRegister & zn,const ZRegister & zm,FPMacroNaNPropagationOption nan_option)718 void MacroAssembler::Fabd(const ZRegister& zd,
719 const PRegisterM& pg,
720 const ZRegister& zn,
721 const ZRegister& zm,
722 FPMacroNaNPropagationOption nan_option) {
723 VIXL_ASSERT(allow_macro_instructions_);
724 FPCommutativeArithmeticHelper(zd,
725 pg,
726 zn,
727 zm,
728 static_cast<SVEArithPredicatedFn>(
729 &Assembler::fabd),
730 nan_option);
731 }
732
Fmul(const ZRegister & zd,const PRegisterM & pg,const ZRegister & zn,const ZRegister & zm,FPMacroNaNPropagationOption nan_option)733 void MacroAssembler::Fmul(const ZRegister& zd,
734 const PRegisterM& pg,
735 const ZRegister& zn,
736 const ZRegister& zm,
737 FPMacroNaNPropagationOption nan_option) {
738 VIXL_ASSERT(allow_macro_instructions_);
739 FPCommutativeArithmeticHelper(zd,
740 pg,
741 zn,
742 zm,
743 static_cast<SVEArithPredicatedFn>(
744 &Assembler::fmul),
745 nan_option);
746 }
747
Fmulx(const ZRegister & zd,const PRegisterM & pg,const ZRegister & zn,const ZRegister & zm,FPMacroNaNPropagationOption nan_option)748 void MacroAssembler::Fmulx(const ZRegister& zd,
749 const PRegisterM& pg,
750 const ZRegister& zn,
751 const ZRegister& zm,
752 FPMacroNaNPropagationOption nan_option) {
753 VIXL_ASSERT(allow_macro_instructions_);
754 FPCommutativeArithmeticHelper(zd,
755 pg,
756 zn,
757 zm,
758 static_cast<SVEArithPredicatedFn>(
759 &Assembler::fmulx),
760 nan_option);
761 }
762
Fmax(const ZRegister & zd,const PRegisterM & pg,const ZRegister & zn,const ZRegister & zm,FPMacroNaNPropagationOption nan_option)763 void MacroAssembler::Fmax(const ZRegister& zd,
764 const PRegisterM& pg,
765 const ZRegister& zn,
766 const ZRegister& zm,
767 FPMacroNaNPropagationOption nan_option) {
768 VIXL_ASSERT(allow_macro_instructions_);
769 FPCommutativeArithmeticHelper(zd,
770 pg,
771 zn,
772 zm,
773 static_cast<SVEArithPredicatedFn>(
774 &Assembler::fmax),
775 nan_option);
776 }
777
Fmin(const ZRegister & zd,const PRegisterM & pg,const ZRegister & zn,const ZRegister & zm,FPMacroNaNPropagationOption nan_option)778 void MacroAssembler::Fmin(const ZRegister& zd,
779 const PRegisterM& pg,
780 const ZRegister& zn,
781 const ZRegister& zm,
782 FPMacroNaNPropagationOption nan_option) {
783 VIXL_ASSERT(allow_macro_instructions_);
784 FPCommutativeArithmeticHelper(zd,
785 pg,
786 zn,
787 zm,
788 static_cast<SVEArithPredicatedFn>(
789 &Assembler::fmin),
790 nan_option);
791 }
792
Fmaxnm(const ZRegister & zd,const PRegisterM & pg,const ZRegister & zn,const ZRegister & zm,FPMacroNaNPropagationOption nan_option)793 void MacroAssembler::Fmaxnm(const ZRegister& zd,
794 const PRegisterM& pg,
795 const ZRegister& zn,
796 const ZRegister& zm,
797 FPMacroNaNPropagationOption nan_option) {
798 VIXL_ASSERT(allow_macro_instructions_);
799 FPCommutativeArithmeticHelper(zd,
800 pg,
801 zn,
802 zm,
803 static_cast<SVEArithPredicatedFn>(
804 &Assembler::fmaxnm),
805 nan_option);
806 }
807
Fminnm(const ZRegister & zd,const PRegisterM & pg,const ZRegister & zn,const ZRegister & zm,FPMacroNaNPropagationOption nan_option)808 void MacroAssembler::Fminnm(const ZRegister& zd,
809 const PRegisterM& pg,
810 const ZRegister& zn,
811 const ZRegister& zm,
812 FPMacroNaNPropagationOption nan_option) {
813 VIXL_ASSERT(allow_macro_instructions_);
814 FPCommutativeArithmeticHelper(zd,
815 pg,
816 zn,
817 zm,
818 static_cast<SVEArithPredicatedFn>(
819 &Assembler::fminnm),
820 nan_option);
821 }
822
Fdup(const ZRegister & zd,double imm)823 void MacroAssembler::Fdup(const ZRegister& zd, double imm) {
824 VIXL_ASSERT(allow_macro_instructions_);
825
826 switch (zd.GetLaneSizeInBits()) {
827 case kHRegSize:
828 Fdup(zd, Float16(imm));
829 break;
830 case kSRegSize:
831 Fdup(zd, static_cast<float>(imm));
832 break;
833 case kDRegSize:
834 if (IsImmFP64(imm)) {
835 SingleEmissionCheckScope guard(this);
836 fdup(zd, imm);
837 } else {
838 Dup(zd, DoubleToRawbits(imm));
839 }
840 break;
841 }
842 }
843
Fdup(const ZRegister & zd,float imm)844 void MacroAssembler::Fdup(const ZRegister& zd, float imm) {
845 VIXL_ASSERT(allow_macro_instructions_);
846
847 switch (zd.GetLaneSizeInBits()) {
848 case kHRegSize:
849 Fdup(zd, Float16(imm));
850 break;
851 case kSRegSize:
852 if (IsImmFP32(imm)) {
853 SingleEmissionCheckScope guard(this);
854 fdup(zd, imm);
855 } else {
856 Dup(zd, FloatToRawbits(imm));
857 }
858 break;
859 case kDRegSize:
860 Fdup(zd, static_cast<double>(imm));
861 break;
862 }
863 }
864
Fdup(const ZRegister & zd,Float16 imm)865 void MacroAssembler::Fdup(const ZRegister& zd, Float16 imm) {
866 VIXL_ASSERT(allow_macro_instructions_);
867
868 switch (zd.GetLaneSizeInBits()) {
869 case kHRegSize:
870 if (IsImmFP16(imm)) {
871 SingleEmissionCheckScope guard(this);
872 fdup(zd, imm);
873 } else {
874 Dup(zd, Float16ToRawbits(imm));
875 }
876 break;
877 case kSRegSize:
878 Fdup(zd, FPToFloat(imm, kIgnoreDefaultNaN));
879 break;
880 case kDRegSize:
881 Fdup(zd, FPToDouble(imm, kIgnoreDefaultNaN));
882 break;
883 }
884 }
885
Index(const ZRegister & zd,const Operand & start,const Operand & step)886 void MacroAssembler::Index(const ZRegister& zd,
887 const Operand& start,
888 const Operand& step) {
889 class IndexOperand : public Operand {
890 public:
891 static IndexOperand Prepare(MacroAssembler* masm,
892 UseScratchRegisterScope* temps,
893 const Operand& op,
894 const ZRegister& zd_inner) {
895 // Look for encodable immediates.
896 int imm;
897 if (op.IsImmediate()) {
898 if (IntegerOperand(op).TryEncodeAsIntNForLane<5>(zd_inner, &imm)) {
899 return IndexOperand(imm);
900 }
901 Register scratch = temps->AcquireRegisterToHoldLane(zd_inner);
902 masm->Mov(scratch, op);
903 return IndexOperand(scratch);
904 } else {
905 // Plain registers can be encoded directly.
906 VIXL_ASSERT(op.IsPlainRegister());
907 return IndexOperand(op.GetRegister());
908 }
909 }
910
911 int GetImm5() const {
912 int64_t imm = GetImmediate();
913 VIXL_ASSERT(IsInt5(imm));
914 return static_cast<int>(imm);
915 }
916
917 private:
918 explicit IndexOperand(const Register& reg) : Operand(reg) {}
919 explicit IndexOperand(int64_t imm) : Operand(imm) {}
920 };
921
922 UseScratchRegisterScope temps(this);
923 IndexOperand start_enc = IndexOperand::Prepare(this, &temps, start, zd);
924 IndexOperand step_enc = IndexOperand::Prepare(this, &temps, step, zd);
925
926 SingleEmissionCheckScope guard(this);
927 if (start_enc.IsImmediate()) {
928 if (step_enc.IsImmediate()) {
929 index(zd, start_enc.GetImm5(), step_enc.GetImm5());
930 } else {
931 index(zd, start_enc.GetImm5(), step_enc.GetRegister());
932 }
933 } else {
934 if (step_enc.IsImmediate()) {
935 index(zd, start_enc.GetRegister(), step_enc.GetImm5());
936 } else {
937 index(zd, start_enc.GetRegister(), step_enc.GetRegister());
938 }
939 }
940 }
941
Insr(const ZRegister & zdn,IntegerOperand imm)942 void MacroAssembler::Insr(const ZRegister& zdn, IntegerOperand imm) {
943 VIXL_ASSERT(allow_macro_instructions_);
944 VIXL_ASSERT(imm.FitsInLane(zdn));
945
946 if (imm.IsZero()) {
947 SingleEmissionCheckScope guard(this);
948 insr(zdn, xzr);
949 return;
950 }
951
952 UseScratchRegisterScope temps(this);
953 Register scratch = temps.AcquireRegisterToHoldLane(zdn);
954
955 // TODO: There are many cases where we could optimise immediates, such as by
956 // detecting repeating patterns or FP immediates. We should optimise and
957 // abstract this for use in other SVE mov-immediate-like macros.
958 Mov(scratch, imm);
959
960 SingleEmissionCheckScope guard(this);
961 insr(zdn, scratch);
962 }
963
Mla(const ZRegister & zd,const PRegisterM & pg,const ZRegister & za,const ZRegister & zn,const ZRegister & zm)964 void MacroAssembler::Mla(const ZRegister& zd,
965 const PRegisterM& pg,
966 const ZRegister& za,
967 const ZRegister& zn,
968 const ZRegister& zm) {
969 VIXL_ASSERT(allow_macro_instructions_);
970 if (zd.Aliases(za)) {
971 // zda = zda + (zn * zm)
972 SingleEmissionCheckScope guard(this);
973 mla(zd, pg, zn, zm);
974 } else if (zd.Aliases(zn)) {
975 // zdn = za + (zdn * zm)
976 SingleEmissionCheckScope guard(this);
977 mad(zd, pg, zm, za);
978 } else if (zd.Aliases(zm)) {
979 // Multiplication is commutative, so we can swap zn and zm.
980 // zdm = za + (zdm * zn)
981 SingleEmissionCheckScope guard(this);
982 mad(zd, pg, zn, za);
983 } else {
984 // zd = za + (zn * zm)
985 ExactAssemblyScope guard(this, 2 * kInstructionSize);
986 movprfx(zd, pg, za);
987 mla(zd, pg, zn, zm);
988 }
989 }
990
Mls(const ZRegister & zd,const PRegisterM & pg,const ZRegister & za,const ZRegister & zn,const ZRegister & zm)991 void MacroAssembler::Mls(const ZRegister& zd,
992 const PRegisterM& pg,
993 const ZRegister& za,
994 const ZRegister& zn,
995 const ZRegister& zm) {
996 VIXL_ASSERT(allow_macro_instructions_);
997 if (zd.Aliases(za)) {
998 // zda = zda - (zn * zm)
999 SingleEmissionCheckScope guard(this);
1000 mls(zd, pg, zn, zm);
1001 } else if (zd.Aliases(zn)) {
1002 // zdn = za - (zdn * zm)
1003 SingleEmissionCheckScope guard(this);
1004 msb(zd, pg, zm, za);
1005 } else if (zd.Aliases(zm)) {
1006 // Multiplication is commutative, so we can swap zn and zm.
1007 // zdm = za - (zdm * zn)
1008 SingleEmissionCheckScope guard(this);
1009 msb(zd, pg, zn, za);
1010 } else {
1011 // zd = za - (zn * zm)
1012 ExactAssemblyScope guard(this, 2 * kInstructionSize);
1013 movprfx(zd, pg, za);
1014 mls(zd, pg, zn, zm);
1015 }
1016 }
1017
CompareHelper(Condition cond,const PRegisterWithLaneSize & pd,const PRegisterZ & pg,const ZRegister & zn,IntegerOperand imm)1018 void MacroAssembler::CompareHelper(Condition cond,
1019 const PRegisterWithLaneSize& pd,
1020 const PRegisterZ& pg,
1021 const ZRegister& zn,
1022 IntegerOperand imm) {
1023 UseScratchRegisterScope temps(this);
1024 ZRegister zm = temps.AcquireZ().WithLaneSize(zn.GetLaneSizeInBits());
1025 Dup(zm, imm);
1026 SingleEmissionCheckScope guard(this);
1027 cmp(cond, pd, pg, zn, zm);
1028 }
1029
Pfirst(const PRegisterWithLaneSize & pd,const PRegister & pg,const PRegisterWithLaneSize & pn)1030 void MacroAssembler::Pfirst(const PRegisterWithLaneSize& pd,
1031 const PRegister& pg,
1032 const PRegisterWithLaneSize& pn) {
1033 VIXL_ASSERT(allow_macro_instructions_);
1034 VIXL_ASSERT(pd.IsLaneSizeB());
1035 VIXL_ASSERT(pn.IsLaneSizeB());
1036 if (pd.Is(pn)) {
1037 SingleEmissionCheckScope guard(this);
1038 pfirst(pd, pg, pn);
1039 } else {
1040 UseScratchRegisterScope temps(this);
1041 PRegister temp_pg = pg;
1042 if (pd.Aliases(pg)) {
1043 temp_pg = temps.AcquireP();
1044 Mov(temp_pg.VnB(), pg.VnB());
1045 }
1046 Mov(pd, pn);
1047 SingleEmissionCheckScope guard(this);
1048 pfirst(pd, temp_pg, pd);
1049 }
1050 }
1051
Pnext(const PRegisterWithLaneSize & pd,const PRegister & pg,const PRegisterWithLaneSize & pn)1052 void MacroAssembler::Pnext(const PRegisterWithLaneSize& pd,
1053 const PRegister& pg,
1054 const PRegisterWithLaneSize& pn) {
1055 VIXL_ASSERT(allow_macro_instructions_);
1056 VIXL_ASSERT(AreSameFormat(pd, pn));
1057 if (pd.Is(pn)) {
1058 SingleEmissionCheckScope guard(this);
1059 pnext(pd, pg, pn);
1060 } else {
1061 UseScratchRegisterScope temps(this);
1062 PRegister temp_pg = pg;
1063 if (pd.Aliases(pg)) {
1064 temp_pg = temps.AcquireP();
1065 Mov(temp_pg.VnB(), pg.VnB());
1066 }
1067 Mov(pd.VnB(), pn.VnB());
1068 SingleEmissionCheckScope guard(this);
1069 pnext(pd, temp_pg, pd);
1070 }
1071 }
1072
Ptrue(const PRegisterWithLaneSize & pd,SVEPredicateConstraint pattern,FlagsUpdate s)1073 void MacroAssembler::Ptrue(const PRegisterWithLaneSize& pd,
1074 SVEPredicateConstraint pattern,
1075 FlagsUpdate s) {
1076 VIXL_ASSERT(allow_macro_instructions_);
1077 switch (s) {
1078 case LeaveFlags:
1079 Ptrue(pd, pattern);
1080 return;
1081 case SetFlags:
1082 Ptrues(pd, pattern);
1083 return;
1084 }
1085 VIXL_UNREACHABLE();
1086 }
1087
Sub(const ZRegister & zd,IntegerOperand imm,const ZRegister & zm)1088 void MacroAssembler::Sub(const ZRegister& zd,
1089 IntegerOperand imm,
1090 const ZRegister& zm) {
1091 VIXL_ASSERT(allow_macro_instructions_);
1092
1093 int imm8;
1094 int shift = -1;
1095 if (imm.TryEncodeAsShiftedUintNForLane<8, 0>(zd, &imm8, &shift) ||
1096 imm.TryEncodeAsShiftedUintNForLane<8, 8>(zd, &imm8, &shift)) {
1097 MovprfxHelperScope guard(this, zd, zm);
1098 subr(zd, zd, imm8, shift);
1099 } else {
1100 UseScratchRegisterScope temps(this);
1101 ZRegister scratch = temps.AcquireZ().WithLaneSize(zm.GetLaneSizeInBits());
1102 Dup(scratch, imm);
1103
1104 SingleEmissionCheckScope guard(this);
1105 sub(zd, scratch, zm);
1106 }
1107 }
1108
SVELoadBroadcastImmHelper(const ZRegister & zt,const PRegisterZ & pg,const SVEMemOperand & addr,SVELoadBroadcastFn fn,int divisor)1109 void MacroAssembler::SVELoadBroadcastImmHelper(const ZRegister& zt,
1110 const PRegisterZ& pg,
1111 const SVEMemOperand& addr,
1112 SVELoadBroadcastFn fn,
1113 int divisor) {
1114 VIXL_ASSERT(addr.IsScalarPlusImmediate());
1115 int64_t imm = addr.GetImmediateOffset();
1116 if ((imm % divisor == 0) && IsUint6(imm / divisor)) {
1117 SingleEmissionCheckScope guard(this);
1118 (this->*fn)(zt, pg, addr);
1119 } else {
1120 UseScratchRegisterScope temps(this);
1121 Register scratch = temps.AcquireX();
1122 CalculateSVEAddress(scratch, addr, zt);
1123 SingleEmissionCheckScope guard(this);
1124 (this->*fn)(zt, pg, SVEMemOperand(scratch));
1125 }
1126 }
1127
SVELoadStoreScalarImmHelper(const CPURegister & rt,const SVEMemOperand & addr,SVELoadStoreFn fn)1128 void MacroAssembler::SVELoadStoreScalarImmHelper(const CPURegister& rt,
1129 const SVEMemOperand& addr,
1130 SVELoadStoreFn fn) {
1131 VIXL_ASSERT(allow_macro_instructions_);
1132 VIXL_ASSERT(rt.IsZRegister() || rt.IsPRegister());
1133
1134 if (addr.IsPlainScalar() ||
1135 (addr.IsScalarPlusImmediate() && IsInt9(addr.GetImmediateOffset()) &&
1136 addr.IsMulVl())) {
1137 SingleEmissionCheckScope guard(this);
1138 (this->*fn)(rt, addr);
1139 return;
1140 }
1141
1142 if (addr.IsEquivalentToScalar()) {
1143 SingleEmissionCheckScope guard(this);
1144 (this->*fn)(rt, SVEMemOperand(addr.GetScalarBase()));
1145 return;
1146 }
1147
1148 UseScratchRegisterScope temps(this);
1149 Register scratch = temps.AcquireX();
1150 CalculateSVEAddress(scratch, addr, rt);
1151 SingleEmissionCheckScope guard(this);
1152 (this->*fn)(rt, SVEMemOperand(scratch));
1153 }
1154
1155 template <typename Tg, typename Tf>
SVELoadStoreNTBroadcastQOHelper(const ZRegister & zt,const Tg & pg,const SVEMemOperand & addr,Tf fn,int imm_bits,int shift_amount,SVEOffsetModifier supported_modifier,int vl_divisor_log2)1156 void MacroAssembler::SVELoadStoreNTBroadcastQOHelper(
1157 const ZRegister& zt,
1158 const Tg& pg,
1159 const SVEMemOperand& addr,
1160 Tf fn,
1161 int imm_bits,
1162 int shift_amount,
1163 SVEOffsetModifier supported_modifier,
1164 int vl_divisor_log2) {
1165 VIXL_ASSERT(allow_macro_instructions_);
1166 int imm_divisor = 1 << shift_amount;
1167
1168 if (addr.IsPlainScalar() ||
1169 (addr.IsScalarPlusImmediate() &&
1170 IsIntN(imm_bits, addr.GetImmediateOffset() / imm_divisor) &&
1171 ((addr.GetImmediateOffset() % imm_divisor) == 0) &&
1172 (addr.GetOffsetModifier() == supported_modifier))) {
1173 SingleEmissionCheckScope guard(this);
1174 (this->*fn)(zt, pg, addr);
1175 return;
1176 }
1177
1178 if (addr.IsScalarPlusScalar() && !addr.GetScalarOffset().IsZero() &&
1179 addr.IsEquivalentToLSL(zt.GetLaneSizeInBytesLog2())) {
1180 SingleEmissionCheckScope guard(this);
1181 (this->*fn)(zt, pg, addr);
1182 return;
1183 }
1184
1185 if (addr.IsEquivalentToScalar()) {
1186 SingleEmissionCheckScope guard(this);
1187 (this->*fn)(zt, pg, SVEMemOperand(addr.GetScalarBase()));
1188 return;
1189 }
1190
1191 if (addr.IsMulVl() && (supported_modifier != SVE_MUL_VL) &&
1192 (vl_divisor_log2 == -1)) {
1193 // We don't handle [x0, #imm, MUL VL] if the in-memory access size is not VL
1194 // dependent.
1195 VIXL_UNIMPLEMENTED();
1196 }
1197
1198 UseScratchRegisterScope temps(this);
1199 Register scratch = temps.AcquireX();
1200 CalculateSVEAddress(scratch, addr, vl_divisor_log2);
1201 SingleEmissionCheckScope guard(this);
1202 (this->*fn)(zt, pg, SVEMemOperand(scratch));
1203 }
1204
1205 template <typename Tg, typename Tf>
SVELoadStore1Helper(int msize_in_bytes_log2,const ZRegister & zt,const Tg & pg,const SVEMemOperand & addr,Tf fn)1206 void MacroAssembler::SVELoadStore1Helper(int msize_in_bytes_log2,
1207 const ZRegister& zt,
1208 const Tg& pg,
1209 const SVEMemOperand& addr,
1210 Tf fn) {
1211 if (addr.IsPlainScalar() ||
1212 (addr.IsScalarPlusScalar() && !addr.GetScalarOffset().IsZero() &&
1213 addr.IsEquivalentToLSL(msize_in_bytes_log2)) ||
1214 (addr.IsScalarPlusImmediate() && IsInt4(addr.GetImmediateOffset()) &&
1215 addr.IsMulVl())) {
1216 SingleEmissionCheckScope guard(this);
1217 (this->*fn)(zt, pg, addr);
1218 return;
1219 }
1220
1221 if (addr.IsEquivalentToScalar()) {
1222 SingleEmissionCheckScope guard(this);
1223 (this->*fn)(zt, pg, SVEMemOperand(addr.GetScalarBase()));
1224 return;
1225 }
1226
1227 if (addr.IsVectorPlusImmediate()) {
1228 uint64_t offset = addr.GetImmediateOffset();
1229 if (IsMultiple(offset, (1 << msize_in_bytes_log2)) &&
1230 IsUint5(offset >> msize_in_bytes_log2)) {
1231 SingleEmissionCheckScope guard(this);
1232 (this->*fn)(zt, pg, addr);
1233 return;
1234 }
1235 }
1236
1237 if (addr.IsScalarPlusVector()) {
1238 VIXL_ASSERT(addr.IsScatterGather());
1239 SingleEmissionCheckScope guard(this);
1240 (this->*fn)(zt, pg, addr);
1241 return;
1242 }
1243
1244 UseScratchRegisterScope temps(this);
1245 if (addr.IsScatterGather()) {
1246 // In scatter-gather modes, zt and zn/zm have the same lane size. However,
1247 // for 32-bit accesses, the result of each lane's address calculation still
1248 // requires 64 bits; we can't naively use `Adr` for the address calculation
1249 // because it would truncate each address to 32 bits.
1250
1251 if (addr.IsVectorPlusImmediate()) {
1252 // Synthesise the immediate in an X register, then use a
1253 // scalar-plus-vector access with the original vector.
1254 Register scratch = temps.AcquireX();
1255 Mov(scratch, addr.GetImmediateOffset());
1256 SingleEmissionCheckScope guard(this);
1257 SVEOffsetModifier om =
1258 zt.IsLaneSizeS() ? SVE_UXTW : NO_SVE_OFFSET_MODIFIER;
1259 (this->*fn)(zt, pg, SVEMemOperand(scratch, addr.GetVectorBase(), om));
1260 return;
1261 }
1262
1263 VIXL_UNIMPLEMENTED();
1264 } else {
1265 Register scratch = temps.AcquireX();
1266 // TODO: If we have an immediate offset that is a multiple of
1267 // msize_in_bytes, we can use Rdvl/Rdpl and a scalar-plus-scalar form to
1268 // save an instruction.
1269 int vl_divisor_log2 = zt.GetLaneSizeInBytesLog2() - msize_in_bytes_log2;
1270 CalculateSVEAddress(scratch, addr, vl_divisor_log2);
1271 SingleEmissionCheckScope guard(this);
1272 (this->*fn)(zt, pg, SVEMemOperand(scratch));
1273 }
1274 }
1275
1276 template <typename Tf>
SVELoadFFHelper(int msize_in_bytes_log2,const ZRegister & zt,const PRegisterZ & pg,const SVEMemOperand & addr,Tf fn)1277 void MacroAssembler::SVELoadFFHelper(int msize_in_bytes_log2,
1278 const ZRegister& zt,
1279 const PRegisterZ& pg,
1280 const SVEMemOperand& addr,
1281 Tf fn) {
1282 if (addr.IsScatterGather()) {
1283 // Scatter-gather first-fault loads share encodings with normal loads.
1284 SVELoadStore1Helper(msize_in_bytes_log2, zt, pg, addr, fn);
1285 return;
1286 }
1287
1288 // Contiguous first-faulting loads have no scalar-plus-immediate form at all,
1289 // so we don't do immediate synthesis.
1290
1291 // We cannot currently distinguish "[x0]" from "[x0, #0]", and this
1292 // is not "scalar-plus-scalar", so we have to permit `IsPlainScalar()` here.
1293 if (addr.IsPlainScalar() || (addr.IsScalarPlusScalar() &&
1294 addr.IsEquivalentToLSL(msize_in_bytes_log2))) {
1295 SingleEmissionCheckScope guard(this);
1296 (this->*fn)(zt, pg, addr);
1297 return;
1298 }
1299
1300 VIXL_UNIMPLEMENTED();
1301 }
1302
Ld1b(const ZRegister & zt,const PRegisterZ & pg,const SVEMemOperand & addr)1303 void MacroAssembler::Ld1b(const ZRegister& zt,
1304 const PRegisterZ& pg,
1305 const SVEMemOperand& addr) {
1306 VIXL_ASSERT(allow_macro_instructions_);
1307 SVELoadStore1Helper(kBRegSizeInBytesLog2,
1308 zt,
1309 pg,
1310 addr,
1311 static_cast<SVELoad1Fn>(&Assembler::ld1b));
1312 }
1313
Ld1h(const ZRegister & zt,const PRegisterZ & pg,const SVEMemOperand & addr)1314 void MacroAssembler::Ld1h(const ZRegister& zt,
1315 const PRegisterZ& pg,
1316 const SVEMemOperand& addr) {
1317 VIXL_ASSERT(allow_macro_instructions_);
1318 SVELoadStore1Helper(kHRegSizeInBytesLog2,
1319 zt,
1320 pg,
1321 addr,
1322 static_cast<SVELoad1Fn>(&Assembler::ld1h));
1323 }
1324
Ld1w(const ZRegister & zt,const PRegisterZ & pg,const SVEMemOperand & addr)1325 void MacroAssembler::Ld1w(const ZRegister& zt,
1326 const PRegisterZ& pg,
1327 const SVEMemOperand& addr) {
1328 VIXL_ASSERT(allow_macro_instructions_);
1329 SVELoadStore1Helper(kWRegSizeInBytesLog2,
1330 zt,
1331 pg,
1332 addr,
1333 static_cast<SVELoad1Fn>(&Assembler::ld1w));
1334 }
1335
Ld1d(const ZRegister & zt,const PRegisterZ & pg,const SVEMemOperand & addr)1336 void MacroAssembler::Ld1d(const ZRegister& zt,
1337 const PRegisterZ& pg,
1338 const SVEMemOperand& addr) {
1339 VIXL_ASSERT(allow_macro_instructions_);
1340 SVELoadStore1Helper(kDRegSizeInBytesLog2,
1341 zt,
1342 pg,
1343 addr,
1344 static_cast<SVELoad1Fn>(&Assembler::ld1d));
1345 }
1346
Ld1sb(const ZRegister & zt,const PRegisterZ & pg,const SVEMemOperand & addr)1347 void MacroAssembler::Ld1sb(const ZRegister& zt,
1348 const PRegisterZ& pg,
1349 const SVEMemOperand& addr) {
1350 VIXL_ASSERT(allow_macro_instructions_);
1351 SVELoadStore1Helper(kBRegSizeInBytesLog2,
1352 zt,
1353 pg,
1354 addr,
1355 static_cast<SVELoad1Fn>(&Assembler::ld1sb));
1356 }
1357
Ld1sh(const ZRegister & zt,const PRegisterZ & pg,const SVEMemOperand & addr)1358 void MacroAssembler::Ld1sh(const ZRegister& zt,
1359 const PRegisterZ& pg,
1360 const SVEMemOperand& addr) {
1361 VIXL_ASSERT(allow_macro_instructions_);
1362 SVELoadStore1Helper(kHRegSizeInBytesLog2,
1363 zt,
1364 pg,
1365 addr,
1366 static_cast<SVELoad1Fn>(&Assembler::ld1sh));
1367 }
1368
Ld1sw(const ZRegister & zt,const PRegisterZ & pg,const SVEMemOperand & addr)1369 void MacroAssembler::Ld1sw(const ZRegister& zt,
1370 const PRegisterZ& pg,
1371 const SVEMemOperand& addr) {
1372 VIXL_ASSERT(allow_macro_instructions_);
1373 SVELoadStore1Helper(kSRegSizeInBytesLog2,
1374 zt,
1375 pg,
1376 addr,
1377 static_cast<SVELoad1Fn>(&Assembler::ld1sw));
1378 }
1379
St1b(const ZRegister & zt,const PRegister & pg,const SVEMemOperand & addr)1380 void MacroAssembler::St1b(const ZRegister& zt,
1381 const PRegister& pg,
1382 const SVEMemOperand& addr) {
1383 VIXL_ASSERT(allow_macro_instructions_);
1384 SVELoadStore1Helper(kBRegSizeInBytesLog2,
1385 zt,
1386 pg,
1387 addr,
1388 static_cast<SVEStore1Fn>(&Assembler::st1b));
1389 }
1390
St1h(const ZRegister & zt,const PRegister & pg,const SVEMemOperand & addr)1391 void MacroAssembler::St1h(const ZRegister& zt,
1392 const PRegister& pg,
1393 const SVEMemOperand& addr) {
1394 VIXL_ASSERT(allow_macro_instructions_);
1395 SVELoadStore1Helper(kHRegSizeInBytesLog2,
1396 zt,
1397 pg,
1398 addr,
1399 static_cast<SVEStore1Fn>(&Assembler::st1h));
1400 }
1401
St1w(const ZRegister & zt,const PRegister & pg,const SVEMemOperand & addr)1402 void MacroAssembler::St1w(const ZRegister& zt,
1403 const PRegister& pg,
1404 const SVEMemOperand& addr) {
1405 VIXL_ASSERT(allow_macro_instructions_);
1406 SVELoadStore1Helper(kSRegSizeInBytesLog2,
1407 zt,
1408 pg,
1409 addr,
1410 static_cast<SVEStore1Fn>(&Assembler::st1w));
1411 }
1412
St1d(const ZRegister & zt,const PRegister & pg,const SVEMemOperand & addr)1413 void MacroAssembler::St1d(const ZRegister& zt,
1414 const PRegister& pg,
1415 const SVEMemOperand& addr) {
1416 VIXL_ASSERT(allow_macro_instructions_);
1417 SVELoadStore1Helper(kDRegSizeInBytesLog2,
1418 zt,
1419 pg,
1420 addr,
1421 static_cast<SVEStore1Fn>(&Assembler::st1d));
1422 }
1423
Ldff1b(const ZRegister & zt,const PRegisterZ & pg,const SVEMemOperand & addr)1424 void MacroAssembler::Ldff1b(const ZRegister& zt,
1425 const PRegisterZ& pg,
1426 const SVEMemOperand& addr) {
1427 VIXL_ASSERT(allow_macro_instructions_);
1428 SVELoadFFHelper(kBRegSizeInBytesLog2,
1429 zt,
1430 pg,
1431 addr,
1432 static_cast<SVELoad1Fn>(&Assembler::ldff1b));
1433 }
1434
Ldff1h(const ZRegister & zt,const PRegisterZ & pg,const SVEMemOperand & addr)1435 void MacroAssembler::Ldff1h(const ZRegister& zt,
1436 const PRegisterZ& pg,
1437 const SVEMemOperand& addr) {
1438 VIXL_ASSERT(allow_macro_instructions_);
1439 SVELoadFFHelper(kHRegSizeInBytesLog2,
1440 zt,
1441 pg,
1442 addr,
1443 static_cast<SVELoad1Fn>(&Assembler::ldff1h));
1444 }
1445
Ldff1w(const ZRegister & zt,const PRegisterZ & pg,const SVEMemOperand & addr)1446 void MacroAssembler::Ldff1w(const ZRegister& zt,
1447 const PRegisterZ& pg,
1448 const SVEMemOperand& addr) {
1449 VIXL_ASSERT(allow_macro_instructions_);
1450 SVELoadFFHelper(kSRegSizeInBytesLog2,
1451 zt,
1452 pg,
1453 addr,
1454 static_cast<SVELoad1Fn>(&Assembler::ldff1w));
1455 }
1456
Ldff1d(const ZRegister & zt,const PRegisterZ & pg,const SVEMemOperand & addr)1457 void MacroAssembler::Ldff1d(const ZRegister& zt,
1458 const PRegisterZ& pg,
1459 const SVEMemOperand& addr) {
1460 VIXL_ASSERT(allow_macro_instructions_);
1461 SVELoadFFHelper(kDRegSizeInBytesLog2,
1462 zt,
1463 pg,
1464 addr,
1465 static_cast<SVELoad1Fn>(&Assembler::ldff1d));
1466 }
1467
Ldff1sb(const ZRegister & zt,const PRegisterZ & pg,const SVEMemOperand & addr)1468 void MacroAssembler::Ldff1sb(const ZRegister& zt,
1469 const PRegisterZ& pg,
1470 const SVEMemOperand& addr) {
1471 VIXL_ASSERT(allow_macro_instructions_);
1472 SVELoadFFHelper(kBRegSizeInBytesLog2,
1473 zt,
1474 pg,
1475 addr,
1476 static_cast<SVELoad1Fn>(&Assembler::ldff1sb));
1477 }
1478
Ldff1sh(const ZRegister & zt,const PRegisterZ & pg,const SVEMemOperand & addr)1479 void MacroAssembler::Ldff1sh(const ZRegister& zt,
1480 const PRegisterZ& pg,
1481 const SVEMemOperand& addr) {
1482 VIXL_ASSERT(allow_macro_instructions_);
1483 SVELoadFFHelper(kHRegSizeInBytesLog2,
1484 zt,
1485 pg,
1486 addr,
1487 static_cast<SVELoad1Fn>(&Assembler::ldff1sh));
1488 }
1489
Ldff1sw(const ZRegister & zt,const PRegisterZ & pg,const SVEMemOperand & addr)1490 void MacroAssembler::Ldff1sw(const ZRegister& zt,
1491 const PRegisterZ& pg,
1492 const SVEMemOperand& addr) {
1493 VIXL_ASSERT(allow_macro_instructions_);
1494 SVELoadFFHelper(kSRegSizeInBytesLog2,
1495 zt,
1496 pg,
1497 addr,
1498 static_cast<SVELoad1Fn>(&Assembler::ldff1sw));
1499 }
1500
1501 #define VIXL_SVE_LD1R_LIST(V) \
1502 V(qb, 4) V(qh, 4) V(qw, 4) V(qd, 4) V(ob, 5) V(oh, 5) V(ow, 5) V(od, 5)
1503
1504 #define VIXL_DEFINE_MASM_FUNC(SZ, SH) \
1505 void MacroAssembler::Ld1r##SZ(const ZRegister& zt, \
1506 const PRegisterZ& pg, \
1507 const SVEMemOperand& addr) { \
1508 VIXL_ASSERT(allow_macro_instructions_); \
1509 SVELoadStoreNTBroadcastQOHelper(zt, \
1510 pg, \
1511 addr, \
1512 &MacroAssembler::ld1r##SZ, \
1513 4, \
1514 SH, \
1515 NO_SVE_OFFSET_MODIFIER, \
1516 -1); \
1517 }
1518
VIXL_SVE_LD1R_LIST(VIXL_DEFINE_MASM_FUNC)1519 VIXL_SVE_LD1R_LIST(VIXL_DEFINE_MASM_FUNC)
1520
1521 #undef VIXL_DEFINE_MASM_FUNC
1522 #undef VIXL_SVE_LD1R_LIST
1523
1524 void MacroAssembler::Ldnt1b(const ZRegister& zt,
1525 const PRegisterZ& pg,
1526 const SVEMemOperand& addr) {
1527 VIXL_ASSERT(allow_macro_instructions_);
1528 if (addr.IsVectorPlusScalar()) {
1529 SingleEmissionCheckScope guard(this);
1530 ldnt1b(zt, pg, addr);
1531 } else {
1532 SVELoadStoreNTBroadcastQOHelper(zt,
1533 pg,
1534 addr,
1535 &MacroAssembler::ldnt1b,
1536 4,
1537 0,
1538 SVE_MUL_VL);
1539 }
1540 }
1541
Ldnt1d(const ZRegister & zt,const PRegisterZ & pg,const SVEMemOperand & addr)1542 void MacroAssembler::Ldnt1d(const ZRegister& zt,
1543 const PRegisterZ& pg,
1544 const SVEMemOperand& addr) {
1545 VIXL_ASSERT(allow_macro_instructions_);
1546 if (addr.IsVectorPlusScalar()) {
1547 SingleEmissionCheckScope guard(this);
1548 ldnt1d(zt, pg, addr);
1549 } else {
1550 SVELoadStoreNTBroadcastQOHelper(zt,
1551 pg,
1552 addr,
1553 &MacroAssembler::ldnt1d,
1554 4,
1555 0,
1556 SVE_MUL_VL);
1557 }
1558 }
1559
Ldnt1h(const ZRegister & zt,const PRegisterZ & pg,const SVEMemOperand & addr)1560 void MacroAssembler::Ldnt1h(const ZRegister& zt,
1561 const PRegisterZ& pg,
1562 const SVEMemOperand& addr) {
1563 VIXL_ASSERT(allow_macro_instructions_);
1564 if (addr.IsVectorPlusScalar()) {
1565 SingleEmissionCheckScope guard(this);
1566 ldnt1h(zt, pg, addr);
1567 } else {
1568 SVELoadStoreNTBroadcastQOHelper(zt,
1569 pg,
1570 addr,
1571 &MacroAssembler::ldnt1h,
1572 4,
1573 0,
1574 SVE_MUL_VL);
1575 }
1576 }
1577
Ldnt1w(const ZRegister & zt,const PRegisterZ & pg,const SVEMemOperand & addr)1578 void MacroAssembler::Ldnt1w(const ZRegister& zt,
1579 const PRegisterZ& pg,
1580 const SVEMemOperand& addr) {
1581 VIXL_ASSERT(allow_macro_instructions_);
1582 if (addr.IsVectorPlusScalar()) {
1583 SingleEmissionCheckScope guard(this);
1584 ldnt1w(zt, pg, addr);
1585 } else {
1586 SVELoadStoreNTBroadcastQOHelper(zt,
1587 pg,
1588 addr,
1589 &MacroAssembler::ldnt1w,
1590 4,
1591 0,
1592 SVE_MUL_VL);
1593 }
1594 }
1595
Stnt1b(const ZRegister & zt,const PRegister & pg,const SVEMemOperand & addr)1596 void MacroAssembler::Stnt1b(const ZRegister& zt,
1597 const PRegister& pg,
1598 const SVEMemOperand& addr) {
1599 VIXL_ASSERT(allow_macro_instructions_);
1600 if (addr.IsVectorPlusScalar()) {
1601 SingleEmissionCheckScope guard(this);
1602 stnt1b(zt, pg, addr);
1603 } else {
1604 SVELoadStoreNTBroadcastQOHelper(zt,
1605 pg,
1606 addr,
1607 &MacroAssembler::stnt1b,
1608 4,
1609 0,
1610 SVE_MUL_VL);
1611 }
1612 }
Stnt1d(const ZRegister & zt,const PRegister & pg,const SVEMemOperand & addr)1613 void MacroAssembler::Stnt1d(const ZRegister& zt,
1614 const PRegister& pg,
1615 const SVEMemOperand& addr) {
1616 VIXL_ASSERT(allow_macro_instructions_);
1617 if (addr.IsVectorPlusScalar()) {
1618 SingleEmissionCheckScope guard(this);
1619 stnt1d(zt, pg, addr);
1620 } else {
1621 SVELoadStoreNTBroadcastQOHelper(zt,
1622 pg,
1623 addr,
1624 &MacroAssembler::stnt1d,
1625 4,
1626 0,
1627 SVE_MUL_VL);
1628 }
1629 }
Stnt1h(const ZRegister & zt,const PRegister & pg,const SVEMemOperand & addr)1630 void MacroAssembler::Stnt1h(const ZRegister& zt,
1631 const PRegister& pg,
1632 const SVEMemOperand& addr) {
1633 VIXL_ASSERT(allow_macro_instructions_);
1634 if (addr.IsVectorPlusScalar()) {
1635 SingleEmissionCheckScope guard(this);
1636 stnt1h(zt, pg, addr);
1637 } else {
1638 SVELoadStoreNTBroadcastQOHelper(zt,
1639 pg,
1640 addr,
1641 &MacroAssembler::stnt1h,
1642 4,
1643 0,
1644 SVE_MUL_VL);
1645 }
1646 }
Stnt1w(const ZRegister & zt,const PRegister & pg,const SVEMemOperand & addr)1647 void MacroAssembler::Stnt1w(const ZRegister& zt,
1648 const PRegister& pg,
1649 const SVEMemOperand& addr) {
1650 VIXL_ASSERT(allow_macro_instructions_);
1651 if (addr.IsVectorPlusScalar()) {
1652 SingleEmissionCheckScope guard(this);
1653 stnt1w(zt, pg, addr);
1654 } else {
1655 SVELoadStoreNTBroadcastQOHelper(zt,
1656 pg,
1657 addr,
1658 &MacroAssembler::stnt1w,
1659 4,
1660 0,
1661 SVE_MUL_VL);
1662 }
1663 }
1664
SVEDotIndexHelper(ZZZImmFn fn,const ZRegister & zd,const ZRegister & za,const ZRegister & zn,const ZRegister & zm,int index)1665 void MacroAssembler::SVEDotIndexHelper(ZZZImmFn fn,
1666 const ZRegister& zd,
1667 const ZRegister& za,
1668 const ZRegister& zn,
1669 const ZRegister& zm,
1670 int index) {
1671 if (zd.Aliases(za)) {
1672 // zda = zda + (zn . zm)
1673 SingleEmissionCheckScope guard(this);
1674 (this->*fn)(zd, zn, zm, index);
1675
1676 } else if (zd.Aliases(zn) || zd.Aliases(zm)) {
1677 // zdn = za + (zdn . zm[index])
1678 // zdm = za + (zn . zdm[index])
1679 // zdnm = za + (zdnm . zdnm[index])
1680 UseScratchRegisterScope temps(this);
1681 ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd);
1682 {
1683 MovprfxHelperScope guard(this, scratch, za);
1684 (this->*fn)(scratch, zn, zm, index);
1685 }
1686
1687 Mov(zd, scratch);
1688 } else {
1689 // zd = za + (zn . zm)
1690 MovprfxHelperScope guard(this, zd, za);
1691 (this->*fn)(zd, zn, zm, index);
1692 }
1693 }
1694
FourRegDestructiveHelper(Int3ArithFn fn,const ZRegister & zd,const ZRegister & za,const ZRegister & zn,const ZRegister & zm)1695 void MacroAssembler::FourRegDestructiveHelper(Int3ArithFn fn,
1696 const ZRegister& zd,
1697 const ZRegister& za,
1698 const ZRegister& zn,
1699 const ZRegister& zm) {
1700 if (!zd.Aliases(za) && (zd.Aliases(zn) || zd.Aliases(zm))) {
1701 // zd = za . zd . zm
1702 // zd = za . zn . zd
1703 // zd = za . zd . zd
1704 UseScratchRegisterScope temps(this);
1705 ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd);
1706 {
1707 MovprfxHelperScope guard(this, scratch, za);
1708 (this->*fn)(scratch, zn, zm);
1709 }
1710
1711 Mov(zd, scratch);
1712 } else {
1713 MovprfxHelperScope guard(this, zd, za);
1714 (this->*fn)(zd, zn, zm);
1715 }
1716 }
1717
FourRegDestructiveHelper(Int4ArithFn fn,const ZRegister & zd,const ZRegister & za,const ZRegister & zn,const ZRegister & zm)1718 void MacroAssembler::FourRegDestructiveHelper(Int4ArithFn fn,
1719 const ZRegister& zd,
1720 const ZRegister& za,
1721 const ZRegister& zn,
1722 const ZRegister& zm) {
1723 if (!zd.Aliases(za) && (zd.Aliases(zn) || zd.Aliases(zm))) {
1724 // zd = za . zd . zm
1725 // zd = za . zn . zd
1726 // zd = za . zd . zd
1727 UseScratchRegisterScope temps(this);
1728 ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd);
1729 {
1730 MovprfxHelperScope guard(this, scratch, za);
1731 (this->*fn)(scratch, scratch, zn, zm);
1732 }
1733
1734 Mov(zd, scratch);
1735 } else {
1736 MovprfxHelperScope guard(this, zd, za);
1737 (this->*fn)(zd, zd, zn, zm);
1738 }
1739 }
1740
FourRegOneImmDestructiveHelper(ZZZImmFn fn,const ZRegister & zd,const ZRegister & za,const ZRegister & zn,const ZRegister & zm,int imm)1741 void MacroAssembler::FourRegOneImmDestructiveHelper(ZZZImmFn fn,
1742 const ZRegister& zd,
1743 const ZRegister& za,
1744 const ZRegister& zn,
1745 const ZRegister& zm,
1746 int imm) {
1747 if (!zd.Aliases(za) && (zd.Aliases(zn) || zd.Aliases(zm))) {
1748 // zd = za . zd . zm[i]
1749 // zd = za . zn . zd[i]
1750 // zd = za . zd . zd[i]
1751 UseScratchRegisterScope temps(this);
1752 ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd);
1753 {
1754 MovprfxHelperScope guard(this, scratch, za);
1755 (this->*fn)(scratch, zn, zm, imm);
1756 }
1757
1758 Mov(zd, scratch);
1759 } else {
1760 // zd = za . zn . zm[i]
1761 MovprfxHelperScope guard(this, zd, za);
1762 (this->*fn)(zd, zn, zm, imm);
1763 }
1764 }
1765
AbsoluteDifferenceAccumulate(Int3ArithFn fn,const ZRegister & zd,const ZRegister & za,const ZRegister & zn,const ZRegister & zm)1766 void MacroAssembler::AbsoluteDifferenceAccumulate(Int3ArithFn fn,
1767 const ZRegister& zd,
1768 const ZRegister& za,
1769 const ZRegister& zn,
1770 const ZRegister& zm) {
1771 if (zn.Aliases(zm)) {
1772 // If zn == zm, the difference is zero.
1773 if (!zd.Aliases(za)) {
1774 Mov(zd, za);
1775 }
1776 } else if (zd.Aliases(za)) {
1777 SingleEmissionCheckScope guard(this);
1778 (this->*fn)(zd, zn, zm);
1779 } else if (zd.Aliases(zn)) {
1780 UseScratchRegisterScope temps(this);
1781 ZRegister ztmp = temps.AcquireZ().WithLaneSize(zn.GetLaneSizeInBits());
1782 Mov(ztmp, zn);
1783 MovprfxHelperScope guard(this, zd, za);
1784 (this->*fn)(zd, ztmp, zm);
1785 } else if (zd.Aliases(zm)) {
1786 UseScratchRegisterScope temps(this);
1787 ZRegister ztmp = temps.AcquireZ().WithLaneSize(zn.GetLaneSizeInBits());
1788 Mov(ztmp, zm);
1789 MovprfxHelperScope guard(this, zd, za);
1790 (this->*fn)(zd, zn, ztmp);
1791 } else {
1792 MovprfxHelperScope guard(this, zd, za);
1793 (this->*fn)(zd, zn, zm);
1794 }
1795 }
1796
1797 #define VIXL_SVE_4REG_LIST(V) \
1798 V(Saba, saba, AbsoluteDifferenceAccumulate) \
1799 V(Uaba, uaba, AbsoluteDifferenceAccumulate) \
1800 V(Sabalb, sabalb, AbsoluteDifferenceAccumulate) \
1801 V(Sabalt, sabalt, AbsoluteDifferenceAccumulate) \
1802 V(Uabalb, uabalb, AbsoluteDifferenceAccumulate) \
1803 V(Uabalt, uabalt, AbsoluteDifferenceAccumulate) \
1804 V(Sdot, sdot, FourRegDestructiveHelper) \
1805 V(Udot, udot, FourRegDestructiveHelper) \
1806 V(Adclb, adclb, FourRegDestructiveHelper) \
1807 V(Adclt, adclt, FourRegDestructiveHelper) \
1808 V(Sbclb, sbclb, FourRegDestructiveHelper) \
1809 V(Sbclt, sbclt, FourRegDestructiveHelper) \
1810 V(Smlalb, smlalb, FourRegDestructiveHelper) \
1811 V(Smlalt, smlalt, FourRegDestructiveHelper) \
1812 V(Smlslb, smlslb, FourRegDestructiveHelper) \
1813 V(Smlslt, smlslt, FourRegDestructiveHelper) \
1814 V(Umlalb, umlalb, FourRegDestructiveHelper) \
1815 V(Umlalt, umlalt, FourRegDestructiveHelper) \
1816 V(Umlslb, umlslb, FourRegDestructiveHelper) \
1817 V(Umlslt, umlslt, FourRegDestructiveHelper) \
1818 V(Bcax, bcax, FourRegDestructiveHelper) \
1819 V(Bsl, bsl, FourRegDestructiveHelper) \
1820 V(Bsl1n, bsl1n, FourRegDestructiveHelper) \
1821 V(Bsl2n, bsl2n, FourRegDestructiveHelper) \
1822 V(Eor3, eor3, FourRegDestructiveHelper) \
1823 V(Nbsl, nbsl, FourRegDestructiveHelper) \
1824 V(Fmlalb, fmlalb, FourRegDestructiveHelper) \
1825 V(Fmlalt, fmlalt, FourRegDestructiveHelper) \
1826 V(Fmlslb, fmlslb, FourRegDestructiveHelper) \
1827 V(Fmlslt, fmlslt, FourRegDestructiveHelper) \
1828 V(Sqdmlalb, sqdmlalb, FourRegDestructiveHelper) \
1829 V(Sqdmlalbt, sqdmlalbt, FourRegDestructiveHelper) \
1830 V(Sqdmlalt, sqdmlalt, FourRegDestructiveHelper) \
1831 V(Sqdmlslb, sqdmlslb, FourRegDestructiveHelper) \
1832 V(Sqdmlslbt, sqdmlslbt, FourRegDestructiveHelper) \
1833 V(Sqdmlslt, sqdmlslt, FourRegDestructiveHelper) \
1834 V(Sqrdmlah, sqrdmlah, FourRegDestructiveHelper) \
1835 V(Sqrdmlsh, sqrdmlsh, FourRegDestructiveHelper) \
1836 V(Fmmla, fmmla, FourRegDestructiveHelper) \
1837 V(Smmla, smmla, FourRegDestructiveHelper) \
1838 V(Ummla, ummla, FourRegDestructiveHelper) \
1839 V(Usmmla, usmmla, FourRegDestructiveHelper) \
1840 V(Usdot, usdot, FourRegDestructiveHelper)
1841
1842 #define VIXL_DEFINE_MASM_FUNC(MASMFN, ASMFN, HELPER) \
1843 void MacroAssembler::MASMFN(const ZRegister& zd, \
1844 const ZRegister& za, \
1845 const ZRegister& zn, \
1846 const ZRegister& zm) { \
1847 VIXL_ASSERT(allow_macro_instructions_); \
1848 HELPER(&Assembler::ASMFN, zd, za, zn, zm); \
1849 }
1850 VIXL_SVE_4REG_LIST(VIXL_DEFINE_MASM_FUNC)
1851 #undef VIXL_DEFINE_MASM_FUNC
1852
1853 #define VIXL_SVE_4REG_1IMM_LIST(V) \
1854 V(Fmla, fmla, FourRegOneImmDestructiveHelper) \
1855 V(Fmls, fmls, FourRegOneImmDestructiveHelper) \
1856 V(Fmlalb, fmlalb, FourRegOneImmDestructiveHelper) \
1857 V(Fmlalt, fmlalt, FourRegOneImmDestructiveHelper) \
1858 V(Fmlslb, fmlslb, FourRegOneImmDestructiveHelper) \
1859 V(Fmlslt, fmlslt, FourRegOneImmDestructiveHelper) \
1860 V(Mla, mla, FourRegOneImmDestructiveHelper) \
1861 V(Mls, mls, FourRegOneImmDestructiveHelper) \
1862 V(Smlalb, smlalb, FourRegOneImmDestructiveHelper) \
1863 V(Smlalt, smlalt, FourRegOneImmDestructiveHelper) \
1864 V(Smlslb, smlslb, FourRegOneImmDestructiveHelper) \
1865 V(Smlslt, smlslt, FourRegOneImmDestructiveHelper) \
1866 V(Sqdmlalb, sqdmlalb, FourRegOneImmDestructiveHelper) \
1867 V(Sqdmlalt, sqdmlalt, FourRegOneImmDestructiveHelper) \
1868 V(Sqdmlslb, sqdmlslb, FourRegOneImmDestructiveHelper) \
1869 V(Sqdmlslt, sqdmlslt, FourRegOneImmDestructiveHelper) \
1870 V(Sqrdmlah, sqrdmlah, FourRegOneImmDestructiveHelper) \
1871 V(Sqrdmlsh, sqrdmlsh, FourRegOneImmDestructiveHelper) \
1872 V(Umlalb, umlalb, FourRegOneImmDestructiveHelper) \
1873 V(Umlalt, umlalt, FourRegOneImmDestructiveHelper) \
1874 V(Umlslb, umlslb, FourRegOneImmDestructiveHelper) \
1875 V(Umlslt, umlslt, FourRegOneImmDestructiveHelper)
1876
1877 #define VIXL_DEFINE_MASM_FUNC(MASMFN, ASMFN, HELPER) \
1878 void MacroAssembler::MASMFN(const ZRegister& zd, \
1879 const ZRegister& za, \
1880 const ZRegister& zn, \
1881 const ZRegister& zm, \
1882 int imm) { \
1883 VIXL_ASSERT(allow_macro_instructions_); \
1884 HELPER(&Assembler::ASMFN, zd, za, zn, zm, imm); \
1885 }
VIXL_SVE_4REG_1IMM_LIST(VIXL_DEFINE_MASM_FUNC)1886 VIXL_SVE_4REG_1IMM_LIST(VIXL_DEFINE_MASM_FUNC)
1887 #undef VIXL_DEFINE_MASM_FUNC
1888
1889 void MacroAssembler::Sdot(const ZRegister& zd,
1890 const ZRegister& za,
1891 const ZRegister& zn,
1892 const ZRegister& zm,
1893 int index) {
1894 VIXL_ASSERT(allow_macro_instructions_);
1895 SVEDotIndexHelper(&Assembler::sdot, zd, za, zn, zm, index);
1896 }
1897
Udot(const ZRegister & zd,const ZRegister & za,const ZRegister & zn,const ZRegister & zm,int index)1898 void MacroAssembler::Udot(const ZRegister& zd,
1899 const ZRegister& za,
1900 const ZRegister& zn,
1901 const ZRegister& zm,
1902 int index) {
1903 VIXL_ASSERT(allow_macro_instructions_);
1904 SVEDotIndexHelper(&Assembler::udot, zd, za, zn, zm, index);
1905 }
1906
Sudot(const ZRegister & zd,const ZRegister & za,const ZRegister & zn,const ZRegister & zm,int index)1907 void MacroAssembler::Sudot(const ZRegister& zd,
1908 const ZRegister& za,
1909 const ZRegister& zn,
1910 const ZRegister& zm,
1911 int index) {
1912 VIXL_ASSERT(allow_macro_instructions_);
1913 SVEDotIndexHelper(&Assembler::sudot, zd, za, zn, zm, index);
1914 }
1915
Usdot(const ZRegister & zd,const ZRegister & za,const ZRegister & zn,const ZRegister & zm,int index)1916 void MacroAssembler::Usdot(const ZRegister& zd,
1917 const ZRegister& za,
1918 const ZRegister& zn,
1919 const ZRegister& zm,
1920 int index) {
1921 VIXL_ASSERT(allow_macro_instructions_);
1922 SVEDotIndexHelper(&Assembler::usdot, zd, za, zn, zm, index);
1923 }
1924
Cdot(const ZRegister & zd,const ZRegister & za,const ZRegister & zn,const ZRegister & zm,int index,int rot)1925 void MacroAssembler::Cdot(const ZRegister& zd,
1926 const ZRegister& za,
1927 const ZRegister& zn,
1928 const ZRegister& zm,
1929 int index,
1930 int rot) {
1931 // This doesn't handle zm when it's out of the range that can be encoded in
1932 // instruction. The range depends on element size: z0-z7 for B, z0-15 for H.
1933 if ((zd.Aliases(zn) || zd.Aliases(zm)) && !zd.Aliases(za)) {
1934 UseScratchRegisterScope temps(this);
1935 ZRegister ztmp = temps.AcquireZ().WithSameLaneSizeAs(zd);
1936 {
1937 MovprfxHelperScope guard(this, ztmp, za);
1938 cdot(ztmp, zn, zm, index, rot);
1939 }
1940 Mov(zd, ztmp);
1941 } else {
1942 MovprfxHelperScope guard(this, zd, za);
1943 cdot(zd, zn, zm, index, rot);
1944 }
1945 }
1946
Cdot(const ZRegister & zd,const ZRegister & za,const ZRegister & zn,const ZRegister & zm,int rot)1947 void MacroAssembler::Cdot(const ZRegister& zd,
1948 const ZRegister& za,
1949 const ZRegister& zn,
1950 const ZRegister& zm,
1951 int rot) {
1952 if ((zd.Aliases(zn) || zd.Aliases(zm)) && !zd.Aliases(za)) {
1953 UseScratchRegisterScope temps(this);
1954 VIXL_ASSERT(AreSameLaneSize(zn, zm));
1955 ZRegister ztmp = temps.AcquireZ().WithSameLaneSizeAs(zn);
1956 Mov(ztmp, zd.Aliases(zn) ? zn : zm);
1957 MovprfxHelperScope guard(this, zd, za);
1958 cdot(zd, (zd.Aliases(zn) ? ztmp : zn), (zd.Aliases(zm) ? ztmp : zm), rot);
1959 } else {
1960 MovprfxHelperScope guard(this, zd, za);
1961 cdot(zd, zn, zm, rot);
1962 }
1963 }
1964
FPMulAddHelper(const ZRegister & zd,const PRegisterM & pg,const ZRegister & za,const ZRegister & zn,const ZRegister & zm,SVEMulAddPredicatedZdaFn fn_zda,SVEMulAddPredicatedZdnFn fn_zdn,FPMacroNaNPropagationOption nan_option)1965 void MacroAssembler::FPMulAddHelper(const ZRegister& zd,
1966 const PRegisterM& pg,
1967 const ZRegister& za,
1968 const ZRegister& zn,
1969 const ZRegister& zm,
1970 SVEMulAddPredicatedZdaFn fn_zda,
1971 SVEMulAddPredicatedZdnFn fn_zdn,
1972 FPMacroNaNPropagationOption nan_option) {
1973 ResolveFPNaNPropagationOption(&nan_option);
1974
1975 if (zd.Aliases(za)) {
1976 // zda = (-)zda + ((-)zn * zm) for fmla, fmls, fnmla and fnmls.
1977 SingleEmissionCheckScope guard(this);
1978 (this->*fn_zda)(zd, pg, zn, zm);
1979 } else if (zd.Aliases(zn)) {
1980 // zdn = (-)za + ((-)zdn * zm) for fmad, fmsb, fnmad and fnmsb.
1981 SingleEmissionCheckScope guard(this);
1982 (this->*fn_zdn)(zd, pg, zm, za);
1983 } else if (zd.Aliases(zm)) {
1984 switch (nan_option) {
1985 case FastNaNPropagation: {
1986 // We treat multiplication as commutative in the fast mode, so we can
1987 // swap zn and zm.
1988 // zdm = (-)za + ((-)zdm * zn) for fmad, fmsb, fnmad and fnmsb.
1989 SingleEmissionCheckScope guard(this);
1990 (this->*fn_zdn)(zd, pg, zn, za);
1991 return;
1992 }
1993 case StrictNaNPropagation: {
1994 UseScratchRegisterScope temps(this);
1995 // Use a scratch register to keep the argument order exactly as
1996 // specified.
1997 ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zn);
1998 {
1999 MovprfxHelperScope guard(this, scratch, pg, za);
2000 // scratch = (-)za + ((-)zn * zm)
2001 (this->*fn_zda)(scratch, pg, zn, zm);
2002 }
2003 Mov(zd, scratch);
2004 return;
2005 }
2006 case NoFPMacroNaNPropagationSelected:
2007 VIXL_UNREACHABLE();
2008 return;
2009 }
2010 } else {
2011 // zd = (-)za + ((-)zn * zm) for fmla, fmls, fnmla and fnmls.
2012 MovprfxHelperScope guard(this, zd, pg, za);
2013 (this->*fn_zda)(zd, pg, zn, zm);
2014 }
2015 }
2016
Fmla(const ZRegister & zd,const PRegisterM & pg,const ZRegister & za,const ZRegister & zn,const ZRegister & zm,FPMacroNaNPropagationOption nan_option)2017 void MacroAssembler::Fmla(const ZRegister& zd,
2018 const PRegisterM& pg,
2019 const ZRegister& za,
2020 const ZRegister& zn,
2021 const ZRegister& zm,
2022 FPMacroNaNPropagationOption nan_option) {
2023 VIXL_ASSERT(allow_macro_instructions_);
2024 FPMulAddHelper(zd,
2025 pg,
2026 za,
2027 zn,
2028 zm,
2029 &Assembler::fmla,
2030 &Assembler::fmad,
2031 nan_option);
2032 }
2033
Fmls(const ZRegister & zd,const PRegisterM & pg,const ZRegister & za,const ZRegister & zn,const ZRegister & zm,FPMacroNaNPropagationOption nan_option)2034 void MacroAssembler::Fmls(const ZRegister& zd,
2035 const PRegisterM& pg,
2036 const ZRegister& za,
2037 const ZRegister& zn,
2038 const ZRegister& zm,
2039 FPMacroNaNPropagationOption nan_option) {
2040 VIXL_ASSERT(allow_macro_instructions_);
2041 FPMulAddHelper(zd,
2042 pg,
2043 za,
2044 zn,
2045 zm,
2046 &Assembler::fmls,
2047 &Assembler::fmsb,
2048 nan_option);
2049 }
2050
Fnmla(const ZRegister & zd,const PRegisterM & pg,const ZRegister & za,const ZRegister & zn,const ZRegister & zm,FPMacroNaNPropagationOption nan_option)2051 void MacroAssembler::Fnmla(const ZRegister& zd,
2052 const PRegisterM& pg,
2053 const ZRegister& za,
2054 const ZRegister& zn,
2055 const ZRegister& zm,
2056 FPMacroNaNPropagationOption nan_option) {
2057 VIXL_ASSERT(allow_macro_instructions_);
2058 FPMulAddHelper(zd,
2059 pg,
2060 za,
2061 zn,
2062 zm,
2063 &Assembler::fnmla,
2064 &Assembler::fnmad,
2065 nan_option);
2066 }
2067
Fnmls(const ZRegister & zd,const PRegisterM & pg,const ZRegister & za,const ZRegister & zn,const ZRegister & zm,FPMacroNaNPropagationOption nan_option)2068 void MacroAssembler::Fnmls(const ZRegister& zd,
2069 const PRegisterM& pg,
2070 const ZRegister& za,
2071 const ZRegister& zn,
2072 const ZRegister& zm,
2073 FPMacroNaNPropagationOption nan_option) {
2074 VIXL_ASSERT(allow_macro_instructions_);
2075 FPMulAddHelper(zd,
2076 pg,
2077 za,
2078 zn,
2079 zm,
2080 &Assembler::fnmls,
2081 &Assembler::fnmsb,
2082 nan_option);
2083 }
2084
Ftmad(const ZRegister & zd,const ZRegister & zn,const ZRegister & zm,int imm3)2085 void MacroAssembler::Ftmad(const ZRegister& zd,
2086 const ZRegister& zn,
2087 const ZRegister& zm,
2088 int imm3) {
2089 VIXL_ASSERT(allow_macro_instructions_);
2090 if (zd.Aliases(zm) && !zd.Aliases(zn)) {
2091 UseScratchRegisterScope temps(this);
2092 ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zm);
2093 Mov(scratch, zm);
2094 MovprfxHelperScope guard(this, zd, zn);
2095 ftmad(zd, zd, scratch, imm3);
2096 } else {
2097 MovprfxHelperScope guard(this, zd, zn);
2098 ftmad(zd, zd, zm, imm3);
2099 }
2100 }
2101
Fcadd(const ZRegister & zd,const PRegisterM & pg,const ZRegister & zn,const ZRegister & zm,int rot)2102 void MacroAssembler::Fcadd(const ZRegister& zd,
2103 const PRegisterM& pg,
2104 const ZRegister& zn,
2105 const ZRegister& zm,
2106 int rot) {
2107 VIXL_ASSERT(allow_macro_instructions_);
2108 if (zd.Aliases(zm) && !zd.Aliases(zn)) {
2109 UseScratchRegisterScope temps(this);
2110 ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd);
2111 {
2112 MovprfxHelperScope guard(this, scratch, pg, zn);
2113 fcadd(scratch, pg, scratch, zm, rot);
2114 }
2115 Mov(zd, scratch);
2116 } else {
2117 MovprfxHelperScope guard(this, zd, pg, zn);
2118 fcadd(zd, pg, zd, zm, rot);
2119 }
2120 }
2121
Fcmla(const ZRegister & zd,const PRegisterM & pg,const ZRegister & za,const ZRegister & zn,const ZRegister & zm,int rot)2122 void MacroAssembler::Fcmla(const ZRegister& zd,
2123 const PRegisterM& pg,
2124 const ZRegister& za,
2125 const ZRegister& zn,
2126 const ZRegister& zm,
2127 int rot) {
2128 VIXL_ASSERT(allow_macro_instructions_);
2129 if ((zd.Aliases(zn) || zd.Aliases(zm)) && !zd.Aliases(za)) {
2130 UseScratchRegisterScope temps(this);
2131 ZRegister ztmp = temps.AcquireZ().WithSameLaneSizeAs(zd);
2132 {
2133 MovprfxHelperScope guard(this, ztmp, za);
2134 fcmla(ztmp, pg, zn, zm, rot);
2135 }
2136 Mov(zd, pg, ztmp);
2137 } else {
2138 MovprfxHelperScope guard(this, zd, pg, za);
2139 fcmla(zd, pg, zn, zm, rot);
2140 }
2141 }
2142
Splice(const ZRegister & zd,const PRegister & pg,const ZRegister & zn,const ZRegister & zm)2143 void MacroAssembler::Splice(const ZRegister& zd,
2144 const PRegister& pg,
2145 const ZRegister& zn,
2146 const ZRegister& zm) {
2147 VIXL_ASSERT(allow_macro_instructions_);
2148 if (CPUHas(CPUFeatures::kSVE2) && AreConsecutive(zn, zm) && !zd.Aliases(zn)) {
2149 SingleEmissionCheckScope guard(this);
2150 splice(zd, pg, zn, zm);
2151 } else if (zd.Aliases(zm) && !zd.Aliases(zn)) {
2152 UseScratchRegisterScope temps(this);
2153 ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd);
2154 {
2155 MovprfxHelperScope guard(this, scratch, zn);
2156 splice(scratch, pg, scratch, zm);
2157 }
2158 Mov(zd, scratch);
2159 } else {
2160 MovprfxHelperScope guard(this, zd, zn);
2161 splice(zd, pg, zd, zm);
2162 }
2163 }
2164
Clasta(const ZRegister & zd,const PRegister & pg,const ZRegister & zn,const ZRegister & zm)2165 void MacroAssembler::Clasta(const ZRegister& zd,
2166 const PRegister& pg,
2167 const ZRegister& zn,
2168 const ZRegister& zm) {
2169 VIXL_ASSERT(allow_macro_instructions_);
2170 if (zd.Aliases(zm) && !zd.Aliases(zn)) {
2171 UseScratchRegisterScope temps(this);
2172 ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd);
2173 {
2174 MovprfxHelperScope guard(this, scratch, zn);
2175 clasta(scratch, pg, scratch, zm);
2176 }
2177 Mov(zd, scratch);
2178 } else {
2179 MovprfxHelperScope guard(this, zd, zn);
2180 clasta(zd, pg, zd, zm);
2181 }
2182 }
2183
Clastb(const ZRegister & zd,const PRegister & pg,const ZRegister & zn,const ZRegister & zm)2184 void MacroAssembler::Clastb(const ZRegister& zd,
2185 const PRegister& pg,
2186 const ZRegister& zn,
2187 const ZRegister& zm) {
2188 VIXL_ASSERT(allow_macro_instructions_);
2189 if (zd.Aliases(zm) && !zd.Aliases(zn)) {
2190 UseScratchRegisterScope temps(this);
2191 ZRegister scratch = temps.AcquireZ().WithSameLaneSizeAs(zd);
2192 {
2193 MovprfxHelperScope guard(this, scratch, zn);
2194 clastb(scratch, pg, scratch, zm);
2195 }
2196 Mov(zd, scratch);
2197 } else {
2198 MovprfxHelperScope guard(this, zd, zn);
2199 clastb(zd, pg, zd, zm);
2200 }
2201 }
2202
ShiftRightAccumulate(IntArithImmFn fn,const ZRegister & zd,const ZRegister & za,const ZRegister & zn,int shift)2203 void MacroAssembler::ShiftRightAccumulate(IntArithImmFn fn,
2204 const ZRegister& zd,
2205 const ZRegister& za,
2206 const ZRegister& zn,
2207 int shift) {
2208 VIXL_ASSERT(allow_macro_instructions_);
2209 if (!zd.Aliases(za) && zd.Aliases(zn)) {
2210 UseScratchRegisterScope temps(this);
2211 ZRegister ztmp = temps.AcquireZ().WithSameLaneSizeAs(zn);
2212 Mov(ztmp, zn);
2213 {
2214 MovprfxHelperScope guard(this, zd, za);
2215 (this->*fn)(zd, ztmp, shift);
2216 }
2217 } else {
2218 MovprfxHelperScope guard(this, zd, za);
2219 (this->*fn)(zd, zn, shift);
2220 }
2221 }
2222
Srsra(const ZRegister & zd,const ZRegister & za,const ZRegister & zn,int shift)2223 void MacroAssembler::Srsra(const ZRegister& zd,
2224 const ZRegister& za,
2225 const ZRegister& zn,
2226 int shift) {
2227 ShiftRightAccumulate(&Assembler::srsra, zd, za, zn, shift);
2228 }
2229
Ssra(const ZRegister & zd,const ZRegister & za,const ZRegister & zn,int shift)2230 void MacroAssembler::Ssra(const ZRegister& zd,
2231 const ZRegister& za,
2232 const ZRegister& zn,
2233 int shift) {
2234 ShiftRightAccumulate(&Assembler::ssra, zd, za, zn, shift);
2235 }
2236
Ursra(const ZRegister & zd,const ZRegister & za,const ZRegister & zn,int shift)2237 void MacroAssembler::Ursra(const ZRegister& zd,
2238 const ZRegister& za,
2239 const ZRegister& zn,
2240 int shift) {
2241 ShiftRightAccumulate(&Assembler::ursra, zd, za, zn, shift);
2242 }
2243
Usra(const ZRegister & zd,const ZRegister & za,const ZRegister & zn,int shift)2244 void MacroAssembler::Usra(const ZRegister& zd,
2245 const ZRegister& za,
2246 const ZRegister& zn,
2247 int shift) {
2248 ShiftRightAccumulate(&Assembler::usra, zd, za, zn, shift);
2249 }
2250
ComplexAddition(ZZZImmFn fn,const ZRegister & zd,const ZRegister & zn,const ZRegister & zm,int rot)2251 void MacroAssembler::ComplexAddition(ZZZImmFn fn,
2252 const ZRegister& zd,
2253 const ZRegister& zn,
2254 const ZRegister& zm,
2255 int rot) {
2256 VIXL_ASSERT(allow_macro_instructions_);
2257 if (!zd.Aliases(zn) && zd.Aliases(zm)) {
2258 UseScratchRegisterScope temps(this);
2259 ZRegister ztmp = temps.AcquireZ().WithSameLaneSizeAs(zm);
2260 Mov(ztmp, zm);
2261 {
2262 MovprfxHelperScope guard(this, zd, zn);
2263 (this->*fn)(zd, zd, ztmp, rot);
2264 }
2265 } else {
2266 MovprfxHelperScope guard(this, zd, zn);
2267 (this->*fn)(zd, zd, zm, rot);
2268 }
2269 }
2270
Cadd(const ZRegister & zd,const ZRegister & zn,const ZRegister & zm,int rot)2271 void MacroAssembler::Cadd(const ZRegister& zd,
2272 const ZRegister& zn,
2273 const ZRegister& zm,
2274 int rot) {
2275 ComplexAddition(&Assembler::cadd, zd, zn, zm, rot);
2276 }
2277
Sqcadd(const ZRegister & zd,const ZRegister & zn,const ZRegister & zm,int rot)2278 void MacroAssembler::Sqcadd(const ZRegister& zd,
2279 const ZRegister& zn,
2280 const ZRegister& zm,
2281 int rot) {
2282 ComplexAddition(&Assembler::sqcadd, zd, zn, zm, rot);
2283 }
2284
2285 } // namespace aarch64
2286 } // namespace vixl
2287