1 // Copyright 2015, VIXL authors
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are met:
6 //
7 // * Redistributions of source code must retain the above copyright notice,
8 // this list of conditions and the following disclaimer.
9 // * Redistributions in binary form must reproduce the above copyright notice,
10 // this list of conditions and the following disclaimer in the documentation
11 // and/or other materials provided with the distribution.
12 // * Neither the name of ARM Limited nor the names of its contributors may be
13 // used to endorse or promote products derived from this software without
14 // specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
27 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
28
29 #include <cmath>
30
31 #include "simulator-aarch64.h"
32
33 namespace vixl {
34 namespace aarch64 {
35
36 using vixl::internal::SimFloat16;
37
38 template <typename T>
IsFloat64()39 bool IsFloat64() {
40 return false;
41 }
42 template <>
IsFloat64()43 bool IsFloat64<double>() {
44 return true;
45 }
46
47 template <typename T>
IsFloat32()48 bool IsFloat32() {
49 return false;
50 }
51 template <>
IsFloat32()52 bool IsFloat32<float>() {
53 return true;
54 }
55
56 template <typename T>
IsFloat16()57 bool IsFloat16() {
58 return false;
59 }
60 template <>
IsFloat16()61 bool IsFloat16<Float16>() {
62 return true;
63 }
64 template <>
IsFloat16()65 bool IsFloat16<SimFloat16>() {
66 return true;
67 }
68
69 template <>
FPDefaultNaN()70 double Simulator::FPDefaultNaN<double>() {
71 return kFP64DefaultNaN;
72 }
73
74
75 template <>
FPDefaultNaN()76 float Simulator::FPDefaultNaN<float>() {
77 return kFP32DefaultNaN;
78 }
79
80
81 template <>
FPDefaultNaN()82 SimFloat16 Simulator::FPDefaultNaN<SimFloat16>() {
83 return SimFloat16(kFP16DefaultNaN);
84 }
85
86
FixedToDouble(int64_t src,int fbits,FPRounding round)87 double Simulator::FixedToDouble(int64_t src, int fbits, FPRounding round) {
88 if (src >= 0) {
89 return UFixedToDouble(src, fbits, round);
90 } else if (src == INT64_MIN) {
91 return -UFixedToDouble(src, fbits, round);
92 } else {
93 return -UFixedToDouble(-src, fbits, round);
94 }
95 }
96
97
UFixedToDouble(uint64_t src,int fbits,FPRounding round)98 double Simulator::UFixedToDouble(uint64_t src, int fbits, FPRounding round) {
99 // An input of 0 is a special case because the result is effectively
100 // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
101 if (src == 0) {
102 return 0.0;
103 }
104
105 // Calculate the exponent. The highest significant bit will have the value
106 // 2^exponent.
107 const int highest_significant_bit = 63 - CountLeadingZeros(src);
108 const int64_t exponent = highest_significant_bit - fbits;
109
110 return FPRoundToDouble(0, exponent, src, round);
111 }
112
113
FixedToFloat(int64_t src,int fbits,FPRounding round)114 float Simulator::FixedToFloat(int64_t src, int fbits, FPRounding round) {
115 if (src >= 0) {
116 return UFixedToFloat(src, fbits, round);
117 } else if (src == INT64_MIN) {
118 return -UFixedToFloat(src, fbits, round);
119 } else {
120 return -UFixedToFloat(-src, fbits, round);
121 }
122 }
123
124
UFixedToFloat(uint64_t src,int fbits,FPRounding round)125 float Simulator::UFixedToFloat(uint64_t src, int fbits, FPRounding round) {
126 // An input of 0 is a special case because the result is effectively
127 // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
128 if (src == 0) {
129 return 0.0f;
130 }
131
132 // Calculate the exponent. The highest significant bit will have the value
133 // 2^exponent.
134 const int highest_significant_bit = 63 - CountLeadingZeros(src);
135 const int32_t exponent = highest_significant_bit - fbits;
136
137 return FPRoundToFloat(0, exponent, src, round);
138 }
139
140
FixedToFloat16(int64_t src,int fbits,FPRounding round)141 SimFloat16 Simulator::FixedToFloat16(int64_t src, int fbits, FPRounding round) {
142 if (src >= 0) {
143 return UFixedToFloat16(src, fbits, round);
144 } else if (src == INT64_MIN) {
145 return -UFixedToFloat16(src, fbits, round);
146 } else {
147 return -UFixedToFloat16(-src, fbits, round);
148 }
149 }
150
151
UFixedToFloat16(uint64_t src,int fbits,FPRounding round)152 SimFloat16 Simulator::UFixedToFloat16(uint64_t src,
153 int fbits,
154 FPRounding round) {
155 // An input of 0 is a special case because the result is effectively
156 // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
157 if (src == 0) {
158 return 0.0f;
159 }
160
161 // Calculate the exponent. The highest significant bit will have the value
162 // 2^exponent.
163 const int highest_significant_bit = 63 - CountLeadingZeros(src);
164 const int16_t exponent = highest_significant_bit - fbits;
165
166 return FPRoundToFloat16(0, exponent, src, round);
167 }
168
169
ld1(VectorFormat vform,LogicVRegister dst,uint64_t addr)170 void Simulator::ld1(VectorFormat vform, LogicVRegister dst, uint64_t addr) {
171 dst.ClearForWrite(vform);
172 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
173 LoadLane(dst, vform, i, addr);
174 addr += LaneSizeInBytesFromFormat(vform);
175 }
176 }
177
178
ld1(VectorFormat vform,LogicVRegister dst,int index,uint64_t addr)179 void Simulator::ld1(VectorFormat vform,
180 LogicVRegister dst,
181 int index,
182 uint64_t addr) {
183 LoadLane(dst, vform, index, addr);
184 }
185
186
ld1r(VectorFormat vform,VectorFormat unpack_vform,LogicVRegister dst,uint64_t addr,bool is_signed)187 void Simulator::ld1r(VectorFormat vform,
188 VectorFormat unpack_vform,
189 LogicVRegister dst,
190 uint64_t addr,
191 bool is_signed) {
192 unsigned unpack_size = LaneSizeInBytesFromFormat(unpack_vform);
193 dst.ClearForWrite(vform);
194 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
195 if (is_signed) {
196 LoadIntToLane(dst, vform, unpack_size, i, addr);
197 } else {
198 LoadUintToLane(dst, vform, unpack_size, i, addr);
199 }
200 }
201 }
202
203
ld1r(VectorFormat vform,LogicVRegister dst,uint64_t addr)204 void Simulator::ld1r(VectorFormat vform, LogicVRegister dst, uint64_t addr) {
205 ld1r(vform, vform, dst, addr);
206 }
207
208
ld2(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,uint64_t addr1)209 void Simulator::ld2(VectorFormat vform,
210 LogicVRegister dst1,
211 LogicVRegister dst2,
212 uint64_t addr1) {
213 dst1.ClearForWrite(vform);
214 dst2.ClearForWrite(vform);
215 int esize = LaneSizeInBytesFromFormat(vform);
216 uint64_t addr2 = addr1 + esize;
217 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
218 LoadLane(dst1, vform, i, addr1);
219 LoadLane(dst2, vform, i, addr2);
220 addr1 += 2 * esize;
221 addr2 += 2 * esize;
222 }
223 }
224
225
ld2(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,int index,uint64_t addr1)226 void Simulator::ld2(VectorFormat vform,
227 LogicVRegister dst1,
228 LogicVRegister dst2,
229 int index,
230 uint64_t addr1) {
231 dst1.ClearForWrite(vform);
232 dst2.ClearForWrite(vform);
233 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
234 LoadLane(dst1, vform, index, addr1);
235 LoadLane(dst2, vform, index, addr2);
236 }
237
238
ld2r(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,uint64_t addr)239 void Simulator::ld2r(VectorFormat vform,
240 LogicVRegister dst1,
241 LogicVRegister dst2,
242 uint64_t addr) {
243 dst1.ClearForWrite(vform);
244 dst2.ClearForWrite(vform);
245 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
246 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
247 LoadLane(dst1, vform, i, addr);
248 LoadLane(dst2, vform, i, addr2);
249 }
250 }
251
252
ld3(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,uint64_t addr1)253 void Simulator::ld3(VectorFormat vform,
254 LogicVRegister dst1,
255 LogicVRegister dst2,
256 LogicVRegister dst3,
257 uint64_t addr1) {
258 dst1.ClearForWrite(vform);
259 dst2.ClearForWrite(vform);
260 dst3.ClearForWrite(vform);
261 int esize = LaneSizeInBytesFromFormat(vform);
262 uint64_t addr2 = addr1 + esize;
263 uint64_t addr3 = addr2 + esize;
264 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
265 LoadLane(dst1, vform, i, addr1);
266 LoadLane(dst2, vform, i, addr2);
267 LoadLane(dst3, vform, i, addr3);
268 addr1 += 3 * esize;
269 addr2 += 3 * esize;
270 addr3 += 3 * esize;
271 }
272 }
273
274
ld3(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,int index,uint64_t addr1)275 void Simulator::ld3(VectorFormat vform,
276 LogicVRegister dst1,
277 LogicVRegister dst2,
278 LogicVRegister dst3,
279 int index,
280 uint64_t addr1) {
281 dst1.ClearForWrite(vform);
282 dst2.ClearForWrite(vform);
283 dst3.ClearForWrite(vform);
284 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
285 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
286 LoadLane(dst1, vform, index, addr1);
287 LoadLane(dst2, vform, index, addr2);
288 LoadLane(dst3, vform, index, addr3);
289 }
290
291
ld3r(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,uint64_t addr)292 void Simulator::ld3r(VectorFormat vform,
293 LogicVRegister dst1,
294 LogicVRegister dst2,
295 LogicVRegister dst3,
296 uint64_t addr) {
297 dst1.ClearForWrite(vform);
298 dst2.ClearForWrite(vform);
299 dst3.ClearForWrite(vform);
300 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
301 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
302 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
303 LoadLane(dst1, vform, i, addr);
304 LoadLane(dst2, vform, i, addr2);
305 LoadLane(dst3, vform, i, addr3);
306 }
307 }
308
309
ld4(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,LogicVRegister dst4,uint64_t addr1)310 void Simulator::ld4(VectorFormat vform,
311 LogicVRegister dst1,
312 LogicVRegister dst2,
313 LogicVRegister dst3,
314 LogicVRegister dst4,
315 uint64_t addr1) {
316 dst1.ClearForWrite(vform);
317 dst2.ClearForWrite(vform);
318 dst3.ClearForWrite(vform);
319 dst4.ClearForWrite(vform);
320 int esize = LaneSizeInBytesFromFormat(vform);
321 uint64_t addr2 = addr1 + esize;
322 uint64_t addr3 = addr2 + esize;
323 uint64_t addr4 = addr3 + esize;
324 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
325 LoadLane(dst1, vform, i, addr1);
326 LoadLane(dst2, vform, i, addr2);
327 LoadLane(dst3, vform, i, addr3);
328 LoadLane(dst4, vform, i, addr4);
329 addr1 += 4 * esize;
330 addr2 += 4 * esize;
331 addr3 += 4 * esize;
332 addr4 += 4 * esize;
333 }
334 }
335
336
ld4(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,LogicVRegister dst4,int index,uint64_t addr1)337 void Simulator::ld4(VectorFormat vform,
338 LogicVRegister dst1,
339 LogicVRegister dst2,
340 LogicVRegister dst3,
341 LogicVRegister dst4,
342 int index,
343 uint64_t addr1) {
344 dst1.ClearForWrite(vform);
345 dst2.ClearForWrite(vform);
346 dst3.ClearForWrite(vform);
347 dst4.ClearForWrite(vform);
348 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
349 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
350 uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform);
351 LoadLane(dst1, vform, index, addr1);
352 LoadLane(dst2, vform, index, addr2);
353 LoadLane(dst3, vform, index, addr3);
354 LoadLane(dst4, vform, index, addr4);
355 }
356
357
ld4r(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,LogicVRegister dst4,uint64_t addr)358 void Simulator::ld4r(VectorFormat vform,
359 LogicVRegister dst1,
360 LogicVRegister dst2,
361 LogicVRegister dst3,
362 LogicVRegister dst4,
363 uint64_t addr) {
364 dst1.ClearForWrite(vform);
365 dst2.ClearForWrite(vform);
366 dst3.ClearForWrite(vform);
367 dst4.ClearForWrite(vform);
368 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
369 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
370 uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform);
371 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
372 LoadLane(dst1, vform, i, addr);
373 LoadLane(dst2, vform, i, addr2);
374 LoadLane(dst3, vform, i, addr3);
375 LoadLane(dst4, vform, i, addr4);
376 }
377 }
378
379
st1(VectorFormat vform,LogicVRegister src,uint64_t addr)380 void Simulator::st1(VectorFormat vform, LogicVRegister src, uint64_t addr) {
381 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
382 StoreLane(src, vform, i, addr);
383 addr += LaneSizeInBytesFromFormat(vform);
384 }
385 }
386
387
st1(VectorFormat vform,LogicVRegister src,int index,uint64_t addr)388 void Simulator::st1(VectorFormat vform,
389 LogicVRegister src,
390 int index,
391 uint64_t addr) {
392 StoreLane(src, vform, index, addr);
393 }
394
395
st2(VectorFormat vform,LogicVRegister src,LogicVRegister src2,uint64_t addr)396 void Simulator::st2(VectorFormat vform,
397 LogicVRegister src,
398 LogicVRegister src2,
399 uint64_t addr) {
400 int esize = LaneSizeInBytesFromFormat(vform);
401 uint64_t addr2 = addr + esize;
402 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
403 StoreLane(src, vform, i, addr);
404 StoreLane(src2, vform, i, addr2);
405 addr += 2 * esize;
406 addr2 += 2 * esize;
407 }
408 }
409
410
st2(VectorFormat vform,LogicVRegister src,LogicVRegister src2,int index,uint64_t addr)411 void Simulator::st2(VectorFormat vform,
412 LogicVRegister src,
413 LogicVRegister src2,
414 int index,
415 uint64_t addr) {
416 int esize = LaneSizeInBytesFromFormat(vform);
417 StoreLane(src, vform, index, addr);
418 StoreLane(src2, vform, index, addr + 1 * esize);
419 }
420
421
st3(VectorFormat vform,LogicVRegister src,LogicVRegister src2,LogicVRegister src3,uint64_t addr)422 void Simulator::st3(VectorFormat vform,
423 LogicVRegister src,
424 LogicVRegister src2,
425 LogicVRegister src3,
426 uint64_t addr) {
427 int esize = LaneSizeInBytesFromFormat(vform);
428 uint64_t addr2 = addr + esize;
429 uint64_t addr3 = addr2 + esize;
430 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
431 StoreLane(src, vform, i, addr);
432 StoreLane(src2, vform, i, addr2);
433 StoreLane(src3, vform, i, addr3);
434 addr += 3 * esize;
435 addr2 += 3 * esize;
436 addr3 += 3 * esize;
437 }
438 }
439
440
st3(VectorFormat vform,LogicVRegister src,LogicVRegister src2,LogicVRegister src3,int index,uint64_t addr)441 void Simulator::st3(VectorFormat vform,
442 LogicVRegister src,
443 LogicVRegister src2,
444 LogicVRegister src3,
445 int index,
446 uint64_t addr) {
447 int esize = LaneSizeInBytesFromFormat(vform);
448 StoreLane(src, vform, index, addr);
449 StoreLane(src2, vform, index, addr + 1 * esize);
450 StoreLane(src3, vform, index, addr + 2 * esize);
451 }
452
453
st4(VectorFormat vform,LogicVRegister src,LogicVRegister src2,LogicVRegister src3,LogicVRegister src4,uint64_t addr)454 void Simulator::st4(VectorFormat vform,
455 LogicVRegister src,
456 LogicVRegister src2,
457 LogicVRegister src3,
458 LogicVRegister src4,
459 uint64_t addr) {
460 int esize = LaneSizeInBytesFromFormat(vform);
461 uint64_t addr2 = addr + esize;
462 uint64_t addr3 = addr2 + esize;
463 uint64_t addr4 = addr3 + esize;
464 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
465 StoreLane(src, vform, i, addr);
466 StoreLane(src2, vform, i, addr2);
467 StoreLane(src3, vform, i, addr3);
468 StoreLane(src4, vform, i, addr4);
469 addr += 4 * esize;
470 addr2 += 4 * esize;
471 addr3 += 4 * esize;
472 addr4 += 4 * esize;
473 }
474 }
475
476
st4(VectorFormat vform,LogicVRegister src,LogicVRegister src2,LogicVRegister src3,LogicVRegister src4,int index,uint64_t addr)477 void Simulator::st4(VectorFormat vform,
478 LogicVRegister src,
479 LogicVRegister src2,
480 LogicVRegister src3,
481 LogicVRegister src4,
482 int index,
483 uint64_t addr) {
484 int esize = LaneSizeInBytesFromFormat(vform);
485 StoreLane(src, vform, index, addr);
486 StoreLane(src2, vform, index, addr + 1 * esize);
487 StoreLane(src3, vform, index, addr + 2 * esize);
488 StoreLane(src4, vform, index, addr + 3 * esize);
489 }
490
491
cmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,Condition cond)492 LogicVRegister Simulator::cmp(VectorFormat vform,
493 LogicVRegister dst,
494 const LogicVRegister& src1,
495 const LogicVRegister& src2,
496 Condition cond) {
497 dst.ClearForWrite(vform);
498 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
499 int64_t sa = src1.Int(vform, i);
500 int64_t sb = src2.Int(vform, i);
501 uint64_t ua = src1.Uint(vform, i);
502 uint64_t ub = src2.Uint(vform, i);
503 bool result = false;
504 switch (cond) {
505 case eq:
506 result = (ua == ub);
507 break;
508 case ge:
509 result = (sa >= sb);
510 break;
511 case gt:
512 result = (sa > sb);
513 break;
514 case hi:
515 result = (ua > ub);
516 break;
517 case hs:
518 result = (ua >= ub);
519 break;
520 case lt:
521 result = (sa < sb);
522 break;
523 case le:
524 result = (sa <= sb);
525 break;
526 default:
527 VIXL_UNREACHABLE();
528 break;
529 }
530 dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0);
531 }
532 return dst;
533 }
534
535
cmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,int imm,Condition cond)536 LogicVRegister Simulator::cmp(VectorFormat vform,
537 LogicVRegister dst,
538 const LogicVRegister& src1,
539 int imm,
540 Condition cond) {
541 SimVRegister temp;
542 LogicVRegister imm_reg = dup_immediate(vform, temp, imm);
543 return cmp(vform, dst, src1, imm_reg, cond);
544 }
545
546
cmptst(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)547 LogicVRegister Simulator::cmptst(VectorFormat vform,
548 LogicVRegister dst,
549 const LogicVRegister& src1,
550 const LogicVRegister& src2) {
551 dst.ClearForWrite(vform);
552 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
553 uint64_t ua = src1.Uint(vform, i);
554 uint64_t ub = src2.Uint(vform, i);
555 dst.SetUint(vform, i, ((ua & ub) != 0) ? MaxUintFromFormat(vform) : 0);
556 }
557 return dst;
558 }
559
560
add(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)561 LogicVRegister Simulator::add(VectorFormat vform,
562 LogicVRegister dst,
563 const LogicVRegister& src1,
564 const LogicVRegister& src2) {
565 int lane_size = LaneSizeInBitsFromFormat(vform);
566 dst.ClearForWrite(vform);
567
568 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
569 // Test for unsigned saturation.
570 uint64_t ua = src1.UintLeftJustified(vform, i);
571 uint64_t ub = src2.UintLeftJustified(vform, i);
572 uint64_t ur = ua + ub;
573 if (ur < ua) {
574 dst.SetUnsignedSat(i, true);
575 }
576
577 // Test for signed saturation.
578 bool pos_a = (ua >> 63) == 0;
579 bool pos_b = (ub >> 63) == 0;
580 bool pos_r = (ur >> 63) == 0;
581 // If the signs of the operands are the same, but different from the result,
582 // there was an overflow.
583 if ((pos_a == pos_b) && (pos_a != pos_r)) {
584 dst.SetSignedSat(i, pos_a);
585 }
586 dst.SetInt(vform, i, ur >> (64 - lane_size));
587 }
588 return dst;
589 }
590
add_uint(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,uint64_t value)591 LogicVRegister Simulator::add_uint(VectorFormat vform,
592 LogicVRegister dst,
593 const LogicVRegister& src1,
594 uint64_t value) {
595 int lane_size = LaneSizeInBitsFromFormat(vform);
596 VIXL_ASSERT(IsUintN(lane_size, value));
597 dst.ClearForWrite(vform);
598 // Left-justify `value`.
599 uint64_t ub = value << (64 - lane_size);
600 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
601 // Test for unsigned saturation.
602 uint64_t ua = src1.UintLeftJustified(vform, i);
603 uint64_t ur = ua + ub;
604 if (ur < ua) {
605 dst.SetUnsignedSat(i, true);
606 }
607
608 // Test for signed saturation.
609 // `value` is always positive, so we have an overflow if the (signed) result
610 // is smaller than the first operand.
611 if (RawbitsToInt64(ur) < RawbitsToInt64(ua)) {
612 dst.SetSignedSat(i, true);
613 }
614
615 dst.SetInt(vform, i, ur >> (64 - lane_size));
616 }
617 return dst;
618 }
619
addp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)620 LogicVRegister Simulator::addp(VectorFormat vform,
621 LogicVRegister dst,
622 const LogicVRegister& src1,
623 const LogicVRegister& src2) {
624 SimVRegister temp1, temp2;
625 uzp1(vform, temp1, src1, src2);
626 uzp2(vform, temp2, src1, src2);
627 add(vform, dst, temp1, temp2);
628 if (IsSVEFormat(vform)) {
629 interleave_top_bottom(vform, dst, dst);
630 }
631 return dst;
632 }
633
sdiv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)634 LogicVRegister Simulator::sdiv(VectorFormat vform,
635 LogicVRegister dst,
636 const LogicVRegister& src1,
637 const LogicVRegister& src2) {
638 VIXL_ASSERT((vform == kFormatVnS) || (vform == kFormatVnD));
639
640 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
641 int64_t val1 = src1.Int(vform, i);
642 int64_t val2 = src2.Int(vform, i);
643 int64_t min_int = (vform == kFormatVnD) ? kXMinInt : kWMinInt;
644 int64_t quotient = 0;
645 if ((val1 == min_int) && (val2 == -1)) {
646 quotient = min_int;
647 } else if (val2 != 0) {
648 quotient = val1 / val2;
649 }
650 dst.SetInt(vform, i, quotient);
651 }
652
653 return dst;
654 }
655
udiv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)656 LogicVRegister Simulator::udiv(VectorFormat vform,
657 LogicVRegister dst,
658 const LogicVRegister& src1,
659 const LogicVRegister& src2) {
660 VIXL_ASSERT((vform == kFormatVnS) || (vform == kFormatVnD));
661
662 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
663 uint64_t val1 = src1.Uint(vform, i);
664 uint64_t val2 = src2.Uint(vform, i);
665 uint64_t quotient = 0;
666 if (val2 != 0) {
667 quotient = val1 / val2;
668 }
669 dst.SetUint(vform, i, quotient);
670 }
671
672 return dst;
673 }
674
675
mla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & srca,const LogicVRegister & src1,const LogicVRegister & src2)676 LogicVRegister Simulator::mla(VectorFormat vform,
677 LogicVRegister dst,
678 const LogicVRegister& srca,
679 const LogicVRegister& src1,
680 const LogicVRegister& src2) {
681 SimVRegister temp;
682 mul(vform, temp, src1, src2);
683 add(vform, dst, srca, temp);
684 return dst;
685 }
686
687
mls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & srca,const LogicVRegister & src1,const LogicVRegister & src2)688 LogicVRegister Simulator::mls(VectorFormat vform,
689 LogicVRegister dst,
690 const LogicVRegister& srca,
691 const LogicVRegister& src1,
692 const LogicVRegister& src2) {
693 SimVRegister temp;
694 mul(vform, temp, src1, src2);
695 sub(vform, dst, srca, temp);
696 return dst;
697 }
698
699
mul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)700 LogicVRegister Simulator::mul(VectorFormat vform,
701 LogicVRegister dst,
702 const LogicVRegister& src1,
703 const LogicVRegister& src2) {
704 dst.ClearForWrite(vform);
705
706 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
707 dst.SetUint(vform, i, src1.Uint(vform, i) * src2.Uint(vform, i));
708 }
709 return dst;
710 }
711
712
mul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)713 LogicVRegister Simulator::mul(VectorFormat vform,
714 LogicVRegister dst,
715 const LogicVRegister& src1,
716 const LogicVRegister& src2,
717 int index) {
718 SimVRegister temp;
719 VectorFormat indexform = VectorFormatFillQ(vform);
720 return mul(vform, dst, src1, dup_element(indexform, temp, src2, index));
721 }
722
723
smulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)724 LogicVRegister Simulator::smulh(VectorFormat vform,
725 LogicVRegister dst,
726 const LogicVRegister& src1,
727 const LogicVRegister& src2) {
728 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
729 int64_t dst_val = 0xbadbeef;
730 int64_t val1 = src1.Int(vform, i);
731 int64_t val2 = src2.Int(vform, i);
732 switch (LaneSizeInBitsFromFormat(vform)) {
733 case 8:
734 dst_val = internal::MultiplyHigh<8>(val1, val2);
735 break;
736 case 16:
737 dst_val = internal::MultiplyHigh<16>(val1, val2);
738 break;
739 case 32:
740 dst_val = internal::MultiplyHigh<32>(val1, val2);
741 break;
742 case 64:
743 dst_val = internal::MultiplyHigh<64>(val1, val2);
744 break;
745 default:
746 VIXL_UNREACHABLE();
747 break;
748 }
749 dst.SetInt(vform, i, dst_val);
750 }
751 return dst;
752 }
753
754
umulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)755 LogicVRegister Simulator::umulh(VectorFormat vform,
756 LogicVRegister dst,
757 const LogicVRegister& src1,
758 const LogicVRegister& src2) {
759 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
760 uint64_t dst_val = 0xbadbeef;
761 uint64_t val1 = src1.Uint(vform, i);
762 uint64_t val2 = src2.Uint(vform, i);
763 switch (LaneSizeInBitsFromFormat(vform)) {
764 case 8:
765 dst_val = internal::MultiplyHigh<8>(val1, val2);
766 break;
767 case 16:
768 dst_val = internal::MultiplyHigh<16>(val1, val2);
769 break;
770 case 32:
771 dst_val = internal::MultiplyHigh<32>(val1, val2);
772 break;
773 case 64:
774 dst_val = internal::MultiplyHigh<64>(val1, val2);
775 break;
776 default:
777 VIXL_UNREACHABLE();
778 break;
779 }
780 dst.SetUint(vform, i, dst_val);
781 }
782 return dst;
783 }
784
785
mla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)786 LogicVRegister Simulator::mla(VectorFormat vform,
787 LogicVRegister dst,
788 const LogicVRegister& src1,
789 const LogicVRegister& src2,
790 int index) {
791 SimVRegister temp;
792 VectorFormat indexform = VectorFormatFillQ(vform);
793 return mla(vform, dst, dst, src1, dup_element(indexform, temp, src2, index));
794 }
795
796
mls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)797 LogicVRegister Simulator::mls(VectorFormat vform,
798 LogicVRegister dst,
799 const LogicVRegister& src1,
800 const LogicVRegister& src2,
801 int index) {
802 SimVRegister temp;
803 VectorFormat indexform = VectorFormatFillQ(vform);
804 return mls(vform, dst, dst, src1, dup_element(indexform, temp, src2, index));
805 }
806
sqdmull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)807 LogicVRegister Simulator::sqdmull(VectorFormat vform,
808 LogicVRegister dst,
809 const LogicVRegister& src1,
810 const LogicVRegister& src2,
811 int index) {
812 SimVRegister temp;
813 VectorFormat indexform =
814 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
815 return sqdmull(vform, dst, src1, dup_element(indexform, temp, src2, index));
816 }
817
sqdmlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)818 LogicVRegister Simulator::sqdmlal(VectorFormat vform,
819 LogicVRegister dst,
820 const LogicVRegister& src1,
821 const LogicVRegister& src2,
822 int index) {
823 SimVRegister temp;
824 VectorFormat indexform =
825 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
826 return sqdmlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
827 }
828
sqdmlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)829 LogicVRegister Simulator::sqdmlsl(VectorFormat vform,
830 LogicVRegister dst,
831 const LogicVRegister& src1,
832 const LogicVRegister& src2,
833 int index) {
834 SimVRegister temp;
835 VectorFormat indexform =
836 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
837 return sqdmlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
838 }
839
sqdmulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)840 LogicVRegister Simulator::sqdmulh(VectorFormat vform,
841 LogicVRegister dst,
842 const LogicVRegister& src1,
843 const LogicVRegister& src2,
844 int index) {
845 SimVRegister temp;
846 VectorFormat indexform = VectorFormatFillQ(vform);
847 return sqdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index));
848 }
849
850
sqrdmulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)851 LogicVRegister Simulator::sqrdmulh(VectorFormat vform,
852 LogicVRegister dst,
853 const LogicVRegister& src1,
854 const LogicVRegister& src2,
855 int index) {
856 SimVRegister temp;
857 VectorFormat indexform = VectorFormatFillQ(vform);
858 return sqrdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index));
859 }
860
861
sqrdmlah(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)862 LogicVRegister Simulator::sqrdmlah(VectorFormat vform,
863 LogicVRegister dst,
864 const LogicVRegister& src1,
865 const LogicVRegister& src2,
866 int index) {
867 SimVRegister temp;
868 VectorFormat indexform = VectorFormatFillQ(vform);
869 return sqrdmlah(vform, dst, src1, dup_element(indexform, temp, src2, index));
870 }
871
872
sqrdmlsh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)873 LogicVRegister Simulator::sqrdmlsh(VectorFormat vform,
874 LogicVRegister dst,
875 const LogicVRegister& src1,
876 const LogicVRegister& src2,
877 int index) {
878 SimVRegister temp;
879 VectorFormat indexform = VectorFormatFillQ(vform);
880 return sqrdmlsh(vform, dst, src1, dup_element(indexform, temp, src2, index));
881 }
882
883
PolynomialMult(uint64_t op1,uint64_t op2,int lane_size_in_bits) const884 uint64_t Simulator::PolynomialMult(uint64_t op1,
885 uint64_t op2,
886 int lane_size_in_bits) const {
887 VIXL_ASSERT(static_cast<unsigned>(lane_size_in_bits) <= kSRegSize);
888 VIXL_ASSERT(IsUintN(lane_size_in_bits, op1));
889 VIXL_ASSERT(IsUintN(lane_size_in_bits, op2));
890 uint64_t result = 0;
891 for (int i = 0; i < lane_size_in_bits; ++i) {
892 if ((op1 >> i) & 1) {
893 result = result ^ (op2 << i);
894 }
895 }
896 return result;
897 }
898
899
pmul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)900 LogicVRegister Simulator::pmul(VectorFormat vform,
901 LogicVRegister dst,
902 const LogicVRegister& src1,
903 const LogicVRegister& src2) {
904 dst.ClearForWrite(vform);
905 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
906 dst.SetUint(vform,
907 i,
908 PolynomialMult(src1.Uint(vform, i),
909 src2.Uint(vform, i),
910 LaneSizeInBitsFromFormat(vform)));
911 }
912 return dst;
913 }
914
915
pmull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)916 LogicVRegister Simulator::pmull(VectorFormat vform,
917 LogicVRegister dst,
918 const LogicVRegister& src1,
919 const LogicVRegister& src2) {
920 dst.ClearForWrite(vform);
921
922 VectorFormat vform_src = VectorFormatHalfWidth(vform);
923 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
924 dst.SetUint(vform,
925 i,
926 PolynomialMult(src1.Uint(vform_src, i),
927 src2.Uint(vform_src, i),
928 LaneSizeInBitsFromFormat(vform_src)));
929 }
930
931 return dst;
932 }
933
934
pmull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)935 LogicVRegister Simulator::pmull2(VectorFormat vform,
936 LogicVRegister dst,
937 const LogicVRegister& src1,
938 const LogicVRegister& src2) {
939 VectorFormat vform_src = VectorFormatHalfWidthDoubleLanes(vform);
940 dst.ClearForWrite(vform);
941 int lane_count = LaneCountFromFormat(vform);
942 for (int i = 0; i < lane_count; i++) {
943 dst.SetUint(vform,
944 i,
945 PolynomialMult(src1.Uint(vform_src, lane_count + i),
946 src2.Uint(vform_src, lane_count + i),
947 LaneSizeInBitsFromFormat(vform_src)));
948 }
949 return dst;
950 }
951
952
sub(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)953 LogicVRegister Simulator::sub(VectorFormat vform,
954 LogicVRegister dst,
955 const LogicVRegister& src1,
956 const LogicVRegister& src2) {
957 int lane_size = LaneSizeInBitsFromFormat(vform);
958 dst.ClearForWrite(vform);
959 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
960 // Test for unsigned saturation.
961 uint64_t ua = src1.UintLeftJustified(vform, i);
962 uint64_t ub = src2.UintLeftJustified(vform, i);
963 uint64_t ur = ua - ub;
964 if (ub > ua) {
965 dst.SetUnsignedSat(i, false);
966 }
967
968 // Test for signed saturation.
969 bool pos_a = (ua >> 63) == 0;
970 bool pos_b = (ub >> 63) == 0;
971 bool pos_r = (ur >> 63) == 0;
972 // If the signs of the operands are different, and the sign of the first
973 // operand doesn't match the result, there was an overflow.
974 if ((pos_a != pos_b) && (pos_a != pos_r)) {
975 dst.SetSignedSat(i, pos_a);
976 }
977
978 dst.SetInt(vform, i, ur >> (64 - lane_size));
979 }
980 return dst;
981 }
982
sub_uint(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,uint64_t value)983 LogicVRegister Simulator::sub_uint(VectorFormat vform,
984 LogicVRegister dst,
985 const LogicVRegister& src1,
986 uint64_t value) {
987 int lane_size = LaneSizeInBitsFromFormat(vform);
988 VIXL_ASSERT(IsUintN(lane_size, value));
989 dst.ClearForWrite(vform);
990 // Left-justify `value`.
991 uint64_t ub = value << (64 - lane_size);
992 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
993 // Test for unsigned saturation.
994 uint64_t ua = src1.UintLeftJustified(vform, i);
995 uint64_t ur = ua - ub;
996 if (ub > ua) {
997 dst.SetUnsignedSat(i, false);
998 }
999
1000 // Test for signed saturation.
1001 // `value` is always positive, so we have an overflow if the (signed) result
1002 // is greater than the first operand.
1003 if (RawbitsToInt64(ur) > RawbitsToInt64(ua)) {
1004 dst.SetSignedSat(i, false);
1005 }
1006
1007 dst.SetInt(vform, i, ur >> (64 - lane_size));
1008 }
1009 return dst;
1010 }
1011
and_(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1012 LogicVRegister Simulator::and_(VectorFormat vform,
1013 LogicVRegister dst,
1014 const LogicVRegister& src1,
1015 const LogicVRegister& src2) {
1016 dst.ClearForWrite(vform);
1017 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1018 dst.SetUint(vform, i, src1.Uint(vform, i) & src2.Uint(vform, i));
1019 }
1020 return dst;
1021 }
1022
1023
orr(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1024 LogicVRegister Simulator::orr(VectorFormat vform,
1025 LogicVRegister dst,
1026 const LogicVRegister& src1,
1027 const LogicVRegister& src2) {
1028 dst.ClearForWrite(vform);
1029 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1030 dst.SetUint(vform, i, src1.Uint(vform, i) | src2.Uint(vform, i));
1031 }
1032 return dst;
1033 }
1034
1035
orn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1036 LogicVRegister Simulator::orn(VectorFormat vform,
1037 LogicVRegister dst,
1038 const LogicVRegister& src1,
1039 const LogicVRegister& src2) {
1040 dst.ClearForWrite(vform);
1041 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1042 dst.SetUint(vform, i, src1.Uint(vform, i) | ~src2.Uint(vform, i));
1043 }
1044 return dst;
1045 }
1046
1047
eor(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1048 LogicVRegister Simulator::eor(VectorFormat vform,
1049 LogicVRegister dst,
1050 const LogicVRegister& src1,
1051 const LogicVRegister& src2) {
1052 dst.ClearForWrite(vform);
1053 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1054 dst.SetUint(vform, i, src1.Uint(vform, i) ^ src2.Uint(vform, i));
1055 }
1056 return dst;
1057 }
1058
1059
bic(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1060 LogicVRegister Simulator::bic(VectorFormat vform,
1061 LogicVRegister dst,
1062 const LogicVRegister& src1,
1063 const LogicVRegister& src2) {
1064 dst.ClearForWrite(vform);
1065 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1066 dst.SetUint(vform, i, src1.Uint(vform, i) & ~src2.Uint(vform, i));
1067 }
1068 return dst;
1069 }
1070
1071
bic(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,uint64_t imm)1072 LogicVRegister Simulator::bic(VectorFormat vform,
1073 LogicVRegister dst,
1074 const LogicVRegister& src,
1075 uint64_t imm) {
1076 uint64_t result[16];
1077 int lane_count = LaneCountFromFormat(vform);
1078 for (int i = 0; i < lane_count; ++i) {
1079 result[i] = src.Uint(vform, i) & ~imm;
1080 }
1081 dst.ClearForWrite(vform);
1082 for (int i = 0; i < lane_count; ++i) {
1083 dst.SetUint(vform, i, result[i]);
1084 }
1085 return dst;
1086 }
1087
1088
bif(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1089 LogicVRegister Simulator::bif(VectorFormat vform,
1090 LogicVRegister dst,
1091 const LogicVRegister& src1,
1092 const LogicVRegister& src2) {
1093 dst.ClearForWrite(vform);
1094 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1095 uint64_t operand1 = dst.Uint(vform, i);
1096 uint64_t operand2 = ~src2.Uint(vform, i);
1097 uint64_t operand3 = src1.Uint(vform, i);
1098 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1099 dst.SetUint(vform, i, result);
1100 }
1101 return dst;
1102 }
1103
1104
bit(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1105 LogicVRegister Simulator::bit(VectorFormat vform,
1106 LogicVRegister dst,
1107 const LogicVRegister& src1,
1108 const LogicVRegister& src2) {
1109 dst.ClearForWrite(vform);
1110 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1111 uint64_t operand1 = dst.Uint(vform, i);
1112 uint64_t operand2 = src2.Uint(vform, i);
1113 uint64_t operand3 = src1.Uint(vform, i);
1114 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1115 dst.SetUint(vform, i, result);
1116 }
1117 return dst;
1118 }
1119
1120
bsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src_mask,const LogicVRegister & src1,const LogicVRegister & src2)1121 LogicVRegister Simulator::bsl(VectorFormat vform,
1122 LogicVRegister dst,
1123 const LogicVRegister& src_mask,
1124 const LogicVRegister& src1,
1125 const LogicVRegister& src2) {
1126 dst.ClearForWrite(vform);
1127 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1128 uint64_t operand1 = src2.Uint(vform, i);
1129 uint64_t operand2 = src_mask.Uint(vform, i);
1130 uint64_t operand3 = src1.Uint(vform, i);
1131 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1132 dst.SetUint(vform, i, result);
1133 }
1134 return dst;
1135 }
1136
1137
sminmax(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool max)1138 LogicVRegister Simulator::sminmax(VectorFormat vform,
1139 LogicVRegister dst,
1140 const LogicVRegister& src1,
1141 const LogicVRegister& src2,
1142 bool max) {
1143 dst.ClearForWrite(vform);
1144 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1145 int64_t src1_val = src1.Int(vform, i);
1146 int64_t src2_val = src2.Int(vform, i);
1147 int64_t dst_val;
1148 if (max) {
1149 dst_val = (src1_val > src2_val) ? src1_val : src2_val;
1150 } else {
1151 dst_val = (src1_val < src2_val) ? src1_val : src2_val;
1152 }
1153 dst.SetInt(vform, i, dst_val);
1154 }
1155 return dst;
1156 }
1157
1158
smax(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1159 LogicVRegister Simulator::smax(VectorFormat vform,
1160 LogicVRegister dst,
1161 const LogicVRegister& src1,
1162 const LogicVRegister& src2) {
1163 return sminmax(vform, dst, src1, src2, true);
1164 }
1165
1166
smin(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1167 LogicVRegister Simulator::smin(VectorFormat vform,
1168 LogicVRegister dst,
1169 const LogicVRegister& src1,
1170 const LogicVRegister& src2) {
1171 return sminmax(vform, dst, src1, src2, false);
1172 }
1173
1174
sminmaxp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool max)1175 LogicVRegister Simulator::sminmaxp(VectorFormat vform,
1176 LogicVRegister dst,
1177 const LogicVRegister& src1,
1178 const LogicVRegister& src2,
1179 bool max) {
1180 unsigned lanes = LaneCountFromFormat(vform);
1181 int64_t result[kZRegMaxSizeInBytes];
1182 const LogicVRegister* src = &src1;
1183 for (unsigned j = 0; j < 2; j++) {
1184 for (unsigned i = 0; i < lanes; i += 2) {
1185 int64_t first_val = src->Int(vform, i);
1186 int64_t second_val = src->Int(vform, i + 1);
1187 int64_t dst_val;
1188 if (max) {
1189 dst_val = (first_val > second_val) ? first_val : second_val;
1190 } else {
1191 dst_val = (first_val < second_val) ? first_val : second_val;
1192 }
1193 VIXL_ASSERT(((i >> 1) + (j * lanes / 2)) < ArrayLength(result));
1194 result[(i >> 1) + (j * lanes / 2)] = dst_val;
1195 }
1196 src = &src2;
1197 }
1198 dst.SetIntArray(vform, result);
1199 if (IsSVEFormat(vform)) {
1200 interleave_top_bottom(vform, dst, dst);
1201 }
1202 return dst;
1203 }
1204
1205
smaxp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1206 LogicVRegister Simulator::smaxp(VectorFormat vform,
1207 LogicVRegister dst,
1208 const LogicVRegister& src1,
1209 const LogicVRegister& src2) {
1210 return sminmaxp(vform, dst, src1, src2, true);
1211 }
1212
1213
sminp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1214 LogicVRegister Simulator::sminp(VectorFormat vform,
1215 LogicVRegister dst,
1216 const LogicVRegister& src1,
1217 const LogicVRegister& src2) {
1218 return sminmaxp(vform, dst, src1, src2, false);
1219 }
1220
1221
addp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1222 LogicVRegister Simulator::addp(VectorFormat vform,
1223 LogicVRegister dst,
1224 const LogicVRegister& src) {
1225 VIXL_ASSERT(vform == kFormatD);
1226
1227 uint64_t dst_val = src.Uint(kFormat2D, 0) + src.Uint(kFormat2D, 1);
1228 dst.ClearForWrite(vform);
1229 dst.SetUint(vform, 0, dst_val);
1230 return dst;
1231 }
1232
1233
addv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1234 LogicVRegister Simulator::addv(VectorFormat vform,
1235 LogicVRegister dst,
1236 const LogicVRegister& src) {
1237 VectorFormat vform_dst =
1238 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
1239
1240
1241 int64_t dst_val = 0;
1242 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1243 dst_val += src.Int(vform, i);
1244 }
1245
1246 dst.ClearForWrite(vform_dst);
1247 dst.SetInt(vform_dst, 0, dst_val);
1248 return dst;
1249 }
1250
1251
saddlv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1252 LogicVRegister Simulator::saddlv(VectorFormat vform,
1253 LogicVRegister dst,
1254 const LogicVRegister& src) {
1255 VectorFormat vform_dst =
1256 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2);
1257
1258 int64_t dst_val = 0;
1259 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1260 dst_val += src.Int(vform, i);
1261 }
1262
1263 dst.ClearForWrite(vform_dst);
1264 dst.SetInt(vform_dst, 0, dst_val);
1265 return dst;
1266 }
1267
1268
uaddlv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1269 LogicVRegister Simulator::uaddlv(VectorFormat vform,
1270 LogicVRegister dst,
1271 const LogicVRegister& src) {
1272 VectorFormat vform_dst =
1273 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2);
1274
1275 uint64_t dst_val = 0;
1276 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1277 dst_val += src.Uint(vform, i);
1278 }
1279
1280 dst.ClearForWrite(vform_dst);
1281 dst.SetUint(vform_dst, 0, dst_val);
1282 return dst;
1283 }
1284
1285
sminmaxv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src,bool max)1286 LogicVRegister Simulator::sminmaxv(VectorFormat vform,
1287 LogicVRegister dst,
1288 const LogicPRegister& pg,
1289 const LogicVRegister& src,
1290 bool max) {
1291 int64_t dst_val = max ? INT64_MIN : INT64_MAX;
1292 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1293 if (!pg.IsActive(vform, i)) continue;
1294
1295 int64_t src_val = src.Int(vform, i);
1296 if (max) {
1297 dst_val = (src_val > dst_val) ? src_val : dst_val;
1298 } else {
1299 dst_val = (src_val < dst_val) ? src_val : dst_val;
1300 }
1301 }
1302 dst.ClearForWrite(ScalarFormatFromFormat(vform));
1303 dst.SetInt(vform, 0, dst_val);
1304 return dst;
1305 }
1306
1307
smaxv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1308 LogicVRegister Simulator::smaxv(VectorFormat vform,
1309 LogicVRegister dst,
1310 const LogicVRegister& src) {
1311 sminmaxv(vform, dst, GetPTrue(), src, true);
1312 return dst;
1313 }
1314
1315
sminv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1316 LogicVRegister Simulator::sminv(VectorFormat vform,
1317 LogicVRegister dst,
1318 const LogicVRegister& src) {
1319 sminmaxv(vform, dst, GetPTrue(), src, false);
1320 return dst;
1321 }
1322
1323
smaxv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)1324 LogicVRegister Simulator::smaxv(VectorFormat vform,
1325 LogicVRegister dst,
1326 const LogicPRegister& pg,
1327 const LogicVRegister& src) {
1328 VIXL_ASSERT(IsSVEFormat(vform));
1329 sminmaxv(vform, dst, pg, src, true);
1330 return dst;
1331 }
1332
1333
sminv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)1334 LogicVRegister Simulator::sminv(VectorFormat vform,
1335 LogicVRegister dst,
1336 const LogicPRegister& pg,
1337 const LogicVRegister& src) {
1338 VIXL_ASSERT(IsSVEFormat(vform));
1339 sminmaxv(vform, dst, pg, src, false);
1340 return dst;
1341 }
1342
1343
uminmax(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool max)1344 LogicVRegister Simulator::uminmax(VectorFormat vform,
1345 LogicVRegister dst,
1346 const LogicVRegister& src1,
1347 const LogicVRegister& src2,
1348 bool max) {
1349 dst.ClearForWrite(vform);
1350 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1351 uint64_t src1_val = src1.Uint(vform, i);
1352 uint64_t src2_val = src2.Uint(vform, i);
1353 uint64_t dst_val;
1354 if (max) {
1355 dst_val = (src1_val > src2_val) ? src1_val : src2_val;
1356 } else {
1357 dst_val = (src1_val < src2_val) ? src1_val : src2_val;
1358 }
1359 dst.SetUint(vform, i, dst_val);
1360 }
1361 return dst;
1362 }
1363
1364
umax(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1365 LogicVRegister Simulator::umax(VectorFormat vform,
1366 LogicVRegister dst,
1367 const LogicVRegister& src1,
1368 const LogicVRegister& src2) {
1369 return uminmax(vform, dst, src1, src2, true);
1370 }
1371
1372
umin(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1373 LogicVRegister Simulator::umin(VectorFormat vform,
1374 LogicVRegister dst,
1375 const LogicVRegister& src1,
1376 const LogicVRegister& src2) {
1377 return uminmax(vform, dst, src1, src2, false);
1378 }
1379
1380
uminmaxp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool max)1381 LogicVRegister Simulator::uminmaxp(VectorFormat vform,
1382 LogicVRegister dst,
1383 const LogicVRegister& src1,
1384 const LogicVRegister& src2,
1385 bool max) {
1386 unsigned lanes = LaneCountFromFormat(vform);
1387 uint64_t result[kZRegMaxSizeInBytes];
1388 const LogicVRegister* src = &src1;
1389 for (unsigned j = 0; j < 2; j++) {
1390 for (unsigned i = 0; i < lanes; i += 2) {
1391 uint64_t first_val = src->Uint(vform, i);
1392 uint64_t second_val = src->Uint(vform, i + 1);
1393 uint64_t dst_val;
1394 if (max) {
1395 dst_val = (first_val > second_val) ? first_val : second_val;
1396 } else {
1397 dst_val = (first_val < second_val) ? first_val : second_val;
1398 }
1399 VIXL_ASSERT(((i >> 1) + (j * lanes / 2)) < ArrayLength(result));
1400 result[(i >> 1) + (j * lanes / 2)] = dst_val;
1401 }
1402 src = &src2;
1403 }
1404 dst.SetUintArray(vform, result);
1405 if (IsSVEFormat(vform)) {
1406 interleave_top_bottom(vform, dst, dst);
1407 }
1408 return dst;
1409 }
1410
1411
umaxp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1412 LogicVRegister Simulator::umaxp(VectorFormat vform,
1413 LogicVRegister dst,
1414 const LogicVRegister& src1,
1415 const LogicVRegister& src2) {
1416 return uminmaxp(vform, dst, src1, src2, true);
1417 }
1418
1419
uminp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1420 LogicVRegister Simulator::uminp(VectorFormat vform,
1421 LogicVRegister dst,
1422 const LogicVRegister& src1,
1423 const LogicVRegister& src2) {
1424 return uminmaxp(vform, dst, src1, src2, false);
1425 }
1426
1427
uminmaxv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src,bool max)1428 LogicVRegister Simulator::uminmaxv(VectorFormat vform,
1429 LogicVRegister dst,
1430 const LogicPRegister& pg,
1431 const LogicVRegister& src,
1432 bool max) {
1433 uint64_t dst_val = max ? 0 : UINT64_MAX;
1434 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1435 if (!pg.IsActive(vform, i)) continue;
1436
1437 uint64_t src_val = src.Uint(vform, i);
1438 if (max) {
1439 dst_val = (src_val > dst_val) ? src_val : dst_val;
1440 } else {
1441 dst_val = (src_val < dst_val) ? src_val : dst_val;
1442 }
1443 }
1444 dst.ClearForWrite(ScalarFormatFromFormat(vform));
1445 dst.SetUint(vform, 0, dst_val);
1446 return dst;
1447 }
1448
1449
umaxv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1450 LogicVRegister Simulator::umaxv(VectorFormat vform,
1451 LogicVRegister dst,
1452 const LogicVRegister& src) {
1453 uminmaxv(vform, dst, GetPTrue(), src, true);
1454 return dst;
1455 }
1456
1457
uminv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1458 LogicVRegister Simulator::uminv(VectorFormat vform,
1459 LogicVRegister dst,
1460 const LogicVRegister& src) {
1461 uminmaxv(vform, dst, GetPTrue(), src, false);
1462 return dst;
1463 }
1464
1465
umaxv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)1466 LogicVRegister Simulator::umaxv(VectorFormat vform,
1467 LogicVRegister dst,
1468 const LogicPRegister& pg,
1469 const LogicVRegister& src) {
1470 VIXL_ASSERT(IsSVEFormat(vform));
1471 uminmaxv(vform, dst, pg, src, true);
1472 return dst;
1473 }
1474
1475
uminv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)1476 LogicVRegister Simulator::uminv(VectorFormat vform,
1477 LogicVRegister dst,
1478 const LogicPRegister& pg,
1479 const LogicVRegister& src) {
1480 VIXL_ASSERT(IsSVEFormat(vform));
1481 uminmaxv(vform, dst, pg, src, false);
1482 return dst;
1483 }
1484
1485
shl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1486 LogicVRegister Simulator::shl(VectorFormat vform,
1487 LogicVRegister dst,
1488 const LogicVRegister& src,
1489 int shift) {
1490 VIXL_ASSERT(shift >= 0);
1491 SimVRegister temp;
1492 LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1493 return ushl(vform, dst, src, shiftreg);
1494 }
1495
1496
sshll(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1497 LogicVRegister Simulator::sshll(VectorFormat vform,
1498 LogicVRegister dst,
1499 const LogicVRegister& src,
1500 int shift) {
1501 VIXL_ASSERT(shift >= 0);
1502 SimVRegister temp1, temp2;
1503 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1504 LogicVRegister extendedreg = sxtl(vform, temp2, src);
1505 return sshl(vform, dst, extendedreg, shiftreg);
1506 }
1507
1508
sshll2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1509 LogicVRegister Simulator::sshll2(VectorFormat vform,
1510 LogicVRegister dst,
1511 const LogicVRegister& src,
1512 int shift) {
1513 VIXL_ASSERT(shift >= 0);
1514 SimVRegister temp1, temp2;
1515 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1516 LogicVRegister extendedreg = sxtl2(vform, temp2, src);
1517 return sshl(vform, dst, extendedreg, shiftreg);
1518 }
1519
1520
shll(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1521 LogicVRegister Simulator::shll(VectorFormat vform,
1522 LogicVRegister dst,
1523 const LogicVRegister& src) {
1524 int shift = LaneSizeInBitsFromFormat(vform) / 2;
1525 return sshll(vform, dst, src, shift);
1526 }
1527
1528
shll2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1529 LogicVRegister Simulator::shll2(VectorFormat vform,
1530 LogicVRegister dst,
1531 const LogicVRegister& src) {
1532 int shift = LaneSizeInBitsFromFormat(vform) / 2;
1533 return sshll2(vform, dst, src, shift);
1534 }
1535
1536
ushll(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1537 LogicVRegister Simulator::ushll(VectorFormat vform,
1538 LogicVRegister dst,
1539 const LogicVRegister& src,
1540 int shift) {
1541 VIXL_ASSERT(shift >= 0);
1542 SimVRegister temp1, temp2;
1543 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1544 LogicVRegister extendedreg = uxtl(vform, temp2, src);
1545 return ushl(vform, dst, extendedreg, shiftreg);
1546 }
1547
1548
ushll2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1549 LogicVRegister Simulator::ushll2(VectorFormat vform,
1550 LogicVRegister dst,
1551 const LogicVRegister& src,
1552 int shift) {
1553 VIXL_ASSERT(shift >= 0);
1554 SimVRegister temp1, temp2;
1555 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1556 LogicVRegister extendedreg = uxtl2(vform, temp2, src);
1557 return ushl(vform, dst, extendedreg, shiftreg);
1558 }
1559
clast(VectorFormat vform,const LogicPRegister & pg,const LogicVRegister & src,int offset_from_last_active)1560 std::pair<bool, uint64_t> Simulator::clast(VectorFormat vform,
1561 const LogicPRegister& pg,
1562 const LogicVRegister& src,
1563 int offset_from_last_active) {
1564 // Untested for any other values.
1565 VIXL_ASSERT((offset_from_last_active == 0) || (offset_from_last_active == 1));
1566
1567 int last_active = GetLastActive(vform, pg);
1568 int lane_count = LaneCountFromFormat(vform);
1569 int index =
1570 ((last_active + offset_from_last_active) + lane_count) % lane_count;
1571 return std::make_pair(last_active >= 0, src.Uint(vform, index));
1572 }
1573
compact(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)1574 LogicVRegister Simulator::compact(VectorFormat vform,
1575 LogicVRegister dst,
1576 const LogicPRegister& pg,
1577 const LogicVRegister& src) {
1578 int j = 0;
1579 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1580 if (pg.IsActive(vform, i)) {
1581 dst.SetUint(vform, j++, src.Uint(vform, i));
1582 }
1583 }
1584 for (; j < LaneCountFromFormat(vform); j++) {
1585 dst.SetUint(vform, j, 0);
1586 }
1587 return dst;
1588 }
1589
splice(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src1,const LogicVRegister & src2)1590 LogicVRegister Simulator::splice(VectorFormat vform,
1591 LogicVRegister dst,
1592 const LogicPRegister& pg,
1593 const LogicVRegister& src1,
1594 const LogicVRegister& src2) {
1595 int lane_count = LaneCountFromFormat(vform);
1596 int first_active = GetFirstActive(vform, pg);
1597 int last_active = GetLastActive(vform, pg);
1598 int dst_idx = 0;
1599 uint64_t result[kZRegMaxSizeInBytes];
1600
1601 if (first_active >= 0) {
1602 VIXL_ASSERT(last_active >= first_active);
1603 VIXL_ASSERT(last_active < lane_count);
1604 for (int i = first_active; i <= last_active; i++) {
1605 result[dst_idx++] = src1.Uint(vform, i);
1606 }
1607 }
1608
1609 VIXL_ASSERT(dst_idx <= lane_count);
1610 for (int i = dst_idx; i < lane_count; i++) {
1611 result[i] = src2.Uint(vform, i - dst_idx);
1612 }
1613
1614 dst.SetUintArray(vform, result);
1615
1616 return dst;
1617 }
1618
sel(VectorFormat vform,LogicVRegister dst,const SimPRegister & pg,const LogicVRegister & src1,const LogicVRegister & src2)1619 LogicVRegister Simulator::sel(VectorFormat vform,
1620 LogicVRegister dst,
1621 const SimPRegister& pg,
1622 const LogicVRegister& src1,
1623 const LogicVRegister& src2) {
1624 int p_reg_bits_per_lane =
1625 LaneSizeInBitsFromFormat(vform) / kZRegBitsPerPRegBit;
1626 for (int lane = 0; lane < LaneCountFromFormat(vform); lane++) {
1627 uint64_t lane_value = pg.GetBit(lane * p_reg_bits_per_lane)
1628 ? src1.Uint(vform, lane)
1629 : src2.Uint(vform, lane);
1630 dst.SetUint(vform, lane, lane_value);
1631 }
1632 return dst;
1633 }
1634
1635
sel(LogicPRegister dst,const LogicPRegister & pg,const LogicPRegister & src1,const LogicPRegister & src2)1636 LogicPRegister Simulator::sel(LogicPRegister dst,
1637 const LogicPRegister& pg,
1638 const LogicPRegister& src1,
1639 const LogicPRegister& src2) {
1640 for (int i = 0; i < dst.GetChunkCount(); i++) {
1641 LogicPRegister::ChunkType mask = pg.GetChunk(i);
1642 LogicPRegister::ChunkType result =
1643 (mask & src1.GetChunk(i)) | (~mask & src2.GetChunk(i));
1644 dst.SetChunk(i, result);
1645 }
1646 return dst;
1647 }
1648
1649
sli(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1650 LogicVRegister Simulator::sli(VectorFormat vform,
1651 LogicVRegister dst,
1652 const LogicVRegister& src,
1653 int shift) {
1654 dst.ClearForWrite(vform);
1655 int lane_count = LaneCountFromFormat(vform);
1656 for (int i = 0; i < lane_count; i++) {
1657 uint64_t src_lane = src.Uint(vform, i);
1658 uint64_t dst_lane = dst.Uint(vform, i);
1659 uint64_t shifted = src_lane << shift;
1660 uint64_t mask = MaxUintFromFormat(vform) << shift;
1661 dst.SetUint(vform, i, (dst_lane & ~mask) | shifted);
1662 }
1663 return dst;
1664 }
1665
1666
sqshl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1667 LogicVRegister Simulator::sqshl(VectorFormat vform,
1668 LogicVRegister dst,
1669 const LogicVRegister& src,
1670 int shift) {
1671 VIXL_ASSERT(shift >= 0);
1672 SimVRegister temp;
1673 LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1674 return sshl(vform, dst, src, shiftreg).SignedSaturate(vform);
1675 }
1676
1677
uqshl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1678 LogicVRegister Simulator::uqshl(VectorFormat vform,
1679 LogicVRegister dst,
1680 const LogicVRegister& src,
1681 int shift) {
1682 VIXL_ASSERT(shift >= 0);
1683 SimVRegister temp;
1684 LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1685 return ushl(vform, dst, src, shiftreg).UnsignedSaturate(vform);
1686 }
1687
1688
sqshlu(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1689 LogicVRegister Simulator::sqshlu(VectorFormat vform,
1690 LogicVRegister dst,
1691 const LogicVRegister& src,
1692 int shift) {
1693 VIXL_ASSERT(shift >= 0);
1694 SimVRegister temp;
1695 LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1696 return sshl(vform, dst, src, shiftreg).UnsignedSaturate(vform);
1697 }
1698
1699
sri(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1700 LogicVRegister Simulator::sri(VectorFormat vform,
1701 LogicVRegister dst,
1702 const LogicVRegister& src,
1703 int shift) {
1704 dst.ClearForWrite(vform);
1705 int lane_count = LaneCountFromFormat(vform);
1706 VIXL_ASSERT((shift > 0) &&
1707 (shift <= static_cast<int>(LaneSizeInBitsFromFormat(vform))));
1708 for (int i = 0; i < lane_count; i++) {
1709 uint64_t src_lane = src.Uint(vform, i);
1710 uint64_t dst_lane = dst.Uint(vform, i);
1711 uint64_t shifted;
1712 uint64_t mask;
1713 if (shift == 64) {
1714 shifted = 0;
1715 mask = 0;
1716 } else {
1717 shifted = src_lane >> shift;
1718 mask = MaxUintFromFormat(vform) >> shift;
1719 }
1720 dst.SetUint(vform, i, (dst_lane & ~mask) | shifted);
1721 }
1722 return dst;
1723 }
1724
1725
ushr(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1726 LogicVRegister Simulator::ushr(VectorFormat vform,
1727 LogicVRegister dst,
1728 const LogicVRegister& src,
1729 int shift) {
1730 VIXL_ASSERT(shift >= 0);
1731 SimVRegister temp;
1732 LogicVRegister shiftreg = dup_immediate(vform, temp, -shift);
1733 return ushl(vform, dst, src, shiftreg);
1734 }
1735
1736
sshr(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1737 LogicVRegister Simulator::sshr(VectorFormat vform,
1738 LogicVRegister dst,
1739 const LogicVRegister& src,
1740 int shift) {
1741 VIXL_ASSERT(shift >= 0);
1742 SimVRegister temp;
1743 LogicVRegister shiftreg = dup_immediate(vform, temp, -shift);
1744 return sshl(vform, dst, src, shiftreg);
1745 }
1746
1747
ssra(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1748 LogicVRegister Simulator::ssra(VectorFormat vform,
1749 LogicVRegister dst,
1750 const LogicVRegister& src,
1751 int shift) {
1752 SimVRegister temp;
1753 LogicVRegister shifted_reg = sshr(vform, temp, src, shift);
1754 return add(vform, dst, dst, shifted_reg);
1755 }
1756
1757
usra(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1758 LogicVRegister Simulator::usra(VectorFormat vform,
1759 LogicVRegister dst,
1760 const LogicVRegister& src,
1761 int shift) {
1762 SimVRegister temp;
1763 LogicVRegister shifted_reg = ushr(vform, temp, src, shift);
1764 return add(vform, dst, dst, shifted_reg);
1765 }
1766
1767
srsra(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1768 LogicVRegister Simulator::srsra(VectorFormat vform,
1769 LogicVRegister dst,
1770 const LogicVRegister& src,
1771 int shift) {
1772 SimVRegister temp;
1773 LogicVRegister shifted_reg = sshr(vform, temp, src, shift).Round(vform);
1774 return add(vform, dst, dst, shifted_reg);
1775 }
1776
1777
ursra(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1778 LogicVRegister Simulator::ursra(VectorFormat vform,
1779 LogicVRegister dst,
1780 const LogicVRegister& src,
1781 int shift) {
1782 SimVRegister temp;
1783 LogicVRegister shifted_reg = ushr(vform, temp, src, shift).Round(vform);
1784 return add(vform, dst, dst, shifted_reg);
1785 }
1786
1787
cls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1788 LogicVRegister Simulator::cls(VectorFormat vform,
1789 LogicVRegister dst,
1790 const LogicVRegister& src) {
1791 int lane_size_in_bits = LaneSizeInBitsFromFormat(vform);
1792 int lane_count = LaneCountFromFormat(vform);
1793
1794 // Ensure that we can store one result per lane.
1795 int result[kZRegMaxSizeInBytes];
1796
1797 for (int i = 0; i < lane_count; i++) {
1798 result[i] = CountLeadingSignBits(src.Int(vform, i), lane_size_in_bits);
1799 }
1800
1801 dst.ClearForWrite(vform);
1802 for (int i = 0; i < lane_count; ++i) {
1803 dst.SetUint(vform, i, result[i]);
1804 }
1805 return dst;
1806 }
1807
1808
clz(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1809 LogicVRegister Simulator::clz(VectorFormat vform,
1810 LogicVRegister dst,
1811 const LogicVRegister& src) {
1812 int lane_size_in_bits = LaneSizeInBitsFromFormat(vform);
1813 int lane_count = LaneCountFromFormat(vform);
1814
1815 // Ensure that we can store one result per lane.
1816 int result[kZRegMaxSizeInBytes];
1817
1818 for (int i = 0; i < lane_count; i++) {
1819 result[i] = CountLeadingZeros(src.Uint(vform, i), lane_size_in_bits);
1820 }
1821
1822 dst.ClearForWrite(vform);
1823 for (int i = 0; i < lane_count; ++i) {
1824 dst.SetUint(vform, i, result[i]);
1825 }
1826 return dst;
1827 }
1828
1829
cnot(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1830 LogicVRegister Simulator::cnot(VectorFormat vform,
1831 LogicVRegister dst,
1832 const LogicVRegister& src) {
1833 dst.ClearForWrite(vform);
1834 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1835 uint64_t value = (src.Uint(vform, i) == 0) ? 1 : 0;
1836 dst.SetUint(vform, i, value);
1837 }
1838 return dst;
1839 }
1840
1841
cnt(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1842 LogicVRegister Simulator::cnt(VectorFormat vform,
1843 LogicVRegister dst,
1844 const LogicVRegister& src) {
1845 int lane_size_in_bits = LaneSizeInBitsFromFormat(vform);
1846 int lane_count = LaneCountFromFormat(vform);
1847
1848 // Ensure that we can store one result per lane.
1849 int result[kZRegMaxSizeInBytes];
1850
1851 for (int i = 0; i < lane_count; i++) {
1852 result[i] = CountSetBits(src.Uint(vform, i), lane_size_in_bits);
1853 }
1854
1855 dst.ClearForWrite(vform);
1856 for (int i = 0; i < lane_count; ++i) {
1857 dst.SetUint(vform, i, result[i]);
1858 }
1859 return dst;
1860 }
1861
CalculateSignedShiftDistance(int64_t shift_val,int esize,bool shift_in_ls_byte)1862 static int64_t CalculateSignedShiftDistance(int64_t shift_val,
1863 int esize,
1864 bool shift_in_ls_byte) {
1865 if (shift_in_ls_byte) {
1866 // Neon uses the least-significant byte of the lane as the shift distance.
1867 shift_val = ExtractSignedBitfield64(7, 0, shift_val);
1868 } else {
1869 // SVE uses a saturated shift distance in the range
1870 // -(esize + 1) ... (esize + 1).
1871 if (shift_val > (esize + 1)) shift_val = esize + 1;
1872 if (shift_val < -(esize + 1)) shift_val = -(esize + 1);
1873 }
1874 return shift_val;
1875 }
1876
sshl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool shift_in_ls_byte)1877 LogicVRegister Simulator::sshl(VectorFormat vform,
1878 LogicVRegister dst,
1879 const LogicVRegister& src1,
1880 const LogicVRegister& src2,
1881 bool shift_in_ls_byte) {
1882 dst.ClearForWrite(vform);
1883 int esize = LaneSizeInBitsFromFormat(vform);
1884 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1885 int64_t shift_val = CalculateSignedShiftDistance(src2.Int(vform, i),
1886 esize,
1887 shift_in_ls_byte);
1888
1889 int64_t lj_src_val = src1.IntLeftJustified(vform, i);
1890
1891 // Set signed saturation state.
1892 if ((shift_val > CountLeadingSignBits(lj_src_val)) && (lj_src_val != 0)) {
1893 dst.SetSignedSat(i, lj_src_val >= 0);
1894 }
1895
1896 // Set unsigned saturation state.
1897 if (lj_src_val < 0) {
1898 dst.SetUnsignedSat(i, false);
1899 } else if ((shift_val > CountLeadingZeros(lj_src_val)) &&
1900 (lj_src_val != 0)) {
1901 dst.SetUnsignedSat(i, true);
1902 }
1903
1904 int64_t src_val = src1.Int(vform, i);
1905 bool src_is_negative = src_val < 0;
1906 if (shift_val > 63) {
1907 dst.SetInt(vform, i, 0);
1908 } else if (shift_val < -63) {
1909 dst.SetRounding(i, src_is_negative);
1910 dst.SetInt(vform, i, src_is_negative ? -1 : 0);
1911 } else {
1912 // Use unsigned types for shifts, as behaviour is undefined for signed
1913 // lhs.
1914 uint64_t usrc_val = static_cast<uint64_t>(src_val);
1915
1916 if (shift_val < 0) {
1917 // Convert to right shift.
1918 shift_val = -shift_val;
1919
1920 // Set rounding state by testing most-significant bit shifted out.
1921 // Rounding only needed on right shifts.
1922 if (((usrc_val >> (shift_val - 1)) & 1) == 1) {
1923 dst.SetRounding(i, true);
1924 }
1925
1926 usrc_val >>= shift_val;
1927
1928 if (src_is_negative) {
1929 // Simulate sign-extension.
1930 usrc_val |= (~UINT64_C(0) << (64 - shift_val));
1931 }
1932 } else {
1933 usrc_val <<= shift_val;
1934 }
1935 dst.SetUint(vform, i, usrc_val);
1936 }
1937 }
1938 return dst;
1939 }
1940
1941
ushl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool shift_in_ls_byte)1942 LogicVRegister Simulator::ushl(VectorFormat vform,
1943 LogicVRegister dst,
1944 const LogicVRegister& src1,
1945 const LogicVRegister& src2,
1946 bool shift_in_ls_byte) {
1947 dst.ClearForWrite(vform);
1948 int esize = LaneSizeInBitsFromFormat(vform);
1949 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1950 int64_t shift_val = CalculateSignedShiftDistance(src2.Int(vform, i),
1951 esize,
1952 shift_in_ls_byte);
1953
1954 uint64_t lj_src_val = src1.UintLeftJustified(vform, i);
1955
1956 // Set saturation state.
1957 if ((shift_val > CountLeadingZeros(lj_src_val)) && (lj_src_val != 0)) {
1958 dst.SetUnsignedSat(i, true);
1959 }
1960
1961 uint64_t src_val = src1.Uint(vform, i);
1962 if ((shift_val > 63) || (shift_val < -64)) {
1963 dst.SetUint(vform, i, 0);
1964 } else {
1965 if (shift_val < 0) {
1966 // Set rounding state. Rounding only needed on right shifts.
1967 if (((src_val >> (-shift_val - 1)) & 1) == 1) {
1968 dst.SetRounding(i, true);
1969 }
1970
1971 if (shift_val == -64) {
1972 src_val = 0;
1973 } else {
1974 src_val >>= -shift_val;
1975 }
1976 } else {
1977 src_val <<= shift_val;
1978 }
1979 dst.SetUint(vform, i, src_val);
1980 }
1981 }
1982 return dst;
1983 }
1984
sshr(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1985 LogicVRegister Simulator::sshr(VectorFormat vform,
1986 LogicVRegister dst,
1987 const LogicVRegister& src1,
1988 const LogicVRegister& src2) {
1989 SimVRegister temp;
1990 // Saturate to sidestep the min-int problem.
1991 neg(vform, temp, src2).SignedSaturate(vform);
1992 sshl(vform, dst, src1, temp, false);
1993 return dst;
1994 }
1995
ushr(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1996 LogicVRegister Simulator::ushr(VectorFormat vform,
1997 LogicVRegister dst,
1998 const LogicVRegister& src1,
1999 const LogicVRegister& src2) {
2000 SimVRegister temp;
2001 // Saturate to sidestep the min-int problem.
2002 neg(vform, temp, src2).SignedSaturate(vform);
2003 ushl(vform, dst, src1, temp, false);
2004 return dst;
2005 }
2006
neg(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2007 LogicVRegister Simulator::neg(VectorFormat vform,
2008 LogicVRegister dst,
2009 const LogicVRegister& src) {
2010 dst.ClearForWrite(vform);
2011 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2012 // Test for signed saturation.
2013 int64_t sa = src.Int(vform, i);
2014 if (sa == MinIntFromFormat(vform)) {
2015 dst.SetSignedSat(i, true);
2016 }
2017 dst.SetInt(vform, i, (sa == INT64_MIN) ? sa : -sa);
2018 }
2019 return dst;
2020 }
2021
2022
suqadd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2023 LogicVRegister Simulator::suqadd(VectorFormat vform,
2024 LogicVRegister dst,
2025 const LogicVRegister& src1,
2026 const LogicVRegister& src2) {
2027 dst.ClearForWrite(vform);
2028 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2029 int64_t sa = src1.IntLeftJustified(vform, i);
2030 uint64_t ub = src2.UintLeftJustified(vform, i);
2031 uint64_t ur = sa + ub;
2032
2033 int64_t sr;
2034 memcpy(&sr, &ur, sizeof(sr));
2035 if (sr < sa) { // Test for signed positive saturation.
2036 dst.SetInt(vform, i, MaxIntFromFormat(vform));
2037 } else {
2038 dst.SetUint(vform, i, src1.Int(vform, i) + src2.Uint(vform, i));
2039 }
2040 }
2041 return dst;
2042 }
2043
2044
usqadd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2045 LogicVRegister Simulator::usqadd(VectorFormat vform,
2046 LogicVRegister dst,
2047 const LogicVRegister& src1,
2048 const LogicVRegister& src2) {
2049 dst.ClearForWrite(vform);
2050 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2051 uint64_t ua = src1.UintLeftJustified(vform, i);
2052 int64_t sb = src2.IntLeftJustified(vform, i);
2053 uint64_t ur = ua + sb;
2054
2055 if ((sb > 0) && (ur <= ua)) {
2056 dst.SetUint(vform, i, MaxUintFromFormat(vform)); // Positive saturation.
2057 } else if ((sb < 0) && (ur >= ua)) {
2058 dst.SetUint(vform, i, 0); // Negative saturation.
2059 } else {
2060 dst.SetUint(vform, i, src1.Uint(vform, i) + src2.Int(vform, i));
2061 }
2062 }
2063 return dst;
2064 }
2065
2066
abs(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2067 LogicVRegister Simulator::abs(VectorFormat vform,
2068 LogicVRegister dst,
2069 const LogicVRegister& src) {
2070 dst.ClearForWrite(vform);
2071 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2072 // Test for signed saturation.
2073 int64_t sa = src.Int(vform, i);
2074 if (sa == MinIntFromFormat(vform)) {
2075 dst.SetSignedSat(i, true);
2076 }
2077 if (sa < 0) {
2078 dst.SetInt(vform, i, (sa == INT64_MIN) ? sa : -sa);
2079 } else {
2080 dst.SetInt(vform, i, sa);
2081 }
2082 }
2083 return dst;
2084 }
2085
2086
andv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)2087 LogicVRegister Simulator::andv(VectorFormat vform,
2088 LogicVRegister dst,
2089 const LogicPRegister& pg,
2090 const LogicVRegister& src) {
2091 VIXL_ASSERT(IsSVEFormat(vform));
2092 uint64_t result = GetUintMask(LaneSizeInBitsFromFormat(vform));
2093 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2094 if (!pg.IsActive(vform, i)) continue;
2095
2096 result &= src.Uint(vform, i);
2097 }
2098 VectorFormat vform_dst =
2099 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
2100 dst.ClearForWrite(vform_dst);
2101 dst.SetUint(vform_dst, 0, result);
2102 return dst;
2103 }
2104
2105
eorv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)2106 LogicVRegister Simulator::eorv(VectorFormat vform,
2107 LogicVRegister dst,
2108 const LogicPRegister& pg,
2109 const LogicVRegister& src) {
2110 VIXL_ASSERT(IsSVEFormat(vform));
2111 uint64_t result = 0;
2112 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2113 if (!pg.IsActive(vform, i)) continue;
2114
2115 result ^= src.Uint(vform, i);
2116 }
2117 VectorFormat vform_dst =
2118 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
2119 dst.ClearForWrite(vform_dst);
2120 dst.SetUint(vform_dst, 0, result);
2121 return dst;
2122 }
2123
2124
orv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)2125 LogicVRegister Simulator::orv(VectorFormat vform,
2126 LogicVRegister dst,
2127 const LogicPRegister& pg,
2128 const LogicVRegister& src) {
2129 VIXL_ASSERT(IsSVEFormat(vform));
2130 uint64_t result = 0;
2131 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2132 if (!pg.IsActive(vform, i)) continue;
2133
2134 result |= src.Uint(vform, i);
2135 }
2136 VectorFormat vform_dst =
2137 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
2138 dst.ClearForWrite(vform_dst);
2139 dst.SetUint(vform_dst, 0, result);
2140 return dst;
2141 }
2142
2143
saddv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)2144 LogicVRegister Simulator::saddv(VectorFormat vform,
2145 LogicVRegister dst,
2146 const LogicPRegister& pg,
2147 const LogicVRegister& src) {
2148 VIXL_ASSERT(IsSVEFormat(vform));
2149 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) <= kSRegSize);
2150 int64_t result = 0;
2151 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2152 if (!pg.IsActive(vform, i)) continue;
2153
2154 // The destination register always has D-lane sizes and the source register
2155 // always has S-lanes or smaller, so signed integer overflow -- undefined
2156 // behaviour -- can't occur.
2157 result += src.Int(vform, i);
2158 }
2159
2160 dst.ClearForWrite(kFormatD);
2161 dst.SetInt(kFormatD, 0, result);
2162 return dst;
2163 }
2164
2165
uaddv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)2166 LogicVRegister Simulator::uaddv(VectorFormat vform,
2167 LogicVRegister dst,
2168 const LogicPRegister& pg,
2169 const LogicVRegister& src) {
2170 VIXL_ASSERT(IsSVEFormat(vform));
2171 uint64_t result = 0;
2172 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2173 if (!pg.IsActive(vform, i)) continue;
2174
2175 result += src.Uint(vform, i);
2176 }
2177
2178 dst.ClearForWrite(kFormatD);
2179 dst.SetUint(kFormatD, 0, result);
2180 return dst;
2181 }
2182
2183
extractnarrow(VectorFormat dstform,LogicVRegister dst,bool dst_is_signed,const LogicVRegister & src,bool src_is_signed)2184 LogicVRegister Simulator::extractnarrow(VectorFormat dstform,
2185 LogicVRegister dst,
2186 bool dst_is_signed,
2187 const LogicVRegister& src,
2188 bool src_is_signed) {
2189 bool upperhalf = false;
2190 VectorFormat srcform = dstform;
2191 if ((dstform == kFormat16B) || (dstform == kFormat8H) ||
2192 (dstform == kFormat4S)) {
2193 upperhalf = true;
2194 srcform = VectorFormatHalfLanes(srcform);
2195 }
2196 srcform = VectorFormatDoubleWidth(srcform);
2197
2198 LogicVRegister src_copy = src;
2199
2200 int offset;
2201 if (upperhalf) {
2202 offset = LaneCountFromFormat(dstform) / 2;
2203 } else {
2204 offset = 0;
2205 dst.ClearForWrite(dstform);
2206 }
2207
2208 for (int i = 0; i < LaneCountFromFormat(srcform); i++) {
2209 int64_t ssrc = src_copy.Int(srcform, i);
2210 uint64_t usrc = src_copy.Uint(srcform, i);
2211
2212 // Test for signed saturation
2213 if (ssrc > MaxIntFromFormat(dstform)) {
2214 dst.SetSignedSat(offset + i, true);
2215 } else if (ssrc < MinIntFromFormat(dstform)) {
2216 dst.SetSignedSat(offset + i, false);
2217 }
2218
2219 // Test for unsigned saturation
2220 if (src_is_signed) {
2221 if (ssrc > static_cast<int64_t>(MaxUintFromFormat(dstform))) {
2222 dst.SetUnsignedSat(offset + i, true);
2223 } else if (ssrc < 0) {
2224 dst.SetUnsignedSat(offset + i, false);
2225 }
2226 } else {
2227 if (usrc > MaxUintFromFormat(dstform)) {
2228 dst.SetUnsignedSat(offset + i, true);
2229 }
2230 }
2231
2232 int64_t result;
2233 if (src_is_signed) {
2234 result = ssrc & MaxUintFromFormat(dstform);
2235 } else {
2236 result = usrc & MaxUintFromFormat(dstform);
2237 }
2238
2239 if (dst_is_signed) {
2240 dst.SetInt(dstform, offset + i, result);
2241 } else {
2242 dst.SetUint(dstform, offset + i, result);
2243 }
2244 }
2245 return dst;
2246 }
2247
2248
xtn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2249 LogicVRegister Simulator::xtn(VectorFormat vform,
2250 LogicVRegister dst,
2251 const LogicVRegister& src) {
2252 return extractnarrow(vform, dst, true, src, true);
2253 }
2254
2255
sqxtn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2256 LogicVRegister Simulator::sqxtn(VectorFormat vform,
2257 LogicVRegister dst,
2258 const LogicVRegister& src) {
2259 return extractnarrow(vform, dst, true, src, true).SignedSaturate(vform);
2260 }
2261
2262
sqxtun(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2263 LogicVRegister Simulator::sqxtun(VectorFormat vform,
2264 LogicVRegister dst,
2265 const LogicVRegister& src) {
2266 return extractnarrow(vform, dst, false, src, true).UnsignedSaturate(vform);
2267 }
2268
2269
uqxtn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2270 LogicVRegister Simulator::uqxtn(VectorFormat vform,
2271 LogicVRegister dst,
2272 const LogicVRegister& src) {
2273 return extractnarrow(vform, dst, false, src, false).UnsignedSaturate(vform);
2274 }
2275
2276
absdiff(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool is_signed)2277 LogicVRegister Simulator::absdiff(VectorFormat vform,
2278 LogicVRegister dst,
2279 const LogicVRegister& src1,
2280 const LogicVRegister& src2,
2281 bool is_signed) {
2282 dst.ClearForWrite(vform);
2283 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2284 bool src1_gt_src2 = is_signed ? (src1.Int(vform, i) > src2.Int(vform, i))
2285 : (src1.Uint(vform, i) > src2.Uint(vform, i));
2286 // Always calculate the answer using unsigned arithmetic, to avoid
2287 // implemenation-defined signed overflow.
2288 if (src1_gt_src2) {
2289 dst.SetUint(vform, i, src1.Uint(vform, i) - src2.Uint(vform, i));
2290 } else {
2291 dst.SetUint(vform, i, src2.Uint(vform, i) - src1.Uint(vform, i));
2292 }
2293 }
2294 return dst;
2295 }
2296
2297
saba(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2298 LogicVRegister Simulator::saba(VectorFormat vform,
2299 LogicVRegister dst,
2300 const LogicVRegister& src1,
2301 const LogicVRegister& src2) {
2302 SimVRegister temp;
2303 dst.ClearForWrite(vform);
2304 absdiff(vform, temp, src1, src2, true);
2305 add(vform, dst, dst, temp);
2306 return dst;
2307 }
2308
2309
uaba(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2310 LogicVRegister Simulator::uaba(VectorFormat vform,
2311 LogicVRegister dst,
2312 const LogicVRegister& src1,
2313 const LogicVRegister& src2) {
2314 SimVRegister temp;
2315 dst.ClearForWrite(vform);
2316 absdiff(vform, temp, src1, src2, false);
2317 add(vform, dst, dst, temp);
2318 return dst;
2319 }
2320
2321
not_(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2322 LogicVRegister Simulator::not_(VectorFormat vform,
2323 LogicVRegister dst,
2324 const LogicVRegister& src) {
2325 dst.ClearForWrite(vform);
2326 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2327 dst.SetUint(vform, i, ~src.Uint(vform, i));
2328 }
2329 return dst;
2330 }
2331
2332
rbit(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2333 LogicVRegister Simulator::rbit(VectorFormat vform,
2334 LogicVRegister dst,
2335 const LogicVRegister& src) {
2336 uint64_t result[kZRegMaxSizeInBytes];
2337 int lane_count = LaneCountFromFormat(vform);
2338 int lane_size_in_bits = LaneSizeInBitsFromFormat(vform);
2339 uint64_t reversed_value;
2340 uint64_t value;
2341 for (int i = 0; i < lane_count; i++) {
2342 value = src.Uint(vform, i);
2343 reversed_value = 0;
2344 for (int j = 0; j < lane_size_in_bits; j++) {
2345 reversed_value = (reversed_value << 1) | (value & 1);
2346 value >>= 1;
2347 }
2348 result[i] = reversed_value;
2349 }
2350
2351 dst.ClearForWrite(vform);
2352 for (int i = 0; i < lane_count; ++i) {
2353 dst.SetUint(vform, i, result[i]);
2354 }
2355 return dst;
2356 }
2357
2358
rev(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2359 LogicVRegister Simulator::rev(VectorFormat vform,
2360 LogicVRegister dst,
2361 const LogicVRegister& src) {
2362 VIXL_ASSERT(IsSVEFormat(vform));
2363 int lane_count = LaneCountFromFormat(vform);
2364 for (int i = 0; i < lane_count / 2; i++) {
2365 uint64_t t = src.Uint(vform, i);
2366 dst.SetUint(vform, i, src.Uint(vform, lane_count - i - 1));
2367 dst.SetUint(vform, lane_count - i - 1, t);
2368 }
2369 return dst;
2370 }
2371
2372
rev_byte(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int rev_size)2373 LogicVRegister Simulator::rev_byte(VectorFormat vform,
2374 LogicVRegister dst,
2375 const LogicVRegister& src,
2376 int rev_size) {
2377 uint64_t result[kZRegMaxSizeInBytes] = {};
2378 int lane_count = LaneCountFromFormat(vform);
2379 int lane_size = LaneSizeInBytesFromFormat(vform);
2380 int lanes_per_loop = rev_size / lane_size;
2381 for (int i = 0; i < lane_count; i += lanes_per_loop) {
2382 for (int j = 0; j < lanes_per_loop; j++) {
2383 result[i + lanes_per_loop - 1 - j] = src.Uint(vform, i + j);
2384 }
2385 }
2386 dst.ClearForWrite(vform);
2387 for (int i = 0; i < lane_count; ++i) {
2388 dst.SetUint(vform, i, result[i]);
2389 }
2390 return dst;
2391 }
2392
2393
rev16(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2394 LogicVRegister Simulator::rev16(VectorFormat vform,
2395 LogicVRegister dst,
2396 const LogicVRegister& src) {
2397 return rev_byte(vform, dst, src, 2);
2398 }
2399
2400
rev32(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2401 LogicVRegister Simulator::rev32(VectorFormat vform,
2402 LogicVRegister dst,
2403 const LogicVRegister& src) {
2404 return rev_byte(vform, dst, src, 4);
2405 }
2406
2407
rev64(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2408 LogicVRegister Simulator::rev64(VectorFormat vform,
2409 LogicVRegister dst,
2410 const LogicVRegister& src) {
2411 return rev_byte(vform, dst, src, 8);
2412 }
2413
addlp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,bool is_signed,bool do_accumulate)2414 LogicVRegister Simulator::addlp(VectorFormat vform,
2415 LogicVRegister dst,
2416 const LogicVRegister& src,
2417 bool is_signed,
2418 bool do_accumulate) {
2419 VectorFormat vformsrc = VectorFormatHalfWidthDoubleLanes(vform);
2420 VIXL_ASSERT(LaneSizeInBitsFromFormat(vformsrc) <= kSRegSize);
2421
2422 uint64_t result[kZRegMaxSizeInBytes];
2423 int lane_count = LaneCountFromFormat(vform);
2424 for (int i = 0; i < lane_count; i++) {
2425 if (is_signed) {
2426 result[i] = static_cast<uint64_t>(src.Int(vformsrc, 2 * i) +
2427 src.Int(vformsrc, 2 * i + 1));
2428 } else {
2429 result[i] = src.Uint(vformsrc, 2 * i) + src.Uint(vformsrc, 2 * i + 1);
2430 }
2431 }
2432
2433 dst.ClearForWrite(vform);
2434 for (int i = 0; i < lane_count; ++i) {
2435 if (do_accumulate) {
2436 result[i] += dst.Uint(vform, i);
2437 }
2438 dst.SetUint(vform, i, result[i]);
2439 }
2440
2441 return dst;
2442 }
2443
2444
saddlp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2445 LogicVRegister Simulator::saddlp(VectorFormat vform,
2446 LogicVRegister dst,
2447 const LogicVRegister& src) {
2448 return addlp(vform, dst, src, true, false);
2449 }
2450
2451
uaddlp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2452 LogicVRegister Simulator::uaddlp(VectorFormat vform,
2453 LogicVRegister dst,
2454 const LogicVRegister& src) {
2455 return addlp(vform, dst, src, false, false);
2456 }
2457
2458
sadalp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2459 LogicVRegister Simulator::sadalp(VectorFormat vform,
2460 LogicVRegister dst,
2461 const LogicVRegister& src) {
2462 return addlp(vform, dst, src, true, true);
2463 }
2464
2465
uadalp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2466 LogicVRegister Simulator::uadalp(VectorFormat vform,
2467 LogicVRegister dst,
2468 const LogicVRegister& src) {
2469 return addlp(vform, dst, src, false, true);
2470 }
2471
ror(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int rotation)2472 LogicVRegister Simulator::ror(VectorFormat vform,
2473 LogicVRegister dst,
2474 const LogicVRegister& src,
2475 int rotation) {
2476 int width = LaneSizeInBitsFromFormat(vform);
2477 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2478 uint64_t value = src.Uint(vform, i);
2479 dst.SetUint(vform, i, RotateRight(value, rotation, width));
2480 }
2481 return dst;
2482 }
2483
ext(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)2484 LogicVRegister Simulator::ext(VectorFormat vform,
2485 LogicVRegister dst,
2486 const LogicVRegister& src1,
2487 const LogicVRegister& src2,
2488 int index) {
2489 uint8_t result[kZRegMaxSizeInBytes] = {};
2490 int lane_count = LaneCountFromFormat(vform);
2491 for (int i = 0; i < lane_count - index; ++i) {
2492 result[i] = src1.Uint(vform, i + index);
2493 }
2494 for (int i = 0; i < index; ++i) {
2495 result[lane_count - index + i] = src2.Uint(vform, i);
2496 }
2497 dst.ClearForWrite(vform);
2498 for (int i = 0; i < lane_count; ++i) {
2499 dst.SetUint(vform, i, result[i]);
2500 }
2501 return dst;
2502 }
2503
rotate_elements_right(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int index)2504 LogicVRegister Simulator::rotate_elements_right(VectorFormat vform,
2505 LogicVRegister dst,
2506 const LogicVRegister& src,
2507 int index) {
2508 if (index < 0) index += LaneCountFromFormat(vform);
2509 VIXL_ASSERT((index >= 0) && (index < LaneCountFromFormat(vform)));
2510 index *= LaneSizeInBytesFromFormat(vform);
2511 return ext(kFormatVnB, dst, src, src, index);
2512 }
2513
2514
2515 template <typename T>
fadda(VectorFormat vform,LogicVRegister acc,const LogicPRegister & pg,const LogicVRegister & src)2516 LogicVRegister Simulator::fadda(VectorFormat vform,
2517 LogicVRegister acc,
2518 const LogicPRegister& pg,
2519 const LogicVRegister& src) {
2520 T result = acc.Float<T>(0);
2521 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2522 if (!pg.IsActive(vform, i)) continue;
2523
2524 result = FPAdd(result, src.Float<T>(i));
2525 }
2526 VectorFormat vform_dst =
2527 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
2528 acc.ClearForWrite(vform_dst);
2529 acc.SetFloat(0, result);
2530 return acc;
2531 }
2532
fadda(VectorFormat vform,LogicVRegister acc,const LogicPRegister & pg,const LogicVRegister & src)2533 LogicVRegister Simulator::fadda(VectorFormat vform,
2534 LogicVRegister acc,
2535 const LogicPRegister& pg,
2536 const LogicVRegister& src) {
2537 switch (LaneSizeInBitsFromFormat(vform)) {
2538 case kHRegSize:
2539 fadda<SimFloat16>(vform, acc, pg, src);
2540 break;
2541 case kSRegSize:
2542 fadda<float>(vform, acc, pg, src);
2543 break;
2544 case kDRegSize:
2545 fadda<double>(vform, acc, pg, src);
2546 break;
2547 default:
2548 VIXL_UNREACHABLE();
2549 }
2550 return acc;
2551 }
2552
2553 template <typename T>
fcadd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int rot)2554 LogicVRegister Simulator::fcadd(VectorFormat vform,
2555 LogicVRegister dst, // d
2556 const LogicVRegister& src1, // n
2557 const LogicVRegister& src2, // m
2558 int rot) {
2559 int elements = LaneCountFromFormat(vform);
2560
2561 T element1, element3;
2562 rot = (rot == 1) ? 270 : 90;
2563
2564 // Loop example:
2565 // 2S --> (2/2 = 1 - 1 = 0) --> 1 x Complex Number (2x components: r+i)
2566 // 4S --> (4/2 = 2) - 1 = 1) --> 2 x Complex Number (2x2 components: r+i)
2567
2568 for (int e = 0; e <= (elements / 2) - 1; e++) {
2569 switch (rot) {
2570 case 90:
2571 element1 = FPNeg(src2.Float<T>(e * 2 + 1));
2572 element3 = src2.Float<T>(e * 2);
2573 break;
2574 case 270:
2575 element1 = src2.Float<T>(e * 2 + 1);
2576 element3 = FPNeg(src2.Float<T>(e * 2));
2577 break;
2578 default:
2579 VIXL_UNREACHABLE();
2580 return dst; // prevents "element(n) may be unintialized" errors
2581 }
2582 dst.ClearForWrite(vform);
2583 dst.SetFloat<T>(e * 2, FPAdd(src1.Float<T>(e * 2), element1));
2584 dst.SetFloat<T>(e * 2 + 1, FPAdd(src1.Float<T>(e * 2 + 1), element3));
2585 }
2586 return dst;
2587 }
2588
2589
fcadd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int rot)2590 LogicVRegister Simulator::fcadd(VectorFormat vform,
2591 LogicVRegister dst, // d
2592 const LogicVRegister& src1, // n
2593 const LogicVRegister& src2, // m
2594 int rot) {
2595 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
2596 fcadd<SimFloat16>(vform, dst, src1, src2, rot);
2597 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
2598 fcadd<float>(vform, dst, src1, src2, rot);
2599 } else {
2600 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
2601 fcadd<double>(vform, dst, src1, src2, rot);
2602 }
2603 return dst;
2604 }
2605
2606 template <typename T>
fcmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,const LogicVRegister & acc,int index,int rot)2607 LogicVRegister Simulator::fcmla(VectorFormat vform,
2608 LogicVRegister dst,
2609 const LogicVRegister& src1,
2610 const LogicVRegister& src2,
2611 const LogicVRegister& acc,
2612 int index,
2613 int rot) {
2614 int elements = LaneCountFromFormat(vform);
2615
2616 T element1, element2, element3, element4;
2617 rot *= 90;
2618
2619 // Loop example:
2620 // 2S --> (2/2 = 1 - 1 = 0) --> 1 x Complex Number (2x components: r+i)
2621 // 4S --> (4/2 = 2) - 1 = 1) --> 2 x Complex Number (2x2 components: r+i)
2622
2623 for (int e = 0; e <= (elements / 2) - 1; e++) {
2624 // Index == -1 indicates a vector/vector rather than vector/indexed-element
2625 // operation.
2626 int f = (index < 0) ? e : index;
2627
2628 switch (rot) {
2629 case 0:
2630 element1 = src2.Float<T>(f * 2);
2631 element2 = src1.Float<T>(e * 2);
2632 element3 = src2.Float<T>(f * 2 + 1);
2633 element4 = src1.Float<T>(e * 2);
2634 break;
2635 case 90:
2636 element1 = FPNeg(src2.Float<T>(f * 2 + 1));
2637 element2 = src1.Float<T>(e * 2 + 1);
2638 element3 = src2.Float<T>(f * 2);
2639 element4 = src1.Float<T>(e * 2 + 1);
2640 break;
2641 case 180:
2642 element1 = FPNeg(src2.Float<T>(f * 2));
2643 element2 = src1.Float<T>(e * 2);
2644 element3 = FPNeg(src2.Float<T>(f * 2 + 1));
2645 element4 = src1.Float<T>(e * 2);
2646 break;
2647 case 270:
2648 element1 = src2.Float<T>(f * 2 + 1);
2649 element2 = src1.Float<T>(e * 2 + 1);
2650 element3 = FPNeg(src2.Float<T>(f * 2));
2651 element4 = src1.Float<T>(e * 2 + 1);
2652 break;
2653 default:
2654 VIXL_UNREACHABLE();
2655 return dst; // prevents "element(n) may be unintialized" errors
2656 }
2657 dst.ClearForWrite(vform);
2658 dst.SetFloat<T>(vform,
2659 e * 2,
2660 FPMulAdd(acc.Float<T>(e * 2), element2, element1));
2661 dst.SetFloat<T>(vform,
2662 e * 2 + 1,
2663 FPMulAdd(acc.Float<T>(e * 2 + 1), element4, element3));
2664 }
2665 return dst;
2666 }
2667
fcmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,const LogicVRegister & acc,int rot)2668 LogicVRegister Simulator::fcmla(VectorFormat vform,
2669 LogicVRegister dst,
2670 const LogicVRegister& src1,
2671 const LogicVRegister& src2,
2672 const LogicVRegister& acc,
2673 int rot) {
2674 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
2675 fcmla<SimFloat16>(vform, dst, src1, src2, acc, -1, rot);
2676 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
2677 fcmla<float>(vform, dst, src1, src2, acc, -1, rot);
2678 } else {
2679 fcmla<double>(vform, dst, src1, src2, acc, -1, rot);
2680 }
2681 return dst;
2682 }
2683
2684
fcmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index,int rot)2685 LogicVRegister Simulator::fcmla(VectorFormat vform,
2686 LogicVRegister dst, // d
2687 const LogicVRegister& src1, // n
2688 const LogicVRegister& src2, // m
2689 int index,
2690 int rot) {
2691 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
2692 VIXL_UNIMPLEMENTED();
2693 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
2694 fcmla<float>(vform, dst, src1, src2, dst, index, rot);
2695 } else {
2696 fcmla<double>(vform, dst, src1, src2, dst, index, rot);
2697 }
2698 return dst;
2699 }
2700
cadd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int rot,bool saturate)2701 LogicVRegister Simulator::cadd(VectorFormat vform,
2702 LogicVRegister dst,
2703 const LogicVRegister& src1,
2704 const LogicVRegister& src2,
2705 int rot,
2706 bool saturate) {
2707 SimVRegister src1_r, src1_i;
2708 SimVRegister src2_r, src2_i;
2709 SimVRegister zero;
2710 zero.Clear();
2711 uzp1(vform, src1_r, src1, zero);
2712 uzp2(vform, src1_i, src1, zero);
2713 uzp1(vform, src2_r, src2, zero);
2714 uzp2(vform, src2_i, src2, zero);
2715
2716 if (rot == 90) {
2717 if (saturate) {
2718 sub(vform, src1_r, src1_r, src2_i).SignedSaturate(vform);
2719 add(vform, src1_i, src1_i, src2_r).SignedSaturate(vform);
2720 } else {
2721 sub(vform, src1_r, src1_r, src2_i);
2722 add(vform, src1_i, src1_i, src2_r);
2723 }
2724 } else {
2725 VIXL_ASSERT(rot == 270);
2726 if (saturate) {
2727 add(vform, src1_r, src1_r, src2_i).SignedSaturate(vform);
2728 sub(vform, src1_i, src1_i, src2_r).SignedSaturate(vform);
2729 } else {
2730 add(vform, src1_r, src1_r, src2_i);
2731 sub(vform, src1_i, src1_i, src2_r);
2732 }
2733 }
2734
2735 zip1(vform, dst, src1_r, src1_i);
2736 return dst;
2737 }
2738
cmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & srca,const LogicVRegister & src1,const LogicVRegister & src2,int rot)2739 LogicVRegister Simulator::cmla(VectorFormat vform,
2740 LogicVRegister dst,
2741 const LogicVRegister& srca,
2742 const LogicVRegister& src1,
2743 const LogicVRegister& src2,
2744 int rot) {
2745 SimVRegister src1_a;
2746 SimVRegister src2_a, src2_b;
2747 SimVRegister srca_i, srca_r;
2748 SimVRegister zero, temp;
2749 zero.Clear();
2750
2751 if ((rot == 0) || (rot == 180)) {
2752 uzp1(vform, src1_a, src1, zero);
2753 uzp1(vform, src2_a, src2, zero);
2754 uzp2(vform, src2_b, src2, zero);
2755 } else {
2756 uzp2(vform, src1_a, src1, zero);
2757 uzp2(vform, src2_a, src2, zero);
2758 uzp1(vform, src2_b, src2, zero);
2759 }
2760
2761 uzp1(vform, srca_r, srca, zero);
2762 uzp2(vform, srca_i, srca, zero);
2763
2764 bool sub_r = (rot == 90) || (rot == 180);
2765 bool sub_i = (rot == 180) || (rot == 270);
2766
2767 mul(vform, temp, src1_a, src2_a);
2768 if (sub_r) {
2769 sub(vform, srca_r, srca_r, temp);
2770 } else {
2771 add(vform, srca_r, srca_r, temp);
2772 }
2773
2774 mul(vform, temp, src1_a, src2_b);
2775 if (sub_i) {
2776 sub(vform, srca_i, srca_i, temp);
2777 } else {
2778 add(vform, srca_i, srca_i, temp);
2779 }
2780
2781 zip1(vform, dst, srca_r, srca_i);
2782 return dst;
2783 }
2784
cmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & srca,const LogicVRegister & src1,const LogicVRegister & src2,int index,int rot)2785 LogicVRegister Simulator::cmla(VectorFormat vform,
2786 LogicVRegister dst,
2787 const LogicVRegister& srca,
2788 const LogicVRegister& src1,
2789 const LogicVRegister& src2,
2790 int index,
2791 int rot) {
2792 SimVRegister temp;
2793 dup_elements_to_segments(VectorFormatDoubleWidth(vform), temp, src2, index);
2794 return cmla(vform, dst, srca, src1, temp, rot);
2795 }
2796
bgrp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool do_bext)2797 LogicVRegister Simulator::bgrp(VectorFormat vform,
2798 LogicVRegister dst,
2799 const LogicVRegister& src1,
2800 const LogicVRegister& src2,
2801 bool do_bext) {
2802 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2803 uint64_t value = src1.Uint(vform, i);
2804 uint64_t mask = src2.Uint(vform, i);
2805 int high_pos = 0;
2806 int low_pos = 0;
2807 uint64_t result_high = 0;
2808 uint64_t result_low = 0;
2809 for (unsigned j = 0; j < LaneSizeInBitsFromFormat(vform); j++) {
2810 if ((mask & 1) == 0) {
2811 result_high |= (value & 1) << high_pos;
2812 high_pos++;
2813 } else {
2814 result_low |= (value & 1) << low_pos;
2815 low_pos++;
2816 }
2817 mask >>= 1;
2818 value >>= 1;
2819 }
2820
2821 if (!do_bext) {
2822 result_low |= result_high << low_pos;
2823 }
2824
2825 dst.SetUint(vform, i, result_low);
2826 }
2827 return dst;
2828 }
2829
bdep(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2830 LogicVRegister Simulator::bdep(VectorFormat vform,
2831 LogicVRegister dst,
2832 const LogicVRegister& src1,
2833 const LogicVRegister& src2) {
2834 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2835 uint64_t value = src1.Uint(vform, i);
2836 uint64_t mask = src2.Uint(vform, i);
2837 uint64_t result = 0;
2838 for (unsigned j = 0; j < LaneSizeInBitsFromFormat(vform); j++) {
2839 if ((mask & 1) == 1) {
2840 result |= (value & 1) << j;
2841 value >>= 1;
2842 }
2843 mask >>= 1;
2844 }
2845 dst.SetUint(vform, i, result);
2846 }
2847 return dst;
2848 }
2849
histogram(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src1,const LogicVRegister & src2,bool do_segmented)2850 LogicVRegister Simulator::histogram(VectorFormat vform,
2851 LogicVRegister dst,
2852 const LogicPRegister& pg,
2853 const LogicVRegister& src1,
2854 const LogicVRegister& src2,
2855 bool do_segmented) {
2856 int elements_per_segment = kQRegSize / LaneSizeInBitsFromFormat(vform);
2857 uint64_t result[kZRegMaxSizeInBytes];
2858
2859 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2860 uint64_t count = 0;
2861 uint64_t value = src1.Uint(vform, i);
2862
2863 int segment = do_segmented ? (i / elements_per_segment) : 0;
2864 int segment_offset = segment * elements_per_segment;
2865 int hist_limit = do_segmented ? elements_per_segment : (i + 1);
2866 for (int j = 0; j < hist_limit; j++) {
2867 if (pg.IsActive(vform, j) &&
2868 (value == src2.Uint(vform, j + segment_offset))) {
2869 count++;
2870 }
2871 }
2872 result[i] = count;
2873 }
2874 dst.SetUintArray(vform, result);
2875 return dst;
2876 }
2877
dup_element(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int src_index)2878 LogicVRegister Simulator::dup_element(VectorFormat vform,
2879 LogicVRegister dst,
2880 const LogicVRegister& src,
2881 int src_index) {
2882 if ((vform == kFormatVnQ) || (vform == kFormatVnO)) {
2883 // When duplicating an element larger than 64 bits, split the element into
2884 // 64-bit parts, and duplicate the parts across the destination.
2885 uint64_t d[4];
2886 int count = (vform == kFormatVnQ) ? 2 : 4;
2887 for (int i = 0; i < count; i++) {
2888 d[i] = src.Uint(kFormatVnD, (src_index * count) + i);
2889 }
2890 dst.Clear();
2891 for (int i = 0; i < LaneCountFromFormat(vform) * count; i++) {
2892 dst.SetUint(kFormatVnD, i, d[i % count]);
2893 }
2894 } else {
2895 int lane_count = LaneCountFromFormat(vform);
2896 uint64_t value = src.Uint(vform, src_index);
2897 dst.ClearForWrite(vform);
2898 for (int i = 0; i < lane_count; ++i) {
2899 dst.SetUint(vform, i, value);
2900 }
2901 }
2902 return dst;
2903 }
2904
dup_elements_to_segments(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int src_index)2905 LogicVRegister Simulator::dup_elements_to_segments(VectorFormat vform,
2906 LogicVRegister dst,
2907 const LogicVRegister& src,
2908 int src_index) {
2909 // In SVE, a segment is a 128-bit portion of a vector, like a Q register,
2910 // whereas in NEON, the size of segment is equal to the size of register
2911 // itself.
2912 int segment_size = std::min(kQRegSize, RegisterSizeInBitsFromFormat(vform));
2913 VIXL_ASSERT(IsMultiple(segment_size, LaneSizeInBitsFromFormat(vform)));
2914 int lanes_per_segment = segment_size / LaneSizeInBitsFromFormat(vform);
2915
2916 VIXL_ASSERT(src_index >= 0);
2917 VIXL_ASSERT(src_index < lanes_per_segment);
2918
2919 dst.ClearForWrite(vform);
2920 for (int j = 0; j < LaneCountFromFormat(vform); j += lanes_per_segment) {
2921 uint64_t value = src.Uint(vform, j + src_index);
2922 for (int i = 0; i < lanes_per_segment; i++) {
2923 dst.SetUint(vform, j + i, value);
2924 }
2925 }
2926 return dst;
2927 }
2928
dup_elements_to_segments(VectorFormat vform,LogicVRegister dst,const std::pair<int,int> & src_and_index)2929 LogicVRegister Simulator::dup_elements_to_segments(
2930 VectorFormat vform,
2931 LogicVRegister dst,
2932 const std::pair<int, int>& src_and_index) {
2933 return dup_elements_to_segments(vform,
2934 dst,
2935 ReadVRegister(src_and_index.first),
2936 src_and_index.second);
2937 }
2938
dup_immediate(VectorFormat vform,LogicVRegister dst,uint64_t imm)2939 LogicVRegister Simulator::dup_immediate(VectorFormat vform,
2940 LogicVRegister dst,
2941 uint64_t imm) {
2942 int lane_count = LaneCountFromFormat(vform);
2943 uint64_t value = imm & MaxUintFromFormat(vform);
2944 dst.ClearForWrite(vform);
2945 for (int i = 0; i < lane_count; ++i) {
2946 dst.SetUint(vform, i, value);
2947 }
2948 return dst;
2949 }
2950
2951
ins_element(VectorFormat vform,LogicVRegister dst,int dst_index,const LogicVRegister & src,int src_index)2952 LogicVRegister Simulator::ins_element(VectorFormat vform,
2953 LogicVRegister dst,
2954 int dst_index,
2955 const LogicVRegister& src,
2956 int src_index) {
2957 dst.SetUint(vform, dst_index, src.Uint(vform, src_index));
2958 return dst;
2959 }
2960
2961
ins_immediate(VectorFormat vform,LogicVRegister dst,int dst_index,uint64_t imm)2962 LogicVRegister Simulator::ins_immediate(VectorFormat vform,
2963 LogicVRegister dst,
2964 int dst_index,
2965 uint64_t imm) {
2966 uint64_t value = imm & MaxUintFromFormat(vform);
2967 dst.SetUint(vform, dst_index, value);
2968 return dst;
2969 }
2970
2971
index(VectorFormat vform,LogicVRegister dst,uint64_t start,uint64_t step)2972 LogicVRegister Simulator::index(VectorFormat vform,
2973 LogicVRegister dst,
2974 uint64_t start,
2975 uint64_t step) {
2976 VIXL_ASSERT(IsSVEFormat(vform));
2977 uint64_t value = start;
2978 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2979 dst.SetUint(vform, i, value);
2980 value += step;
2981 }
2982 return dst;
2983 }
2984
2985
insr(VectorFormat vform,LogicVRegister dst,uint64_t imm)2986 LogicVRegister Simulator::insr(VectorFormat vform,
2987 LogicVRegister dst,
2988 uint64_t imm) {
2989 VIXL_ASSERT(IsSVEFormat(vform));
2990 for (int i = LaneCountFromFormat(vform) - 1; i > 0; i--) {
2991 dst.SetUint(vform, i, dst.Uint(vform, i - 1));
2992 }
2993 dst.SetUint(vform, 0, imm);
2994 return dst;
2995 }
2996
2997
mov(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2998 LogicVRegister Simulator::mov(VectorFormat vform,
2999 LogicVRegister dst,
3000 const LogicVRegister& src) {
3001 dst.ClearForWrite(vform);
3002 for (int lane = 0; lane < LaneCountFromFormat(vform); lane++) {
3003 dst.SetUint(vform, lane, src.Uint(vform, lane));
3004 }
3005 return dst;
3006 }
3007
3008
mov(LogicPRegister dst,const LogicPRegister & src)3009 LogicPRegister Simulator::mov(LogicPRegister dst, const LogicPRegister& src) {
3010 // Avoid a copy if the registers already alias.
3011 if (dst.Aliases(src)) return dst;
3012
3013 for (int i = 0; i < dst.GetChunkCount(); i++) {
3014 dst.SetChunk(i, src.GetChunk(i));
3015 }
3016 return dst;
3017 }
3018
3019
mov_merging(VectorFormat vform,LogicVRegister dst,const SimPRegister & pg,const LogicVRegister & src)3020 LogicVRegister Simulator::mov_merging(VectorFormat vform,
3021 LogicVRegister dst,
3022 const SimPRegister& pg,
3023 const LogicVRegister& src) {
3024 return sel(vform, dst, pg, src, dst);
3025 }
3026
mov_zeroing(VectorFormat vform,LogicVRegister dst,const SimPRegister & pg,const LogicVRegister & src)3027 LogicVRegister Simulator::mov_zeroing(VectorFormat vform,
3028 LogicVRegister dst,
3029 const SimPRegister& pg,
3030 const LogicVRegister& src) {
3031 SimVRegister zero;
3032 dup_immediate(vform, zero, 0);
3033 return sel(vform, dst, pg, src, zero);
3034 }
3035
mov_alternating(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int start_at)3036 LogicVRegister Simulator::mov_alternating(VectorFormat vform,
3037 LogicVRegister dst,
3038 const LogicVRegister& src,
3039 int start_at) {
3040 VIXL_ASSERT((start_at == 0) || (start_at == 1));
3041 for (int i = start_at; i < LaneCountFromFormat(vform); i += 2) {
3042 dst.SetUint(vform, i, src.Uint(vform, i));
3043 }
3044 return dst;
3045 }
3046
mov_merging(LogicPRegister dst,const LogicPRegister & pg,const LogicPRegister & src)3047 LogicPRegister Simulator::mov_merging(LogicPRegister dst,
3048 const LogicPRegister& pg,
3049 const LogicPRegister& src) {
3050 return sel(dst, pg, src, dst);
3051 }
3052
mov_zeroing(LogicPRegister dst,const LogicPRegister & pg,const LogicPRegister & src)3053 LogicPRegister Simulator::mov_zeroing(LogicPRegister dst,
3054 const LogicPRegister& pg,
3055 const LogicPRegister& src) {
3056 SimPRegister all_false;
3057 return sel(dst, pg, src, pfalse(all_false));
3058 }
3059
movi(VectorFormat vform,LogicVRegister dst,uint64_t imm)3060 LogicVRegister Simulator::movi(VectorFormat vform,
3061 LogicVRegister dst,
3062 uint64_t imm) {
3063 int lane_count = LaneCountFromFormat(vform);
3064 dst.ClearForWrite(vform);
3065 for (int i = 0; i < lane_count; ++i) {
3066 dst.SetUint(vform, i, imm);
3067 }
3068 return dst;
3069 }
3070
3071
mvni(VectorFormat vform,LogicVRegister dst,uint64_t imm)3072 LogicVRegister Simulator::mvni(VectorFormat vform,
3073 LogicVRegister dst,
3074 uint64_t imm) {
3075 int lane_count = LaneCountFromFormat(vform);
3076 dst.ClearForWrite(vform);
3077 for (int i = 0; i < lane_count; ++i) {
3078 dst.SetUint(vform, i, ~imm);
3079 }
3080 return dst;
3081 }
3082
3083
orr(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,uint64_t imm)3084 LogicVRegister Simulator::orr(VectorFormat vform,
3085 LogicVRegister dst,
3086 const LogicVRegister& src,
3087 uint64_t imm) {
3088 uint64_t result[16];
3089 int lane_count = LaneCountFromFormat(vform);
3090 for (int i = 0; i < lane_count; ++i) {
3091 result[i] = src.Uint(vform, i) | imm;
3092 }
3093 dst.ClearForWrite(vform);
3094 for (int i = 0; i < lane_count; ++i) {
3095 dst.SetUint(vform, i, result[i]);
3096 }
3097 return dst;
3098 }
3099
3100
uxtl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,bool is_2)3101 LogicVRegister Simulator::uxtl(VectorFormat vform,
3102 LogicVRegister dst,
3103 const LogicVRegister& src,
3104 bool is_2) {
3105 VectorFormat vform_half = VectorFormatHalfWidth(vform);
3106 int lane_count = LaneCountFromFormat(vform);
3107 int src_offset = is_2 ? lane_count : 0;
3108
3109 dst.ClearForWrite(vform);
3110 for (int i = 0; i < lane_count; i++) {
3111 dst.SetUint(vform, i, src.Uint(vform_half, src_offset + i));
3112 }
3113 return dst;
3114 }
3115
3116
sxtl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,bool is_2)3117 LogicVRegister Simulator::sxtl(VectorFormat vform,
3118 LogicVRegister dst,
3119 const LogicVRegister& src,
3120 bool is_2) {
3121 VectorFormat vform_half = VectorFormatHalfWidth(vform);
3122 int lane_count = LaneCountFromFormat(vform);
3123 int src_offset = is_2 ? lane_count : 0;
3124
3125 dst.ClearForWrite(vform);
3126 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3127 dst.SetInt(vform, i, src.Int(vform_half, src_offset + i));
3128 }
3129 return dst;
3130 }
3131
3132
uxtl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)3133 LogicVRegister Simulator::uxtl2(VectorFormat vform,
3134 LogicVRegister dst,
3135 const LogicVRegister& src) {
3136 return uxtl(vform, dst, src, /* is_2 = */ true);
3137 }
3138
3139
sxtl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)3140 LogicVRegister Simulator::sxtl2(VectorFormat vform,
3141 LogicVRegister dst,
3142 const LogicVRegister& src) {
3143 return sxtl(vform, dst, src, /* is_2 = */ true);
3144 }
3145
3146
uxt(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,unsigned from_size_in_bits)3147 LogicVRegister Simulator::uxt(VectorFormat vform,
3148 LogicVRegister dst,
3149 const LogicVRegister& src,
3150 unsigned from_size_in_bits) {
3151 int lane_count = LaneCountFromFormat(vform);
3152 uint64_t mask = GetUintMask(from_size_in_bits);
3153
3154 dst.ClearForWrite(vform);
3155 for (int i = 0; i < lane_count; i++) {
3156 dst.SetInt(vform, i, src.Uint(vform, i) & mask);
3157 }
3158 return dst;
3159 }
3160
3161
sxt(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,unsigned from_size_in_bits)3162 LogicVRegister Simulator::sxt(VectorFormat vform,
3163 LogicVRegister dst,
3164 const LogicVRegister& src,
3165 unsigned from_size_in_bits) {
3166 int lane_count = LaneCountFromFormat(vform);
3167
3168 dst.ClearForWrite(vform);
3169 for (int i = 0; i < lane_count; i++) {
3170 uint64_t value =
3171 ExtractSignedBitfield64(from_size_in_bits - 1, 0, src.Uint(vform, i));
3172 dst.SetInt(vform, i, value);
3173 }
3174 return dst;
3175 }
3176
3177
shrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3178 LogicVRegister Simulator::shrn(VectorFormat vform,
3179 LogicVRegister dst,
3180 const LogicVRegister& src,
3181 int shift) {
3182 SimVRegister temp;
3183 VectorFormat vform_src = VectorFormatDoubleWidth(vform);
3184 VectorFormat vform_dst = vform;
3185 LogicVRegister shifted_src = ushr(vform_src, temp, src, shift);
3186 return extractnarrow(vform_dst, dst, false, shifted_src, false);
3187 }
3188
3189
shrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3190 LogicVRegister Simulator::shrn2(VectorFormat vform,
3191 LogicVRegister dst,
3192 const LogicVRegister& src,
3193 int shift) {
3194 SimVRegister temp;
3195 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
3196 VectorFormat vformdst = vform;
3197 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift);
3198 return extractnarrow(vformdst, dst, false, shifted_src, false);
3199 }
3200
3201
rshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3202 LogicVRegister Simulator::rshrn(VectorFormat vform,
3203 LogicVRegister dst,
3204 const LogicVRegister& src,
3205 int shift) {
3206 SimVRegister temp;
3207 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
3208 VectorFormat vformdst = vform;
3209 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc);
3210 return extractnarrow(vformdst, dst, false, shifted_src, false);
3211 }
3212
3213
rshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3214 LogicVRegister Simulator::rshrn2(VectorFormat vform,
3215 LogicVRegister dst,
3216 const LogicVRegister& src,
3217 int shift) {
3218 SimVRegister temp;
3219 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
3220 VectorFormat vformdst = vform;
3221 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc);
3222 return extractnarrow(vformdst, dst, false, shifted_src, false);
3223 }
3224
Table(VectorFormat vform,LogicVRegister dst,const LogicVRegister & ind,bool zero_out_of_bounds,const LogicVRegister * tab1,const LogicVRegister * tab2,const LogicVRegister * tab3,const LogicVRegister * tab4)3225 LogicVRegister Simulator::Table(VectorFormat vform,
3226 LogicVRegister dst,
3227 const LogicVRegister& ind,
3228 bool zero_out_of_bounds,
3229 const LogicVRegister* tab1,
3230 const LogicVRegister* tab2,
3231 const LogicVRegister* tab3,
3232 const LogicVRegister* tab4) {
3233 VIXL_ASSERT(tab1 != NULL);
3234 int lane_count = LaneCountFromFormat(vform);
3235 VIXL_ASSERT((tab3 == NULL) || (lane_count <= 16));
3236 uint64_t table[kZRegMaxSizeInBytes * 2];
3237 uint64_t result[kZRegMaxSizeInBytes];
3238
3239 // For Neon, the table source registers are always 16B, and Neon allows only
3240 // 8B or 16B vform for the destination, so infer the table format from the
3241 // destination.
3242 VectorFormat vform_tab = (vform == kFormat8B) ? kFormat16B : vform;
3243
3244 uint64_t tab_size = tab1->UintArray(vform_tab, &table[0]);
3245 if (tab2 != NULL) tab_size += tab2->UintArray(vform_tab, &table[tab_size]);
3246 if (tab3 != NULL) tab_size += tab3->UintArray(vform_tab, &table[tab_size]);
3247 if (tab4 != NULL) tab_size += tab4->UintArray(vform_tab, &table[tab_size]);
3248
3249 for (int i = 0; i < lane_count; i++) {
3250 uint64_t index = ind.Uint(vform, i);
3251 result[i] = zero_out_of_bounds ? 0 : dst.Uint(vform, i);
3252 if (index < tab_size) result[i] = table[index];
3253 }
3254 dst.SetUintArray(vform, result);
3255 return dst;
3256 }
3257
tbl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & ind)3258 LogicVRegister Simulator::tbl(VectorFormat vform,
3259 LogicVRegister dst,
3260 const LogicVRegister& tab,
3261 const LogicVRegister& ind) {
3262 return Table(vform, dst, ind, true, &tab);
3263 }
3264
3265
tbl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & ind)3266 LogicVRegister Simulator::tbl(VectorFormat vform,
3267 LogicVRegister dst,
3268 const LogicVRegister& tab,
3269 const LogicVRegister& tab2,
3270 const LogicVRegister& ind) {
3271 return Table(vform, dst, ind, true, &tab, &tab2);
3272 }
3273
3274
tbl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & tab3,const LogicVRegister & ind)3275 LogicVRegister Simulator::tbl(VectorFormat vform,
3276 LogicVRegister dst,
3277 const LogicVRegister& tab,
3278 const LogicVRegister& tab2,
3279 const LogicVRegister& tab3,
3280 const LogicVRegister& ind) {
3281 return Table(vform, dst, ind, true, &tab, &tab2, &tab3);
3282 }
3283
3284
tbl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & tab3,const LogicVRegister & tab4,const LogicVRegister & ind)3285 LogicVRegister Simulator::tbl(VectorFormat vform,
3286 LogicVRegister dst,
3287 const LogicVRegister& tab,
3288 const LogicVRegister& tab2,
3289 const LogicVRegister& tab3,
3290 const LogicVRegister& tab4,
3291 const LogicVRegister& ind) {
3292 return Table(vform, dst, ind, true, &tab, &tab2, &tab3, &tab4);
3293 }
3294
3295
tbx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & ind)3296 LogicVRegister Simulator::tbx(VectorFormat vform,
3297 LogicVRegister dst,
3298 const LogicVRegister& tab,
3299 const LogicVRegister& ind) {
3300 return Table(vform, dst, ind, false, &tab);
3301 }
3302
3303
tbx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & ind)3304 LogicVRegister Simulator::tbx(VectorFormat vform,
3305 LogicVRegister dst,
3306 const LogicVRegister& tab,
3307 const LogicVRegister& tab2,
3308 const LogicVRegister& ind) {
3309 return Table(vform, dst, ind, false, &tab, &tab2);
3310 }
3311
3312
tbx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & tab3,const LogicVRegister & ind)3313 LogicVRegister Simulator::tbx(VectorFormat vform,
3314 LogicVRegister dst,
3315 const LogicVRegister& tab,
3316 const LogicVRegister& tab2,
3317 const LogicVRegister& tab3,
3318 const LogicVRegister& ind) {
3319 return Table(vform, dst, ind, false, &tab, &tab2, &tab3);
3320 }
3321
3322
tbx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & tab3,const LogicVRegister & tab4,const LogicVRegister & ind)3323 LogicVRegister Simulator::tbx(VectorFormat vform,
3324 LogicVRegister dst,
3325 const LogicVRegister& tab,
3326 const LogicVRegister& tab2,
3327 const LogicVRegister& tab3,
3328 const LogicVRegister& tab4,
3329 const LogicVRegister& ind) {
3330 return Table(vform, dst, ind, false, &tab, &tab2, &tab3, &tab4);
3331 }
3332
3333
uqshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3334 LogicVRegister Simulator::uqshrn(VectorFormat vform,
3335 LogicVRegister dst,
3336 const LogicVRegister& src,
3337 int shift) {
3338 return shrn(vform, dst, src, shift).UnsignedSaturate(vform);
3339 }
3340
3341
uqshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3342 LogicVRegister Simulator::uqshrn2(VectorFormat vform,
3343 LogicVRegister dst,
3344 const LogicVRegister& src,
3345 int shift) {
3346 return shrn2(vform, dst, src, shift).UnsignedSaturate(vform);
3347 }
3348
3349
uqrshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3350 LogicVRegister Simulator::uqrshrn(VectorFormat vform,
3351 LogicVRegister dst,
3352 const LogicVRegister& src,
3353 int shift) {
3354 return rshrn(vform, dst, src, shift).UnsignedSaturate(vform);
3355 }
3356
3357
uqrshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3358 LogicVRegister Simulator::uqrshrn2(VectorFormat vform,
3359 LogicVRegister dst,
3360 const LogicVRegister& src,
3361 int shift) {
3362 return rshrn2(vform, dst, src, shift).UnsignedSaturate(vform);
3363 }
3364
3365
sqshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3366 LogicVRegister Simulator::sqshrn(VectorFormat vform,
3367 LogicVRegister dst,
3368 const LogicVRegister& src,
3369 int shift) {
3370 SimVRegister temp;
3371 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
3372 VectorFormat vformdst = vform;
3373 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
3374 return sqxtn(vformdst, dst, shifted_src);
3375 }
3376
3377
sqshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3378 LogicVRegister Simulator::sqshrn2(VectorFormat vform,
3379 LogicVRegister dst,
3380 const LogicVRegister& src,
3381 int shift) {
3382 SimVRegister temp;
3383 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
3384 VectorFormat vformdst = vform;
3385 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
3386 return sqxtn(vformdst, dst, shifted_src);
3387 }
3388
3389
sqrshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3390 LogicVRegister Simulator::sqrshrn(VectorFormat vform,
3391 LogicVRegister dst,
3392 const LogicVRegister& src,
3393 int shift) {
3394 SimVRegister temp;
3395 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
3396 VectorFormat vformdst = vform;
3397 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
3398 return sqxtn(vformdst, dst, shifted_src);
3399 }
3400
3401
sqrshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3402 LogicVRegister Simulator::sqrshrn2(VectorFormat vform,
3403 LogicVRegister dst,
3404 const LogicVRegister& src,
3405 int shift) {
3406 SimVRegister temp;
3407 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
3408 VectorFormat vformdst = vform;
3409 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
3410 return sqxtn(vformdst, dst, shifted_src);
3411 }
3412
3413
sqshrun(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3414 LogicVRegister Simulator::sqshrun(VectorFormat vform,
3415 LogicVRegister dst,
3416 const LogicVRegister& src,
3417 int shift) {
3418 SimVRegister temp;
3419 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
3420 VectorFormat vformdst = vform;
3421 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
3422 return sqxtun(vformdst, dst, shifted_src);
3423 }
3424
3425
sqshrun2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3426 LogicVRegister Simulator::sqshrun2(VectorFormat vform,
3427 LogicVRegister dst,
3428 const LogicVRegister& src,
3429 int shift) {
3430 SimVRegister temp;
3431 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
3432 VectorFormat vformdst = vform;
3433 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
3434 return sqxtun(vformdst, dst, shifted_src);
3435 }
3436
3437
sqrshrun(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3438 LogicVRegister Simulator::sqrshrun(VectorFormat vform,
3439 LogicVRegister dst,
3440 const LogicVRegister& src,
3441 int shift) {
3442 SimVRegister temp;
3443 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
3444 VectorFormat vformdst = vform;
3445 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
3446 return sqxtun(vformdst, dst, shifted_src);
3447 }
3448
3449
sqrshrun2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3450 LogicVRegister Simulator::sqrshrun2(VectorFormat vform,
3451 LogicVRegister dst,
3452 const LogicVRegister& src,
3453 int shift) {
3454 SimVRegister temp;
3455 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
3456 VectorFormat vformdst = vform;
3457 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
3458 return sqxtun(vformdst, dst, shifted_src);
3459 }
3460
3461
uaddl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3462 LogicVRegister Simulator::uaddl(VectorFormat vform,
3463 LogicVRegister dst,
3464 const LogicVRegister& src1,
3465 const LogicVRegister& src2) {
3466 SimVRegister temp1, temp2;
3467 uxtl(vform, temp1, src1);
3468 uxtl(vform, temp2, src2);
3469 add(vform, dst, temp1, temp2);
3470 return dst;
3471 }
3472
3473
uaddl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3474 LogicVRegister Simulator::uaddl2(VectorFormat vform,
3475 LogicVRegister dst,
3476 const LogicVRegister& src1,
3477 const LogicVRegister& src2) {
3478 SimVRegister temp1, temp2;
3479 uxtl2(vform, temp1, src1);
3480 uxtl2(vform, temp2, src2);
3481 add(vform, dst, temp1, temp2);
3482 return dst;
3483 }
3484
3485
uaddw(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3486 LogicVRegister Simulator::uaddw(VectorFormat vform,
3487 LogicVRegister dst,
3488 const LogicVRegister& src1,
3489 const LogicVRegister& src2) {
3490 SimVRegister temp;
3491 uxtl(vform, temp, src2);
3492 add(vform, dst, src1, temp);
3493 return dst;
3494 }
3495
3496
uaddw2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3497 LogicVRegister Simulator::uaddw2(VectorFormat vform,
3498 LogicVRegister dst,
3499 const LogicVRegister& src1,
3500 const LogicVRegister& src2) {
3501 SimVRegister temp;
3502 uxtl2(vform, temp, src2);
3503 add(vform, dst, src1, temp);
3504 return dst;
3505 }
3506
3507
saddl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3508 LogicVRegister Simulator::saddl(VectorFormat vform,
3509 LogicVRegister dst,
3510 const LogicVRegister& src1,
3511 const LogicVRegister& src2) {
3512 SimVRegister temp1, temp2;
3513 sxtl(vform, temp1, src1);
3514 sxtl(vform, temp2, src2);
3515 add(vform, dst, temp1, temp2);
3516 return dst;
3517 }
3518
3519
saddl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3520 LogicVRegister Simulator::saddl2(VectorFormat vform,
3521 LogicVRegister dst,
3522 const LogicVRegister& src1,
3523 const LogicVRegister& src2) {
3524 SimVRegister temp1, temp2;
3525 sxtl2(vform, temp1, src1);
3526 sxtl2(vform, temp2, src2);
3527 add(vform, dst, temp1, temp2);
3528 return dst;
3529 }
3530
3531
saddw(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3532 LogicVRegister Simulator::saddw(VectorFormat vform,
3533 LogicVRegister dst,
3534 const LogicVRegister& src1,
3535 const LogicVRegister& src2) {
3536 SimVRegister temp;
3537 sxtl(vform, temp, src2);
3538 add(vform, dst, src1, temp);
3539 return dst;
3540 }
3541
3542
saddw2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3543 LogicVRegister Simulator::saddw2(VectorFormat vform,
3544 LogicVRegister dst,
3545 const LogicVRegister& src1,
3546 const LogicVRegister& src2) {
3547 SimVRegister temp;
3548 sxtl2(vform, temp, src2);
3549 add(vform, dst, src1, temp);
3550 return dst;
3551 }
3552
3553
usubl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3554 LogicVRegister Simulator::usubl(VectorFormat vform,
3555 LogicVRegister dst,
3556 const LogicVRegister& src1,
3557 const LogicVRegister& src2) {
3558 SimVRegister temp1, temp2;
3559 uxtl(vform, temp1, src1);
3560 uxtl(vform, temp2, src2);
3561 sub(vform, dst, temp1, temp2);
3562 return dst;
3563 }
3564
3565
usubl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3566 LogicVRegister Simulator::usubl2(VectorFormat vform,
3567 LogicVRegister dst,
3568 const LogicVRegister& src1,
3569 const LogicVRegister& src2) {
3570 SimVRegister temp1, temp2;
3571 uxtl2(vform, temp1, src1);
3572 uxtl2(vform, temp2, src2);
3573 sub(vform, dst, temp1, temp2);
3574 return dst;
3575 }
3576
3577
usubw(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3578 LogicVRegister Simulator::usubw(VectorFormat vform,
3579 LogicVRegister dst,
3580 const LogicVRegister& src1,
3581 const LogicVRegister& src2) {
3582 SimVRegister temp;
3583 uxtl(vform, temp, src2);
3584 sub(vform, dst, src1, temp);
3585 return dst;
3586 }
3587
3588
usubw2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3589 LogicVRegister Simulator::usubw2(VectorFormat vform,
3590 LogicVRegister dst,
3591 const LogicVRegister& src1,
3592 const LogicVRegister& src2) {
3593 SimVRegister temp;
3594 uxtl2(vform, temp, src2);
3595 sub(vform, dst, src1, temp);
3596 return dst;
3597 }
3598
3599
ssubl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3600 LogicVRegister Simulator::ssubl(VectorFormat vform,
3601 LogicVRegister dst,
3602 const LogicVRegister& src1,
3603 const LogicVRegister& src2) {
3604 SimVRegister temp1, temp2;
3605 sxtl(vform, temp1, src1);
3606 sxtl(vform, temp2, src2);
3607 sub(vform, dst, temp1, temp2);
3608 return dst;
3609 }
3610
3611
ssubl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3612 LogicVRegister Simulator::ssubl2(VectorFormat vform,
3613 LogicVRegister dst,
3614 const LogicVRegister& src1,
3615 const LogicVRegister& src2) {
3616 SimVRegister temp1, temp2;
3617 sxtl2(vform, temp1, src1);
3618 sxtl2(vform, temp2, src2);
3619 sub(vform, dst, temp1, temp2);
3620 return dst;
3621 }
3622
3623
ssubw(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3624 LogicVRegister Simulator::ssubw(VectorFormat vform,
3625 LogicVRegister dst,
3626 const LogicVRegister& src1,
3627 const LogicVRegister& src2) {
3628 SimVRegister temp;
3629 sxtl(vform, temp, src2);
3630 sub(vform, dst, src1, temp);
3631 return dst;
3632 }
3633
3634
ssubw2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3635 LogicVRegister Simulator::ssubw2(VectorFormat vform,
3636 LogicVRegister dst,
3637 const LogicVRegister& src1,
3638 const LogicVRegister& src2) {
3639 SimVRegister temp;
3640 sxtl2(vform, temp, src2);
3641 sub(vform, dst, src1, temp);
3642 return dst;
3643 }
3644
3645
uabal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3646 LogicVRegister Simulator::uabal(VectorFormat vform,
3647 LogicVRegister dst,
3648 const LogicVRegister& src1,
3649 const LogicVRegister& src2) {
3650 SimVRegister temp1, temp2;
3651 uxtl(vform, temp1, src1);
3652 uxtl(vform, temp2, src2);
3653 uaba(vform, dst, temp1, temp2);
3654 return dst;
3655 }
3656
3657
uabal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3658 LogicVRegister Simulator::uabal2(VectorFormat vform,
3659 LogicVRegister dst,
3660 const LogicVRegister& src1,
3661 const LogicVRegister& src2) {
3662 SimVRegister temp1, temp2;
3663 uxtl2(vform, temp1, src1);
3664 uxtl2(vform, temp2, src2);
3665 uaba(vform, dst, temp1, temp2);
3666 return dst;
3667 }
3668
3669
sabal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3670 LogicVRegister Simulator::sabal(VectorFormat vform,
3671 LogicVRegister dst,
3672 const LogicVRegister& src1,
3673 const LogicVRegister& src2) {
3674 SimVRegister temp1, temp2;
3675 sxtl(vform, temp1, src1);
3676 sxtl(vform, temp2, src2);
3677 saba(vform, dst, temp1, temp2);
3678 return dst;
3679 }
3680
3681
sabal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3682 LogicVRegister Simulator::sabal2(VectorFormat vform,
3683 LogicVRegister dst,
3684 const LogicVRegister& src1,
3685 const LogicVRegister& src2) {
3686 SimVRegister temp1, temp2;
3687 sxtl2(vform, temp1, src1);
3688 sxtl2(vform, temp2, src2);
3689 saba(vform, dst, temp1, temp2);
3690 return dst;
3691 }
3692
3693
uabdl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3694 LogicVRegister Simulator::uabdl(VectorFormat vform,
3695 LogicVRegister dst,
3696 const LogicVRegister& src1,
3697 const LogicVRegister& src2) {
3698 SimVRegister temp1, temp2;
3699 uxtl(vform, temp1, src1);
3700 uxtl(vform, temp2, src2);
3701 absdiff(vform, dst, temp1, temp2, false);
3702 return dst;
3703 }
3704
3705
uabdl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3706 LogicVRegister Simulator::uabdl2(VectorFormat vform,
3707 LogicVRegister dst,
3708 const LogicVRegister& src1,
3709 const LogicVRegister& src2) {
3710 SimVRegister temp1, temp2;
3711 uxtl2(vform, temp1, src1);
3712 uxtl2(vform, temp2, src2);
3713 absdiff(vform, dst, temp1, temp2, false);
3714 return dst;
3715 }
3716
3717
sabdl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3718 LogicVRegister Simulator::sabdl(VectorFormat vform,
3719 LogicVRegister dst,
3720 const LogicVRegister& src1,
3721 const LogicVRegister& src2) {
3722 SimVRegister temp1, temp2;
3723 sxtl(vform, temp1, src1);
3724 sxtl(vform, temp2, src2);
3725 absdiff(vform, dst, temp1, temp2, true);
3726 return dst;
3727 }
3728
3729
sabdl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3730 LogicVRegister Simulator::sabdl2(VectorFormat vform,
3731 LogicVRegister dst,
3732 const LogicVRegister& src1,
3733 const LogicVRegister& src2) {
3734 SimVRegister temp1, temp2;
3735 sxtl2(vform, temp1, src1);
3736 sxtl2(vform, temp2, src2);
3737 absdiff(vform, dst, temp1, temp2, true);
3738 return dst;
3739 }
3740
3741
umull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool is_2)3742 LogicVRegister Simulator::umull(VectorFormat vform,
3743 LogicVRegister dst,
3744 const LogicVRegister& src1,
3745 const LogicVRegister& src2,
3746 bool is_2) {
3747 SimVRegister temp1, temp2;
3748 uxtl(vform, temp1, src1, is_2);
3749 uxtl(vform, temp2, src2, is_2);
3750 mul(vform, dst, temp1, temp2);
3751 return dst;
3752 }
3753
3754
umull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3755 LogicVRegister Simulator::umull2(VectorFormat vform,
3756 LogicVRegister dst,
3757 const LogicVRegister& src1,
3758 const LogicVRegister& src2) {
3759 return umull(vform, dst, src1, src2, /* is_2 = */ true);
3760 }
3761
3762
smull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool is_2)3763 LogicVRegister Simulator::smull(VectorFormat vform,
3764 LogicVRegister dst,
3765 const LogicVRegister& src1,
3766 const LogicVRegister& src2,
3767 bool is_2) {
3768 SimVRegister temp1, temp2;
3769 sxtl(vform, temp1, src1, is_2);
3770 sxtl(vform, temp2, src2, is_2);
3771 mul(vform, dst, temp1, temp2);
3772 return dst;
3773 }
3774
3775
smull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3776 LogicVRegister Simulator::smull2(VectorFormat vform,
3777 LogicVRegister dst,
3778 const LogicVRegister& src1,
3779 const LogicVRegister& src2) {
3780 return smull(vform, dst, src1, src2, /* is_2 = */ true);
3781 }
3782
3783
umlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool is_2)3784 LogicVRegister Simulator::umlsl(VectorFormat vform,
3785 LogicVRegister dst,
3786 const LogicVRegister& src1,
3787 const LogicVRegister& src2,
3788 bool is_2) {
3789 SimVRegister temp1, temp2;
3790 uxtl(vform, temp1, src1, is_2);
3791 uxtl(vform, temp2, src2, is_2);
3792 mls(vform, dst, dst, temp1, temp2);
3793 return dst;
3794 }
3795
3796
umlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3797 LogicVRegister Simulator::umlsl2(VectorFormat vform,
3798 LogicVRegister dst,
3799 const LogicVRegister& src1,
3800 const LogicVRegister& src2) {
3801 return umlsl(vform, dst, src1, src2, /* is_2 = */ true);
3802 }
3803
3804
smlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool is_2)3805 LogicVRegister Simulator::smlsl(VectorFormat vform,
3806 LogicVRegister dst,
3807 const LogicVRegister& src1,
3808 const LogicVRegister& src2,
3809 bool is_2) {
3810 SimVRegister temp1, temp2;
3811 sxtl(vform, temp1, src1, is_2);
3812 sxtl(vform, temp2, src2, is_2);
3813 mls(vform, dst, dst, temp1, temp2);
3814 return dst;
3815 }
3816
3817
smlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3818 LogicVRegister Simulator::smlsl2(VectorFormat vform,
3819 LogicVRegister dst,
3820 const LogicVRegister& src1,
3821 const LogicVRegister& src2) {
3822 return smlsl(vform, dst, src1, src2, /* is_2 = */ true);
3823 }
3824
3825
umlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool is_2)3826 LogicVRegister Simulator::umlal(VectorFormat vform,
3827 LogicVRegister dst,
3828 const LogicVRegister& src1,
3829 const LogicVRegister& src2,
3830 bool is_2) {
3831 SimVRegister temp1, temp2;
3832 uxtl(vform, temp1, src1, is_2);
3833 uxtl(vform, temp2, src2, is_2);
3834 mla(vform, dst, dst, temp1, temp2);
3835 return dst;
3836 }
3837
3838
umlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3839 LogicVRegister Simulator::umlal2(VectorFormat vform,
3840 LogicVRegister dst,
3841 const LogicVRegister& src1,
3842 const LogicVRegister& src2) {
3843 return umlal(vform, dst, src1, src2, /* is_2 = */ true);
3844 }
3845
3846
smlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool is_2)3847 LogicVRegister Simulator::smlal(VectorFormat vform,
3848 LogicVRegister dst,
3849 const LogicVRegister& src1,
3850 const LogicVRegister& src2,
3851 bool is_2) {
3852 SimVRegister temp1, temp2;
3853 sxtl(vform, temp1, src1, is_2);
3854 sxtl(vform, temp2, src2, is_2);
3855 mla(vform, dst, dst, temp1, temp2);
3856 return dst;
3857 }
3858
3859
smlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3860 LogicVRegister Simulator::smlal2(VectorFormat vform,
3861 LogicVRegister dst,
3862 const LogicVRegister& src1,
3863 const LogicVRegister& src2) {
3864 return smlal(vform, dst, src1, src2, /* is_2 = */ true);
3865 }
3866
3867
sqdmlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool is_2)3868 LogicVRegister Simulator::sqdmlal(VectorFormat vform,
3869 LogicVRegister dst,
3870 const LogicVRegister& src1,
3871 const LogicVRegister& src2,
3872 bool is_2) {
3873 SimVRegister temp;
3874 LogicVRegister product = sqdmull(vform, temp, src1, src2, is_2);
3875 return add(vform, dst, dst, product).SignedSaturate(vform);
3876 }
3877
3878
sqdmlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3879 LogicVRegister Simulator::sqdmlal2(VectorFormat vform,
3880 LogicVRegister dst,
3881 const LogicVRegister& src1,
3882 const LogicVRegister& src2) {
3883 return sqdmlal(vform, dst, src1, src2, /* is_2 = */ true);
3884 }
3885
3886
sqdmlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool is_2)3887 LogicVRegister Simulator::sqdmlsl(VectorFormat vform,
3888 LogicVRegister dst,
3889 const LogicVRegister& src1,
3890 const LogicVRegister& src2,
3891 bool is_2) {
3892 SimVRegister temp;
3893 LogicVRegister product = sqdmull(vform, temp, src1, src2, is_2);
3894 return sub(vform, dst, dst, product).SignedSaturate(vform);
3895 }
3896
3897
sqdmlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3898 LogicVRegister Simulator::sqdmlsl2(VectorFormat vform,
3899 LogicVRegister dst,
3900 const LogicVRegister& src1,
3901 const LogicVRegister& src2) {
3902 return sqdmlsl(vform, dst, src1, src2, /* is_2 = */ true);
3903 }
3904
3905
sqdmull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool is_2)3906 LogicVRegister Simulator::sqdmull(VectorFormat vform,
3907 LogicVRegister dst,
3908 const LogicVRegister& src1,
3909 const LogicVRegister& src2,
3910 bool is_2) {
3911 SimVRegister temp;
3912 LogicVRegister product = smull(vform, temp, src1, src2, is_2);
3913 return add(vform, dst, product, product).SignedSaturate(vform);
3914 }
3915
3916
sqdmull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3917 LogicVRegister Simulator::sqdmull2(VectorFormat vform,
3918 LogicVRegister dst,
3919 const LogicVRegister& src1,
3920 const LogicVRegister& src2) {
3921 return sqdmull(vform, dst, src1, src2, /* is_2 = */ true);
3922 }
3923
sqrdmulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool round)3924 LogicVRegister Simulator::sqrdmulh(VectorFormat vform,
3925 LogicVRegister dst,
3926 const LogicVRegister& src1,
3927 const LogicVRegister& src2,
3928 bool round) {
3929 int esize = LaneSizeInBitsFromFormat(vform);
3930
3931 SimVRegister temp_lo, temp_hi;
3932
3933 // Compute low and high multiplication results.
3934 mul(vform, temp_lo, src1, src2);
3935 smulh(vform, temp_hi, src1, src2);
3936
3937 // Double by shifting high half, and adding in most-significant bit of low
3938 // half.
3939 shl(vform, temp_hi, temp_hi, 1);
3940 usra(vform, temp_hi, temp_lo, esize - 1);
3941
3942 if (round) {
3943 // Add the second (due to doubling) most-significant bit of the low half
3944 // into the result.
3945 shl(vform, temp_lo, temp_lo, 1);
3946 usra(vform, temp_hi, temp_lo, esize - 1);
3947 }
3948
3949 SimPRegister not_sat;
3950 LogicPRegister ptemp(not_sat);
3951 dst.ClearForWrite(vform);
3952 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3953 // Saturation only occurs when src1 = src2 = minimum representable value.
3954 // Check this as a special case.
3955 ptemp.SetActive(vform, i, true);
3956 if ((src1.Int(vform, i) == MinIntFromFormat(vform)) &&
3957 (src2.Int(vform, i) == MinIntFromFormat(vform))) {
3958 ptemp.SetActive(vform, i, false);
3959 }
3960 dst.SetInt(vform, i, MaxIntFromFormat(vform));
3961 }
3962
3963 mov_merging(vform, dst, not_sat, temp_hi);
3964 return dst;
3965 }
3966
3967
dot(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool is_src1_signed,bool is_src2_signed)3968 LogicVRegister Simulator::dot(VectorFormat vform,
3969 LogicVRegister dst,
3970 const LogicVRegister& src1,
3971 const LogicVRegister& src2,
3972 bool is_src1_signed,
3973 bool is_src2_signed) {
3974 VectorFormat quarter_vform =
3975 VectorFormatHalfWidthDoubleLanes(VectorFormatHalfWidthDoubleLanes(vform));
3976
3977 dst.ClearForWrite(vform);
3978 for (int e = 0; e < LaneCountFromFormat(vform); e++) {
3979 uint64_t result = 0;
3980 int64_t element1, element2;
3981 for (int i = 0; i < 4; i++) {
3982 int index = 4 * e + i;
3983 if (is_src1_signed) {
3984 element1 = src1.Int(quarter_vform, index);
3985 } else {
3986 element1 = src1.Uint(quarter_vform, index);
3987 }
3988 if (is_src2_signed) {
3989 element2 = src2.Int(quarter_vform, index);
3990 } else {
3991 element2 = src2.Uint(quarter_vform, index);
3992 }
3993 result += element1 * element2;
3994 }
3995 dst.SetUint(vform, e, result + dst.Uint(vform, e));
3996 }
3997 return dst;
3998 }
3999
4000
sdot(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4001 LogicVRegister Simulator::sdot(VectorFormat vform,
4002 LogicVRegister dst,
4003 const LogicVRegister& src1,
4004 const LogicVRegister& src2) {
4005 return dot(vform, dst, src1, src2, true, true);
4006 }
4007
4008
udot(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4009 LogicVRegister Simulator::udot(VectorFormat vform,
4010 LogicVRegister dst,
4011 const LogicVRegister& src1,
4012 const LogicVRegister& src2) {
4013 return dot(vform, dst, src1, src2, false, false);
4014 }
4015
usdot(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4016 LogicVRegister Simulator::usdot(VectorFormat vform,
4017 LogicVRegister dst,
4018 const LogicVRegister& src1,
4019 const LogicVRegister& src2) {
4020 return dot(vform, dst, src1, src2, false, true);
4021 }
4022
cdot(VectorFormat vform,LogicVRegister dst,const LogicVRegister & acc,const LogicVRegister & src1,const LogicVRegister & src2,int rot)4023 LogicVRegister Simulator::cdot(VectorFormat vform,
4024 LogicVRegister dst,
4025 const LogicVRegister& acc,
4026 const LogicVRegister& src1,
4027 const LogicVRegister& src2,
4028 int rot) {
4029 VIXL_ASSERT((rot == 0) || (rot == 90) || (rot == 180) || (rot == 270));
4030 VectorFormat quarter_vform =
4031 VectorFormatHalfWidthDoubleLanes(VectorFormatHalfWidthDoubleLanes(vform));
4032
4033 int sel_a = ((rot == 0) || (rot == 180)) ? 0 : 1;
4034 int sel_b = 1 - sel_a;
4035 int sub_i = ((rot == 90) || (rot == 180)) ? 1 : -1;
4036
4037 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4038 int64_t result = acc.Int(vform, i);
4039 for (int j = 0; j < 2; j++) {
4040 int64_t r1 = src1.Int(quarter_vform, (4 * i) + (2 * j) + 0);
4041 int64_t i1 = src1.Int(quarter_vform, (4 * i) + (2 * j) + 1);
4042 int64_t r2 = src2.Int(quarter_vform, (4 * i) + (2 * j) + sel_a);
4043 int64_t i2 = src2.Int(quarter_vform, (4 * i) + (2 * j) + sel_b);
4044 result += (r1 * r2) + (sub_i * i1 * i2);
4045 }
4046 dst.SetInt(vform, i, result);
4047 }
4048 return dst;
4049 }
4050
sqrdcmlah(VectorFormat vform,LogicVRegister dst,const LogicVRegister & srca,const LogicVRegister & src1,const LogicVRegister & src2,int rot)4051 LogicVRegister Simulator::sqrdcmlah(VectorFormat vform,
4052 LogicVRegister dst,
4053 const LogicVRegister& srca,
4054 const LogicVRegister& src1,
4055 const LogicVRegister& src2,
4056 int rot) {
4057 SimVRegister src1_a, src1_b;
4058 SimVRegister src2_a, src2_b;
4059 SimVRegister srca_i, srca_r;
4060 SimVRegister zero, temp;
4061 zero.Clear();
4062
4063 if ((rot == 0) || (rot == 180)) {
4064 uzp1(vform, src1_a, src1, zero);
4065 uzp1(vform, src2_a, src2, zero);
4066 uzp2(vform, src2_b, src2, zero);
4067 } else {
4068 uzp2(vform, src1_a, src1, zero);
4069 uzp2(vform, src2_a, src2, zero);
4070 uzp1(vform, src2_b, src2, zero);
4071 }
4072
4073 uzp1(vform, srca_r, srca, zero);
4074 uzp2(vform, srca_i, srca, zero);
4075
4076 bool sub_r = (rot == 90) || (rot == 180);
4077 bool sub_i = (rot == 180) || (rot == 270);
4078
4079 const bool round = true;
4080 sqrdmlash(vform, srca_r, src1_a, src2_a, round, sub_r);
4081 sqrdmlash(vform, srca_i, src1_a, src2_b, round, sub_i);
4082 zip1(vform, dst, srca_r, srca_i);
4083 return dst;
4084 }
4085
sqrdcmlah(VectorFormat vform,LogicVRegister dst,const LogicVRegister & srca,const LogicVRegister & src1,const LogicVRegister & src2,int index,int rot)4086 LogicVRegister Simulator::sqrdcmlah(VectorFormat vform,
4087 LogicVRegister dst,
4088 const LogicVRegister& srca,
4089 const LogicVRegister& src1,
4090 const LogicVRegister& src2,
4091 int index,
4092 int rot) {
4093 SimVRegister temp;
4094 dup_elements_to_segments(VectorFormatDoubleWidth(vform), temp, src2, index);
4095 return sqrdcmlah(vform, dst, srca, src1, temp, rot);
4096 }
4097
sqrdmlash_d(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool round,bool sub_op)4098 LogicVRegister Simulator::sqrdmlash_d(VectorFormat vform,
4099 LogicVRegister dst,
4100 const LogicVRegister& src1,
4101 const LogicVRegister& src2,
4102 bool round,
4103 bool sub_op) {
4104 // 2 * INT_64_MIN * INT_64_MIN causes INT_128 to overflow.
4105 // To avoid this, we use:
4106 // (dst << (esize - 1) + src1 * src2 + 1 << (esize - 2)) >> (esize - 1)
4107 // which is same as:
4108 // (dst << esize + 2 * src1 * src2 + 1 << (esize - 1)) >> esize.
4109
4110 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4111 int esize = kDRegSize;
4112 vixl_uint128_t round_const, accum;
4113 round_const.first = 0;
4114 if (round) {
4115 round_const.second = UINT64_C(1) << (esize - 2);
4116 } else {
4117 round_const.second = 0;
4118 }
4119
4120 dst.ClearForWrite(vform);
4121 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4122 // Shift the whole value left by `esize - 1` bits.
4123 accum.first = dst.Int(vform, i) >> 1;
4124 accum.second = dst.Int(vform, i) << (esize - 1);
4125
4126 vixl_uint128_t product = Mul64(src1.Int(vform, i), src2.Int(vform, i));
4127
4128 if (sub_op) {
4129 product = Neg128(product);
4130 }
4131 accum = Add128(accum, product);
4132
4133 // Perform rounding.
4134 accum = Add128(accum, round_const);
4135
4136 // Arithmetic shift the whole value right by `esize - 1` bits.
4137 accum.second = (accum.first << 1) | (accum.second >> (esize - 1));
4138 accum.first = -(accum.first >> (esize - 1));
4139
4140 // Perform saturation.
4141 bool is_pos = (accum.first == 0) ? true : false;
4142 if (is_pos &&
4143 (accum.second > static_cast<uint64_t>(MaxIntFromFormat(vform)))) {
4144 accum.second = MaxIntFromFormat(vform);
4145 } else if (!is_pos && (accum.second <
4146 static_cast<uint64_t>(MinIntFromFormat(vform)))) {
4147 accum.second = MinIntFromFormat(vform);
4148 }
4149
4150 dst.SetInt(vform, i, accum.second);
4151 }
4152
4153 return dst;
4154 }
4155
sqrdmlash(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool round,bool sub_op)4156 LogicVRegister Simulator::sqrdmlash(VectorFormat vform,
4157 LogicVRegister dst,
4158 const LogicVRegister& src1,
4159 const LogicVRegister& src2,
4160 bool round,
4161 bool sub_op) {
4162 // 2 * INT_32_MIN * INT_32_MIN causes int64_t to overflow.
4163 // To avoid this, we use:
4164 // (dst << (esize - 1) + src1 * src2 + 1 << (esize - 2)) >> (esize - 1)
4165 // which is same as:
4166 // (dst << esize + 2 * src1 * src2 + 1 << (esize - 1)) >> esize.
4167
4168 if (vform == kFormatVnD) {
4169 return sqrdmlash_d(vform, dst, src1, src2, round, sub_op);
4170 }
4171
4172 int esize = LaneSizeInBitsFromFormat(vform);
4173 int round_const = round ? (1 << (esize - 2)) : 0;
4174 int64_t accum;
4175
4176 dst.ClearForWrite(vform);
4177 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4178 accum = dst.Int(vform, i) << (esize - 1);
4179 if (sub_op) {
4180 accum -= src1.Int(vform, i) * src2.Int(vform, i);
4181 } else {
4182 accum += src1.Int(vform, i) * src2.Int(vform, i);
4183 }
4184 accum += round_const;
4185 accum = accum >> (esize - 1);
4186
4187 if (accum > MaxIntFromFormat(vform)) {
4188 accum = MaxIntFromFormat(vform);
4189 } else if (accum < MinIntFromFormat(vform)) {
4190 accum = MinIntFromFormat(vform);
4191 }
4192 dst.SetInt(vform, i, accum);
4193 }
4194 return dst;
4195 }
4196
4197
sqrdmlah(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool round)4198 LogicVRegister Simulator::sqrdmlah(VectorFormat vform,
4199 LogicVRegister dst,
4200 const LogicVRegister& src1,
4201 const LogicVRegister& src2,
4202 bool round) {
4203 return sqrdmlash(vform, dst, src1, src2, round, false);
4204 }
4205
4206
sqrdmlsh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool round)4207 LogicVRegister Simulator::sqrdmlsh(VectorFormat vform,
4208 LogicVRegister dst,
4209 const LogicVRegister& src1,
4210 const LogicVRegister& src2,
4211 bool round) {
4212 return sqrdmlash(vform, dst, src1, src2, round, true);
4213 }
4214
4215
sqdmulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4216 LogicVRegister Simulator::sqdmulh(VectorFormat vform,
4217 LogicVRegister dst,
4218 const LogicVRegister& src1,
4219 const LogicVRegister& src2) {
4220 return sqrdmulh(vform, dst, src1, src2, false);
4221 }
4222
4223
addhn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4224 LogicVRegister Simulator::addhn(VectorFormat vform,
4225 LogicVRegister dst,
4226 const LogicVRegister& src1,
4227 const LogicVRegister& src2) {
4228 SimVRegister temp;
4229 add(VectorFormatDoubleWidth(vform), temp, src1, src2);
4230 shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4231 return dst;
4232 }
4233
4234
addhn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4235 LogicVRegister Simulator::addhn2(VectorFormat vform,
4236 LogicVRegister dst,
4237 const LogicVRegister& src1,
4238 const LogicVRegister& src2) {
4239 SimVRegister temp;
4240 add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
4241 shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4242 return dst;
4243 }
4244
4245
raddhn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4246 LogicVRegister Simulator::raddhn(VectorFormat vform,
4247 LogicVRegister dst,
4248 const LogicVRegister& src1,
4249 const LogicVRegister& src2) {
4250 SimVRegister temp;
4251 add(VectorFormatDoubleWidth(vform), temp, src1, src2);
4252 rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4253 return dst;
4254 }
4255
4256
raddhn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4257 LogicVRegister Simulator::raddhn2(VectorFormat vform,
4258 LogicVRegister dst,
4259 const LogicVRegister& src1,
4260 const LogicVRegister& src2) {
4261 SimVRegister temp;
4262 add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
4263 rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4264 return dst;
4265 }
4266
4267
subhn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4268 LogicVRegister Simulator::subhn(VectorFormat vform,
4269 LogicVRegister dst,
4270 const LogicVRegister& src1,
4271 const LogicVRegister& src2) {
4272 SimVRegister temp;
4273 sub(VectorFormatDoubleWidth(vform), temp, src1, src2);
4274 shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4275 return dst;
4276 }
4277
4278
subhn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4279 LogicVRegister Simulator::subhn2(VectorFormat vform,
4280 LogicVRegister dst,
4281 const LogicVRegister& src1,
4282 const LogicVRegister& src2) {
4283 SimVRegister temp;
4284 sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
4285 shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4286 return dst;
4287 }
4288
4289
rsubhn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4290 LogicVRegister Simulator::rsubhn(VectorFormat vform,
4291 LogicVRegister dst,
4292 const LogicVRegister& src1,
4293 const LogicVRegister& src2) {
4294 SimVRegister temp;
4295 sub(VectorFormatDoubleWidth(vform), temp, src1, src2);
4296 rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4297 return dst;
4298 }
4299
4300
rsubhn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4301 LogicVRegister Simulator::rsubhn2(VectorFormat vform,
4302 LogicVRegister dst,
4303 const LogicVRegister& src1,
4304 const LogicVRegister& src2) {
4305 SimVRegister temp;
4306 sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
4307 rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4308 return dst;
4309 }
4310
4311
trn1(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4312 LogicVRegister Simulator::trn1(VectorFormat vform,
4313 LogicVRegister dst,
4314 const LogicVRegister& src1,
4315 const LogicVRegister& src2) {
4316 uint64_t result[kZRegMaxSizeInBytes] = {};
4317 int lane_count = LaneCountFromFormat(vform);
4318 int pairs = lane_count / 2;
4319 for (int i = 0; i < pairs; ++i) {
4320 result[2 * i] = src1.Uint(vform, 2 * i);
4321 result[(2 * i) + 1] = src2.Uint(vform, 2 * i);
4322 }
4323
4324 dst.ClearForWrite(vform);
4325 for (int i = 0; i < lane_count; ++i) {
4326 dst.SetUint(vform, i, result[i]);
4327 }
4328 return dst;
4329 }
4330
4331
trn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4332 LogicVRegister Simulator::trn2(VectorFormat vform,
4333 LogicVRegister dst,
4334 const LogicVRegister& src1,
4335 const LogicVRegister& src2) {
4336 uint64_t result[kZRegMaxSizeInBytes] = {};
4337 int lane_count = LaneCountFromFormat(vform);
4338 int pairs = lane_count / 2;
4339 for (int i = 0; i < pairs; ++i) {
4340 result[2 * i] = src1.Uint(vform, (2 * i) + 1);
4341 result[(2 * i) + 1] = src2.Uint(vform, (2 * i) + 1);
4342 }
4343
4344 dst.ClearForWrite(vform);
4345 for (int i = 0; i < lane_count; ++i) {
4346 dst.SetUint(vform, i, result[i]);
4347 }
4348 return dst;
4349 }
4350
4351
zip1(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4352 LogicVRegister Simulator::zip1(VectorFormat vform,
4353 LogicVRegister dst,
4354 const LogicVRegister& src1,
4355 const LogicVRegister& src2) {
4356 uint64_t result[kZRegMaxSizeInBytes] = {};
4357 int lane_count = LaneCountFromFormat(vform);
4358 int pairs = lane_count / 2;
4359 for (int i = 0; i < pairs; ++i) {
4360 result[2 * i] = src1.Uint(vform, i);
4361 result[(2 * i) + 1] = src2.Uint(vform, i);
4362 }
4363
4364 dst.ClearForWrite(vform);
4365 for (int i = 0; i < lane_count; ++i) {
4366 dst.SetUint(vform, i, result[i]);
4367 }
4368 return dst;
4369 }
4370
4371
zip2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4372 LogicVRegister Simulator::zip2(VectorFormat vform,
4373 LogicVRegister dst,
4374 const LogicVRegister& src1,
4375 const LogicVRegister& src2) {
4376 uint64_t result[kZRegMaxSizeInBytes] = {};
4377 int lane_count = LaneCountFromFormat(vform);
4378 int pairs = lane_count / 2;
4379 for (int i = 0; i < pairs; ++i) {
4380 result[2 * i] = src1.Uint(vform, pairs + i);
4381 result[(2 * i) + 1] = src2.Uint(vform, pairs + i);
4382 }
4383
4384 dst.ClearForWrite(vform);
4385 for (int i = 0; i < lane_count; ++i) {
4386 dst.SetUint(vform, i, result[i]);
4387 }
4388 return dst;
4389 }
4390
4391
uzp1(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4392 LogicVRegister Simulator::uzp1(VectorFormat vform,
4393 LogicVRegister dst,
4394 const LogicVRegister& src1,
4395 const LogicVRegister& src2) {
4396 uint64_t result[kZRegMaxSizeInBytes * 2];
4397 int lane_count = LaneCountFromFormat(vform);
4398 for (int i = 0; i < lane_count; ++i) {
4399 result[i] = src1.Uint(vform, i);
4400 result[lane_count + i] = src2.Uint(vform, i);
4401 }
4402
4403 dst.ClearForWrite(vform);
4404 for (int i = 0; i < lane_count; ++i) {
4405 dst.SetUint(vform, i, result[2 * i]);
4406 }
4407 return dst;
4408 }
4409
4410
uzp2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4411 LogicVRegister Simulator::uzp2(VectorFormat vform,
4412 LogicVRegister dst,
4413 const LogicVRegister& src1,
4414 const LogicVRegister& src2) {
4415 uint64_t result[kZRegMaxSizeInBytes * 2];
4416 int lane_count = LaneCountFromFormat(vform);
4417 for (int i = 0; i < lane_count; ++i) {
4418 result[i] = src1.Uint(vform, i);
4419 result[lane_count + i] = src2.Uint(vform, i);
4420 }
4421
4422 dst.ClearForWrite(vform);
4423 for (int i = 0; i < lane_count; ++i) {
4424 dst.SetUint(vform, i, result[(2 * i) + 1]);
4425 }
4426 return dst;
4427 }
4428
interleave_top_bottom(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4429 LogicVRegister Simulator::interleave_top_bottom(VectorFormat vform,
4430 LogicVRegister dst,
4431 const LogicVRegister& src) {
4432 // Interleave the top and bottom half of a vector, ie. for a vector:
4433 //
4434 // [ ... | F | D | B | ... | E | C | A ]
4435 //
4436 // where B is the first element in the top half of the vector, produce a
4437 // result vector:
4438 //
4439 // [ ... | ... | F | E | D | C | B | A ]
4440
4441 uint64_t result[kZRegMaxSizeInBytes] = {};
4442 int lane_count = LaneCountFromFormat(vform);
4443 for (int i = 0; i < lane_count; i += 2) {
4444 result[i] = src.Uint(vform, i / 2);
4445 result[i + 1] = src.Uint(vform, (lane_count / 2) + (i / 2));
4446 }
4447 dst.SetUintArray(vform, result);
4448 return dst;
4449 }
4450
4451 template <typename T>
FPNeg(T op)4452 T Simulator::FPNeg(T op) {
4453 return -op;
4454 }
4455
4456 template <typename T>
FPAdd(T op1,T op2)4457 T Simulator::FPAdd(T op1, T op2) {
4458 T result = FPProcessNaNs(op1, op2);
4459 if (IsNaN(result)) {
4460 return result;
4461 }
4462
4463 if (IsInf(op1) && IsInf(op2) && (op1 != op2)) {
4464 // inf + -inf returns the default NaN.
4465 FPProcessException();
4466 return FPDefaultNaN<T>();
4467 } else {
4468 // Other cases should be handled by standard arithmetic.
4469 return op1 + op2;
4470 }
4471 }
4472
4473
4474 template <typename T>
FPSub(T op1,T op2)4475 T Simulator::FPSub(T op1, T op2) {
4476 // NaNs should be handled elsewhere.
4477 VIXL_ASSERT(!IsNaN(op1) && !IsNaN(op2));
4478
4479 if (IsInf(op1) && IsInf(op2) && (op1 == op2)) {
4480 // inf - inf returns the default NaN.
4481 FPProcessException();
4482 return FPDefaultNaN<T>();
4483 } else {
4484 // Other cases should be handled by standard arithmetic.
4485 return op1 - op2;
4486 }
4487 }
4488
4489 template <typename T>
FPMulNaNs(T op1,T op2)4490 T Simulator::FPMulNaNs(T op1, T op2) {
4491 T result = FPProcessNaNs(op1, op2);
4492 return IsNaN(result) ? result : FPMul(op1, op2);
4493 }
4494
4495 template <typename T>
FPMul(T op1,T op2)4496 T Simulator::FPMul(T op1, T op2) {
4497 // NaNs should be handled elsewhere.
4498 VIXL_ASSERT(!IsNaN(op1) && !IsNaN(op2));
4499
4500 if ((IsInf(op1) && (op2 == 0.0)) || (IsInf(op2) && (op1 == 0.0))) {
4501 // inf * 0.0 returns the default NaN.
4502 FPProcessException();
4503 return FPDefaultNaN<T>();
4504 } else {
4505 // Other cases should be handled by standard arithmetic.
4506 return op1 * op2;
4507 }
4508 }
4509
4510
4511 template <typename T>
FPMulx(T op1,T op2)4512 T Simulator::FPMulx(T op1, T op2) {
4513 if ((IsInf(op1) && (op2 == 0.0)) || (IsInf(op2) && (op1 == 0.0))) {
4514 // inf * 0.0 returns +/-2.0.
4515 T two = 2.0;
4516 return copysign(1.0, op1) * copysign(1.0, op2) * two;
4517 }
4518 return FPMul(op1, op2);
4519 }
4520
4521
4522 template <typename T>
FPMulAdd(T a,T op1,T op2)4523 T Simulator::FPMulAdd(T a, T op1, T op2) {
4524 T result = FPProcessNaNs3(a, op1, op2);
4525
4526 T sign_a = copysign(1.0, a);
4527 T sign_prod = copysign(1.0, op1) * copysign(1.0, op2);
4528 bool isinf_prod = IsInf(op1) || IsInf(op2);
4529 bool operation_generates_nan =
4530 (IsInf(op1) && (op2 == 0.0)) || // inf * 0.0
4531 (IsInf(op2) && (op1 == 0.0)) || // 0.0 * inf
4532 (IsInf(a) && isinf_prod && (sign_a != sign_prod)); // inf - inf
4533
4534 if (IsNaN(result)) {
4535 // Generated NaNs override quiet NaNs propagated from a.
4536 if (operation_generates_nan && IsQuietNaN(a)) {
4537 FPProcessException();
4538 return FPDefaultNaN<T>();
4539 } else {
4540 return result;
4541 }
4542 }
4543
4544 // If the operation would produce a NaN, return the default NaN.
4545 if (operation_generates_nan) {
4546 FPProcessException();
4547 return FPDefaultNaN<T>();
4548 }
4549
4550 // Work around broken fma implementations for exact zero results: The sign of
4551 // exact 0.0 results is positive unless both a and op1 * op2 are negative.
4552 if (((op1 == 0.0) || (op2 == 0.0)) && (a == 0.0)) {
4553 return ((sign_a < T(0.0)) && (sign_prod < T(0.0))) ? -0.0 : 0.0;
4554 }
4555
4556 result = FusedMultiplyAdd(op1, op2, a);
4557 VIXL_ASSERT(!IsNaN(result));
4558
4559 // Work around broken fma implementations for rounded zero results: If a is
4560 // 0.0, the sign of the result is the sign of op1 * op2 before rounding.
4561 if ((a == 0.0) && (result == 0.0)) {
4562 return copysign(0.0, sign_prod);
4563 }
4564
4565 return result;
4566 }
4567
4568 template float Simulator::FPMulAdd(float a, float op1, float op2);
4569
4570 template double Simulator::FPMulAdd(double a, double op1, double op2);
4571
4572 template <typename T>
FPDiv(T op1,T op2)4573 T Simulator::FPDiv(T op1, T op2) {
4574 // NaNs should be handled elsewhere.
4575 VIXL_ASSERT(!IsNaN(op1) && !IsNaN(op2));
4576
4577 if ((IsInf(op1) && IsInf(op2)) || ((op1 == 0.0) && (op2 == 0.0))) {
4578 // inf / inf and 0.0 / 0.0 return the default NaN.
4579 FPProcessException();
4580 return FPDefaultNaN<T>();
4581 } else {
4582 if (op2 == 0.0) {
4583 FPProcessException();
4584 if (!IsNaN(op1)) {
4585 double op1_sign = copysign(1.0, op1);
4586 double op2_sign = copysign(1.0, op2);
4587 return static_cast<T>(op1_sign * op2_sign * kFP64PositiveInfinity);
4588 }
4589 }
4590
4591 // Other cases should be handled by standard arithmetic.
4592 return op1 / op2;
4593 }
4594 }
4595
4596
4597 template <typename T>
FPSqrt(T op)4598 T Simulator::FPSqrt(T op) {
4599 if (IsNaN(op)) {
4600 return FPProcessNaN(op);
4601 } else if (op < T(0.0)) {
4602 FPProcessException();
4603 return FPDefaultNaN<T>();
4604 } else {
4605 return sqrt(op);
4606 }
4607 }
4608
4609
4610 template <typename T>
FPMax(T a,T b)4611 T Simulator::FPMax(T a, T b) {
4612 T result = FPProcessNaNs(a, b);
4613 if (IsNaN(result)) return result;
4614
4615 if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) {
4616 // a and b are zero, and the sign differs: return +0.0.
4617 return 0.0;
4618 } else {
4619 return (a > b) ? a : b;
4620 }
4621 }
4622
4623
4624 template <typename T>
FPMaxNM(T a,T b)4625 T Simulator::FPMaxNM(T a, T b) {
4626 if (IsQuietNaN(a) && !IsQuietNaN(b)) {
4627 a = kFP64NegativeInfinity;
4628 } else if (!IsQuietNaN(a) && IsQuietNaN(b)) {
4629 b = kFP64NegativeInfinity;
4630 }
4631
4632 T result = FPProcessNaNs(a, b);
4633 return IsNaN(result) ? result : FPMax(a, b);
4634 }
4635
4636
4637 template <typename T>
FPMin(T a,T b)4638 T Simulator::FPMin(T a, T b) {
4639 T result = FPProcessNaNs(a, b);
4640 if (IsNaN(result)) return result;
4641
4642 if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) {
4643 // a and b are zero, and the sign differs: return -0.0.
4644 return -0.0;
4645 } else {
4646 return (a < b) ? a : b;
4647 }
4648 }
4649
4650
4651 template <typename T>
FPMinNM(T a,T b)4652 T Simulator::FPMinNM(T a, T b) {
4653 if (IsQuietNaN(a) && !IsQuietNaN(b)) {
4654 a = kFP64PositiveInfinity;
4655 } else if (!IsQuietNaN(a) && IsQuietNaN(b)) {
4656 b = kFP64PositiveInfinity;
4657 }
4658
4659 T result = FPProcessNaNs(a, b);
4660 return IsNaN(result) ? result : FPMin(a, b);
4661 }
4662
4663
4664 template <typename T>
FPRecipStepFused(T op1,T op2)4665 T Simulator::FPRecipStepFused(T op1, T op2) {
4666 const T two = 2.0;
4667 if ((IsInf(op1) && (op2 == 0.0)) || ((op1 == 0.0) && (IsInf(op2)))) {
4668 return two;
4669 } else if (IsInf(op1) || IsInf(op2)) {
4670 // Return +inf if signs match, otherwise -inf.
4671 return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity
4672 : kFP64NegativeInfinity;
4673 } else {
4674 return FusedMultiplyAdd(op1, op2, two);
4675 }
4676 }
4677
4678 template <typename T>
IsNormal(T value)4679 bool IsNormal(T value) {
4680 return std::isnormal(value);
4681 }
4682
4683 template <>
IsNormal(SimFloat16 value)4684 bool IsNormal(SimFloat16 value) {
4685 uint16_t rawbits = Float16ToRawbits(value);
4686 uint16_t exp_mask = 0x7c00;
4687 // Check that the exponent is neither all zeroes or all ones.
4688 return ((rawbits & exp_mask) != 0) && ((~rawbits & exp_mask) != 0);
4689 }
4690
4691
4692 template <typename T>
FPRSqrtStepFused(T op1,T op2)4693 T Simulator::FPRSqrtStepFused(T op1, T op2) {
4694 const T one_point_five = 1.5;
4695 const T two = 2.0;
4696
4697 if ((IsInf(op1) && (op2 == 0.0)) || ((op1 == 0.0) && (IsInf(op2)))) {
4698 return one_point_five;
4699 } else if (IsInf(op1) || IsInf(op2)) {
4700 // Return +inf if signs match, otherwise -inf.
4701 return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity
4702 : kFP64NegativeInfinity;
4703 } else {
4704 // The multiply-add-halve operation must be fully fused, so avoid interim
4705 // rounding by checking which operand can be losslessly divided by two
4706 // before doing the multiply-add.
4707 if (IsNormal(op1 / two)) {
4708 return FusedMultiplyAdd(op1 / two, op2, one_point_five);
4709 } else if (IsNormal(op2 / two)) {
4710 return FusedMultiplyAdd(op1, op2 / two, one_point_five);
4711 } else {
4712 // Neither operand is normal after halving: the result is dominated by
4713 // the addition term, so just return that.
4714 return one_point_five;
4715 }
4716 }
4717 }
4718
FPToFixedJS(double value)4719 int32_t Simulator::FPToFixedJS(double value) {
4720 // The Z-flag is set when the conversion from double precision floating-point
4721 // to 32-bit integer is exact. If the source value is +/-Infinity, -0.0, NaN,
4722 // outside the bounds of a 32-bit integer, or isn't an exact integer then the
4723 // Z-flag is unset.
4724 int Z = 1;
4725 int32_t result;
4726
4727 if ((value == 0.0) || (value == kFP64PositiveInfinity) ||
4728 (value == kFP64NegativeInfinity)) {
4729 // +/- zero and infinity all return zero, however -0 and +/- Infinity also
4730 // unset the Z-flag.
4731 result = 0.0;
4732 if ((value != 0.0) || std::signbit(value)) {
4733 Z = 0;
4734 }
4735 } else if (std::isnan(value)) {
4736 // NaN values unset the Z-flag and set the result to 0.
4737 FPProcessNaN(value);
4738 result = 0;
4739 Z = 0;
4740 } else {
4741 // All other values are converted to an integer representation, rounded
4742 // toward zero.
4743 double int_result = std::floor(value);
4744 double error = value - int_result;
4745
4746 if ((error != 0.0) && (int_result < 0.0)) {
4747 int_result++;
4748 }
4749
4750 // Constrain the value into the range [INT32_MIN, INT32_MAX]. We can almost
4751 // write a one-liner with std::round, but the behaviour on ties is incorrect
4752 // for our purposes.
4753 double mod_const = static_cast<double>(UINT64_C(1) << 32);
4754 double mod_error =
4755 (int_result / mod_const) - std::floor(int_result / mod_const);
4756 double constrained;
4757 if (mod_error == 0.5) {
4758 constrained = INT32_MIN;
4759 } else {
4760 constrained = int_result - mod_const * round(int_result / mod_const);
4761 }
4762
4763 VIXL_ASSERT(std::floor(constrained) == constrained);
4764 VIXL_ASSERT(constrained >= INT32_MIN);
4765 VIXL_ASSERT(constrained <= INT32_MAX);
4766
4767 // Take the bottom 32 bits of the result as a 32-bit integer.
4768 result = static_cast<int32_t>(constrained);
4769
4770 if ((int_result < INT32_MIN) || (int_result > INT32_MAX) ||
4771 (error != 0.0)) {
4772 // If the integer result is out of range or the conversion isn't exact,
4773 // take exception and unset the Z-flag.
4774 FPProcessException();
4775 Z = 0;
4776 }
4777 }
4778
4779 ReadNzcv().SetN(0);
4780 ReadNzcv().SetZ(Z);
4781 ReadNzcv().SetC(0);
4782 ReadNzcv().SetV(0);
4783
4784 return result;
4785 }
4786
FPRoundIntCommon(double value,FPRounding round_mode)4787 double Simulator::FPRoundIntCommon(double value, FPRounding round_mode) {
4788 VIXL_ASSERT((value != kFP64PositiveInfinity) &&
4789 (value != kFP64NegativeInfinity));
4790 VIXL_ASSERT(!IsNaN(value));
4791
4792 double int_result = std::floor(value);
4793 double error = value - int_result;
4794 switch (round_mode) {
4795 case FPTieAway: {
4796 // Take care of correctly handling the range ]-0.5, -0.0], which must
4797 // yield -0.0.
4798 if ((-0.5 < value) && (value < 0.0)) {
4799 int_result = -0.0;
4800
4801 } else if ((error > 0.5) || ((error == 0.5) && (int_result >= 0.0))) {
4802 // If the error is greater than 0.5, or is equal to 0.5 and the integer
4803 // result is positive, round up.
4804 int_result++;
4805 }
4806 break;
4807 }
4808 case FPTieEven: {
4809 // Take care of correctly handling the range [-0.5, -0.0], which must
4810 // yield -0.0.
4811 if ((-0.5 <= value) && (value < 0.0)) {
4812 int_result = -0.0;
4813
4814 // If the error is greater than 0.5, or is equal to 0.5 and the integer
4815 // result is odd, round up.
4816 } else if ((error > 0.5) ||
4817 ((error == 0.5) && (std::fmod(int_result, 2) != 0))) {
4818 int_result++;
4819 }
4820 break;
4821 }
4822 case FPZero: {
4823 // If value>0 then we take floor(value)
4824 // otherwise, ceil(value).
4825 if (value < 0) {
4826 int_result = ceil(value);
4827 }
4828 break;
4829 }
4830 case FPNegativeInfinity: {
4831 // We always use floor(value).
4832 break;
4833 }
4834 case FPPositiveInfinity: {
4835 // Take care of correctly handling the range ]-1.0, -0.0], which must
4836 // yield -0.0.
4837 if ((-1.0 < value) && (value < 0.0)) {
4838 int_result = -0.0;
4839
4840 // If the error is non-zero, round up.
4841 } else if (error > 0.0) {
4842 int_result++;
4843 }
4844 break;
4845 }
4846 default:
4847 VIXL_UNIMPLEMENTED();
4848 }
4849 return int_result;
4850 }
4851
FPRoundInt(double value,FPRounding round_mode)4852 double Simulator::FPRoundInt(double value, FPRounding round_mode) {
4853 if ((value == 0.0) || (value == kFP64PositiveInfinity) ||
4854 (value == kFP64NegativeInfinity)) {
4855 return value;
4856 } else if (IsNaN(value)) {
4857 return FPProcessNaN(value);
4858 }
4859 return FPRoundIntCommon(value, round_mode);
4860 }
4861
FPRoundInt(double value,FPRounding round_mode,FrintMode frint_mode)4862 double Simulator::FPRoundInt(double value,
4863 FPRounding round_mode,
4864 FrintMode frint_mode) {
4865 if (frint_mode == kFrintToInteger) {
4866 return FPRoundInt(value, round_mode);
4867 }
4868
4869 VIXL_ASSERT((frint_mode == kFrintToInt32) || (frint_mode == kFrintToInt64));
4870
4871 if (value == 0.0) {
4872 return value;
4873 }
4874
4875 if ((value == kFP64PositiveInfinity) || (value == kFP64NegativeInfinity) ||
4876 IsNaN(value)) {
4877 if (frint_mode == kFrintToInt32) {
4878 return INT32_MIN;
4879 } else {
4880 return INT64_MIN;
4881 }
4882 }
4883
4884 double result = FPRoundIntCommon(value, round_mode);
4885
4886 // We want to compare `result > INT64_MAX` below, but INT64_MAX isn't exactly
4887 // representable as a double, and is rounded to (INT64_MAX + 1) when
4888 // converted. To avoid this, we compare `result >= int64_max_plus_one`
4889 // instead; this is safe because `result` is known to be integral, and
4890 // `int64_max_plus_one` is exactly representable as a double.
4891 constexpr uint64_t int64_max_plus_one = static_cast<uint64_t>(INT64_MAX) + 1;
4892 VIXL_STATIC_ASSERT(static_cast<uint64_t>(static_cast<double>(
4893 int64_max_plus_one)) == int64_max_plus_one);
4894
4895 if (frint_mode == kFrintToInt32) {
4896 if ((result > INT32_MAX) || (result < INT32_MIN)) {
4897 return INT32_MIN;
4898 }
4899 } else if ((result >= int64_max_plus_one) || (result < INT64_MIN)) {
4900 return INT64_MIN;
4901 }
4902
4903 return result;
4904 }
4905
FPToInt16(double value,FPRounding rmode)4906 int16_t Simulator::FPToInt16(double value, FPRounding rmode) {
4907 value = FPRoundInt(value, rmode);
4908 if (value >= kHMaxInt) {
4909 return kHMaxInt;
4910 } else if (value < kHMinInt) {
4911 return kHMinInt;
4912 }
4913 return IsNaN(value) ? 0 : static_cast<int16_t>(value);
4914 }
4915
4916
FPToInt32(double value,FPRounding rmode)4917 int32_t Simulator::FPToInt32(double value, FPRounding rmode) {
4918 value = FPRoundInt(value, rmode);
4919 if (value >= kWMaxInt) {
4920 return kWMaxInt;
4921 } else if (value < kWMinInt) {
4922 return kWMinInt;
4923 }
4924 return IsNaN(value) ? 0 : static_cast<int32_t>(value);
4925 }
4926
4927
FPToInt64(double value,FPRounding rmode)4928 int64_t Simulator::FPToInt64(double value, FPRounding rmode) {
4929 value = FPRoundInt(value, rmode);
4930 // This is equivalent to "if (value >= kXMaxInt)" but avoids rounding issues
4931 // as a result of kMaxInt not being representable as a double.
4932 if (value >= 9223372036854775808.) {
4933 return kXMaxInt;
4934 } else if (value < kXMinInt) {
4935 return kXMinInt;
4936 }
4937 return IsNaN(value) ? 0 : static_cast<int64_t>(value);
4938 }
4939
4940
FPToUInt16(double value,FPRounding rmode)4941 uint16_t Simulator::FPToUInt16(double value, FPRounding rmode) {
4942 value = FPRoundInt(value, rmode);
4943 if (value >= kHMaxUInt) {
4944 return kHMaxUInt;
4945 } else if (value < 0.0) {
4946 return 0;
4947 }
4948 return IsNaN(value) ? 0 : static_cast<uint16_t>(value);
4949 }
4950
4951
FPToUInt32(double value,FPRounding rmode)4952 uint32_t Simulator::FPToUInt32(double value, FPRounding rmode) {
4953 value = FPRoundInt(value, rmode);
4954 if (value >= kWMaxUInt) {
4955 return kWMaxUInt;
4956 } else if (value < 0.0) {
4957 return 0;
4958 }
4959 return IsNaN(value) ? 0 : static_cast<uint32_t>(value);
4960 }
4961
4962
FPToUInt64(double value,FPRounding rmode)4963 uint64_t Simulator::FPToUInt64(double value, FPRounding rmode) {
4964 value = FPRoundInt(value, rmode);
4965 // This is equivalent to "if (value >= kXMaxUInt)" but avoids rounding issues
4966 // as a result of kMaxUInt not being representable as a double.
4967 if (value >= 18446744073709551616.) {
4968 return kXMaxUInt;
4969 } else if (value < 0.0) {
4970 return 0;
4971 }
4972 return IsNaN(value) ? 0 : static_cast<uint64_t>(value);
4973 }
4974
4975
4976 #define DEFINE_NEON_FP_VECTOR_OP(FN, OP, PROCNAN) \
4977 template <typename T> \
4978 LogicVRegister Simulator::FN(VectorFormat vform, \
4979 LogicVRegister dst, \
4980 const LogicVRegister& src1, \
4981 const LogicVRegister& src2) { \
4982 dst.ClearForWrite(vform); \
4983 for (int i = 0; i < LaneCountFromFormat(vform); i++) { \
4984 T op1 = src1.Float<T>(i); \
4985 T op2 = src2.Float<T>(i); \
4986 T result; \
4987 if (PROCNAN) { \
4988 result = FPProcessNaNs(op1, op2); \
4989 if (!IsNaN(result)) { \
4990 result = OP(op1, op2); \
4991 } \
4992 } else { \
4993 result = OP(op1, op2); \
4994 } \
4995 dst.SetFloat(vform, i, result); \
4996 } \
4997 return dst; \
4998 } \
4999 \
5000 LogicVRegister Simulator::FN(VectorFormat vform, \
5001 LogicVRegister dst, \
5002 const LogicVRegister& src1, \
5003 const LogicVRegister& src2) { \
5004 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { \
5005 FN<SimFloat16>(vform, dst, src1, src2); \
5006 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { \
5007 FN<float>(vform, dst, src1, src2); \
5008 } else { \
5009 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); \
5010 FN<double>(vform, dst, src1, src2); \
5011 } \
5012 return dst; \
5013 }
NEON_FP3SAME_LIST(DEFINE_NEON_FP_VECTOR_OP)5014 NEON_FP3SAME_LIST(DEFINE_NEON_FP_VECTOR_OP)
5015 #undef DEFINE_NEON_FP_VECTOR_OP
5016
5017
5018 LogicVRegister Simulator::fnmul(VectorFormat vform,
5019 LogicVRegister dst,
5020 const LogicVRegister& src1,
5021 const LogicVRegister& src2) {
5022 SimVRegister temp;
5023 LogicVRegister product = fmul(vform, temp, src1, src2);
5024 return fneg(vform, dst, product);
5025 }
5026
5027
5028 template <typename T>
frecps(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)5029 LogicVRegister Simulator::frecps(VectorFormat vform,
5030 LogicVRegister dst,
5031 const LogicVRegister& src1,
5032 const LogicVRegister& src2) {
5033 dst.ClearForWrite(vform);
5034 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5035 T op1 = -src1.Float<T>(i);
5036 T op2 = src2.Float<T>(i);
5037 T result = FPProcessNaNs(op1, op2);
5038 dst.SetFloat(vform, i, IsNaN(result) ? result : FPRecipStepFused(op1, op2));
5039 }
5040 return dst;
5041 }
5042
5043
frecps(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)5044 LogicVRegister Simulator::frecps(VectorFormat vform,
5045 LogicVRegister dst,
5046 const LogicVRegister& src1,
5047 const LogicVRegister& src2) {
5048 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5049 frecps<SimFloat16>(vform, dst, src1, src2);
5050 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5051 frecps<float>(vform, dst, src1, src2);
5052 } else {
5053 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5054 frecps<double>(vform, dst, src1, src2);
5055 }
5056 return dst;
5057 }
5058
5059
5060 template <typename T>
frsqrts(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)5061 LogicVRegister Simulator::frsqrts(VectorFormat vform,
5062 LogicVRegister dst,
5063 const LogicVRegister& src1,
5064 const LogicVRegister& src2) {
5065 dst.ClearForWrite(vform);
5066 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5067 T op1 = -src1.Float<T>(i);
5068 T op2 = src2.Float<T>(i);
5069 T result = FPProcessNaNs(op1, op2);
5070 dst.SetFloat(vform, i, IsNaN(result) ? result : FPRSqrtStepFused(op1, op2));
5071 }
5072 return dst;
5073 }
5074
5075
frsqrts(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)5076 LogicVRegister Simulator::frsqrts(VectorFormat vform,
5077 LogicVRegister dst,
5078 const LogicVRegister& src1,
5079 const LogicVRegister& src2) {
5080 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5081 frsqrts<SimFloat16>(vform, dst, src1, src2);
5082 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5083 frsqrts<float>(vform, dst, src1, src2);
5084 } else {
5085 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5086 frsqrts<double>(vform, dst, src1, src2);
5087 }
5088 return dst;
5089 }
5090
5091
5092 template <typename T>
fcmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,Condition cond)5093 LogicVRegister Simulator::fcmp(VectorFormat vform,
5094 LogicVRegister dst,
5095 const LogicVRegister& src1,
5096 const LogicVRegister& src2,
5097 Condition cond) {
5098 dst.ClearForWrite(vform);
5099 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5100 bool result = false;
5101 T op1 = src1.Float<T>(i);
5102 T op2 = src2.Float<T>(i);
5103 bool unordered = IsNaN(FPProcessNaNs(op1, op2));
5104
5105 switch (cond) {
5106 case eq:
5107 result = (op1 == op2);
5108 break;
5109 case ge:
5110 result = (op1 >= op2);
5111 break;
5112 case gt:
5113 result = (op1 > op2);
5114 break;
5115 case le:
5116 result = (op1 <= op2);
5117 break;
5118 case lt:
5119 result = (op1 < op2);
5120 break;
5121 case ne:
5122 result = (op1 != op2);
5123 break;
5124 case uo:
5125 result = unordered;
5126 break;
5127 default:
5128 // Other conditions are defined in terms of those above.
5129 VIXL_UNREACHABLE();
5130 break;
5131 }
5132
5133 if (result && unordered) {
5134 // Only `uo` and `ne` can be true for unordered comparisons.
5135 VIXL_ASSERT((cond == uo) || (cond == ne));
5136 }
5137
5138 dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0);
5139 }
5140 return dst;
5141 }
5142
5143
fcmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,Condition cond)5144 LogicVRegister Simulator::fcmp(VectorFormat vform,
5145 LogicVRegister dst,
5146 const LogicVRegister& src1,
5147 const LogicVRegister& src2,
5148 Condition cond) {
5149 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5150 fcmp<SimFloat16>(vform, dst, src1, src2, cond);
5151 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5152 fcmp<float>(vform, dst, src1, src2, cond);
5153 } else {
5154 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5155 fcmp<double>(vform, dst, src1, src2, cond);
5156 }
5157 return dst;
5158 }
5159
5160
fcmp_zero(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,Condition cond)5161 LogicVRegister Simulator::fcmp_zero(VectorFormat vform,
5162 LogicVRegister dst,
5163 const LogicVRegister& src,
5164 Condition cond) {
5165 SimVRegister temp;
5166 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5167 LogicVRegister zero_reg =
5168 dup_immediate(vform, temp, Float16ToRawbits(SimFloat16(0.0)));
5169 fcmp<SimFloat16>(vform, dst, src, zero_reg, cond);
5170 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5171 LogicVRegister zero_reg = dup_immediate(vform, temp, FloatToRawbits(0.0));
5172 fcmp<float>(vform, dst, src, zero_reg, cond);
5173 } else {
5174 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5175 LogicVRegister zero_reg = dup_immediate(vform, temp, DoubleToRawbits(0.0));
5176 fcmp<double>(vform, dst, src, zero_reg, cond);
5177 }
5178 return dst;
5179 }
5180
5181
fabscmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,Condition cond)5182 LogicVRegister Simulator::fabscmp(VectorFormat vform,
5183 LogicVRegister dst,
5184 const LogicVRegister& src1,
5185 const LogicVRegister& src2,
5186 Condition cond) {
5187 SimVRegister temp1, temp2;
5188 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5189 LogicVRegister abs_src1 = fabs_<SimFloat16>(vform, temp1, src1);
5190 LogicVRegister abs_src2 = fabs_<SimFloat16>(vform, temp2, src2);
5191 fcmp<SimFloat16>(vform, dst, abs_src1, abs_src2, cond);
5192 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5193 LogicVRegister abs_src1 = fabs_<float>(vform, temp1, src1);
5194 LogicVRegister abs_src2 = fabs_<float>(vform, temp2, src2);
5195 fcmp<float>(vform, dst, abs_src1, abs_src2, cond);
5196 } else {
5197 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5198 LogicVRegister abs_src1 = fabs_<double>(vform, temp1, src1);
5199 LogicVRegister abs_src2 = fabs_<double>(vform, temp2, src2);
5200 fcmp<double>(vform, dst, abs_src1, abs_src2, cond);
5201 }
5202 return dst;
5203 }
5204
5205
5206 template <typename T>
fmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & srca,const LogicVRegister & src1,const LogicVRegister & src2)5207 LogicVRegister Simulator::fmla(VectorFormat vform,
5208 LogicVRegister dst,
5209 const LogicVRegister& srca,
5210 const LogicVRegister& src1,
5211 const LogicVRegister& src2) {
5212 dst.ClearForWrite(vform);
5213 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5214 T op1 = src1.Float<T>(i);
5215 T op2 = src2.Float<T>(i);
5216 T acc = srca.Float<T>(i);
5217 T result = FPMulAdd(acc, op1, op2);
5218 dst.SetFloat(vform, i, result);
5219 }
5220 return dst;
5221 }
5222
5223
fmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & srca,const LogicVRegister & src1,const LogicVRegister & src2)5224 LogicVRegister Simulator::fmla(VectorFormat vform,
5225 LogicVRegister dst,
5226 const LogicVRegister& srca,
5227 const LogicVRegister& src1,
5228 const LogicVRegister& src2) {
5229 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5230 fmla<SimFloat16>(vform, dst, srca, src1, src2);
5231 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5232 fmla<float>(vform, dst, srca, src1, src2);
5233 } else {
5234 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5235 fmla<double>(vform, dst, srca, src1, src2);
5236 }
5237 return dst;
5238 }
5239
5240
5241 template <typename T>
fmls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & srca,const LogicVRegister & src1,const LogicVRegister & src2)5242 LogicVRegister Simulator::fmls(VectorFormat vform,
5243 LogicVRegister dst,
5244 const LogicVRegister& srca,
5245 const LogicVRegister& src1,
5246 const LogicVRegister& src2) {
5247 dst.ClearForWrite(vform);
5248 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5249 T op1 = -src1.Float<T>(i);
5250 T op2 = src2.Float<T>(i);
5251 T acc = srca.Float<T>(i);
5252 T result = FPMulAdd(acc, op1, op2);
5253 dst.SetFloat(i, result);
5254 }
5255 return dst;
5256 }
5257
5258
fmls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & srca,const LogicVRegister & src1,const LogicVRegister & src2)5259 LogicVRegister Simulator::fmls(VectorFormat vform,
5260 LogicVRegister dst,
5261 const LogicVRegister& srca,
5262 const LogicVRegister& src1,
5263 const LogicVRegister& src2) {
5264 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5265 fmls<SimFloat16>(vform, dst, srca, src1, src2);
5266 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5267 fmls<float>(vform, dst, srca, src1, src2);
5268 } else {
5269 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5270 fmls<double>(vform, dst, srca, src1, src2);
5271 }
5272 return dst;
5273 }
5274
5275
fmlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)5276 LogicVRegister Simulator::fmlal(VectorFormat vform,
5277 LogicVRegister dst,
5278 const LogicVRegister& src1,
5279 const LogicVRegister& src2) {
5280 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5281 dst.ClearForWrite(vform);
5282 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5283 float op1 = FPToFloat(src1.Float<SimFloat16>(i), kIgnoreDefaultNaN);
5284 float op2 = FPToFloat(src2.Float<SimFloat16>(i), kIgnoreDefaultNaN);
5285 float acc = dst.Float<float>(i);
5286 float result = FPMulAdd(acc, op1, op2);
5287 dst.SetFloat(i, result);
5288 }
5289 return dst;
5290 }
5291
5292
fmlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)5293 LogicVRegister Simulator::fmlal2(VectorFormat vform,
5294 LogicVRegister dst,
5295 const LogicVRegister& src1,
5296 const LogicVRegister& src2) {
5297 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5298 dst.ClearForWrite(vform);
5299 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5300 int src = i + LaneCountFromFormat(vform);
5301 float op1 = FPToFloat(src1.Float<SimFloat16>(src), kIgnoreDefaultNaN);
5302 float op2 = FPToFloat(src2.Float<SimFloat16>(src), kIgnoreDefaultNaN);
5303 float acc = dst.Float<float>(i);
5304 float result = FPMulAdd(acc, op1, op2);
5305 dst.SetFloat(i, result);
5306 }
5307 return dst;
5308 }
5309
5310
fmlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)5311 LogicVRegister Simulator::fmlsl(VectorFormat vform,
5312 LogicVRegister dst,
5313 const LogicVRegister& src1,
5314 const LogicVRegister& src2) {
5315 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5316 dst.ClearForWrite(vform);
5317 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5318 float op1 = -FPToFloat(src1.Float<SimFloat16>(i), kIgnoreDefaultNaN);
5319 float op2 = FPToFloat(src2.Float<SimFloat16>(i), kIgnoreDefaultNaN);
5320 float acc = dst.Float<float>(i);
5321 float result = FPMulAdd(acc, op1, op2);
5322 dst.SetFloat(i, result);
5323 }
5324 return dst;
5325 }
5326
5327
fmlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)5328 LogicVRegister Simulator::fmlsl2(VectorFormat vform,
5329 LogicVRegister dst,
5330 const LogicVRegister& src1,
5331 const LogicVRegister& src2) {
5332 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5333 dst.ClearForWrite(vform);
5334 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5335 int src = i + LaneCountFromFormat(vform);
5336 float op1 = -FPToFloat(src1.Float<SimFloat16>(src), kIgnoreDefaultNaN);
5337 float op2 = FPToFloat(src2.Float<SimFloat16>(src), kIgnoreDefaultNaN);
5338 float acc = dst.Float<float>(i);
5339 float result = FPMulAdd(acc, op1, op2);
5340 dst.SetFloat(i, result);
5341 }
5342 return dst;
5343 }
5344
5345
fmlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)5346 LogicVRegister Simulator::fmlal(VectorFormat vform,
5347 LogicVRegister dst,
5348 const LogicVRegister& src1,
5349 const LogicVRegister& src2,
5350 int index) {
5351 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5352 dst.ClearForWrite(vform);
5353 float op2 = FPToFloat(src2.Float<SimFloat16>(index), kIgnoreDefaultNaN);
5354 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5355 float op1 = FPToFloat(src1.Float<SimFloat16>(i), kIgnoreDefaultNaN);
5356 float acc = dst.Float<float>(i);
5357 float result = FPMulAdd(acc, op1, op2);
5358 dst.SetFloat(i, result);
5359 }
5360 return dst;
5361 }
5362
5363
fmlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)5364 LogicVRegister Simulator::fmlal2(VectorFormat vform,
5365 LogicVRegister dst,
5366 const LogicVRegister& src1,
5367 const LogicVRegister& src2,
5368 int index) {
5369 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5370 dst.ClearForWrite(vform);
5371 float op2 = FPToFloat(src2.Float<SimFloat16>(index), kIgnoreDefaultNaN);
5372 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5373 int src = i + LaneCountFromFormat(vform);
5374 float op1 = FPToFloat(src1.Float<SimFloat16>(src), kIgnoreDefaultNaN);
5375 float acc = dst.Float<float>(i);
5376 float result = FPMulAdd(acc, op1, op2);
5377 dst.SetFloat(i, result);
5378 }
5379 return dst;
5380 }
5381
5382
fmlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)5383 LogicVRegister Simulator::fmlsl(VectorFormat vform,
5384 LogicVRegister dst,
5385 const LogicVRegister& src1,
5386 const LogicVRegister& src2,
5387 int index) {
5388 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5389 dst.ClearForWrite(vform);
5390 float op2 = FPToFloat(src2.Float<SimFloat16>(index), kIgnoreDefaultNaN);
5391 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5392 float op1 = -FPToFloat(src1.Float<SimFloat16>(i), kIgnoreDefaultNaN);
5393 float acc = dst.Float<float>(i);
5394 float result = FPMulAdd(acc, op1, op2);
5395 dst.SetFloat(i, result);
5396 }
5397 return dst;
5398 }
5399
5400
fmlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)5401 LogicVRegister Simulator::fmlsl2(VectorFormat vform,
5402 LogicVRegister dst,
5403 const LogicVRegister& src1,
5404 const LogicVRegister& src2,
5405 int index) {
5406 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5407 dst.ClearForWrite(vform);
5408 float op2 = FPToFloat(src2.Float<SimFloat16>(index), kIgnoreDefaultNaN);
5409 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5410 int src = i + LaneCountFromFormat(vform);
5411 float op1 = -FPToFloat(src1.Float<SimFloat16>(src), kIgnoreDefaultNaN);
5412 float acc = dst.Float<float>(i);
5413 float result = FPMulAdd(acc, op1, op2);
5414 dst.SetFloat(i, result);
5415 }
5416 return dst;
5417 }
5418
5419
5420 template <typename T>
fneg(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5421 LogicVRegister Simulator::fneg(VectorFormat vform,
5422 LogicVRegister dst,
5423 const LogicVRegister& src) {
5424 dst.ClearForWrite(vform);
5425 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5426 T op = src.Float<T>(i);
5427 op = -op;
5428 dst.SetFloat(i, op);
5429 }
5430 return dst;
5431 }
5432
5433
fneg(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5434 LogicVRegister Simulator::fneg(VectorFormat vform,
5435 LogicVRegister dst,
5436 const LogicVRegister& src) {
5437 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5438 fneg<SimFloat16>(vform, dst, src);
5439 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5440 fneg<float>(vform, dst, src);
5441 } else {
5442 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5443 fneg<double>(vform, dst, src);
5444 }
5445 return dst;
5446 }
5447
5448
5449 template <typename T>
fabs_(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5450 LogicVRegister Simulator::fabs_(VectorFormat vform,
5451 LogicVRegister dst,
5452 const LogicVRegister& src) {
5453 dst.ClearForWrite(vform);
5454 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5455 T op = src.Float<T>(i);
5456 if (copysign(1.0, op) < 0.0) {
5457 op = -op;
5458 }
5459 dst.SetFloat(i, op);
5460 }
5461 return dst;
5462 }
5463
5464
fabs_(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5465 LogicVRegister Simulator::fabs_(VectorFormat vform,
5466 LogicVRegister dst,
5467 const LogicVRegister& src) {
5468 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5469 fabs_<SimFloat16>(vform, dst, src);
5470 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5471 fabs_<float>(vform, dst, src);
5472 } else {
5473 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5474 fabs_<double>(vform, dst, src);
5475 }
5476 return dst;
5477 }
5478
5479
fabd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)5480 LogicVRegister Simulator::fabd(VectorFormat vform,
5481 LogicVRegister dst,
5482 const LogicVRegister& src1,
5483 const LogicVRegister& src2) {
5484 SimVRegister temp;
5485 fsub(vform, temp, src1, src2);
5486 fabs_(vform, dst, temp);
5487 return dst;
5488 }
5489
5490
fsqrt(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5491 LogicVRegister Simulator::fsqrt(VectorFormat vform,
5492 LogicVRegister dst,
5493 const LogicVRegister& src) {
5494 dst.ClearForWrite(vform);
5495 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5496 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5497 SimFloat16 result = FPSqrt(src.Float<SimFloat16>(i));
5498 dst.SetFloat(i, result);
5499 }
5500 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5501 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5502 float result = FPSqrt(src.Float<float>(i));
5503 dst.SetFloat(i, result);
5504 }
5505 } else {
5506 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5507 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5508 double result = FPSqrt(src.Float<double>(i));
5509 dst.SetFloat(i, result);
5510 }
5511 }
5512 return dst;
5513 }
5514
5515
5516 #define DEFINE_NEON_FP_PAIR_OP(FNP, FN, OP) \
5517 LogicVRegister Simulator::FNP(VectorFormat vform, \
5518 LogicVRegister dst, \
5519 const LogicVRegister& src1, \
5520 const LogicVRegister& src2) { \
5521 SimVRegister temp1, temp2; \
5522 uzp1(vform, temp1, src1, src2); \
5523 uzp2(vform, temp2, src1, src2); \
5524 FN(vform, dst, temp1, temp2); \
5525 if (IsSVEFormat(vform)) { \
5526 interleave_top_bottom(vform, dst, dst); \
5527 } \
5528 return dst; \
5529 } \
5530 \
5531 LogicVRegister Simulator::FNP(VectorFormat vform, \
5532 LogicVRegister dst, \
5533 const LogicVRegister& src) { \
5534 if (vform == kFormatH) { \
5535 SimFloat16 result(OP(SimFloat16(RawbitsToFloat16(src.Uint(vform, 0))), \
5536 SimFloat16(RawbitsToFloat16(src.Uint(vform, 1))))); \
5537 dst.SetUint(vform, 0, Float16ToRawbits(result)); \
5538 } else if (vform == kFormatS) { \
5539 float result = OP(src.Float<float>(0), src.Float<float>(1)); \
5540 dst.SetFloat(0, result); \
5541 } else { \
5542 VIXL_ASSERT(vform == kFormatD); \
5543 double result = OP(src.Float<double>(0), src.Float<double>(1)); \
5544 dst.SetFloat(0, result); \
5545 } \
5546 dst.ClearForWrite(vform); \
5547 return dst; \
5548 }
NEON_FPPAIRWISE_LIST(DEFINE_NEON_FP_PAIR_OP)5549 NEON_FPPAIRWISE_LIST(DEFINE_NEON_FP_PAIR_OP)
5550 #undef DEFINE_NEON_FP_PAIR_OP
5551
5552 template <typename T>
5553 LogicVRegister Simulator::FPPairedAcrossHelper(VectorFormat vform,
5554 LogicVRegister dst,
5555 const LogicVRegister& src,
5556 typename TFPPairOp<T>::type fn,
5557 uint64_t inactive_value) {
5558 int lane_count = LaneCountFromFormat(vform);
5559 T result[kZRegMaxSizeInBytes / sizeof(T)];
5560 // Copy the source vector into a working array. Initialise the unused elements
5561 // at the end of the array to the same value that a false predicate would set.
5562 for (int i = 0; i < static_cast<int>(ArrayLength(result)); i++) {
5563 result[i] = (i < lane_count)
5564 ? src.Float<T>(i)
5565 : RawbitsWithSizeToFP<T>(sizeof(T) * 8, inactive_value);
5566 }
5567
5568 // Pairwise reduce the elements to a single value, using the pair op function
5569 // argument.
5570 for (int step = 1; step < lane_count; step *= 2) {
5571 for (int i = 0; i < lane_count; i += step * 2) {
5572 result[i] = (this->*fn)(result[i], result[i + step]);
5573 }
5574 }
5575 dst.ClearForWrite(ScalarFormatFromFormat(vform));
5576 dst.SetFloat<T>(0, result[0]);
5577 return dst;
5578 }
5579
FPPairedAcrossHelper(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,typename TFPPairOp<SimFloat16>::type fn16,typename TFPPairOp<float>::type fn32,typename TFPPairOp<double>::type fn64,uint64_t inactive_value)5580 LogicVRegister Simulator::FPPairedAcrossHelper(
5581 VectorFormat vform,
5582 LogicVRegister dst,
5583 const LogicVRegister& src,
5584 typename TFPPairOp<SimFloat16>::type fn16,
5585 typename TFPPairOp<float>::type fn32,
5586 typename TFPPairOp<double>::type fn64,
5587 uint64_t inactive_value) {
5588 switch (LaneSizeInBitsFromFormat(vform)) {
5589 case kHRegSize:
5590 return FPPairedAcrossHelper<SimFloat16>(vform,
5591 dst,
5592 src,
5593 fn16,
5594 inactive_value);
5595 case kSRegSize:
5596 return FPPairedAcrossHelper<float>(vform, dst, src, fn32, inactive_value);
5597 default:
5598 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5599 return FPPairedAcrossHelper<double>(vform,
5600 dst,
5601 src,
5602 fn64,
5603 inactive_value);
5604 }
5605 }
5606
faddv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5607 LogicVRegister Simulator::faddv(VectorFormat vform,
5608 LogicVRegister dst,
5609 const LogicVRegister& src) {
5610 return FPPairedAcrossHelper(vform,
5611 dst,
5612 src,
5613 &Simulator::FPAdd<SimFloat16>,
5614 &Simulator::FPAdd<float>,
5615 &Simulator::FPAdd<double>,
5616 0);
5617 }
5618
fmaxv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5619 LogicVRegister Simulator::fmaxv(VectorFormat vform,
5620 LogicVRegister dst,
5621 const LogicVRegister& src) {
5622 int lane_size = LaneSizeInBitsFromFormat(vform);
5623 uint64_t inactive_value =
5624 FPToRawbitsWithSize(lane_size, kFP64NegativeInfinity);
5625 return FPPairedAcrossHelper(vform,
5626 dst,
5627 src,
5628 &Simulator::FPMax<SimFloat16>,
5629 &Simulator::FPMax<float>,
5630 &Simulator::FPMax<double>,
5631 inactive_value);
5632 }
5633
5634
fminv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5635 LogicVRegister Simulator::fminv(VectorFormat vform,
5636 LogicVRegister dst,
5637 const LogicVRegister& src) {
5638 int lane_size = LaneSizeInBitsFromFormat(vform);
5639 uint64_t inactive_value =
5640 FPToRawbitsWithSize(lane_size, kFP64PositiveInfinity);
5641 return FPPairedAcrossHelper(vform,
5642 dst,
5643 src,
5644 &Simulator::FPMin<SimFloat16>,
5645 &Simulator::FPMin<float>,
5646 &Simulator::FPMin<double>,
5647 inactive_value);
5648 }
5649
5650
fmaxnmv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5651 LogicVRegister Simulator::fmaxnmv(VectorFormat vform,
5652 LogicVRegister dst,
5653 const LogicVRegister& src) {
5654 int lane_size = LaneSizeInBitsFromFormat(vform);
5655 uint64_t inactive_value = FPToRawbitsWithSize(lane_size, kFP64DefaultNaN);
5656 return FPPairedAcrossHelper(vform,
5657 dst,
5658 src,
5659 &Simulator::FPMaxNM<SimFloat16>,
5660 &Simulator::FPMaxNM<float>,
5661 &Simulator::FPMaxNM<double>,
5662 inactive_value);
5663 }
5664
5665
fminnmv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5666 LogicVRegister Simulator::fminnmv(VectorFormat vform,
5667 LogicVRegister dst,
5668 const LogicVRegister& src) {
5669 int lane_size = LaneSizeInBitsFromFormat(vform);
5670 uint64_t inactive_value = FPToRawbitsWithSize(lane_size, kFP64DefaultNaN);
5671 return FPPairedAcrossHelper(vform,
5672 dst,
5673 src,
5674 &Simulator::FPMinNM<SimFloat16>,
5675 &Simulator::FPMinNM<float>,
5676 &Simulator::FPMinNM<double>,
5677 inactive_value);
5678 }
5679
5680
fmul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)5681 LogicVRegister Simulator::fmul(VectorFormat vform,
5682 LogicVRegister dst,
5683 const LogicVRegister& src1,
5684 const LogicVRegister& src2,
5685 int index) {
5686 dst.ClearForWrite(vform);
5687 SimVRegister temp;
5688 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5689 LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index);
5690 fmul<SimFloat16>(vform, dst, src1, index_reg);
5691 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5692 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
5693 fmul<float>(vform, dst, src1, index_reg);
5694 } else {
5695 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5696 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
5697 fmul<double>(vform, dst, src1, index_reg);
5698 }
5699 return dst;
5700 }
5701
5702
fmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)5703 LogicVRegister Simulator::fmla(VectorFormat vform,
5704 LogicVRegister dst,
5705 const LogicVRegister& src1,
5706 const LogicVRegister& src2,
5707 int index) {
5708 dst.ClearForWrite(vform);
5709 SimVRegister temp;
5710 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5711 LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index);
5712 fmla<SimFloat16>(vform, dst, dst, src1, index_reg);
5713 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5714 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
5715 fmla<float>(vform, dst, dst, src1, index_reg);
5716 } else {
5717 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5718 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
5719 fmla<double>(vform, dst, dst, src1, index_reg);
5720 }
5721 return dst;
5722 }
5723
5724
fmls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)5725 LogicVRegister Simulator::fmls(VectorFormat vform,
5726 LogicVRegister dst,
5727 const LogicVRegister& src1,
5728 const LogicVRegister& src2,
5729 int index) {
5730 dst.ClearForWrite(vform);
5731 SimVRegister temp;
5732 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5733 LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index);
5734 fmls<SimFloat16>(vform, dst, dst, src1, index_reg);
5735 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5736 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
5737 fmls<float>(vform, dst, dst, src1, index_reg);
5738 } else {
5739 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5740 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
5741 fmls<double>(vform, dst, dst, src1, index_reg);
5742 }
5743 return dst;
5744 }
5745
5746
fmulx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)5747 LogicVRegister Simulator::fmulx(VectorFormat vform,
5748 LogicVRegister dst,
5749 const LogicVRegister& src1,
5750 const LogicVRegister& src2,
5751 int index) {
5752 dst.ClearForWrite(vform);
5753 SimVRegister temp;
5754 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5755 LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index);
5756 fmulx<SimFloat16>(vform, dst, src1, index_reg);
5757 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5758 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
5759 fmulx<float>(vform, dst, src1, index_reg);
5760 } else {
5761 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5762 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
5763 fmulx<double>(vform, dst, src1, index_reg);
5764 }
5765 return dst;
5766 }
5767
5768
frint(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,FPRounding rounding_mode,bool inexact_exception,FrintMode frint_mode)5769 LogicVRegister Simulator::frint(VectorFormat vform,
5770 LogicVRegister dst,
5771 const LogicVRegister& src,
5772 FPRounding rounding_mode,
5773 bool inexact_exception,
5774 FrintMode frint_mode) {
5775 dst.ClearForWrite(vform);
5776 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5777 VIXL_ASSERT(frint_mode == kFrintToInteger);
5778 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5779 SimFloat16 input = src.Float<SimFloat16>(i);
5780 SimFloat16 rounded = FPRoundInt(input, rounding_mode);
5781 if (inexact_exception && !IsNaN(input) && (input != rounded)) {
5782 FPProcessException();
5783 }
5784 dst.SetFloat<SimFloat16>(i, rounded);
5785 }
5786 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5787 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5788 float input = src.Float<float>(i);
5789 float rounded = FPRoundInt(input, rounding_mode, frint_mode);
5790
5791 if (inexact_exception && !IsNaN(input) && (input != rounded)) {
5792 FPProcessException();
5793 }
5794 dst.SetFloat<float>(i, rounded);
5795 }
5796 } else {
5797 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5798 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5799 double input = src.Float<double>(i);
5800 double rounded = FPRoundInt(input, rounding_mode, frint_mode);
5801 if (inexact_exception && !IsNaN(input) && (input != rounded)) {
5802 FPProcessException();
5803 }
5804 dst.SetFloat<double>(i, rounded);
5805 }
5806 }
5807 return dst;
5808 }
5809
fcvt(VectorFormat dst_vform,VectorFormat src_vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)5810 LogicVRegister Simulator::fcvt(VectorFormat dst_vform,
5811 VectorFormat src_vform,
5812 LogicVRegister dst,
5813 const LogicPRegister& pg,
5814 const LogicVRegister& src) {
5815 unsigned dst_data_size_in_bits = LaneSizeInBitsFromFormat(dst_vform);
5816 unsigned src_data_size_in_bits = LaneSizeInBitsFromFormat(src_vform);
5817 VectorFormat vform = SVEFormatFromLaneSizeInBits(
5818 std::max(dst_data_size_in_bits, src_data_size_in_bits));
5819
5820 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5821 if (!pg.IsActive(vform, i)) continue;
5822
5823 uint64_t src_raw_bits = ExtractUnsignedBitfield64(src_data_size_in_bits - 1,
5824 0,
5825 src.Uint(vform, i));
5826 double dst_value =
5827 RawbitsWithSizeToFP<double>(src_data_size_in_bits, src_raw_bits);
5828
5829 uint64_t dst_raw_bits =
5830 FPToRawbitsWithSize(dst_data_size_in_bits, dst_value);
5831
5832 dst.SetUint(vform, i, dst_raw_bits);
5833 }
5834
5835 return dst;
5836 }
5837
fcvts(VectorFormat vform,unsigned dst_data_size_in_bits,unsigned src_data_size_in_bits,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src,FPRounding round,int fbits)5838 LogicVRegister Simulator::fcvts(VectorFormat vform,
5839 unsigned dst_data_size_in_bits,
5840 unsigned src_data_size_in_bits,
5841 LogicVRegister dst,
5842 const LogicPRegister& pg,
5843 const LogicVRegister& src,
5844 FPRounding round,
5845 int fbits) {
5846 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits);
5847 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits);
5848
5849 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5850 if (!pg.IsActive(vform, i)) continue;
5851
5852 uint64_t value = ExtractUnsignedBitfield64(src_data_size_in_bits - 1,
5853 0,
5854 src.Uint(vform, i));
5855 double result = RawbitsWithSizeToFP<double>(src_data_size_in_bits, value) *
5856 std::pow(2.0, fbits);
5857
5858 switch (dst_data_size_in_bits) {
5859 case kHRegSize:
5860 dst.SetInt(vform, i, FPToInt16(result, round));
5861 break;
5862 case kSRegSize:
5863 dst.SetInt(vform, i, FPToInt32(result, round));
5864 break;
5865 case kDRegSize:
5866 dst.SetInt(vform, i, FPToInt64(result, round));
5867 break;
5868 default:
5869 VIXL_UNIMPLEMENTED();
5870 break;
5871 }
5872 }
5873
5874 return dst;
5875 }
5876
fcvts(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,FPRounding round,int fbits)5877 LogicVRegister Simulator::fcvts(VectorFormat vform,
5878 LogicVRegister dst,
5879 const LogicVRegister& src,
5880 FPRounding round,
5881 int fbits) {
5882 dst.ClearForWrite(vform);
5883 return fcvts(vform,
5884 LaneSizeInBitsFromFormat(vform),
5885 LaneSizeInBitsFromFormat(vform),
5886 dst,
5887 GetPTrue(),
5888 src,
5889 round,
5890 fbits);
5891 }
5892
fcvtu(VectorFormat vform,unsigned dst_data_size_in_bits,unsigned src_data_size_in_bits,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src,FPRounding round,int fbits)5893 LogicVRegister Simulator::fcvtu(VectorFormat vform,
5894 unsigned dst_data_size_in_bits,
5895 unsigned src_data_size_in_bits,
5896 LogicVRegister dst,
5897 const LogicPRegister& pg,
5898 const LogicVRegister& src,
5899 FPRounding round,
5900 int fbits) {
5901 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits);
5902 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits);
5903
5904 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5905 if (!pg.IsActive(vform, i)) continue;
5906
5907 uint64_t value = ExtractUnsignedBitfield64(src_data_size_in_bits - 1,
5908 0,
5909 src.Uint(vform, i));
5910 double result = RawbitsWithSizeToFP<double>(src_data_size_in_bits, value) *
5911 std::pow(2.0, fbits);
5912
5913 switch (dst_data_size_in_bits) {
5914 case kHRegSize:
5915 dst.SetUint(vform, i, FPToUInt16(result, round));
5916 break;
5917 case kSRegSize:
5918 dst.SetUint(vform, i, FPToUInt32(result, round));
5919 break;
5920 case kDRegSize:
5921 dst.SetUint(vform, i, FPToUInt64(result, round));
5922 break;
5923 default:
5924 VIXL_UNIMPLEMENTED();
5925 break;
5926 }
5927 }
5928
5929 return dst;
5930 }
5931
fcvtu(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,FPRounding round,int fbits)5932 LogicVRegister Simulator::fcvtu(VectorFormat vform,
5933 LogicVRegister dst,
5934 const LogicVRegister& src,
5935 FPRounding round,
5936 int fbits) {
5937 dst.ClearForWrite(vform);
5938 return fcvtu(vform,
5939 LaneSizeInBitsFromFormat(vform),
5940 LaneSizeInBitsFromFormat(vform),
5941 dst,
5942 GetPTrue(),
5943 src,
5944 round,
5945 fbits);
5946 }
5947
fcvtl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5948 LogicVRegister Simulator::fcvtl(VectorFormat vform,
5949 LogicVRegister dst,
5950 const LogicVRegister& src) {
5951 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5952 for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
5953 // TODO: Full support for SimFloat16 in SimRegister(s).
5954 dst.SetFloat(i,
5955 FPToFloat(RawbitsToFloat16(src.Float<uint16_t>(i)),
5956 ReadDN()));
5957 }
5958 } else {
5959 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5960 for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
5961 dst.SetFloat(i, FPToDouble(src.Float<float>(i), ReadDN()));
5962 }
5963 }
5964 return dst;
5965 }
5966
5967
fcvtl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5968 LogicVRegister Simulator::fcvtl2(VectorFormat vform,
5969 LogicVRegister dst,
5970 const LogicVRegister& src) {
5971 int lane_count = LaneCountFromFormat(vform);
5972 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5973 for (int i = 0; i < lane_count; i++) {
5974 // TODO: Full support for SimFloat16 in SimRegister(s).
5975 dst.SetFloat(i,
5976 FPToFloat(RawbitsToFloat16(
5977 src.Float<uint16_t>(i + lane_count)),
5978 ReadDN()));
5979 }
5980 } else {
5981 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5982 for (int i = 0; i < lane_count; i++) {
5983 dst.SetFloat(i, FPToDouble(src.Float<float>(i + lane_count), ReadDN()));
5984 }
5985 }
5986 return dst;
5987 }
5988
5989
fcvtn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5990 LogicVRegister Simulator::fcvtn(VectorFormat vform,
5991 LogicVRegister dst,
5992 const LogicVRegister& src) {
5993 SimVRegister tmp;
5994 LogicVRegister srctmp = mov(kFormat2D, tmp, src);
5995 dst.ClearForWrite(vform);
5996 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5997 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5998 dst.SetFloat(i,
5999 Float16ToRawbits(FPToFloat16(srctmp.Float<float>(i),
6000 FPTieEven,
6001 ReadDN())));
6002 }
6003 } else {
6004 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
6005 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6006 dst.SetFloat(i, FPToFloat(srctmp.Float<double>(i), FPTieEven, ReadDN()));
6007 }
6008 }
6009 return dst;
6010 }
6011
6012
fcvtn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)6013 LogicVRegister Simulator::fcvtn2(VectorFormat vform,
6014 LogicVRegister dst,
6015 const LogicVRegister& src) {
6016 int lane_count = LaneCountFromFormat(vform) / 2;
6017 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
6018 for (int i = lane_count - 1; i >= 0; i--) {
6019 dst.SetFloat(i + lane_count,
6020 Float16ToRawbits(
6021 FPToFloat16(src.Float<float>(i), FPTieEven, ReadDN())));
6022 }
6023 } else {
6024 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
6025 for (int i = lane_count - 1; i >= 0; i--) {
6026 dst.SetFloat(i + lane_count,
6027 FPToFloat(src.Float<double>(i), FPTieEven, ReadDN()));
6028 }
6029 }
6030 return dst;
6031 }
6032
6033
fcvtxn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)6034 LogicVRegister Simulator::fcvtxn(VectorFormat vform,
6035 LogicVRegister dst,
6036 const LogicVRegister& src) {
6037 SimVRegister tmp;
6038 LogicVRegister srctmp = mov(kFormat2D, tmp, src);
6039 int input_lane_count = LaneCountFromFormat(vform);
6040 if (IsSVEFormat(vform)) {
6041 mov(kFormatVnB, tmp, src);
6042 input_lane_count /= 2;
6043 }
6044
6045 dst.ClearForWrite(vform);
6046 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
6047
6048 for (int i = 0; i < input_lane_count; i++) {
6049 dst.SetFloat(i, FPToFloat(srctmp.Float<double>(i), FPRoundOdd, ReadDN()));
6050 }
6051 return dst;
6052 }
6053
6054
fcvtxn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)6055 LogicVRegister Simulator::fcvtxn2(VectorFormat vform,
6056 LogicVRegister dst,
6057 const LogicVRegister& src) {
6058 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
6059 int lane_count = LaneCountFromFormat(vform) / 2;
6060 for (int i = lane_count - 1; i >= 0; i--) {
6061 dst.SetFloat(i + lane_count,
6062 FPToFloat(src.Float<double>(i), FPRoundOdd, ReadDN()));
6063 }
6064 return dst;
6065 }
6066
6067
6068 // Based on reference C function recip_sqrt_estimate from ARM ARM.
recip_sqrt_estimate(double a)6069 double Simulator::recip_sqrt_estimate(double a) {
6070 int quot0, quot1, s;
6071 double r;
6072 if (a < 0.5) {
6073 quot0 = static_cast<int>(a * 512.0);
6074 r = 1.0 / sqrt((static_cast<double>(quot0) + 0.5) / 512.0);
6075 } else {
6076 quot1 = static_cast<int>(a * 256.0);
6077 r = 1.0 / sqrt((static_cast<double>(quot1) + 0.5) / 256.0);
6078 }
6079 s = static_cast<int>(256.0 * r + 0.5);
6080 return static_cast<double>(s) / 256.0;
6081 }
6082
6083
Bits(uint64_t val,int start_bit,int end_bit)6084 static inline uint64_t Bits(uint64_t val, int start_bit, int end_bit) {
6085 return ExtractUnsignedBitfield64(start_bit, end_bit, val);
6086 }
6087
6088
6089 template <typename T>
FPRecipSqrtEstimate(T op)6090 T Simulator::FPRecipSqrtEstimate(T op) {
6091 if (IsNaN(op)) {
6092 return FPProcessNaN(op);
6093 } else if (op == 0.0) {
6094 if (copysign(1.0, op) < 0.0) {
6095 return kFP64NegativeInfinity;
6096 } else {
6097 return kFP64PositiveInfinity;
6098 }
6099 } else if (copysign(1.0, op) < 0.0) {
6100 FPProcessException();
6101 return FPDefaultNaN<T>();
6102 } else if (IsInf(op)) {
6103 return 0.0;
6104 } else {
6105 uint64_t fraction;
6106 int exp, result_exp;
6107
6108 if (IsFloat16<T>()) {
6109 exp = Float16Exp(op);
6110 fraction = Float16Mantissa(op);
6111 fraction <<= 42;
6112 } else if (IsFloat32<T>()) {
6113 exp = FloatExp(op);
6114 fraction = FloatMantissa(op);
6115 fraction <<= 29;
6116 } else {
6117 VIXL_ASSERT(IsFloat64<T>());
6118 exp = DoubleExp(op);
6119 fraction = DoubleMantissa(op);
6120 }
6121
6122 if (exp == 0) {
6123 while (Bits(fraction, 51, 51) == 0) {
6124 fraction = Bits(fraction, 50, 0) << 1;
6125 exp -= 1;
6126 }
6127 fraction = Bits(fraction, 50, 0) << 1;
6128 }
6129
6130 double scaled;
6131 if (Bits(exp, 0, 0) == 0) {
6132 scaled = DoublePack(0, 1022, Bits(fraction, 51, 44) << 44);
6133 } else {
6134 scaled = DoublePack(0, 1021, Bits(fraction, 51, 44) << 44);
6135 }
6136
6137 if (IsFloat16<T>()) {
6138 result_exp = (44 - exp) / 2;
6139 } else if (IsFloat32<T>()) {
6140 result_exp = (380 - exp) / 2;
6141 } else {
6142 VIXL_ASSERT(IsFloat64<T>());
6143 result_exp = (3068 - exp) / 2;
6144 }
6145
6146 uint64_t estimate = DoubleToRawbits(recip_sqrt_estimate(scaled));
6147
6148 if (IsFloat16<T>()) {
6149 uint16_t exp_bits = static_cast<uint16_t>(Bits(result_exp, 4, 0));
6150 uint16_t est_bits = static_cast<uint16_t>(Bits(estimate, 51, 42));
6151 return Float16Pack(0, exp_bits, est_bits);
6152 } else if (IsFloat32<T>()) {
6153 uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0));
6154 uint32_t est_bits = static_cast<uint32_t>(Bits(estimate, 51, 29));
6155 return FloatPack(0, exp_bits, est_bits);
6156 } else {
6157 VIXL_ASSERT(IsFloat64<T>());
6158 return DoublePack(0, Bits(result_exp, 10, 0), Bits(estimate, 51, 0));
6159 }
6160 }
6161 }
6162
6163
frsqrte(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)6164 LogicVRegister Simulator::frsqrte(VectorFormat vform,
6165 LogicVRegister dst,
6166 const LogicVRegister& src) {
6167 dst.ClearForWrite(vform);
6168 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
6169 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6170 SimFloat16 input = src.Float<SimFloat16>(i);
6171 dst.SetFloat(vform, i, FPRecipSqrtEstimate<SimFloat16>(input));
6172 }
6173 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
6174 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6175 float input = src.Float<float>(i);
6176 dst.SetFloat(vform, i, FPRecipSqrtEstimate<float>(input));
6177 }
6178 } else {
6179 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
6180 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6181 double input = src.Float<double>(i);
6182 dst.SetFloat(vform, i, FPRecipSqrtEstimate<double>(input));
6183 }
6184 }
6185 return dst;
6186 }
6187
6188 template <typename T>
FPRecipEstimate(T op,FPRounding rounding)6189 T Simulator::FPRecipEstimate(T op, FPRounding rounding) {
6190 uint32_t sign;
6191
6192 if (IsFloat16<T>()) {
6193 sign = Float16Sign(op);
6194 } else if (IsFloat32<T>()) {
6195 sign = FloatSign(op);
6196 } else {
6197 VIXL_ASSERT(IsFloat64<T>());
6198 sign = DoubleSign(op);
6199 }
6200
6201 if (IsNaN(op)) {
6202 return FPProcessNaN(op);
6203 } else if (IsInf(op)) {
6204 return (sign == 1) ? -0.0 : 0.0;
6205 } else if (op == 0.0) {
6206 FPProcessException(); // FPExc_DivideByZero exception.
6207 return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity;
6208 } else if ((IsFloat16<T>() && (std::fabs(op) < std::pow(2.0, -16.0))) ||
6209 (IsFloat32<T>() && (std::fabs(op) < std::pow(2.0, -128.0))) ||
6210 (IsFloat64<T>() && (std::fabs(op) < std::pow(2.0, -1024.0)))) {
6211 bool overflow_to_inf = false;
6212 switch (rounding) {
6213 case FPTieEven:
6214 overflow_to_inf = true;
6215 break;
6216 case FPPositiveInfinity:
6217 overflow_to_inf = (sign == 0);
6218 break;
6219 case FPNegativeInfinity:
6220 overflow_to_inf = (sign == 1);
6221 break;
6222 case FPZero:
6223 overflow_to_inf = false;
6224 break;
6225 default:
6226 break;
6227 }
6228 FPProcessException(); // FPExc_Overflow and FPExc_Inexact.
6229 if (overflow_to_inf) {
6230 return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity;
6231 } else {
6232 // Return FPMaxNormal(sign).
6233 if (IsFloat16<T>()) {
6234 return Float16Pack(sign, 0x1f, 0x3ff);
6235 } else if (IsFloat32<T>()) {
6236 return FloatPack(sign, 0xfe, 0x07fffff);
6237 } else {
6238 VIXL_ASSERT(IsFloat64<T>());
6239 return DoublePack(sign, 0x7fe, 0x0fffffffffffffl);
6240 }
6241 }
6242 } else {
6243 uint64_t fraction;
6244 int exp, result_exp;
6245
6246 if (IsFloat16<T>()) {
6247 sign = Float16Sign(op);
6248 exp = Float16Exp(op);
6249 fraction = Float16Mantissa(op);
6250 fraction <<= 42;
6251 } else if (IsFloat32<T>()) {
6252 sign = FloatSign(op);
6253 exp = FloatExp(op);
6254 fraction = FloatMantissa(op);
6255 fraction <<= 29;
6256 } else {
6257 VIXL_ASSERT(IsFloat64<T>());
6258 sign = DoubleSign(op);
6259 exp = DoubleExp(op);
6260 fraction = DoubleMantissa(op);
6261 }
6262
6263 if (exp == 0) {
6264 if (Bits(fraction, 51, 51) == 0) {
6265 exp -= 1;
6266 fraction = Bits(fraction, 49, 0) << 2;
6267 } else {
6268 fraction = Bits(fraction, 50, 0) << 1;
6269 }
6270 }
6271
6272 double scaled = DoublePack(0, 1022, Bits(fraction, 51, 44) << 44);
6273
6274 if (IsFloat16<T>()) {
6275 result_exp = (29 - exp); // In range 29-30 = -1 to 29+1 = 30.
6276 } else if (IsFloat32<T>()) {
6277 result_exp = (253 - exp); // In range 253-254 = -1 to 253+1 = 254.
6278 } else {
6279 VIXL_ASSERT(IsFloat64<T>());
6280 result_exp = (2045 - exp); // In range 2045-2046 = -1 to 2045+1 = 2046.
6281 }
6282
6283 double estimate = recip_estimate(scaled);
6284
6285 fraction = DoubleMantissa(estimate);
6286 if (result_exp == 0) {
6287 fraction = (UINT64_C(1) << 51) | Bits(fraction, 51, 1);
6288 } else if (result_exp == -1) {
6289 fraction = (UINT64_C(1) << 50) | Bits(fraction, 51, 2);
6290 result_exp = 0;
6291 }
6292 if (IsFloat16<T>()) {
6293 uint16_t exp_bits = static_cast<uint16_t>(Bits(result_exp, 4, 0));
6294 uint16_t frac_bits = static_cast<uint16_t>(Bits(fraction, 51, 42));
6295 return Float16Pack(sign, exp_bits, frac_bits);
6296 } else if (IsFloat32<T>()) {
6297 uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0));
6298 uint32_t frac_bits = static_cast<uint32_t>(Bits(fraction, 51, 29));
6299 return FloatPack(sign, exp_bits, frac_bits);
6300 } else {
6301 VIXL_ASSERT(IsFloat64<T>());
6302 return DoublePack(sign, Bits(result_exp, 10, 0), Bits(fraction, 51, 0));
6303 }
6304 }
6305 }
6306
6307
frecpe(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,FPRounding round)6308 LogicVRegister Simulator::frecpe(VectorFormat vform,
6309 LogicVRegister dst,
6310 const LogicVRegister& src,
6311 FPRounding round) {
6312 dst.ClearForWrite(vform);
6313 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
6314 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6315 SimFloat16 input = src.Float<SimFloat16>(i);
6316 dst.SetFloat(vform, i, FPRecipEstimate<SimFloat16>(input, round));
6317 }
6318 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
6319 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6320 float input = src.Float<float>(i);
6321 dst.SetFloat(vform, i, FPRecipEstimate<float>(input, round));
6322 }
6323 } else {
6324 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
6325 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6326 double input = src.Float<double>(i);
6327 dst.SetFloat(vform, i, FPRecipEstimate<double>(input, round));
6328 }
6329 }
6330 return dst;
6331 }
6332
6333
ursqrte(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)6334 LogicVRegister Simulator::ursqrte(VectorFormat vform,
6335 LogicVRegister dst,
6336 const LogicVRegister& src) {
6337 dst.ClearForWrite(vform);
6338 uint64_t operand;
6339 uint32_t result;
6340 double dp_operand, dp_result;
6341 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6342 operand = src.Uint(vform, i);
6343 if (operand <= 0x3FFFFFFF) {
6344 result = 0xFFFFFFFF;
6345 } else {
6346 dp_operand = operand * std::pow(2.0, -32);
6347 dp_result = recip_sqrt_estimate(dp_operand) * std::pow(2.0, 31);
6348 result = static_cast<uint32_t>(dp_result);
6349 }
6350 dst.SetUint(vform, i, result);
6351 }
6352 return dst;
6353 }
6354
6355
6356 // Based on reference C function recip_estimate from ARM ARM.
recip_estimate(double a)6357 double Simulator::recip_estimate(double a) {
6358 int q, s;
6359 double r;
6360 q = static_cast<int>(a * 512.0);
6361 r = 1.0 / ((static_cast<double>(q) + 0.5) / 512.0);
6362 s = static_cast<int>(256.0 * r + 0.5);
6363 return static_cast<double>(s) / 256.0;
6364 }
6365
6366
urecpe(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)6367 LogicVRegister Simulator::urecpe(VectorFormat vform,
6368 LogicVRegister dst,
6369 const LogicVRegister& src) {
6370 dst.ClearForWrite(vform);
6371 uint64_t operand;
6372 uint32_t result;
6373 double dp_operand, dp_result;
6374 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6375 operand = src.Uint(vform, i);
6376 if (operand <= 0x7FFFFFFF) {
6377 result = 0xFFFFFFFF;
6378 } else {
6379 dp_operand = operand * std::pow(2.0, -32);
6380 dp_result = recip_estimate(dp_operand) * std::pow(2.0, 31);
6381 result = static_cast<uint32_t>(dp_result);
6382 }
6383 dst.SetUint(vform, i, result);
6384 }
6385 return dst;
6386 }
6387
pfalse(LogicPRegister dst)6388 LogicPRegister Simulator::pfalse(LogicPRegister dst) {
6389 dst.Clear();
6390 return dst;
6391 }
6392
pfirst(LogicPRegister dst,const LogicPRegister & pg,const LogicPRegister & src)6393 LogicPRegister Simulator::pfirst(LogicPRegister dst,
6394 const LogicPRegister& pg,
6395 const LogicPRegister& src) {
6396 int first_pg = GetFirstActive(kFormatVnB, pg);
6397 VIXL_ASSERT(first_pg < LaneCountFromFormat(kFormatVnB));
6398 mov(dst, src);
6399 if (first_pg >= 0) dst.SetActive(kFormatVnB, first_pg, true);
6400 return dst;
6401 }
6402
ptrue(VectorFormat vform,LogicPRegister dst,int pattern)6403 LogicPRegister Simulator::ptrue(VectorFormat vform,
6404 LogicPRegister dst,
6405 int pattern) {
6406 int count = GetPredicateConstraintLaneCount(vform, pattern);
6407 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6408 dst.SetActive(vform, i, i < count);
6409 }
6410 return dst;
6411 }
6412
pnext(VectorFormat vform,LogicPRegister dst,const LogicPRegister & pg,const LogicPRegister & src)6413 LogicPRegister Simulator::pnext(VectorFormat vform,
6414 LogicPRegister dst,
6415 const LogicPRegister& pg,
6416 const LogicPRegister& src) {
6417 int next = GetLastActive(vform, src) + 1;
6418 while (next < LaneCountFromFormat(vform)) {
6419 if (pg.IsActive(vform, next)) break;
6420 next++;
6421 }
6422
6423 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6424 dst.SetActive(vform, i, (i == next));
6425 }
6426 return dst;
6427 }
6428
6429 template <typename T>
frecpx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)6430 LogicVRegister Simulator::frecpx(VectorFormat vform,
6431 LogicVRegister dst,
6432 const LogicVRegister& src) {
6433 dst.ClearForWrite(vform);
6434 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6435 T op = src.Float<T>(i);
6436 T result;
6437 if (IsNaN(op)) {
6438 result = FPProcessNaN(op);
6439 } else {
6440 int exp;
6441 uint32_t sign;
6442 if (IsFloat16<T>()) {
6443 sign = Float16Sign(op);
6444 exp = Float16Exp(op);
6445 exp = (exp == 0) ? (0x1F - 1) : static_cast<int>(Bits(~exp, 4, 0));
6446 result = Float16Pack(sign, exp, 0);
6447 } else if (IsFloat32<T>()) {
6448 sign = FloatSign(op);
6449 exp = FloatExp(op);
6450 exp = (exp == 0) ? (0xFF - 1) : static_cast<int>(Bits(~exp, 7, 0));
6451 result = FloatPack(sign, exp, 0);
6452 } else {
6453 VIXL_ASSERT(IsFloat64<T>());
6454 sign = DoubleSign(op);
6455 exp = DoubleExp(op);
6456 exp = (exp == 0) ? (0x7FF - 1) : static_cast<int>(Bits(~exp, 10, 0));
6457 result = DoublePack(sign, exp, 0);
6458 }
6459 }
6460 dst.SetFloat(i, result);
6461 }
6462 return dst;
6463 }
6464
6465
frecpx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)6466 LogicVRegister Simulator::frecpx(VectorFormat vform,
6467 LogicVRegister dst,
6468 const LogicVRegister& src) {
6469 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
6470 frecpx<SimFloat16>(vform, dst, src);
6471 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
6472 frecpx<float>(vform, dst, src);
6473 } else {
6474 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
6475 frecpx<double>(vform, dst, src);
6476 }
6477 return dst;
6478 }
6479
flogb(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)6480 LogicVRegister Simulator::flogb(VectorFormat vform,
6481 LogicVRegister dst,
6482 const LogicVRegister& src) {
6483 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6484 double op = 0.0;
6485 switch (vform) {
6486 case kFormatVnH:
6487 op = FPToDouble(src.Float<SimFloat16>(i), kIgnoreDefaultNaN);
6488 break;
6489 case kFormatVnS:
6490 op = src.Float<float>(i);
6491 break;
6492 case kFormatVnD:
6493 op = src.Float<double>(i);
6494 break;
6495 default:
6496 VIXL_UNREACHABLE();
6497 }
6498
6499 switch (std::fpclassify(op)) {
6500 case FP_INFINITE:
6501 dst.SetInt(vform, i, MaxIntFromFormat(vform));
6502 break;
6503 case FP_NAN:
6504 case FP_ZERO:
6505 dst.SetInt(vform, i, MinIntFromFormat(vform));
6506 break;
6507 case FP_SUBNORMAL: {
6508 // DoubleMantissa returns the mantissa of its input, leaving 12 zero
6509 // bits where the sign and exponent would be. We subtract 12 to
6510 // find the number of leading zero bits in the mantissa itself.
6511 int64_t mant_zero_count = CountLeadingZeros(DoubleMantissa(op)) - 12;
6512 // Log2 of a subnormal is the lowest exponent a normal number can
6513 // represent, together with the zeros in the mantissa.
6514 dst.SetInt(vform, i, -1023 - mant_zero_count);
6515 break;
6516 }
6517 case FP_NORMAL:
6518 // Log2 of a normal number is the exponent minus the bias.
6519 dst.SetInt(vform, i, static_cast<int64_t>(DoubleExp(op)) - 1023);
6520 break;
6521 }
6522 }
6523 return dst;
6524 }
6525
ftsmul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)6526 LogicVRegister Simulator::ftsmul(VectorFormat vform,
6527 LogicVRegister dst,
6528 const LogicVRegister& src1,
6529 const LogicVRegister& src2) {
6530 SimVRegister maybe_neg_src1;
6531
6532 // The bottom bit of src2 controls the sign of the result. Use it to
6533 // conditionally invert the sign of one `fmul` operand.
6534 shl(vform, maybe_neg_src1, src2, LaneSizeInBitsFromFormat(vform) - 1);
6535 eor(vform, maybe_neg_src1, maybe_neg_src1, src1);
6536
6537 // Multiply src1 by the modified neg_src1, which is potentially its negation.
6538 // In the case of NaNs, NaN * -NaN will return the first NaN intact, so src1,
6539 // rather than neg_src1, must be the first source argument.
6540 fmul(vform, dst, src1, maybe_neg_src1);
6541
6542 return dst;
6543 }
6544
ftssel(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)6545 LogicVRegister Simulator::ftssel(VectorFormat vform,
6546 LogicVRegister dst,
6547 const LogicVRegister& src1,
6548 const LogicVRegister& src2) {
6549 unsigned lane_bits = LaneSizeInBitsFromFormat(vform);
6550 uint64_t sign_bit = UINT64_C(1) << (lane_bits - 1);
6551 uint64_t one;
6552
6553 if (lane_bits == kHRegSize) {
6554 one = Float16ToRawbits(Float16(1.0));
6555 } else if (lane_bits == kSRegSize) {
6556 one = FloatToRawbits(1.0);
6557 } else {
6558 VIXL_ASSERT(lane_bits == kDRegSize);
6559 one = DoubleToRawbits(1.0);
6560 }
6561
6562 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6563 // Use integer accessors for this operation, as this is a data manipulation
6564 // task requiring no calculation.
6565 uint64_t op = src1.Uint(vform, i);
6566
6567 // Only the bottom two bits of the src2 register are significant, indicating
6568 // the quadrant. Bit 0 controls whether src1 or 1.0 is written to dst. Bit 1
6569 // determines the sign of the value written to dst.
6570 uint64_t q = src2.Uint(vform, i);
6571 if ((q & 1) == 1) op = one;
6572 if ((q & 2) == 2) op ^= sign_bit;
6573
6574 dst.SetUint(vform, i, op);
6575 }
6576
6577 return dst;
6578 }
6579
6580 template <typename T>
FTMaddHelper(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,uint64_t coeff_pos,uint64_t coeff_neg)6581 LogicVRegister Simulator::FTMaddHelper(VectorFormat vform,
6582 LogicVRegister dst,
6583 const LogicVRegister& src1,
6584 const LogicVRegister& src2,
6585 uint64_t coeff_pos,
6586 uint64_t coeff_neg) {
6587 SimVRegister zero;
6588 dup_immediate(kFormatVnB, zero, 0);
6589
6590 SimVRegister cf;
6591 SimVRegister cfn;
6592 dup_immediate(vform, cf, coeff_pos);
6593 dup_immediate(vform, cfn, coeff_neg);
6594
6595 // The specification requires testing the top bit of the raw value, rather
6596 // than the sign of the floating point number, so use an integer comparison
6597 // here.
6598 SimPRegister is_neg;
6599 SVEIntCompareVectorsHelper(lt,
6600 vform,
6601 is_neg,
6602 GetPTrue(),
6603 src2,
6604 zero,
6605 false,
6606 LeaveFlags);
6607 mov_merging(vform, cf, is_neg, cfn);
6608
6609 SimVRegister temp;
6610 fabs_<T>(vform, temp, src2);
6611 fmla<T>(vform, cf, cf, src1, temp);
6612 mov(vform, dst, cf);
6613 return dst;
6614 }
6615
6616
ftmad(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,unsigned index)6617 LogicVRegister Simulator::ftmad(VectorFormat vform,
6618 LogicVRegister dst,
6619 const LogicVRegister& src1,
6620 const LogicVRegister& src2,
6621 unsigned index) {
6622 static const uint64_t ftmad_coeff16[] = {0x3c00,
6623 0xb155,
6624 0x2030,
6625 0x0000,
6626 0x0000,
6627 0x0000,
6628 0x0000,
6629 0x0000,
6630 0x3c00,
6631 0xb800,
6632 0x293a,
6633 0x0000,
6634 0x0000,
6635 0x0000,
6636 0x0000,
6637 0x0000};
6638
6639 static const uint64_t ftmad_coeff32[] = {0x3f800000,
6640 0xbe2aaaab,
6641 0x3c088886,
6642 0xb95008b9,
6643 0x36369d6d,
6644 0x00000000,
6645 0x00000000,
6646 0x00000000,
6647 0x3f800000,
6648 0xbf000000,
6649 0x3d2aaaa6,
6650 0xbab60705,
6651 0x37cd37cc,
6652 0x00000000,
6653 0x00000000,
6654 0x00000000};
6655
6656 static const uint64_t ftmad_coeff64[] = {0x3ff0000000000000,
6657 0xbfc5555555555543,
6658 0x3f8111111110f30c,
6659 0xbf2a01a019b92fc6,
6660 0x3ec71de351f3d22b,
6661 0xbe5ae5e2b60f7b91,
6662 0x3de5d8408868552f,
6663 0x0000000000000000,
6664 0x3ff0000000000000,
6665 0xbfe0000000000000,
6666 0x3fa5555555555536,
6667 0xbf56c16c16c13a0b,
6668 0x3efa01a019b1e8d8,
6669 0xbe927e4f7282f468,
6670 0x3e21ee96d2641b13,
6671 0xbda8f76380fbb401};
6672 VIXL_ASSERT((index + 8) < ArrayLength(ftmad_coeff64));
6673 VIXL_ASSERT(ArrayLength(ftmad_coeff16) == ArrayLength(ftmad_coeff64));
6674 VIXL_ASSERT(ArrayLength(ftmad_coeff32) == ArrayLength(ftmad_coeff64));
6675
6676 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
6677 FTMaddHelper<SimFloat16>(vform,
6678 dst,
6679 src1,
6680 src2,
6681 ftmad_coeff16[index],
6682 ftmad_coeff16[index + 8]);
6683 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
6684 FTMaddHelper<float>(vform,
6685 dst,
6686 src1,
6687 src2,
6688 ftmad_coeff32[index],
6689 ftmad_coeff32[index + 8]);
6690 } else {
6691 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
6692 FTMaddHelper<double>(vform,
6693 dst,
6694 src1,
6695 src2,
6696 ftmad_coeff64[index],
6697 ftmad_coeff64[index + 8]);
6698 }
6699 return dst;
6700 }
6701
fexpa(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)6702 LogicVRegister Simulator::fexpa(VectorFormat vform,
6703 LogicVRegister dst,
6704 const LogicVRegister& src) {
6705 static const uint64_t fexpa_coeff16[] = {0x0000, 0x0016, 0x002d, 0x0045,
6706 0x005d, 0x0075, 0x008e, 0x00a8,
6707 0x00c2, 0x00dc, 0x00f8, 0x0114,
6708 0x0130, 0x014d, 0x016b, 0x0189,
6709 0x01a8, 0x01c8, 0x01e8, 0x0209,
6710 0x022b, 0x024e, 0x0271, 0x0295,
6711 0x02ba, 0x02e0, 0x0306, 0x032e,
6712 0x0356, 0x037f, 0x03a9, 0x03d4};
6713
6714 static const uint64_t fexpa_coeff32[] =
6715 {0x000000, 0x0164d2, 0x02cd87, 0x043a29, 0x05aac3, 0x071f62, 0x08980f,
6716 0x0a14d5, 0x0b95c2, 0x0d1adf, 0x0ea43a, 0x1031dc, 0x11c3d3, 0x135a2b,
6717 0x14f4f0, 0x16942d, 0x1837f0, 0x19e046, 0x1b8d3a, 0x1d3eda, 0x1ef532,
6718 0x20b051, 0x227043, 0x243516, 0x25fed7, 0x27cd94, 0x29a15b, 0x2b7a3a,
6719 0x2d583f, 0x2f3b79, 0x3123f6, 0x3311c4, 0x3504f3, 0x36fd92, 0x38fbaf,
6720 0x3aff5b, 0x3d08a4, 0x3f179a, 0x412c4d, 0x4346cd, 0x45672a, 0x478d75,
6721 0x49b9be, 0x4bec15, 0x4e248c, 0x506334, 0x52a81e, 0x54f35b, 0x5744fd,
6722 0x599d16, 0x5bfbb8, 0x5e60f5, 0x60ccdf, 0x633f89, 0x65b907, 0x68396a,
6723 0x6ac0c7, 0x6d4f30, 0x6fe4ba, 0x728177, 0x75257d, 0x77d0df, 0x7a83b3,
6724 0x7d3e0c};
6725
6726 static const uint64_t fexpa_coeff64[] =
6727 {0X0000000000000, 0X02c9a3e778061, 0X059b0d3158574, 0X0874518759bc8,
6728 0X0b5586cf9890f, 0X0e3ec32d3d1a2, 0X11301d0125b51, 0X1429aaea92de0,
6729 0X172b83c7d517b, 0X1a35beb6fcb75, 0X1d4873168b9aa, 0X2063b88628cd6,
6730 0X2387a6e756238, 0X26b4565e27cdd, 0X29e9df51fdee1, 0X2d285a6e4030b,
6731 0X306fe0a31b715, 0X33c08b26416ff, 0X371a7373aa9cb, 0X3a7db34e59ff7,
6732 0X3dea64c123422, 0X4160a21f72e2a, 0X44e086061892d, 0X486a2b5c13cd0,
6733 0X4bfdad5362a27, 0X4f9b2769d2ca7, 0X5342b569d4f82, 0X56f4736b527da,
6734 0X5ab07dd485429, 0X5e76f15ad2148, 0X6247eb03a5585, 0X6623882552225,
6735 0X6a09e667f3bcd, 0X6dfb23c651a2f, 0X71f75e8ec5f74, 0X75feb564267c9,
6736 0X7a11473eb0187, 0X7e2f336cf4e62, 0X82589994cce13, 0X868d99b4492ed,
6737 0X8ace5422aa0db, 0X8f1ae99157736, 0X93737b0cdc5e5, 0X97d829fde4e50,
6738 0X9c49182a3f090, 0Xa0c667b5de565, 0Xa5503b23e255d, 0Xa9e6b5579fdbf,
6739 0Xae89f995ad3ad, 0Xb33a2b84f15fb, 0Xb7f76f2fb5e47, 0Xbcc1e904bc1d2,
6740 0Xc199bdd85529c, 0Xc67f12e57d14b, 0Xcb720dcef9069, 0Xd072d4a07897c,
6741 0Xd5818dcfba487, 0Xda9e603db3285, 0Xdfc97337b9b5f, 0Xe502ee78b3ff6,
6742 0Xea4afa2a490da, 0Xefa1bee615a27, 0Xf50765b6e4540, 0Xfa7c1819e90d8};
6743
6744 unsigned lane_size = LaneSizeInBitsFromFormat(vform);
6745 int index_highbit = 5;
6746 int op_highbit, op_shift;
6747 const uint64_t* fexpa_coeff;
6748
6749 if (lane_size == kHRegSize) {
6750 index_highbit = 4;
6751 VIXL_ASSERT(ArrayLength(fexpa_coeff16) == (1U << (index_highbit + 1)));
6752 fexpa_coeff = fexpa_coeff16;
6753 op_highbit = 9;
6754 op_shift = 10;
6755 } else if (lane_size == kSRegSize) {
6756 VIXL_ASSERT(ArrayLength(fexpa_coeff32) == (1U << (index_highbit + 1)));
6757 fexpa_coeff = fexpa_coeff32;
6758 op_highbit = 13;
6759 op_shift = 23;
6760 } else {
6761 VIXL_ASSERT(lane_size == kDRegSize);
6762 VIXL_ASSERT(ArrayLength(fexpa_coeff64) == (1U << (index_highbit + 1)));
6763 fexpa_coeff = fexpa_coeff64;
6764 op_highbit = 16;
6765 op_shift = 52;
6766 }
6767
6768 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6769 uint64_t op = src.Uint(vform, i);
6770 uint64_t result = fexpa_coeff[Bits(op, index_highbit, 0)];
6771 result |= (Bits(op, op_highbit, index_highbit + 1) << op_shift);
6772 dst.SetUint(vform, i, result);
6773 }
6774 return dst;
6775 }
6776
6777 template <typename T>
fscale(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)6778 LogicVRegister Simulator::fscale(VectorFormat vform,
6779 LogicVRegister dst,
6780 const LogicVRegister& src1,
6781 const LogicVRegister& src2) {
6782 T two = T(2.0);
6783 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6784 T src1_val = src1.Float<T>(i);
6785 if (!IsNaN(src1_val)) {
6786 int64_t scale = src2.Int(vform, i);
6787 // TODO: this is a low-performance implementation, but it's simple and
6788 // less likely to be buggy. Consider replacing it with something faster.
6789
6790 // Scales outside of these bounds become infinity or zero, so there's no
6791 // point iterating further.
6792 scale = std::min<int64_t>(std::max<int64_t>(scale, -2048), 2048);
6793
6794 // Compute src1_val * 2 ^ scale. If scale is positive, multiply by two and
6795 // decrement scale until it's zero.
6796 while (scale-- > 0) {
6797 src1_val = FPMul(src1_val, two);
6798 }
6799
6800 // If scale is negative, divide by two and increment scale until it's
6801 // zero. Initially, scale is (src2 - 1), so we pre-increment.
6802 while (++scale < 0) {
6803 src1_val = FPDiv(src1_val, two);
6804 }
6805 }
6806 dst.SetFloat<T>(i, src1_val);
6807 }
6808 return dst;
6809 }
6810
fscale(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)6811 LogicVRegister Simulator::fscale(VectorFormat vform,
6812 LogicVRegister dst,
6813 const LogicVRegister& src1,
6814 const LogicVRegister& src2) {
6815 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
6816 fscale<SimFloat16>(vform, dst, src1, src2);
6817 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
6818 fscale<float>(vform, dst, src1, src2);
6819 } else {
6820 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
6821 fscale<double>(vform, dst, src1, src2);
6822 }
6823 return dst;
6824 }
6825
scvtf(VectorFormat vform,unsigned dst_data_size_in_bits,unsigned src_data_size_in_bits,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src,FPRounding round,int fbits)6826 LogicVRegister Simulator::scvtf(VectorFormat vform,
6827 unsigned dst_data_size_in_bits,
6828 unsigned src_data_size_in_bits,
6829 LogicVRegister dst,
6830 const LogicPRegister& pg,
6831 const LogicVRegister& src,
6832 FPRounding round,
6833 int fbits) {
6834 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits);
6835 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits);
6836 dst.ClearForWrite(vform);
6837
6838 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6839 if (!pg.IsActive(vform, i)) continue;
6840
6841 int64_t value = ExtractSignedBitfield64(src_data_size_in_bits - 1,
6842 0,
6843 src.Uint(vform, i));
6844
6845 switch (dst_data_size_in_bits) {
6846 case kHRegSize: {
6847 SimFloat16 result = FixedToFloat16(value, fbits, round);
6848 dst.SetUint(vform, i, Float16ToRawbits(result));
6849 break;
6850 }
6851 case kSRegSize: {
6852 float result = FixedToFloat(value, fbits, round);
6853 dst.SetUint(vform, i, FloatToRawbits(result));
6854 break;
6855 }
6856 case kDRegSize: {
6857 double result = FixedToDouble(value, fbits, round);
6858 dst.SetUint(vform, i, DoubleToRawbits(result));
6859 break;
6860 }
6861 default:
6862 VIXL_UNIMPLEMENTED();
6863 break;
6864 }
6865 }
6866
6867 return dst;
6868 }
6869
scvtf(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int fbits,FPRounding round)6870 LogicVRegister Simulator::scvtf(VectorFormat vform,
6871 LogicVRegister dst,
6872 const LogicVRegister& src,
6873 int fbits,
6874 FPRounding round) {
6875 return scvtf(vform,
6876 LaneSizeInBitsFromFormat(vform),
6877 LaneSizeInBitsFromFormat(vform),
6878 dst,
6879 GetPTrue(),
6880 src,
6881 round,
6882 fbits);
6883 }
6884
ucvtf(VectorFormat vform,unsigned dst_data_size_in_bits,unsigned src_data_size_in_bits,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src,FPRounding round,int fbits)6885 LogicVRegister Simulator::ucvtf(VectorFormat vform,
6886 unsigned dst_data_size_in_bits,
6887 unsigned src_data_size_in_bits,
6888 LogicVRegister dst,
6889 const LogicPRegister& pg,
6890 const LogicVRegister& src,
6891 FPRounding round,
6892 int fbits) {
6893 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits);
6894 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits);
6895 dst.ClearForWrite(vform);
6896
6897 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6898 if (!pg.IsActive(vform, i)) continue;
6899
6900 uint64_t value = ExtractUnsignedBitfield64(src_data_size_in_bits - 1,
6901 0,
6902 src.Uint(vform, i));
6903
6904 switch (dst_data_size_in_bits) {
6905 case kHRegSize: {
6906 SimFloat16 result = UFixedToFloat16(value, fbits, round);
6907 dst.SetUint(vform, i, Float16ToRawbits(result));
6908 break;
6909 }
6910 case kSRegSize: {
6911 float result = UFixedToFloat(value, fbits, round);
6912 dst.SetUint(vform, i, FloatToRawbits(result));
6913 break;
6914 }
6915 case kDRegSize: {
6916 double result = UFixedToDouble(value, fbits, round);
6917 dst.SetUint(vform, i, DoubleToRawbits(result));
6918 break;
6919 }
6920 default:
6921 VIXL_UNIMPLEMENTED();
6922 break;
6923 }
6924 }
6925
6926 return dst;
6927 }
6928
ucvtf(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int fbits,FPRounding round)6929 LogicVRegister Simulator::ucvtf(VectorFormat vform,
6930 LogicVRegister dst,
6931 const LogicVRegister& src,
6932 int fbits,
6933 FPRounding round) {
6934 return ucvtf(vform,
6935 LaneSizeInBitsFromFormat(vform),
6936 LaneSizeInBitsFromFormat(vform),
6937 dst,
6938 GetPTrue(),
6939 src,
6940 round,
6941 fbits);
6942 }
6943
unpk(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,UnpackType unpack_type,ExtendType extend_type)6944 LogicVRegister Simulator::unpk(VectorFormat vform,
6945 LogicVRegister dst,
6946 const LogicVRegister& src,
6947 UnpackType unpack_type,
6948 ExtendType extend_type) {
6949 VectorFormat vform_half = VectorFormatHalfWidth(vform);
6950 const int lane_count = LaneCountFromFormat(vform);
6951 const int src_start_lane = (unpack_type == kLoHalf) ? 0 : lane_count;
6952
6953 switch (extend_type) {
6954 case kSignedExtend: {
6955 int64_t result[kZRegMaxSizeInBytes];
6956 for (int i = 0; i < lane_count; ++i) {
6957 result[i] = src.Int(vform_half, i + src_start_lane);
6958 }
6959 for (int i = 0; i < lane_count; ++i) {
6960 dst.SetInt(vform, i, result[i]);
6961 }
6962 break;
6963 }
6964 case kUnsignedExtend: {
6965 uint64_t result[kZRegMaxSizeInBytes];
6966 for (int i = 0; i < lane_count; ++i) {
6967 result[i] = src.Uint(vform_half, i + src_start_lane);
6968 }
6969 for (int i = 0; i < lane_count; ++i) {
6970 dst.SetUint(vform, i, result[i]);
6971 }
6972 break;
6973 }
6974 default:
6975 VIXL_UNREACHABLE();
6976 }
6977 return dst;
6978 }
6979
SVEIntCompareVectorsHelper(Condition cond,VectorFormat vform,LogicPRegister dst,const LogicPRegister & mask,const LogicVRegister & src1,const LogicVRegister & src2,bool is_wide_elements,FlagsUpdate flags)6980 LogicPRegister Simulator::SVEIntCompareVectorsHelper(Condition cond,
6981 VectorFormat vform,
6982 LogicPRegister dst,
6983 const LogicPRegister& mask,
6984 const LogicVRegister& src1,
6985 const LogicVRegister& src2,
6986 bool is_wide_elements,
6987 FlagsUpdate flags) {
6988 for (int lane = 0; lane < LaneCountFromFormat(vform); lane++) {
6989 bool result = false;
6990 if (mask.IsActive(vform, lane)) {
6991 int64_t op1 = 0xbadbeef;
6992 int64_t op2 = 0xbadbeef;
6993 int d_lane = (lane * LaneSizeInBitsFromFormat(vform)) / kDRegSize;
6994 switch (cond) {
6995 case eq:
6996 case ge:
6997 case gt:
6998 case lt:
6999 case le:
7000 case ne:
7001 op1 = src1.Int(vform, lane);
7002 op2 = is_wide_elements ? src2.Int(kFormatVnD, d_lane)
7003 : src2.Int(vform, lane);
7004 break;
7005 case hi:
7006 case hs:
7007 case ls:
7008 case lo:
7009 op1 = src1.Uint(vform, lane);
7010 op2 = is_wide_elements ? src2.Uint(kFormatVnD, d_lane)
7011 : src2.Uint(vform, lane);
7012 break;
7013 default:
7014 VIXL_UNREACHABLE();
7015 }
7016
7017 switch (cond) {
7018 case eq:
7019 result = (op1 == op2);
7020 break;
7021 case ne:
7022 result = (op1 != op2);
7023 break;
7024 case ge:
7025 result = (op1 >= op2);
7026 break;
7027 case gt:
7028 result = (op1 > op2);
7029 break;
7030 case le:
7031 result = (op1 <= op2);
7032 break;
7033 case lt:
7034 result = (op1 < op2);
7035 break;
7036 case hs:
7037 result = (static_cast<uint64_t>(op1) >= static_cast<uint64_t>(op2));
7038 break;
7039 case hi:
7040 result = (static_cast<uint64_t>(op1) > static_cast<uint64_t>(op2));
7041 break;
7042 case ls:
7043 result = (static_cast<uint64_t>(op1) <= static_cast<uint64_t>(op2));
7044 break;
7045 case lo:
7046 result = (static_cast<uint64_t>(op1) < static_cast<uint64_t>(op2));
7047 break;
7048 default:
7049 VIXL_UNREACHABLE();
7050 }
7051 }
7052 dst.SetActive(vform, lane, result);
7053 }
7054
7055 if (flags == SetFlags) PredTest(vform, mask, dst);
7056
7057 return dst;
7058 }
7059
SVEBitwiseShiftHelper(Shift shift_op,VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool is_wide_elements)7060 LogicVRegister Simulator::SVEBitwiseShiftHelper(Shift shift_op,
7061 VectorFormat vform,
7062 LogicVRegister dst,
7063 const LogicVRegister& src1,
7064 const LogicVRegister& src2,
7065 bool is_wide_elements) {
7066 unsigned lane_size = LaneSizeInBitsFromFormat(vform);
7067 VectorFormat shift_vform = is_wide_elements ? kFormatVnD : vform;
7068
7069 for (int lane = 0; lane < LaneCountFromFormat(vform); lane++) {
7070 int shift_src_lane = lane;
7071 if (is_wide_elements) {
7072 // If the shift amount comes from wide elements, select the D-sized lane
7073 // which occupies the corresponding lanes of the value to be shifted.
7074 shift_src_lane = (lane * lane_size) / kDRegSize;
7075 }
7076 uint64_t shift_amount = src2.Uint(shift_vform, shift_src_lane);
7077
7078 // Saturate shift_amount to the size of the lane that will be shifted.
7079 if (shift_amount > lane_size) shift_amount = lane_size;
7080
7081 uint64_t value = src1.Uint(vform, lane);
7082 int64_t result = ShiftOperand(lane_size,
7083 value,
7084 shift_op,
7085 static_cast<unsigned>(shift_amount));
7086 dst.SetUint(vform, lane, result);
7087 }
7088
7089 return dst;
7090 }
7091
asrd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,int shift)7092 LogicVRegister Simulator::asrd(VectorFormat vform,
7093 LogicVRegister dst,
7094 const LogicVRegister& src1,
7095 int shift) {
7096 VIXL_ASSERT((shift > 0) && (static_cast<unsigned>(shift) <=
7097 LaneSizeInBitsFromFormat(vform)));
7098
7099 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7100 int64_t value = src1.Int(vform, i);
7101 if (shift <= 63) {
7102 if (value < 0) {
7103 // The max possible mask is 0x7fff'ffff'ffff'ffff, which can be safely
7104 // cast to int64_t, and cannot cause signed overflow in the result.
7105 value = value + GetUintMask(shift);
7106 }
7107 value = ShiftOperand(kDRegSize, value, ASR, shift);
7108 } else {
7109 value = 0;
7110 }
7111 dst.SetInt(vform, i, value);
7112 }
7113 return dst;
7114 }
7115
SVEBitwiseLogicalUnpredicatedHelper(LogicalOp logical_op,VectorFormat vform,LogicVRegister zd,const LogicVRegister & zn,const LogicVRegister & zm)7116 LogicVRegister Simulator::SVEBitwiseLogicalUnpredicatedHelper(
7117 LogicalOp logical_op,
7118 VectorFormat vform,
7119 LogicVRegister zd,
7120 const LogicVRegister& zn,
7121 const LogicVRegister& zm) {
7122 VIXL_ASSERT(IsSVEFormat(vform));
7123 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7124 uint64_t op1 = zn.Uint(vform, i);
7125 uint64_t op2 = zm.Uint(vform, i);
7126 uint64_t result = 0;
7127 switch (logical_op) {
7128 case AND:
7129 result = op1 & op2;
7130 break;
7131 case BIC:
7132 result = op1 & ~op2;
7133 break;
7134 case EOR:
7135 result = op1 ^ op2;
7136 break;
7137 case ORR:
7138 result = op1 | op2;
7139 break;
7140 default:
7141 VIXL_UNIMPLEMENTED();
7142 }
7143 zd.SetUint(vform, i, result);
7144 }
7145
7146 return zd;
7147 }
7148
SVEPredicateLogicalHelper(SVEPredicateLogicalOp op,LogicPRegister pd,const LogicPRegister & pn,const LogicPRegister & pm)7149 LogicPRegister Simulator::SVEPredicateLogicalHelper(SVEPredicateLogicalOp op,
7150 LogicPRegister pd,
7151 const LogicPRegister& pn,
7152 const LogicPRegister& pm) {
7153 for (int i = 0; i < pn.GetChunkCount(); i++) {
7154 LogicPRegister::ChunkType op1 = pn.GetChunk(i);
7155 LogicPRegister::ChunkType op2 = pm.GetChunk(i);
7156 LogicPRegister::ChunkType result = 0;
7157 switch (op) {
7158 case ANDS_p_p_pp_z:
7159 case AND_p_p_pp_z:
7160 result = op1 & op2;
7161 break;
7162 case BICS_p_p_pp_z:
7163 case BIC_p_p_pp_z:
7164 result = op1 & ~op2;
7165 break;
7166 case EORS_p_p_pp_z:
7167 case EOR_p_p_pp_z:
7168 result = op1 ^ op2;
7169 break;
7170 case NANDS_p_p_pp_z:
7171 case NAND_p_p_pp_z:
7172 result = ~(op1 & op2);
7173 break;
7174 case NORS_p_p_pp_z:
7175 case NOR_p_p_pp_z:
7176 result = ~(op1 | op2);
7177 break;
7178 case ORNS_p_p_pp_z:
7179 case ORN_p_p_pp_z:
7180 result = op1 | ~op2;
7181 break;
7182 case ORRS_p_p_pp_z:
7183 case ORR_p_p_pp_z:
7184 result = op1 | op2;
7185 break;
7186 default:
7187 VIXL_UNIMPLEMENTED();
7188 }
7189 pd.SetChunk(i, result);
7190 }
7191 return pd;
7192 }
7193
SVEBitwiseImmHelper(SVEBitwiseLogicalWithImm_UnpredicatedOp op,VectorFormat vform,LogicVRegister zd,uint64_t imm)7194 LogicVRegister Simulator::SVEBitwiseImmHelper(
7195 SVEBitwiseLogicalWithImm_UnpredicatedOp op,
7196 VectorFormat vform,
7197 LogicVRegister zd,
7198 uint64_t imm) {
7199 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7200 uint64_t op1 = zd.Uint(vform, i);
7201 uint64_t result = 0;
7202 switch (op) {
7203 case AND_z_zi:
7204 result = op1 & imm;
7205 break;
7206 case EOR_z_zi:
7207 result = op1 ^ imm;
7208 break;
7209 case ORR_z_zi:
7210 result = op1 | imm;
7211 break;
7212 default:
7213 VIXL_UNIMPLEMENTED();
7214 }
7215 zd.SetUint(vform, i, result);
7216 }
7217
7218 return zd;
7219 }
7220
SVEStructuredStoreHelper(VectorFormat vform,const LogicPRegister & pg,unsigned zt_code,const LogicSVEAddressVector & addr)7221 void Simulator::SVEStructuredStoreHelper(VectorFormat vform,
7222 const LogicPRegister& pg,
7223 unsigned zt_code,
7224 const LogicSVEAddressVector& addr) {
7225 VIXL_ASSERT(zt_code < kNumberOfZRegisters);
7226
7227 int esize_in_bytes_log2 = LaneSizeInBytesLog2FromFormat(vform);
7228 int msize_in_bytes_log2 = addr.GetMsizeInBytesLog2();
7229 int msize_in_bytes = addr.GetMsizeInBytes();
7230 int reg_count = addr.GetRegCount();
7231
7232 VIXL_ASSERT(esize_in_bytes_log2 >= msize_in_bytes_log2);
7233 VIXL_ASSERT((reg_count >= 1) && (reg_count <= 4));
7234
7235 unsigned zt_codes[4] = {zt_code,
7236 (zt_code + 1) % kNumberOfZRegisters,
7237 (zt_code + 2) % kNumberOfZRegisters,
7238 (zt_code + 3) % kNumberOfZRegisters};
7239
7240 LogicVRegister zt[4] = {
7241 ReadVRegister(zt_codes[0]),
7242 ReadVRegister(zt_codes[1]),
7243 ReadVRegister(zt_codes[2]),
7244 ReadVRegister(zt_codes[3]),
7245 };
7246
7247 // For unpacked forms (e.g. `st1b { z0.h }, ...`, the upper parts of the lanes
7248 // are ignored, so read the source register using the VectorFormat that
7249 // corresponds with the storage format, and multiply the index accordingly.
7250 VectorFormat unpack_vform =
7251 SVEFormatFromLaneSizeInBytesLog2(msize_in_bytes_log2);
7252 int unpack_shift = esize_in_bytes_log2 - msize_in_bytes_log2;
7253
7254 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7255 if (!pg.IsActive(vform, i)) continue;
7256
7257 for (int r = 0; r < reg_count; r++) {
7258 uint64_t element_address = addr.GetElementAddress(i, r);
7259 StoreLane(zt[r], unpack_vform, i << unpack_shift, element_address);
7260 }
7261 }
7262
7263 if (ShouldTraceWrites()) {
7264 PrintRegisterFormat format = GetPrintRegisterFormat(vform);
7265 if (esize_in_bytes_log2 == msize_in_bytes_log2) {
7266 // Use an FP format where it's likely that we're accessing FP data.
7267 format = GetPrintRegisterFormatTryFP(format);
7268 }
7269 // Stores don't represent a change to the source register's value, so only
7270 // print the relevant part of the value.
7271 format = GetPrintRegPartial(format);
7272
7273 PrintZStructAccess(zt_code,
7274 reg_count,
7275 pg,
7276 format,
7277 msize_in_bytes,
7278 "->",
7279 addr);
7280 }
7281 }
7282
SVEStructuredLoadHelper(VectorFormat vform,const LogicPRegister & pg,unsigned zt_code,const LogicSVEAddressVector & addr,bool is_signed)7283 void Simulator::SVEStructuredLoadHelper(VectorFormat vform,
7284 const LogicPRegister& pg,
7285 unsigned zt_code,
7286 const LogicSVEAddressVector& addr,
7287 bool is_signed) {
7288 int esize_in_bytes_log2 = LaneSizeInBytesLog2FromFormat(vform);
7289 int msize_in_bytes_log2 = addr.GetMsizeInBytesLog2();
7290 int msize_in_bytes = addr.GetMsizeInBytes();
7291 int reg_count = addr.GetRegCount();
7292
7293 VIXL_ASSERT(zt_code < kNumberOfZRegisters);
7294 VIXL_ASSERT(esize_in_bytes_log2 >= msize_in_bytes_log2);
7295 VIXL_ASSERT((reg_count >= 1) && (reg_count <= 4));
7296
7297 unsigned zt_codes[4] = {zt_code,
7298 (zt_code + 1) % kNumberOfZRegisters,
7299 (zt_code + 2) % kNumberOfZRegisters,
7300 (zt_code + 3) % kNumberOfZRegisters};
7301 LogicVRegister zt[4] = {
7302 ReadVRegister(zt_codes[0]),
7303 ReadVRegister(zt_codes[1]),
7304 ReadVRegister(zt_codes[2]),
7305 ReadVRegister(zt_codes[3]),
7306 };
7307
7308 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7309 for (int r = 0; r < reg_count; r++) {
7310 uint64_t element_address = addr.GetElementAddress(i, r);
7311
7312 if (!pg.IsActive(vform, i)) {
7313 zt[r].SetUint(vform, i, 0);
7314 continue;
7315 }
7316
7317 if (is_signed) {
7318 LoadIntToLane(zt[r], vform, msize_in_bytes, i, element_address);
7319 } else {
7320 LoadUintToLane(zt[r], vform, msize_in_bytes, i, element_address);
7321 }
7322 }
7323 }
7324
7325 if (ShouldTraceVRegs()) {
7326 PrintRegisterFormat format = GetPrintRegisterFormat(vform);
7327 if ((esize_in_bytes_log2 == msize_in_bytes_log2) && !is_signed) {
7328 // Use an FP format where it's likely that we're accessing FP data.
7329 format = GetPrintRegisterFormatTryFP(format);
7330 }
7331 PrintZStructAccess(zt_code,
7332 reg_count,
7333 pg,
7334 format,
7335 msize_in_bytes,
7336 "<-",
7337 addr);
7338 }
7339 }
7340
brka(LogicPRegister pd,const LogicPRegister & pg,const LogicPRegister & pn)7341 LogicPRegister Simulator::brka(LogicPRegister pd,
7342 const LogicPRegister& pg,
7343 const LogicPRegister& pn) {
7344 bool break_ = false;
7345 for (int i = 0; i < LaneCountFromFormat(kFormatVnB); i++) {
7346 if (pg.IsActive(kFormatVnB, i)) {
7347 pd.SetActive(kFormatVnB, i, !break_);
7348 break_ |= pn.IsActive(kFormatVnB, i);
7349 }
7350 }
7351
7352 return pd;
7353 }
7354
brkb(LogicPRegister pd,const LogicPRegister & pg,const LogicPRegister & pn)7355 LogicPRegister Simulator::brkb(LogicPRegister pd,
7356 const LogicPRegister& pg,
7357 const LogicPRegister& pn) {
7358 bool break_ = false;
7359 for (int i = 0; i < LaneCountFromFormat(kFormatVnB); i++) {
7360 if (pg.IsActive(kFormatVnB, i)) {
7361 break_ |= pn.IsActive(kFormatVnB, i);
7362 pd.SetActive(kFormatVnB, i, !break_);
7363 }
7364 }
7365
7366 return pd;
7367 }
7368
brkn(LogicPRegister pdm,const LogicPRegister & pg,const LogicPRegister & pn)7369 LogicPRegister Simulator::brkn(LogicPRegister pdm,
7370 const LogicPRegister& pg,
7371 const LogicPRegister& pn) {
7372 if (!IsLastActive(kFormatVnB, pg, pn)) {
7373 pfalse(pdm);
7374 }
7375 return pdm;
7376 }
7377
brkpa(LogicPRegister pd,const LogicPRegister & pg,const LogicPRegister & pn,const LogicPRegister & pm)7378 LogicPRegister Simulator::brkpa(LogicPRegister pd,
7379 const LogicPRegister& pg,
7380 const LogicPRegister& pn,
7381 const LogicPRegister& pm) {
7382 bool last_active = IsLastActive(kFormatVnB, pg, pn);
7383
7384 for (int i = 0; i < LaneCountFromFormat(kFormatVnB); i++) {
7385 bool active = false;
7386 if (pg.IsActive(kFormatVnB, i)) {
7387 active = last_active;
7388 last_active = last_active && !pm.IsActive(kFormatVnB, i);
7389 }
7390 pd.SetActive(kFormatVnB, i, active);
7391 }
7392
7393 return pd;
7394 }
7395
brkpb(LogicPRegister pd,const LogicPRegister & pg,const LogicPRegister & pn,const LogicPRegister & pm)7396 LogicPRegister Simulator::brkpb(LogicPRegister pd,
7397 const LogicPRegister& pg,
7398 const LogicPRegister& pn,
7399 const LogicPRegister& pm) {
7400 bool last_active = IsLastActive(kFormatVnB, pg, pn);
7401
7402 for (int i = 0; i < LaneCountFromFormat(kFormatVnB); i++) {
7403 bool active = false;
7404 if (pg.IsActive(kFormatVnB, i)) {
7405 last_active = last_active && !pm.IsActive(kFormatVnB, i);
7406 active = last_active;
7407 }
7408 pd.SetActive(kFormatVnB, i, active);
7409 }
7410
7411 return pd;
7412 }
7413
SVEFaultTolerantLoadHelper(VectorFormat vform,const LogicPRegister & pg,unsigned zt_code,const LogicSVEAddressVector & addr,SVEFaultTolerantLoadType type,bool is_signed)7414 void Simulator::SVEFaultTolerantLoadHelper(VectorFormat vform,
7415 const LogicPRegister& pg,
7416 unsigned zt_code,
7417 const LogicSVEAddressVector& addr,
7418 SVEFaultTolerantLoadType type,
7419 bool is_signed) {
7420 int esize_in_bytes = LaneSizeInBytesFromFormat(vform);
7421 int msize_in_bits = addr.GetMsizeInBits();
7422 int msize_in_bytes = addr.GetMsizeInBytes();
7423
7424 VIXL_ASSERT(zt_code < kNumberOfZRegisters);
7425 VIXL_ASSERT(esize_in_bytes >= msize_in_bytes);
7426 VIXL_ASSERT(addr.GetRegCount() == 1);
7427
7428 LogicVRegister zt = ReadVRegister(zt_code);
7429 LogicPRegister ffr = ReadFFR();
7430
7431 // Non-faulting loads are allowed to fail arbitrarily. To stress user
7432 // code, fail a random element in roughly one in eight full-vector loads.
7433 uint32_t rnd = static_cast<uint32_t>(jrand48(rand_state_));
7434 int fake_fault_at_lane = rnd % (LaneCountFromFormat(vform) * 8);
7435
7436 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7437 uint64_t value = 0;
7438
7439 if (pg.IsActive(vform, i)) {
7440 uint64_t element_address = addr.GetElementAddress(i, 0);
7441
7442 if (type == kSVEFirstFaultLoad) {
7443 // First-faulting loads always load the first active element, regardless
7444 // of FFR. The result will be discarded if its FFR lane is inactive, but
7445 // it could still generate a fault.
7446 value = MemReadUint(msize_in_bytes, element_address);
7447 // All subsequent elements have non-fault semantics.
7448 type = kSVENonFaultLoad;
7449
7450 } else if (ffr.IsActive(vform, i)) {
7451 // Simulation of fault-tolerant loads relies on system calls, and is
7452 // likely to be relatively slow, so we only actually perform the load if
7453 // its FFR lane is active.
7454
7455 bool can_read = (i < fake_fault_at_lane) &&
7456 CanReadMemory(element_address, msize_in_bytes);
7457 if (can_read) {
7458 value = MemReadUint(msize_in_bytes, element_address);
7459 } else {
7460 // Propagate the fault to the end of FFR.
7461 for (int j = i; j < LaneCountFromFormat(vform); j++) {
7462 ffr.SetActive(vform, j, false);
7463 }
7464 }
7465 }
7466 }
7467
7468 // The architecture permits a few possible results for inactive FFR lanes
7469 // (including those caused by a fault in this instruction). We choose to
7470 // leave the register value unchanged (like merging predication) because
7471 // no other input to this instruction can have the same behaviour.
7472 //
7473 // Note that this behaviour takes precedence over pg's zeroing predication.
7474
7475 if (ffr.IsActive(vform, i)) {
7476 int msb = msize_in_bits - 1;
7477 if (is_signed) {
7478 zt.SetInt(vform, i, ExtractSignedBitfield64(msb, 0, value));
7479 } else {
7480 zt.SetUint(vform, i, ExtractUnsignedBitfield64(msb, 0, value));
7481 }
7482 }
7483 }
7484
7485 if (ShouldTraceVRegs()) {
7486 PrintRegisterFormat format = GetPrintRegisterFormat(vform);
7487 if ((esize_in_bytes == msize_in_bytes) && !is_signed) {
7488 // Use an FP format where it's likely that we're accessing FP data.
7489 format = GetPrintRegisterFormatTryFP(format);
7490 }
7491 // Log accessed lanes that are active in both pg and ffr. PrintZStructAccess
7492 // expects a single mask, so combine the two predicates.
7493 SimPRegister mask;
7494 SVEPredicateLogicalHelper(AND_p_p_pp_z, mask, pg, ffr);
7495 PrintZStructAccess(zt_code, 1, mask, format, msize_in_bytes, "<-", addr);
7496 }
7497 }
7498
SVEGatherLoadScalarPlusVectorHelper(const Instruction * instr,VectorFormat vform,SVEOffsetModifier mod)7499 void Simulator::SVEGatherLoadScalarPlusVectorHelper(const Instruction* instr,
7500 VectorFormat vform,
7501 SVEOffsetModifier mod) {
7502 bool is_signed = instr->ExtractBit(14) == 0;
7503 bool is_ff = instr->ExtractBit(13) == 1;
7504 // Note that these instructions don't use the Dtype encoding.
7505 int msize_in_bytes_log2 = instr->ExtractBits(24, 23);
7506 int scale = instr->ExtractBit(21) * msize_in_bytes_log2;
7507 uint64_t base = ReadXRegister(instr->GetRn());
7508 LogicSVEAddressVector addr(base,
7509 &ReadVRegister(instr->GetRm()),
7510 vform,
7511 mod,
7512 scale);
7513 addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
7514 if (is_ff) {
7515 SVEFaultTolerantLoadHelper(vform,
7516 ReadPRegister(instr->GetPgLow8()),
7517 instr->GetRt(),
7518 addr,
7519 kSVEFirstFaultLoad,
7520 is_signed);
7521 } else {
7522 SVEStructuredLoadHelper(vform,
7523 ReadPRegister(instr->GetPgLow8()),
7524 instr->GetRt(),
7525 addr,
7526 is_signed);
7527 }
7528 }
7529
GetFirstActive(VectorFormat vform,const LogicPRegister & pg) const7530 int Simulator::GetFirstActive(VectorFormat vform,
7531 const LogicPRegister& pg) const {
7532 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7533 if (pg.IsActive(vform, i)) return i;
7534 }
7535 return -1;
7536 }
7537
GetLastActive(VectorFormat vform,const LogicPRegister & pg) const7538 int Simulator::GetLastActive(VectorFormat vform,
7539 const LogicPRegister& pg) const {
7540 for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
7541 if (pg.IsActive(vform, i)) return i;
7542 }
7543 return -1;
7544 }
7545
CountActiveLanes(VectorFormat vform,const LogicPRegister & pg) const7546 int Simulator::CountActiveLanes(VectorFormat vform,
7547 const LogicPRegister& pg) const {
7548 int count = 0;
7549 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7550 count += pg.IsActive(vform, i) ? 1 : 0;
7551 }
7552 return count;
7553 }
7554
CountActiveAndTrueLanes(VectorFormat vform,const LogicPRegister & pg,const LogicPRegister & pn) const7555 int Simulator::CountActiveAndTrueLanes(VectorFormat vform,
7556 const LogicPRegister& pg,
7557 const LogicPRegister& pn) const {
7558 int count = 0;
7559 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7560 count += (pg.IsActive(vform, i) && pn.IsActive(vform, i)) ? 1 : 0;
7561 }
7562 return count;
7563 }
7564
GetPredicateConstraintLaneCount(VectorFormat vform,int pattern) const7565 int Simulator::GetPredicateConstraintLaneCount(VectorFormat vform,
7566 int pattern) const {
7567 VIXL_ASSERT(IsSVEFormat(vform));
7568 int all = LaneCountFromFormat(vform);
7569 VIXL_ASSERT(all > 0);
7570
7571 switch (pattern) {
7572 case SVE_VL1:
7573 case SVE_VL2:
7574 case SVE_VL3:
7575 case SVE_VL4:
7576 case SVE_VL5:
7577 case SVE_VL6:
7578 case SVE_VL7:
7579 case SVE_VL8:
7580 // VL1-VL8 are encoded directly.
7581 VIXL_STATIC_ASSERT(SVE_VL1 == 1);
7582 VIXL_STATIC_ASSERT(SVE_VL8 == 8);
7583 return (pattern <= all) ? pattern : 0;
7584 case SVE_VL16:
7585 case SVE_VL32:
7586 case SVE_VL64:
7587 case SVE_VL128:
7588 case SVE_VL256: {
7589 // VL16-VL256 are encoded as log2(N) + c.
7590 int min = 16 << (pattern - SVE_VL16);
7591 return (min <= all) ? min : 0;
7592 }
7593 // Special cases.
7594 case SVE_POW2:
7595 return 1 << HighestSetBitPosition(all);
7596 case SVE_MUL4:
7597 return all - (all % 4);
7598 case SVE_MUL3:
7599 return all - (all % 3);
7600 case SVE_ALL:
7601 return all;
7602 }
7603 // Unnamed cases architecturally return 0.
7604 return 0;
7605 }
7606
match(VectorFormat vform,LogicPRegister dst,const LogicVRegister & haystack,const LogicVRegister & needles,bool negate_match)7607 LogicPRegister Simulator::match(VectorFormat vform,
7608 LogicPRegister dst,
7609 const LogicVRegister& haystack,
7610 const LogicVRegister& needles,
7611 bool negate_match) {
7612 SimVRegister ztemp;
7613 SimPRegister ptemp;
7614
7615 pfalse(dst);
7616 int lanes_per_segment = kQRegSize / LaneSizeInBitsFromFormat(vform);
7617 for (int i = 0; i < lanes_per_segment; i++) {
7618 dup_elements_to_segments(vform, ztemp, needles, i);
7619 SVEIntCompareVectorsHelper(eq,
7620 vform,
7621 ptemp,
7622 GetPTrue(),
7623 haystack,
7624 ztemp,
7625 false,
7626 LeaveFlags);
7627 SVEPredicateLogicalHelper(ORR_p_p_pp_z, dst, dst, ptemp);
7628 }
7629 if (negate_match) {
7630 ptrue(vform, ptemp, SVE_ALL);
7631 SVEPredicateLogicalHelper(EOR_p_p_pp_z, dst, dst, ptemp);
7632 }
7633 return dst;
7634 }
7635
GetStructAddress(int lane) const7636 uint64_t LogicSVEAddressVector::GetStructAddress(int lane) const {
7637 if (IsContiguous()) {
7638 return base_ + (lane * GetRegCount()) * GetMsizeInBytes();
7639 }
7640
7641 VIXL_ASSERT(IsScatterGather());
7642 VIXL_ASSERT(vector_ != NULL);
7643
7644 // For scatter-gather accesses, we need to extract the offset from vector_,
7645 // and apply modifiers.
7646
7647 uint64_t offset = 0;
7648 switch (vector_form_) {
7649 case kFormatVnS:
7650 offset = vector_->GetLane<uint32_t>(lane);
7651 break;
7652 case kFormatVnD:
7653 offset = vector_->GetLane<uint64_t>(lane);
7654 break;
7655 default:
7656 VIXL_UNIMPLEMENTED();
7657 break;
7658 }
7659
7660 switch (vector_mod_) {
7661 case SVE_MUL_VL:
7662 VIXL_UNIMPLEMENTED();
7663 break;
7664 case SVE_LSL:
7665 // We apply the shift below. There's nothing to do here.
7666 break;
7667 case NO_SVE_OFFSET_MODIFIER:
7668 VIXL_ASSERT(vector_shift_ == 0);
7669 break;
7670 case SVE_UXTW:
7671 offset = ExtractUnsignedBitfield64(kWRegSize - 1, 0, offset);
7672 break;
7673 case SVE_SXTW:
7674 offset = ExtractSignedBitfield64(kWRegSize - 1, 0, offset);
7675 break;
7676 }
7677
7678 return base_ + (offset << vector_shift_);
7679 }
7680
pack_odd_elements(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)7681 LogicVRegister Simulator::pack_odd_elements(VectorFormat vform,
7682 LogicVRegister dst,
7683 const LogicVRegister& src) {
7684 SimVRegister zero;
7685 zero.Clear();
7686 return uzp2(vform, dst, src, zero);
7687 }
7688
pack_even_elements(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)7689 LogicVRegister Simulator::pack_even_elements(VectorFormat vform,
7690 LogicVRegister dst,
7691 const LogicVRegister& src) {
7692 SimVRegister zero;
7693 zero.Clear();
7694 return uzp1(vform, dst, src, zero);
7695 }
7696
adcl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool top)7697 LogicVRegister Simulator::adcl(VectorFormat vform,
7698 LogicVRegister dst,
7699 const LogicVRegister& src1,
7700 const LogicVRegister& src2,
7701 bool top) {
7702 unsigned reg_size = LaneSizeInBitsFromFormat(vform);
7703 VIXL_ASSERT((reg_size == kSRegSize) || (reg_size == kDRegSize));
7704
7705 for (int i = 0; i < LaneCountFromFormat(vform); i += 2) {
7706 uint64_t left = src1.Uint(vform, i + (top ? 1 : 0));
7707 uint64_t right = dst.Uint(vform, i);
7708 unsigned carry_in = src2.Uint(vform, i + 1) & 1;
7709 std::pair<uint64_t, uint8_t> val_and_flags =
7710 AddWithCarry(reg_size, left, right, carry_in);
7711
7712 // Set even lanes to the result of the addition.
7713 dst.SetUint(vform, i, val_and_flags.first);
7714
7715 // Set odd lanes to the carry flag from the addition.
7716 uint64_t carry_out = (val_and_flags.second >> 1) & 1;
7717 dst.SetUint(vform, i + 1, carry_out);
7718 }
7719 return dst;
7720 }
7721
7722 // Multiply the 2x8 8-bit matrix in src1 by the 8x2 8-bit matrix in src2, add
7723 // the 2x2 32-bit result to the matrix in srcdst, and write back to srcdst.
7724 //
7725 // Matrices of the form:
7726 //
7727 // src1 = ( a b c d e f g h ) src2 = ( A B )
7728 // ( i j k l m n o p ) ( C D )
7729 // ( E F )
7730 // ( G H )
7731 // ( I J )
7732 // ( K L )
7733 // ( M N )
7734 // ( O P )
7735 //
7736 // Are stored in the input vector registers as:
7737 //
7738 // 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
7739 // src1 = [ p | o | n | m | l | k | j | i | h | g | f | e | d | c | b | a ]
7740 // src2 = [ P | N | L | J | H | F | D | B | O | M | K | I | G | E | C | A ]
7741 //
matmul(VectorFormat vform_dst,LogicVRegister srcdst,const LogicVRegister & src1,const LogicVRegister & src2,bool src1_signed,bool src2_signed)7742 LogicVRegister Simulator::matmul(VectorFormat vform_dst,
7743 LogicVRegister srcdst,
7744 const LogicVRegister& src1,
7745 const LogicVRegister& src2,
7746 bool src1_signed,
7747 bool src2_signed) {
7748 // Two destination forms are supported: Q register containing four S-sized
7749 // elements (4S) and Z register containing n S-sized elements (VnS).
7750 VIXL_ASSERT((vform_dst == kFormat4S) || (vform_dst == kFormatVnS));
7751 VectorFormat vform_src = kFormatVnB;
7752 int b_per_segment = kQRegSize / kBRegSize;
7753 int s_per_segment = kQRegSize / kSRegSize;
7754 int64_t result[kZRegMaxSizeInBytes / kSRegSizeInBytes] = {};
7755 int segment_count = LaneCountFromFormat(vform_dst) / 4;
7756 for (int seg = 0; seg < segment_count; seg++) {
7757 for (int i = 0; i < 2; i++) {
7758 for (int j = 0; j < 2; j++) {
7759 int dstidx = (2 * i) + j + (seg * s_per_segment);
7760 int64_t sum = srcdst.Int(vform_dst, dstidx);
7761 for (int k = 0; k < 8; k++) {
7762 int idx1 = (8 * i) + k + (seg * b_per_segment);
7763 int idx2 = (8 * j) + k + (seg * b_per_segment);
7764 int64_t e1 = src1_signed ? src1.Int(vform_src, idx1)
7765 : src1.Uint(vform_src, idx1);
7766 int64_t e2 = src2_signed ? src2.Int(vform_src, idx2)
7767 : src2.Uint(vform_src, idx2);
7768 sum += e1 * e2;
7769 }
7770 result[dstidx] = sum;
7771 }
7772 }
7773 }
7774 srcdst.SetIntArray(vform_dst, result);
7775 return srcdst;
7776 }
7777
7778 // Multiply the 2x2 FP matrix in src1 by the 2x2 FP matrix in src2, add the 2x2
7779 // result to the matrix in srcdst, and write back to srcdst.
7780 //
7781 // Matrices of the form:
7782 //
7783 // src1 = ( a b ) src2 = ( A B )
7784 // ( c d ) ( C D )
7785 //
7786 // Are stored in the input vector registers as:
7787 //
7788 // 3 2 1 0
7789 // src1 = [ d | c | b | a ]
7790 // src2 = [ D | B | C | A ]
7791 //
7792 template <typename T>
fmatmul(VectorFormat vform,LogicVRegister srcdst,const LogicVRegister & src1,const LogicVRegister & src2)7793 LogicVRegister Simulator::fmatmul(VectorFormat vform,
7794 LogicVRegister srcdst,
7795 const LogicVRegister& src1,
7796 const LogicVRegister& src2) {
7797 T result[kZRegMaxSizeInBytes / sizeof(T)];
7798 int T_per_segment = 4;
7799 int segment_count = GetVectorLengthInBytes() / (T_per_segment * sizeof(T));
7800 for (int seg = 0; seg < segment_count; seg++) {
7801 int segoff = seg * T_per_segment;
7802 for (int i = 0; i < 2; i++) {
7803 for (int j = 0; j < 2; j++) {
7804 T prod0 = FPMulNaNs(src1.Float<T>(2 * i + 0 + segoff),
7805 src2.Float<T>(2 * j + 0 + segoff));
7806 T prod1 = FPMulNaNs(src1.Float<T>(2 * i + 1 + segoff),
7807 src2.Float<T>(2 * j + 1 + segoff));
7808 T sum = FPAdd(srcdst.Float<T>(2 * i + j + segoff), prod0);
7809 result[2 * i + j + segoff] = FPAdd(sum, prod1);
7810 }
7811 }
7812 }
7813 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7814 // Elements outside a multiple of 4T are set to zero. This happens only
7815 // for double precision operations, when the VL is a multiple of 128 bits,
7816 // but not a mutiple of 256 bits.
7817 T value = (i < (T_per_segment * segment_count)) ? result[i] : 0;
7818 srcdst.SetFloat<T>(vform, i, value);
7819 }
7820 return srcdst;
7821 }
7822
fmatmul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)7823 LogicVRegister Simulator::fmatmul(VectorFormat vform,
7824 LogicVRegister dst,
7825 const LogicVRegister& src1,
7826 const LogicVRegister& src2) {
7827 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
7828 fmatmul<float>(vform, dst, src1, src2);
7829 } else {
7830 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
7831 fmatmul<double>(vform, dst, src1, src2);
7832 }
7833 return dst;
7834 }
7835
7836 } // namespace aarch64
7837 } // namespace vixl
7838
7839 #endif // VIXL_INCLUDE_SIMULATOR_AARCH64
7840