1 // Copyright 2015, VIXL authors
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are met:
6 //
7 // * Redistributions of source code must retain the above copyright notice,
8 // this list of conditions and the following disclaimer.
9 // * Redistributions in binary form must reproduce the above copyright notice,
10 // this list of conditions and the following disclaimer in the documentation
11 // and/or other materials provided with the distribution.
12 // * Neither the name of ARM Limited nor the names of its contributors may be
13 // used to endorse or promote products derived from this software without
14 // specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
27 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
28
29 #include <cmath>
30
31 #include "simulator-aarch64.h"
32
33 namespace vixl {
34 namespace aarch64 {
35
36 using vixl::internal::SimFloat16;
37
38 template <typename T>
IsFloat64()39 bool IsFloat64() {
40 return false;
41 }
42 template <>
IsFloat64()43 bool IsFloat64<double>() {
44 return true;
45 }
46
47 template <typename T>
IsFloat32()48 bool IsFloat32() {
49 return false;
50 }
51 template <>
IsFloat32()52 bool IsFloat32<float>() {
53 return true;
54 }
55
56 template <typename T>
IsFloat16()57 bool IsFloat16() {
58 return false;
59 }
60 template <>
IsFloat16()61 bool IsFloat16<Float16>() {
62 return true;
63 }
64 template <>
IsFloat16()65 bool IsFloat16<SimFloat16>() {
66 return true;
67 }
68
69 template <>
FPDefaultNaN()70 double Simulator::FPDefaultNaN<double>() {
71 return kFP64DefaultNaN;
72 }
73
74
75 template <>
FPDefaultNaN()76 float Simulator::FPDefaultNaN<float>() {
77 return kFP32DefaultNaN;
78 }
79
80
81 template <>
FPDefaultNaN()82 SimFloat16 Simulator::FPDefaultNaN<SimFloat16>() {
83 return SimFloat16(kFP16DefaultNaN);
84 }
85
86
FixedToDouble(int64_t src,int fbits,FPRounding round)87 double Simulator::FixedToDouble(int64_t src, int fbits, FPRounding round) {
88 if (src >= 0) {
89 return UFixedToDouble(src, fbits, round);
90 } else if (src == INT64_MIN) {
91 return -UFixedToDouble(src, fbits, round);
92 } else {
93 return -UFixedToDouble(-src, fbits, round);
94 }
95 }
96
97
UFixedToDouble(uint64_t src,int fbits,FPRounding round)98 double Simulator::UFixedToDouble(uint64_t src, int fbits, FPRounding round) {
99 // An input of 0 is a special case because the result is effectively
100 // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
101 if (src == 0) {
102 return 0.0;
103 }
104
105 // Calculate the exponent. The highest significant bit will have the value
106 // 2^exponent.
107 const int highest_significant_bit = 63 - CountLeadingZeros(src);
108 const int64_t exponent = highest_significant_bit - fbits;
109
110 return FPRoundToDouble(0, exponent, src, round);
111 }
112
113
FixedToFloat(int64_t src,int fbits,FPRounding round)114 float Simulator::FixedToFloat(int64_t src, int fbits, FPRounding round) {
115 if (src >= 0) {
116 return UFixedToFloat(src, fbits, round);
117 } else if (src == INT64_MIN) {
118 return -UFixedToFloat(src, fbits, round);
119 } else {
120 return -UFixedToFloat(-src, fbits, round);
121 }
122 }
123
124
UFixedToFloat(uint64_t src,int fbits,FPRounding round)125 float Simulator::UFixedToFloat(uint64_t src, int fbits, FPRounding round) {
126 // An input of 0 is a special case because the result is effectively
127 // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
128 if (src == 0) {
129 return 0.0f;
130 }
131
132 // Calculate the exponent. The highest significant bit will have the value
133 // 2^exponent.
134 const int highest_significant_bit = 63 - CountLeadingZeros(src);
135 const int32_t exponent = highest_significant_bit - fbits;
136
137 return FPRoundToFloat(0, exponent, src, round);
138 }
139
140
FixedToFloat16(int64_t src,int fbits,FPRounding round)141 SimFloat16 Simulator::FixedToFloat16(int64_t src, int fbits, FPRounding round) {
142 if (src >= 0) {
143 return UFixedToFloat16(src, fbits, round);
144 } else if (src == INT64_MIN) {
145 return -UFixedToFloat16(src, fbits, round);
146 } else {
147 return -UFixedToFloat16(-src, fbits, round);
148 }
149 }
150
151
UFixedToFloat16(uint64_t src,int fbits,FPRounding round)152 SimFloat16 Simulator::UFixedToFloat16(uint64_t src,
153 int fbits,
154 FPRounding round) {
155 // An input of 0 is a special case because the result is effectively
156 // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
157 if (src == 0) {
158 return 0.0f;
159 }
160
161 // Calculate the exponent. The highest significant bit will have the value
162 // 2^exponent.
163 const int highest_significant_bit = 63 - CountLeadingZeros(src);
164 const int16_t exponent = highest_significant_bit - fbits;
165
166 return FPRoundToFloat16(0, exponent, src, round);
167 }
168
169
ld1(VectorFormat vform,LogicVRegister dst,uint64_t addr)170 void Simulator::ld1(VectorFormat vform, LogicVRegister dst, uint64_t addr) {
171 dst.ClearForWrite(vform);
172 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
173 LoadLane(dst, vform, i, addr);
174 addr += LaneSizeInBytesFromFormat(vform);
175 }
176 }
177
178
ld1(VectorFormat vform,LogicVRegister dst,int index,uint64_t addr)179 void Simulator::ld1(VectorFormat vform,
180 LogicVRegister dst,
181 int index,
182 uint64_t addr) {
183 LoadLane(dst, vform, index, addr);
184 }
185
186
ld1r(VectorFormat vform,VectorFormat unpack_vform,LogicVRegister dst,uint64_t addr,bool is_signed)187 void Simulator::ld1r(VectorFormat vform,
188 VectorFormat unpack_vform,
189 LogicVRegister dst,
190 uint64_t addr,
191 bool is_signed) {
192 unsigned unpack_size = LaneSizeInBytesFromFormat(unpack_vform);
193 dst.ClearForWrite(vform);
194 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
195 if (is_signed) {
196 LoadIntToLane(dst, vform, unpack_size, i, addr);
197 } else {
198 LoadUintToLane(dst, vform, unpack_size, i, addr);
199 }
200 }
201 }
202
203
ld1r(VectorFormat vform,LogicVRegister dst,uint64_t addr)204 void Simulator::ld1r(VectorFormat vform, LogicVRegister dst, uint64_t addr) {
205 ld1r(vform, vform, dst, addr);
206 }
207
208
ld2(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,uint64_t addr1)209 void Simulator::ld2(VectorFormat vform,
210 LogicVRegister dst1,
211 LogicVRegister dst2,
212 uint64_t addr1) {
213 dst1.ClearForWrite(vform);
214 dst2.ClearForWrite(vform);
215 int esize = LaneSizeInBytesFromFormat(vform);
216 uint64_t addr2 = addr1 + esize;
217 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
218 LoadLane(dst1, vform, i, addr1);
219 LoadLane(dst2, vform, i, addr2);
220 addr1 += 2 * esize;
221 addr2 += 2 * esize;
222 }
223 }
224
225
ld2(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,int index,uint64_t addr1)226 void Simulator::ld2(VectorFormat vform,
227 LogicVRegister dst1,
228 LogicVRegister dst2,
229 int index,
230 uint64_t addr1) {
231 dst1.ClearForWrite(vform);
232 dst2.ClearForWrite(vform);
233 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
234 LoadLane(dst1, vform, index, addr1);
235 LoadLane(dst2, vform, index, addr2);
236 }
237
238
ld2r(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,uint64_t addr)239 void Simulator::ld2r(VectorFormat vform,
240 LogicVRegister dst1,
241 LogicVRegister dst2,
242 uint64_t addr) {
243 dst1.ClearForWrite(vform);
244 dst2.ClearForWrite(vform);
245 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
246 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
247 LoadLane(dst1, vform, i, addr);
248 LoadLane(dst2, vform, i, addr2);
249 }
250 }
251
252
ld3(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,uint64_t addr1)253 void Simulator::ld3(VectorFormat vform,
254 LogicVRegister dst1,
255 LogicVRegister dst2,
256 LogicVRegister dst3,
257 uint64_t addr1) {
258 dst1.ClearForWrite(vform);
259 dst2.ClearForWrite(vform);
260 dst3.ClearForWrite(vform);
261 int esize = LaneSizeInBytesFromFormat(vform);
262 uint64_t addr2 = addr1 + esize;
263 uint64_t addr3 = addr2 + esize;
264 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
265 LoadLane(dst1, vform, i, addr1);
266 LoadLane(dst2, vform, i, addr2);
267 LoadLane(dst3, vform, i, addr3);
268 addr1 += 3 * esize;
269 addr2 += 3 * esize;
270 addr3 += 3 * esize;
271 }
272 }
273
274
ld3(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,int index,uint64_t addr1)275 void Simulator::ld3(VectorFormat vform,
276 LogicVRegister dst1,
277 LogicVRegister dst2,
278 LogicVRegister dst3,
279 int index,
280 uint64_t addr1) {
281 dst1.ClearForWrite(vform);
282 dst2.ClearForWrite(vform);
283 dst3.ClearForWrite(vform);
284 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
285 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
286 LoadLane(dst1, vform, index, addr1);
287 LoadLane(dst2, vform, index, addr2);
288 LoadLane(dst3, vform, index, addr3);
289 }
290
291
ld3r(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,uint64_t addr)292 void Simulator::ld3r(VectorFormat vform,
293 LogicVRegister dst1,
294 LogicVRegister dst2,
295 LogicVRegister dst3,
296 uint64_t addr) {
297 dst1.ClearForWrite(vform);
298 dst2.ClearForWrite(vform);
299 dst3.ClearForWrite(vform);
300 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
301 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
302 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
303 LoadLane(dst1, vform, i, addr);
304 LoadLane(dst2, vform, i, addr2);
305 LoadLane(dst3, vform, i, addr3);
306 }
307 }
308
309
ld4(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,LogicVRegister dst4,uint64_t addr1)310 void Simulator::ld4(VectorFormat vform,
311 LogicVRegister dst1,
312 LogicVRegister dst2,
313 LogicVRegister dst3,
314 LogicVRegister dst4,
315 uint64_t addr1) {
316 dst1.ClearForWrite(vform);
317 dst2.ClearForWrite(vform);
318 dst3.ClearForWrite(vform);
319 dst4.ClearForWrite(vform);
320 int esize = LaneSizeInBytesFromFormat(vform);
321 uint64_t addr2 = addr1 + esize;
322 uint64_t addr3 = addr2 + esize;
323 uint64_t addr4 = addr3 + esize;
324 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
325 LoadLane(dst1, vform, i, addr1);
326 LoadLane(dst2, vform, i, addr2);
327 LoadLane(dst3, vform, i, addr3);
328 LoadLane(dst4, vform, i, addr4);
329 addr1 += 4 * esize;
330 addr2 += 4 * esize;
331 addr3 += 4 * esize;
332 addr4 += 4 * esize;
333 }
334 }
335
336
ld4(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,LogicVRegister dst4,int index,uint64_t addr1)337 void Simulator::ld4(VectorFormat vform,
338 LogicVRegister dst1,
339 LogicVRegister dst2,
340 LogicVRegister dst3,
341 LogicVRegister dst4,
342 int index,
343 uint64_t addr1) {
344 dst1.ClearForWrite(vform);
345 dst2.ClearForWrite(vform);
346 dst3.ClearForWrite(vform);
347 dst4.ClearForWrite(vform);
348 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
349 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
350 uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform);
351 LoadLane(dst1, vform, index, addr1);
352 LoadLane(dst2, vform, index, addr2);
353 LoadLane(dst3, vform, index, addr3);
354 LoadLane(dst4, vform, index, addr4);
355 }
356
357
ld4r(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,LogicVRegister dst4,uint64_t addr)358 void Simulator::ld4r(VectorFormat vform,
359 LogicVRegister dst1,
360 LogicVRegister dst2,
361 LogicVRegister dst3,
362 LogicVRegister dst4,
363 uint64_t addr) {
364 dst1.ClearForWrite(vform);
365 dst2.ClearForWrite(vform);
366 dst3.ClearForWrite(vform);
367 dst4.ClearForWrite(vform);
368 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
369 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
370 uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform);
371 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
372 LoadLane(dst1, vform, i, addr);
373 LoadLane(dst2, vform, i, addr2);
374 LoadLane(dst3, vform, i, addr3);
375 LoadLane(dst4, vform, i, addr4);
376 }
377 }
378
379
st1(VectorFormat vform,LogicVRegister src,uint64_t addr)380 void Simulator::st1(VectorFormat vform, LogicVRegister src, uint64_t addr) {
381 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
382 StoreLane(src, vform, i, addr);
383 addr += LaneSizeInBytesFromFormat(vform);
384 }
385 }
386
387
st1(VectorFormat vform,LogicVRegister src,int index,uint64_t addr)388 void Simulator::st1(VectorFormat vform,
389 LogicVRegister src,
390 int index,
391 uint64_t addr) {
392 StoreLane(src, vform, index, addr);
393 }
394
395
st2(VectorFormat vform,LogicVRegister src,LogicVRegister src2,uint64_t addr)396 void Simulator::st2(VectorFormat vform,
397 LogicVRegister src,
398 LogicVRegister src2,
399 uint64_t addr) {
400 int esize = LaneSizeInBytesFromFormat(vform);
401 uint64_t addr2 = addr + esize;
402 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
403 StoreLane(src, vform, i, addr);
404 StoreLane(src2, vform, i, addr2);
405 addr += 2 * esize;
406 addr2 += 2 * esize;
407 }
408 }
409
410
st2(VectorFormat vform,LogicVRegister src,LogicVRegister src2,int index,uint64_t addr)411 void Simulator::st2(VectorFormat vform,
412 LogicVRegister src,
413 LogicVRegister src2,
414 int index,
415 uint64_t addr) {
416 int esize = LaneSizeInBytesFromFormat(vform);
417 StoreLane(src, vform, index, addr);
418 StoreLane(src2, vform, index, addr + 1 * esize);
419 }
420
421
st3(VectorFormat vform,LogicVRegister src,LogicVRegister src2,LogicVRegister src3,uint64_t addr)422 void Simulator::st3(VectorFormat vform,
423 LogicVRegister src,
424 LogicVRegister src2,
425 LogicVRegister src3,
426 uint64_t addr) {
427 int esize = LaneSizeInBytesFromFormat(vform);
428 uint64_t addr2 = addr + esize;
429 uint64_t addr3 = addr2 + esize;
430 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
431 StoreLane(src, vform, i, addr);
432 StoreLane(src2, vform, i, addr2);
433 StoreLane(src3, vform, i, addr3);
434 addr += 3 * esize;
435 addr2 += 3 * esize;
436 addr3 += 3 * esize;
437 }
438 }
439
440
st3(VectorFormat vform,LogicVRegister src,LogicVRegister src2,LogicVRegister src3,int index,uint64_t addr)441 void Simulator::st3(VectorFormat vform,
442 LogicVRegister src,
443 LogicVRegister src2,
444 LogicVRegister src3,
445 int index,
446 uint64_t addr) {
447 int esize = LaneSizeInBytesFromFormat(vform);
448 StoreLane(src, vform, index, addr);
449 StoreLane(src2, vform, index, addr + 1 * esize);
450 StoreLane(src3, vform, index, addr + 2 * esize);
451 }
452
453
st4(VectorFormat vform,LogicVRegister src,LogicVRegister src2,LogicVRegister src3,LogicVRegister src4,uint64_t addr)454 void Simulator::st4(VectorFormat vform,
455 LogicVRegister src,
456 LogicVRegister src2,
457 LogicVRegister src3,
458 LogicVRegister src4,
459 uint64_t addr) {
460 int esize = LaneSizeInBytesFromFormat(vform);
461 uint64_t addr2 = addr + esize;
462 uint64_t addr3 = addr2 + esize;
463 uint64_t addr4 = addr3 + esize;
464 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
465 StoreLane(src, vform, i, addr);
466 StoreLane(src2, vform, i, addr2);
467 StoreLane(src3, vform, i, addr3);
468 StoreLane(src4, vform, i, addr4);
469 addr += 4 * esize;
470 addr2 += 4 * esize;
471 addr3 += 4 * esize;
472 addr4 += 4 * esize;
473 }
474 }
475
476
st4(VectorFormat vform,LogicVRegister src,LogicVRegister src2,LogicVRegister src3,LogicVRegister src4,int index,uint64_t addr)477 void Simulator::st4(VectorFormat vform,
478 LogicVRegister src,
479 LogicVRegister src2,
480 LogicVRegister src3,
481 LogicVRegister src4,
482 int index,
483 uint64_t addr) {
484 int esize = LaneSizeInBytesFromFormat(vform);
485 StoreLane(src, vform, index, addr);
486 StoreLane(src2, vform, index, addr + 1 * esize);
487 StoreLane(src3, vform, index, addr + 2 * esize);
488 StoreLane(src4, vform, index, addr + 3 * esize);
489 }
490
491
cmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,Condition cond)492 LogicVRegister Simulator::cmp(VectorFormat vform,
493 LogicVRegister dst,
494 const LogicVRegister& src1,
495 const LogicVRegister& src2,
496 Condition cond) {
497 dst.ClearForWrite(vform);
498 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
499 int64_t sa = src1.Int(vform, i);
500 int64_t sb = src2.Int(vform, i);
501 uint64_t ua = src1.Uint(vform, i);
502 uint64_t ub = src2.Uint(vform, i);
503 bool result = false;
504 switch (cond) {
505 case eq:
506 result = (ua == ub);
507 break;
508 case ge:
509 result = (sa >= sb);
510 break;
511 case gt:
512 result = (sa > sb);
513 break;
514 case hi:
515 result = (ua > ub);
516 break;
517 case hs:
518 result = (ua >= ub);
519 break;
520 case lt:
521 result = (sa < sb);
522 break;
523 case le:
524 result = (sa <= sb);
525 break;
526 default:
527 VIXL_UNREACHABLE();
528 break;
529 }
530 dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0);
531 }
532 return dst;
533 }
534
535
cmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,int imm,Condition cond)536 LogicVRegister Simulator::cmp(VectorFormat vform,
537 LogicVRegister dst,
538 const LogicVRegister& src1,
539 int imm,
540 Condition cond) {
541 SimVRegister temp;
542 LogicVRegister imm_reg = dup_immediate(vform, temp, imm);
543 return cmp(vform, dst, src1, imm_reg, cond);
544 }
545
546
cmptst(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)547 LogicVRegister Simulator::cmptst(VectorFormat vform,
548 LogicVRegister dst,
549 const LogicVRegister& src1,
550 const LogicVRegister& src2) {
551 dst.ClearForWrite(vform);
552 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
553 uint64_t ua = src1.Uint(vform, i);
554 uint64_t ub = src2.Uint(vform, i);
555 dst.SetUint(vform, i, ((ua & ub) != 0) ? MaxUintFromFormat(vform) : 0);
556 }
557 return dst;
558 }
559
560
add(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)561 LogicVRegister Simulator::add(VectorFormat vform,
562 LogicVRegister dst,
563 const LogicVRegister& src1,
564 const LogicVRegister& src2) {
565 int lane_size = LaneSizeInBitsFromFormat(vform);
566 dst.ClearForWrite(vform);
567
568 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
569 // Test for unsigned saturation.
570 uint64_t ua = src1.UintLeftJustified(vform, i);
571 uint64_t ub = src2.UintLeftJustified(vform, i);
572 uint64_t ur = ua + ub;
573 if (ur < ua) {
574 dst.SetUnsignedSat(i, true);
575 }
576
577 // Test for signed saturation.
578 bool pos_a = (ua >> 63) == 0;
579 bool pos_b = (ub >> 63) == 0;
580 bool pos_r = (ur >> 63) == 0;
581 // If the signs of the operands are the same, but different from the result,
582 // there was an overflow.
583 if ((pos_a == pos_b) && (pos_a != pos_r)) {
584 dst.SetSignedSat(i, pos_a);
585 }
586 dst.SetInt(vform, i, ur >> (64 - lane_size));
587 }
588 return dst;
589 }
590
add_uint(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,uint64_t value)591 LogicVRegister Simulator::add_uint(VectorFormat vform,
592 LogicVRegister dst,
593 const LogicVRegister& src1,
594 uint64_t value) {
595 int lane_size = LaneSizeInBitsFromFormat(vform);
596 VIXL_ASSERT(IsUintN(lane_size, value));
597 dst.ClearForWrite(vform);
598 // Left-justify `value`.
599 uint64_t ub = value << (64 - lane_size);
600 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
601 // Test for unsigned saturation.
602 uint64_t ua = src1.UintLeftJustified(vform, i);
603 uint64_t ur = ua + ub;
604 if (ur < ua) {
605 dst.SetUnsignedSat(i, true);
606 }
607
608 // Test for signed saturation.
609 // `value` is always positive, so we have an overflow if the (signed) result
610 // is smaller than the first operand.
611 if (RawbitsToInt64(ur) < RawbitsToInt64(ua)) {
612 dst.SetSignedSat(i, true);
613 }
614
615 dst.SetInt(vform, i, ur >> (64 - lane_size));
616 }
617 return dst;
618 }
619
addp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)620 LogicVRegister Simulator::addp(VectorFormat vform,
621 LogicVRegister dst,
622 const LogicVRegister& src1,
623 const LogicVRegister& src2) {
624 SimVRegister temp1, temp2;
625 uzp1(vform, temp1, src1, src2);
626 uzp2(vform, temp2, src1, src2);
627 add(vform, dst, temp1, temp2);
628 if (IsSVEFormat(vform)) {
629 interleave_top_bottom(vform, dst, dst);
630 }
631 return dst;
632 }
633
sdiv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)634 LogicVRegister Simulator::sdiv(VectorFormat vform,
635 LogicVRegister dst,
636 const LogicVRegister& src1,
637 const LogicVRegister& src2) {
638 VIXL_ASSERT((vform == kFormatVnS) || (vform == kFormatVnD));
639
640 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
641 int64_t val1 = src1.Int(vform, i);
642 int64_t val2 = src2.Int(vform, i);
643 int64_t min_int = (vform == kFormatVnD) ? kXMinInt : kWMinInt;
644 int64_t quotient = 0;
645 if ((val1 == min_int) && (val2 == -1)) {
646 quotient = min_int;
647 } else if (val2 != 0) {
648 quotient = val1 / val2;
649 }
650 dst.SetInt(vform, i, quotient);
651 }
652
653 return dst;
654 }
655
udiv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)656 LogicVRegister Simulator::udiv(VectorFormat vform,
657 LogicVRegister dst,
658 const LogicVRegister& src1,
659 const LogicVRegister& src2) {
660 VIXL_ASSERT((vform == kFormatVnS) || (vform == kFormatVnD));
661
662 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
663 uint64_t val1 = src1.Uint(vform, i);
664 uint64_t val2 = src2.Uint(vform, i);
665 uint64_t quotient = 0;
666 if (val2 != 0) {
667 quotient = val1 / val2;
668 }
669 dst.SetUint(vform, i, quotient);
670 }
671
672 return dst;
673 }
674
675
mla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & srca,const LogicVRegister & src1,const LogicVRegister & src2)676 LogicVRegister Simulator::mla(VectorFormat vform,
677 LogicVRegister dst,
678 const LogicVRegister& srca,
679 const LogicVRegister& src1,
680 const LogicVRegister& src2) {
681 SimVRegister temp;
682 mul(vform, temp, src1, src2);
683 add(vform, dst, srca, temp);
684 return dst;
685 }
686
687
mls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & srca,const LogicVRegister & src1,const LogicVRegister & src2)688 LogicVRegister Simulator::mls(VectorFormat vform,
689 LogicVRegister dst,
690 const LogicVRegister& srca,
691 const LogicVRegister& src1,
692 const LogicVRegister& src2) {
693 SimVRegister temp;
694 mul(vform, temp, src1, src2);
695 sub(vform, dst, srca, temp);
696 return dst;
697 }
698
699
mul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)700 LogicVRegister Simulator::mul(VectorFormat vform,
701 LogicVRegister dst,
702 const LogicVRegister& src1,
703 const LogicVRegister& src2) {
704 dst.ClearForWrite(vform);
705
706 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
707 dst.SetUint(vform, i, src1.Uint(vform, i) * src2.Uint(vform, i));
708 }
709 return dst;
710 }
711
712
mul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)713 LogicVRegister Simulator::mul(VectorFormat vform,
714 LogicVRegister dst,
715 const LogicVRegister& src1,
716 const LogicVRegister& src2,
717 int index) {
718 SimVRegister temp;
719 VectorFormat indexform = VectorFormatFillQ(vform);
720 return mul(vform, dst, src1, dup_element(indexform, temp, src2, index));
721 }
722
723
smulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)724 LogicVRegister Simulator::smulh(VectorFormat vform,
725 LogicVRegister dst,
726 const LogicVRegister& src1,
727 const LogicVRegister& src2) {
728 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
729 int64_t dst_val = 0xbadbeef;
730 int64_t val1 = src1.Int(vform, i);
731 int64_t val2 = src2.Int(vform, i);
732 switch (LaneSizeInBitsFromFormat(vform)) {
733 case 8:
734 dst_val = internal::MultiplyHigh<8>(val1, val2);
735 break;
736 case 16:
737 dst_val = internal::MultiplyHigh<16>(val1, val2);
738 break;
739 case 32:
740 dst_val = internal::MultiplyHigh<32>(val1, val2);
741 break;
742 case 64:
743 dst_val = internal::MultiplyHigh<64>(val1, val2);
744 break;
745 default:
746 VIXL_UNREACHABLE();
747 break;
748 }
749 dst.SetInt(vform, i, dst_val);
750 }
751 return dst;
752 }
753
754
umulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)755 LogicVRegister Simulator::umulh(VectorFormat vform,
756 LogicVRegister dst,
757 const LogicVRegister& src1,
758 const LogicVRegister& src2) {
759 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
760 uint64_t dst_val = 0xbadbeef;
761 uint64_t val1 = src1.Uint(vform, i);
762 uint64_t val2 = src2.Uint(vform, i);
763 switch (LaneSizeInBitsFromFormat(vform)) {
764 case 8:
765 dst_val = internal::MultiplyHigh<8>(val1, val2);
766 break;
767 case 16:
768 dst_val = internal::MultiplyHigh<16>(val1, val2);
769 break;
770 case 32:
771 dst_val = internal::MultiplyHigh<32>(val1, val2);
772 break;
773 case 64:
774 dst_val = internal::MultiplyHigh<64>(val1, val2);
775 break;
776 default:
777 VIXL_UNREACHABLE();
778 break;
779 }
780 dst.SetUint(vform, i, dst_val);
781 }
782 return dst;
783 }
784
785
mla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)786 LogicVRegister Simulator::mla(VectorFormat vform,
787 LogicVRegister dst,
788 const LogicVRegister& src1,
789 const LogicVRegister& src2,
790 int index) {
791 SimVRegister temp;
792 VectorFormat indexform = VectorFormatFillQ(vform);
793 return mla(vform, dst, dst, src1, dup_element(indexform, temp, src2, index));
794 }
795
796
mls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)797 LogicVRegister Simulator::mls(VectorFormat vform,
798 LogicVRegister dst,
799 const LogicVRegister& src1,
800 const LogicVRegister& src2,
801 int index) {
802 SimVRegister temp;
803 VectorFormat indexform = VectorFormatFillQ(vform);
804 return mls(vform, dst, dst, src1, dup_element(indexform, temp, src2, index));
805 }
806
sqdmull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)807 LogicVRegister Simulator::sqdmull(VectorFormat vform,
808 LogicVRegister dst,
809 const LogicVRegister& src1,
810 const LogicVRegister& src2,
811 int index) {
812 SimVRegister temp;
813 VectorFormat indexform =
814 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
815 return sqdmull(vform, dst, src1, dup_element(indexform, temp, src2, index));
816 }
817
sqdmlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)818 LogicVRegister Simulator::sqdmlal(VectorFormat vform,
819 LogicVRegister dst,
820 const LogicVRegister& src1,
821 const LogicVRegister& src2,
822 int index) {
823 SimVRegister temp;
824 VectorFormat indexform =
825 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
826 return sqdmlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
827 }
828
sqdmlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)829 LogicVRegister Simulator::sqdmlsl(VectorFormat vform,
830 LogicVRegister dst,
831 const LogicVRegister& src1,
832 const LogicVRegister& src2,
833 int index) {
834 SimVRegister temp;
835 VectorFormat indexform =
836 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
837 return sqdmlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
838 }
839
sqdmulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)840 LogicVRegister Simulator::sqdmulh(VectorFormat vform,
841 LogicVRegister dst,
842 const LogicVRegister& src1,
843 const LogicVRegister& src2,
844 int index) {
845 SimVRegister temp;
846 VectorFormat indexform = VectorFormatFillQ(vform);
847 return sqdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index));
848 }
849
850
sqrdmulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)851 LogicVRegister Simulator::sqrdmulh(VectorFormat vform,
852 LogicVRegister dst,
853 const LogicVRegister& src1,
854 const LogicVRegister& src2,
855 int index) {
856 SimVRegister temp;
857 VectorFormat indexform = VectorFormatFillQ(vform);
858 return sqrdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index));
859 }
860
861
sqrdmlah(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)862 LogicVRegister Simulator::sqrdmlah(VectorFormat vform,
863 LogicVRegister dst,
864 const LogicVRegister& src1,
865 const LogicVRegister& src2,
866 int index) {
867 SimVRegister temp;
868 VectorFormat indexform = VectorFormatFillQ(vform);
869 return sqrdmlah(vform, dst, src1, dup_element(indexform, temp, src2, index));
870 }
871
872
sqrdmlsh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)873 LogicVRegister Simulator::sqrdmlsh(VectorFormat vform,
874 LogicVRegister dst,
875 const LogicVRegister& src1,
876 const LogicVRegister& src2,
877 int index) {
878 SimVRegister temp;
879 VectorFormat indexform = VectorFormatFillQ(vform);
880 return sqrdmlsh(vform, dst, src1, dup_element(indexform, temp, src2, index));
881 }
882
883
PolynomialMult(uint64_t op1,uint64_t op2,int lane_size_in_bits) const884 uint64_t Simulator::PolynomialMult(uint64_t op1,
885 uint64_t op2,
886 int lane_size_in_bits) const {
887 VIXL_ASSERT(static_cast<unsigned>(lane_size_in_bits) <= kSRegSize);
888 VIXL_ASSERT(IsUintN(lane_size_in_bits, op1));
889 VIXL_ASSERT(IsUintN(lane_size_in_bits, op2));
890 uint64_t result = 0;
891 for (int i = 0; i < lane_size_in_bits; ++i) {
892 if ((op1 >> i) & 1) {
893 result = result ^ (op2 << i);
894 }
895 }
896 return result;
897 }
898
899
pmul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)900 LogicVRegister Simulator::pmul(VectorFormat vform,
901 LogicVRegister dst,
902 const LogicVRegister& src1,
903 const LogicVRegister& src2) {
904 dst.ClearForWrite(vform);
905 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
906 dst.SetUint(vform,
907 i,
908 PolynomialMult(src1.Uint(vform, i),
909 src2.Uint(vform, i),
910 LaneSizeInBitsFromFormat(vform)));
911 }
912 return dst;
913 }
914
915
pmull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)916 LogicVRegister Simulator::pmull(VectorFormat vform,
917 LogicVRegister dst,
918 const LogicVRegister& src1,
919 const LogicVRegister& src2) {
920 dst.ClearForWrite(vform);
921
922 VectorFormat vform_src = VectorFormatHalfWidth(vform);
923 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
924 dst.SetUint(vform,
925 i,
926 PolynomialMult(src1.Uint(vform_src, i),
927 src2.Uint(vform_src, i),
928 LaneSizeInBitsFromFormat(vform_src)));
929 }
930
931 return dst;
932 }
933
934
pmull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)935 LogicVRegister Simulator::pmull2(VectorFormat vform,
936 LogicVRegister dst,
937 const LogicVRegister& src1,
938 const LogicVRegister& src2) {
939 VectorFormat vform_src = VectorFormatHalfWidthDoubleLanes(vform);
940 dst.ClearForWrite(vform);
941 int lane_count = LaneCountFromFormat(vform);
942 for (int i = 0; i < lane_count; i++) {
943 dst.SetUint(vform,
944 i,
945 PolynomialMult(src1.Uint(vform_src, lane_count + i),
946 src2.Uint(vform_src, lane_count + i),
947 LaneSizeInBitsFromFormat(vform_src)));
948 }
949 return dst;
950 }
951
952
sub(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)953 LogicVRegister Simulator::sub(VectorFormat vform,
954 LogicVRegister dst,
955 const LogicVRegister& src1,
956 const LogicVRegister& src2) {
957 int lane_size = LaneSizeInBitsFromFormat(vform);
958 dst.ClearForWrite(vform);
959 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
960 // Test for unsigned saturation.
961 uint64_t ua = src1.UintLeftJustified(vform, i);
962 uint64_t ub = src2.UintLeftJustified(vform, i);
963 uint64_t ur = ua - ub;
964 if (ub > ua) {
965 dst.SetUnsignedSat(i, false);
966 }
967
968 // Test for signed saturation.
969 bool pos_a = (ua >> 63) == 0;
970 bool pos_b = (ub >> 63) == 0;
971 bool pos_r = (ur >> 63) == 0;
972 // If the signs of the operands are different, and the sign of the first
973 // operand doesn't match the result, there was an overflow.
974 if ((pos_a != pos_b) && (pos_a != pos_r)) {
975 dst.SetSignedSat(i, pos_a);
976 }
977
978 dst.SetInt(vform, i, ur >> (64 - lane_size));
979 }
980 return dst;
981 }
982
sub_uint(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,uint64_t value)983 LogicVRegister Simulator::sub_uint(VectorFormat vform,
984 LogicVRegister dst,
985 const LogicVRegister& src1,
986 uint64_t value) {
987 int lane_size = LaneSizeInBitsFromFormat(vform);
988 VIXL_ASSERT(IsUintN(lane_size, value));
989 dst.ClearForWrite(vform);
990 // Left-justify `value`.
991 uint64_t ub = value << (64 - lane_size);
992 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
993 // Test for unsigned saturation.
994 uint64_t ua = src1.UintLeftJustified(vform, i);
995 uint64_t ur = ua - ub;
996 if (ub > ua) {
997 dst.SetUnsignedSat(i, false);
998 }
999
1000 // Test for signed saturation.
1001 // `value` is always positive, so we have an overflow if the (signed) result
1002 // is greater than the first operand.
1003 if (RawbitsToInt64(ur) > RawbitsToInt64(ua)) {
1004 dst.SetSignedSat(i, false);
1005 }
1006
1007 dst.SetInt(vform, i, ur >> (64 - lane_size));
1008 }
1009 return dst;
1010 }
1011
and_(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1012 LogicVRegister Simulator::and_(VectorFormat vform,
1013 LogicVRegister dst,
1014 const LogicVRegister& src1,
1015 const LogicVRegister& src2) {
1016 dst.ClearForWrite(vform);
1017 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1018 dst.SetUint(vform, i, src1.Uint(vform, i) & src2.Uint(vform, i));
1019 }
1020 return dst;
1021 }
1022
1023
orr(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1024 LogicVRegister Simulator::orr(VectorFormat vform,
1025 LogicVRegister dst,
1026 const LogicVRegister& src1,
1027 const LogicVRegister& src2) {
1028 dst.ClearForWrite(vform);
1029 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1030 dst.SetUint(vform, i, src1.Uint(vform, i) | src2.Uint(vform, i));
1031 }
1032 return dst;
1033 }
1034
1035
orn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1036 LogicVRegister Simulator::orn(VectorFormat vform,
1037 LogicVRegister dst,
1038 const LogicVRegister& src1,
1039 const LogicVRegister& src2) {
1040 dst.ClearForWrite(vform);
1041 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1042 dst.SetUint(vform, i, src1.Uint(vform, i) | ~src2.Uint(vform, i));
1043 }
1044 return dst;
1045 }
1046
1047
eor(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1048 LogicVRegister Simulator::eor(VectorFormat vform,
1049 LogicVRegister dst,
1050 const LogicVRegister& src1,
1051 const LogicVRegister& src2) {
1052 dst.ClearForWrite(vform);
1053 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1054 dst.SetUint(vform, i, src1.Uint(vform, i) ^ src2.Uint(vform, i));
1055 }
1056 return dst;
1057 }
1058
1059
bic(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1060 LogicVRegister Simulator::bic(VectorFormat vform,
1061 LogicVRegister dst,
1062 const LogicVRegister& src1,
1063 const LogicVRegister& src2) {
1064 dst.ClearForWrite(vform);
1065 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1066 dst.SetUint(vform, i, src1.Uint(vform, i) & ~src2.Uint(vform, i));
1067 }
1068 return dst;
1069 }
1070
1071
bic(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,uint64_t imm)1072 LogicVRegister Simulator::bic(VectorFormat vform,
1073 LogicVRegister dst,
1074 const LogicVRegister& src,
1075 uint64_t imm) {
1076 uint64_t result[16];
1077 int lane_count = LaneCountFromFormat(vform);
1078 for (int i = 0; i < lane_count; ++i) {
1079 result[i] = src.Uint(vform, i) & ~imm;
1080 }
1081 dst.ClearForWrite(vform);
1082 for (int i = 0; i < lane_count; ++i) {
1083 dst.SetUint(vform, i, result[i]);
1084 }
1085 return dst;
1086 }
1087
1088
bif(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1089 LogicVRegister Simulator::bif(VectorFormat vform,
1090 LogicVRegister dst,
1091 const LogicVRegister& src1,
1092 const LogicVRegister& src2) {
1093 dst.ClearForWrite(vform);
1094 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1095 uint64_t operand1 = dst.Uint(vform, i);
1096 uint64_t operand2 = ~src2.Uint(vform, i);
1097 uint64_t operand3 = src1.Uint(vform, i);
1098 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1099 dst.SetUint(vform, i, result);
1100 }
1101 return dst;
1102 }
1103
1104
bit(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1105 LogicVRegister Simulator::bit(VectorFormat vform,
1106 LogicVRegister dst,
1107 const LogicVRegister& src1,
1108 const LogicVRegister& src2) {
1109 dst.ClearForWrite(vform);
1110 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1111 uint64_t operand1 = dst.Uint(vform, i);
1112 uint64_t operand2 = src2.Uint(vform, i);
1113 uint64_t operand3 = src1.Uint(vform, i);
1114 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1115 dst.SetUint(vform, i, result);
1116 }
1117 return dst;
1118 }
1119
1120
bsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src_mask,const LogicVRegister & src1,const LogicVRegister & src2)1121 LogicVRegister Simulator::bsl(VectorFormat vform,
1122 LogicVRegister dst,
1123 const LogicVRegister& src_mask,
1124 const LogicVRegister& src1,
1125 const LogicVRegister& src2) {
1126 dst.ClearForWrite(vform);
1127 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1128 uint64_t operand1 = src2.Uint(vform, i);
1129 uint64_t operand2 = src_mask.Uint(vform, i);
1130 uint64_t operand3 = src1.Uint(vform, i);
1131 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1132 dst.SetUint(vform, i, result);
1133 }
1134 return dst;
1135 }
1136
1137
sminmax(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool max)1138 LogicVRegister Simulator::sminmax(VectorFormat vform,
1139 LogicVRegister dst,
1140 const LogicVRegister& src1,
1141 const LogicVRegister& src2,
1142 bool max) {
1143 dst.ClearForWrite(vform);
1144 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1145 int64_t src1_val = src1.Int(vform, i);
1146 int64_t src2_val = src2.Int(vform, i);
1147 int64_t dst_val;
1148 if (max) {
1149 dst_val = (src1_val > src2_val) ? src1_val : src2_val;
1150 } else {
1151 dst_val = (src1_val < src2_val) ? src1_val : src2_val;
1152 }
1153 dst.SetInt(vform, i, dst_val);
1154 }
1155 return dst;
1156 }
1157
1158
smax(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1159 LogicVRegister Simulator::smax(VectorFormat vform,
1160 LogicVRegister dst,
1161 const LogicVRegister& src1,
1162 const LogicVRegister& src2) {
1163 return sminmax(vform, dst, src1, src2, true);
1164 }
1165
1166
smin(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1167 LogicVRegister Simulator::smin(VectorFormat vform,
1168 LogicVRegister dst,
1169 const LogicVRegister& src1,
1170 const LogicVRegister& src2) {
1171 return sminmax(vform, dst, src1, src2, false);
1172 }
1173
1174
sminmaxp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool max)1175 LogicVRegister Simulator::sminmaxp(VectorFormat vform,
1176 LogicVRegister dst,
1177 const LogicVRegister& src1,
1178 const LogicVRegister& src2,
1179 bool max) {
1180 unsigned lanes = LaneCountFromFormat(vform);
1181 int64_t result[kZRegMaxSizeInBytes];
1182 const LogicVRegister* src = &src1;
1183 for (unsigned j = 0; j < 2; j++) {
1184 for (unsigned i = 0; i < lanes; i += 2) {
1185 int64_t first_val = src->Int(vform, i);
1186 int64_t second_val = src->Int(vform, i + 1);
1187 int64_t dst_val;
1188 if (max) {
1189 dst_val = (first_val > second_val) ? first_val : second_val;
1190 } else {
1191 dst_val = (first_val < second_val) ? first_val : second_val;
1192 }
1193 VIXL_ASSERT(((i >> 1) + (j * lanes / 2)) < ArrayLength(result));
1194 result[(i >> 1) + (j * lanes / 2)] = dst_val;
1195 }
1196 src = &src2;
1197 }
1198 dst.SetIntArray(vform, result);
1199 if (IsSVEFormat(vform)) {
1200 interleave_top_bottom(vform, dst, dst);
1201 }
1202 return dst;
1203 }
1204
1205
smaxp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1206 LogicVRegister Simulator::smaxp(VectorFormat vform,
1207 LogicVRegister dst,
1208 const LogicVRegister& src1,
1209 const LogicVRegister& src2) {
1210 return sminmaxp(vform, dst, src1, src2, true);
1211 }
1212
1213
sminp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1214 LogicVRegister Simulator::sminp(VectorFormat vform,
1215 LogicVRegister dst,
1216 const LogicVRegister& src1,
1217 const LogicVRegister& src2) {
1218 return sminmaxp(vform, dst, src1, src2, false);
1219 }
1220
1221
addp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1222 LogicVRegister Simulator::addp(VectorFormat vform,
1223 LogicVRegister dst,
1224 const LogicVRegister& src) {
1225 VIXL_ASSERT(vform == kFormatD);
1226
1227 uint64_t dst_val = src.Uint(kFormat2D, 0) + src.Uint(kFormat2D, 1);
1228 dst.ClearForWrite(vform);
1229 dst.SetUint(vform, 0, dst_val);
1230 return dst;
1231 }
1232
1233
addv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1234 LogicVRegister Simulator::addv(VectorFormat vform,
1235 LogicVRegister dst,
1236 const LogicVRegister& src) {
1237 VectorFormat vform_dst =
1238 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
1239
1240
1241 int64_t dst_val = 0;
1242 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1243 dst_val += src.Int(vform, i);
1244 }
1245
1246 dst.ClearForWrite(vform_dst);
1247 dst.SetInt(vform_dst, 0, dst_val);
1248 return dst;
1249 }
1250
1251
saddlv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1252 LogicVRegister Simulator::saddlv(VectorFormat vform,
1253 LogicVRegister dst,
1254 const LogicVRegister& src) {
1255 VectorFormat vform_dst =
1256 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2);
1257
1258 int64_t dst_val = 0;
1259 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1260 dst_val += src.Int(vform, i);
1261 }
1262
1263 dst.ClearForWrite(vform_dst);
1264 dst.SetInt(vform_dst, 0, dst_val);
1265 return dst;
1266 }
1267
1268
uaddlv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1269 LogicVRegister Simulator::uaddlv(VectorFormat vform,
1270 LogicVRegister dst,
1271 const LogicVRegister& src) {
1272 VectorFormat vform_dst =
1273 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2);
1274
1275 uint64_t dst_val = 0;
1276 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1277 dst_val += src.Uint(vform, i);
1278 }
1279
1280 dst.ClearForWrite(vform_dst);
1281 dst.SetUint(vform_dst, 0, dst_val);
1282 return dst;
1283 }
1284
1285
sminmaxv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src,bool max)1286 LogicVRegister Simulator::sminmaxv(VectorFormat vform,
1287 LogicVRegister dst,
1288 const LogicPRegister& pg,
1289 const LogicVRegister& src,
1290 bool max) {
1291 int64_t dst_val = max ? INT64_MIN : INT64_MAX;
1292 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1293 if (!pg.IsActive(vform, i)) continue;
1294
1295 int64_t src_val = src.Int(vform, i);
1296 if (max) {
1297 dst_val = (src_val > dst_val) ? src_val : dst_val;
1298 } else {
1299 dst_val = (src_val < dst_val) ? src_val : dst_val;
1300 }
1301 }
1302 dst.ClearForWrite(ScalarFormatFromFormat(vform));
1303 dst.SetInt(vform, 0, dst_val);
1304 return dst;
1305 }
1306
1307
smaxv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1308 LogicVRegister Simulator::smaxv(VectorFormat vform,
1309 LogicVRegister dst,
1310 const LogicVRegister& src) {
1311 sminmaxv(vform, dst, GetPTrue(), src, true);
1312 return dst;
1313 }
1314
1315
sminv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1316 LogicVRegister Simulator::sminv(VectorFormat vform,
1317 LogicVRegister dst,
1318 const LogicVRegister& src) {
1319 sminmaxv(vform, dst, GetPTrue(), src, false);
1320 return dst;
1321 }
1322
1323
smaxv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)1324 LogicVRegister Simulator::smaxv(VectorFormat vform,
1325 LogicVRegister dst,
1326 const LogicPRegister& pg,
1327 const LogicVRegister& src) {
1328 VIXL_ASSERT(IsSVEFormat(vform));
1329 sminmaxv(vform, dst, pg, src, true);
1330 return dst;
1331 }
1332
1333
sminv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)1334 LogicVRegister Simulator::sminv(VectorFormat vform,
1335 LogicVRegister dst,
1336 const LogicPRegister& pg,
1337 const LogicVRegister& src) {
1338 VIXL_ASSERT(IsSVEFormat(vform));
1339 sminmaxv(vform, dst, pg, src, false);
1340 return dst;
1341 }
1342
1343
uminmax(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool max)1344 LogicVRegister Simulator::uminmax(VectorFormat vform,
1345 LogicVRegister dst,
1346 const LogicVRegister& src1,
1347 const LogicVRegister& src2,
1348 bool max) {
1349 dst.ClearForWrite(vform);
1350 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1351 uint64_t src1_val = src1.Uint(vform, i);
1352 uint64_t src2_val = src2.Uint(vform, i);
1353 uint64_t dst_val;
1354 if (max) {
1355 dst_val = (src1_val > src2_val) ? src1_val : src2_val;
1356 } else {
1357 dst_val = (src1_val < src2_val) ? src1_val : src2_val;
1358 }
1359 dst.SetUint(vform, i, dst_val);
1360 }
1361 return dst;
1362 }
1363
1364
umax(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1365 LogicVRegister Simulator::umax(VectorFormat vform,
1366 LogicVRegister dst,
1367 const LogicVRegister& src1,
1368 const LogicVRegister& src2) {
1369 return uminmax(vform, dst, src1, src2, true);
1370 }
1371
1372
umin(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1373 LogicVRegister Simulator::umin(VectorFormat vform,
1374 LogicVRegister dst,
1375 const LogicVRegister& src1,
1376 const LogicVRegister& src2) {
1377 return uminmax(vform, dst, src1, src2, false);
1378 }
1379
1380
uminmaxp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool max)1381 LogicVRegister Simulator::uminmaxp(VectorFormat vform,
1382 LogicVRegister dst,
1383 const LogicVRegister& src1,
1384 const LogicVRegister& src2,
1385 bool max) {
1386 unsigned lanes = LaneCountFromFormat(vform);
1387 uint64_t result[kZRegMaxSizeInBytes];
1388 const LogicVRegister* src = &src1;
1389 for (unsigned j = 0; j < 2; j++) {
1390 for (unsigned i = 0; i < lanes; i += 2) {
1391 uint64_t first_val = src->Uint(vform, i);
1392 uint64_t second_val = src->Uint(vform, i + 1);
1393 uint64_t dst_val;
1394 if (max) {
1395 dst_val = (first_val > second_val) ? first_val : second_val;
1396 } else {
1397 dst_val = (first_val < second_val) ? first_val : second_val;
1398 }
1399 VIXL_ASSERT(((i >> 1) + (j * lanes / 2)) < ArrayLength(result));
1400 result[(i >> 1) + (j * lanes / 2)] = dst_val;
1401 }
1402 src = &src2;
1403 }
1404 dst.SetUintArray(vform, result);
1405 if (IsSVEFormat(vform)) {
1406 interleave_top_bottom(vform, dst, dst);
1407 }
1408 return dst;
1409 }
1410
1411
umaxp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1412 LogicVRegister Simulator::umaxp(VectorFormat vform,
1413 LogicVRegister dst,
1414 const LogicVRegister& src1,
1415 const LogicVRegister& src2) {
1416 return uminmaxp(vform, dst, src1, src2, true);
1417 }
1418
1419
uminp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1420 LogicVRegister Simulator::uminp(VectorFormat vform,
1421 LogicVRegister dst,
1422 const LogicVRegister& src1,
1423 const LogicVRegister& src2) {
1424 return uminmaxp(vform, dst, src1, src2, false);
1425 }
1426
1427
uminmaxv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src,bool max)1428 LogicVRegister Simulator::uminmaxv(VectorFormat vform,
1429 LogicVRegister dst,
1430 const LogicPRegister& pg,
1431 const LogicVRegister& src,
1432 bool max) {
1433 uint64_t dst_val = max ? 0 : UINT64_MAX;
1434 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1435 if (!pg.IsActive(vform, i)) continue;
1436
1437 uint64_t src_val = src.Uint(vform, i);
1438 if (max) {
1439 dst_val = (src_val > dst_val) ? src_val : dst_val;
1440 } else {
1441 dst_val = (src_val < dst_val) ? src_val : dst_val;
1442 }
1443 }
1444 dst.ClearForWrite(ScalarFormatFromFormat(vform));
1445 dst.SetUint(vform, 0, dst_val);
1446 return dst;
1447 }
1448
1449
umaxv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1450 LogicVRegister Simulator::umaxv(VectorFormat vform,
1451 LogicVRegister dst,
1452 const LogicVRegister& src) {
1453 uminmaxv(vform, dst, GetPTrue(), src, true);
1454 return dst;
1455 }
1456
1457
uminv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1458 LogicVRegister Simulator::uminv(VectorFormat vform,
1459 LogicVRegister dst,
1460 const LogicVRegister& src) {
1461 uminmaxv(vform, dst, GetPTrue(), src, false);
1462 return dst;
1463 }
1464
1465
umaxv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)1466 LogicVRegister Simulator::umaxv(VectorFormat vform,
1467 LogicVRegister dst,
1468 const LogicPRegister& pg,
1469 const LogicVRegister& src) {
1470 VIXL_ASSERT(IsSVEFormat(vform));
1471 uminmaxv(vform, dst, pg, src, true);
1472 return dst;
1473 }
1474
1475
uminv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)1476 LogicVRegister Simulator::uminv(VectorFormat vform,
1477 LogicVRegister dst,
1478 const LogicPRegister& pg,
1479 const LogicVRegister& src) {
1480 VIXL_ASSERT(IsSVEFormat(vform));
1481 uminmaxv(vform, dst, pg, src, false);
1482 return dst;
1483 }
1484
1485
shl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1486 LogicVRegister Simulator::shl(VectorFormat vform,
1487 LogicVRegister dst,
1488 const LogicVRegister& src,
1489 int shift) {
1490 VIXL_ASSERT(shift >= 0);
1491 SimVRegister temp;
1492 LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1493 return ushl(vform, dst, src, shiftreg);
1494 }
1495
1496
sshll(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1497 LogicVRegister Simulator::sshll(VectorFormat vform,
1498 LogicVRegister dst,
1499 const LogicVRegister& src,
1500 int shift) {
1501 VIXL_ASSERT(shift >= 0);
1502 SimVRegister temp1, temp2;
1503 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1504 LogicVRegister extendedreg = sxtl(vform, temp2, src);
1505 return sshl(vform, dst, extendedreg, shiftreg);
1506 }
1507
1508
sshll2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1509 LogicVRegister Simulator::sshll2(VectorFormat vform,
1510 LogicVRegister dst,
1511 const LogicVRegister& src,
1512 int shift) {
1513 VIXL_ASSERT(shift >= 0);
1514 SimVRegister temp1, temp2;
1515 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1516 LogicVRegister extendedreg = sxtl2(vform, temp2, src);
1517 return sshl(vform, dst, extendedreg, shiftreg);
1518 }
1519
1520
shll(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1521 LogicVRegister Simulator::shll(VectorFormat vform,
1522 LogicVRegister dst,
1523 const LogicVRegister& src) {
1524 int shift = LaneSizeInBitsFromFormat(vform) / 2;
1525 return sshll(vform, dst, src, shift);
1526 }
1527
1528
shll2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1529 LogicVRegister Simulator::shll2(VectorFormat vform,
1530 LogicVRegister dst,
1531 const LogicVRegister& src) {
1532 int shift = LaneSizeInBitsFromFormat(vform) / 2;
1533 return sshll2(vform, dst, src, shift);
1534 }
1535
1536
ushll(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1537 LogicVRegister Simulator::ushll(VectorFormat vform,
1538 LogicVRegister dst,
1539 const LogicVRegister& src,
1540 int shift) {
1541 VIXL_ASSERT(shift >= 0);
1542 SimVRegister temp1, temp2;
1543 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1544 LogicVRegister extendedreg = uxtl(vform, temp2, src);
1545 return ushl(vform, dst, extendedreg, shiftreg);
1546 }
1547
1548
ushll2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1549 LogicVRegister Simulator::ushll2(VectorFormat vform,
1550 LogicVRegister dst,
1551 const LogicVRegister& src,
1552 int shift) {
1553 VIXL_ASSERT(shift >= 0);
1554 SimVRegister temp1, temp2;
1555 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1556 LogicVRegister extendedreg = uxtl2(vform, temp2, src);
1557 return ushl(vform, dst, extendedreg, shiftreg);
1558 }
1559
clast(VectorFormat vform,const LogicPRegister & pg,const LogicVRegister & src,int offset_from_last_active)1560 std::pair<bool, uint64_t> Simulator::clast(VectorFormat vform,
1561 const LogicPRegister& pg,
1562 const LogicVRegister& src,
1563 int offset_from_last_active) {
1564 // Untested for any other values.
1565 VIXL_ASSERT((offset_from_last_active == 0) || (offset_from_last_active == 1));
1566
1567 int last_active = GetLastActive(vform, pg);
1568 int lane_count = LaneCountFromFormat(vform);
1569 int index =
1570 ((last_active + offset_from_last_active) + lane_count) % lane_count;
1571 return std::make_pair(last_active >= 0, src.Uint(vform, index));
1572 }
1573
compact(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)1574 LogicVRegister Simulator::compact(VectorFormat vform,
1575 LogicVRegister dst,
1576 const LogicPRegister& pg,
1577 const LogicVRegister& src) {
1578 int j = 0;
1579 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1580 if (pg.IsActive(vform, i)) {
1581 dst.SetUint(vform, j++, src.Uint(vform, i));
1582 }
1583 }
1584 for (; j < LaneCountFromFormat(vform); j++) {
1585 dst.SetUint(vform, j, 0);
1586 }
1587 return dst;
1588 }
1589
splice(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src1,const LogicVRegister & src2)1590 LogicVRegister Simulator::splice(VectorFormat vform,
1591 LogicVRegister dst,
1592 const LogicPRegister& pg,
1593 const LogicVRegister& src1,
1594 const LogicVRegister& src2) {
1595 int lane_count = LaneCountFromFormat(vform);
1596 int first_active = GetFirstActive(vform, pg);
1597 int last_active = GetLastActive(vform, pg);
1598 int dst_idx = 0;
1599 uint64_t result[kZRegMaxSizeInBytes];
1600
1601 if (first_active >= 0) {
1602 VIXL_ASSERT(last_active >= first_active);
1603 VIXL_ASSERT(last_active < lane_count);
1604 for (int i = first_active; i <= last_active; i++) {
1605 result[dst_idx++] = src1.Uint(vform, i);
1606 }
1607 }
1608
1609 VIXL_ASSERT(dst_idx <= lane_count);
1610 for (int i = dst_idx; i < lane_count; i++) {
1611 result[i] = src2.Uint(vform, i - dst_idx);
1612 }
1613
1614 dst.SetUintArray(vform, result);
1615
1616 return dst;
1617 }
1618
sel(VectorFormat vform,LogicVRegister dst,const SimPRegister & pg,const LogicVRegister & src1,const LogicVRegister & src2)1619 LogicVRegister Simulator::sel(VectorFormat vform,
1620 LogicVRegister dst,
1621 const SimPRegister& pg,
1622 const LogicVRegister& src1,
1623 const LogicVRegister& src2) {
1624 int p_reg_bits_per_lane =
1625 LaneSizeInBitsFromFormat(vform) / kZRegBitsPerPRegBit;
1626 for (int lane = 0; lane < LaneCountFromFormat(vform); lane++) {
1627 uint64_t lane_value = pg.GetBit(lane * p_reg_bits_per_lane)
1628 ? src1.Uint(vform, lane)
1629 : src2.Uint(vform, lane);
1630 dst.SetUint(vform, lane, lane_value);
1631 }
1632 return dst;
1633 }
1634
1635
sel(LogicPRegister dst,const LogicPRegister & pg,const LogicPRegister & src1,const LogicPRegister & src2)1636 LogicPRegister Simulator::sel(LogicPRegister dst,
1637 const LogicPRegister& pg,
1638 const LogicPRegister& src1,
1639 const LogicPRegister& src2) {
1640 for (int i = 0; i < dst.GetChunkCount(); i++) {
1641 LogicPRegister::ChunkType mask = pg.GetChunk(i);
1642 LogicPRegister::ChunkType result =
1643 (mask & src1.GetChunk(i)) | (~mask & src2.GetChunk(i));
1644 dst.SetChunk(i, result);
1645 }
1646 return dst;
1647 }
1648
1649
sli(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1650 LogicVRegister Simulator::sli(VectorFormat vform,
1651 LogicVRegister dst,
1652 const LogicVRegister& src,
1653 int shift) {
1654 dst.ClearForWrite(vform);
1655 int lane_count = LaneCountFromFormat(vform);
1656 for (int i = 0; i < lane_count; i++) {
1657 uint64_t src_lane = src.Uint(vform, i);
1658 uint64_t dst_lane = dst.Uint(vform, i);
1659 uint64_t shifted = src_lane << shift;
1660 uint64_t mask = MaxUintFromFormat(vform) << shift;
1661 dst.SetUint(vform, i, (dst_lane & ~mask) | shifted);
1662 }
1663 return dst;
1664 }
1665
1666
sqshl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1667 LogicVRegister Simulator::sqshl(VectorFormat vform,
1668 LogicVRegister dst,
1669 const LogicVRegister& src,
1670 int shift) {
1671 VIXL_ASSERT(shift >= 0);
1672 SimVRegister temp;
1673 LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1674 return sshl(vform, dst, src, shiftreg).SignedSaturate(vform);
1675 }
1676
1677
uqshl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1678 LogicVRegister Simulator::uqshl(VectorFormat vform,
1679 LogicVRegister dst,
1680 const LogicVRegister& src,
1681 int shift) {
1682 VIXL_ASSERT(shift >= 0);
1683 SimVRegister temp;
1684 LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1685 return ushl(vform, dst, src, shiftreg).UnsignedSaturate(vform);
1686 }
1687
1688
sqshlu(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1689 LogicVRegister Simulator::sqshlu(VectorFormat vform,
1690 LogicVRegister dst,
1691 const LogicVRegister& src,
1692 int shift) {
1693 VIXL_ASSERT(shift >= 0);
1694 SimVRegister temp;
1695 LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1696 return sshl(vform, dst, src, shiftreg).UnsignedSaturate(vform);
1697 }
1698
1699
sri(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1700 LogicVRegister Simulator::sri(VectorFormat vform,
1701 LogicVRegister dst,
1702 const LogicVRegister& src,
1703 int shift) {
1704 dst.ClearForWrite(vform);
1705 int lane_count = LaneCountFromFormat(vform);
1706 VIXL_ASSERT((shift > 0) &&
1707 (shift <= static_cast<int>(LaneSizeInBitsFromFormat(vform))));
1708 for (int i = 0; i < lane_count; i++) {
1709 uint64_t src_lane = src.Uint(vform, i);
1710 uint64_t dst_lane = dst.Uint(vform, i);
1711 uint64_t shifted;
1712 uint64_t mask;
1713 if (shift == 64) {
1714 shifted = 0;
1715 mask = 0;
1716 } else {
1717 shifted = src_lane >> shift;
1718 mask = MaxUintFromFormat(vform) >> shift;
1719 }
1720 dst.SetUint(vform, i, (dst_lane & ~mask) | shifted);
1721 }
1722 return dst;
1723 }
1724
1725
ushr(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1726 LogicVRegister Simulator::ushr(VectorFormat vform,
1727 LogicVRegister dst,
1728 const LogicVRegister& src,
1729 int shift) {
1730 VIXL_ASSERT(shift >= 0);
1731 SimVRegister temp;
1732 LogicVRegister shiftreg = dup_immediate(vform, temp, -shift);
1733 return ushl(vform, dst, src, shiftreg);
1734 }
1735
1736
sshr(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1737 LogicVRegister Simulator::sshr(VectorFormat vform,
1738 LogicVRegister dst,
1739 const LogicVRegister& src,
1740 int shift) {
1741 VIXL_ASSERT(shift >= 0);
1742 SimVRegister temp;
1743 LogicVRegister shiftreg = dup_immediate(vform, temp, -shift);
1744 return sshl(vform, dst, src, shiftreg);
1745 }
1746
1747
ssra(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1748 LogicVRegister Simulator::ssra(VectorFormat vform,
1749 LogicVRegister dst,
1750 const LogicVRegister& src,
1751 int shift) {
1752 SimVRegister temp;
1753 LogicVRegister shifted_reg = sshr(vform, temp, src, shift);
1754 return add(vform, dst, dst, shifted_reg);
1755 }
1756
1757
usra(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1758 LogicVRegister Simulator::usra(VectorFormat vform,
1759 LogicVRegister dst,
1760 const LogicVRegister& src,
1761 int shift) {
1762 SimVRegister temp;
1763 LogicVRegister shifted_reg = ushr(vform, temp, src, shift);
1764 return add(vform, dst, dst, shifted_reg);
1765 }
1766
1767
srsra(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1768 LogicVRegister Simulator::srsra(VectorFormat vform,
1769 LogicVRegister dst,
1770 const LogicVRegister& src,
1771 int shift) {
1772 SimVRegister temp;
1773 LogicVRegister shifted_reg = sshr(vform, temp, src, shift).Round(vform);
1774 return add(vform, dst, dst, shifted_reg);
1775 }
1776
1777
ursra(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1778 LogicVRegister Simulator::ursra(VectorFormat vform,
1779 LogicVRegister dst,
1780 const LogicVRegister& src,
1781 int shift) {
1782 SimVRegister temp;
1783 LogicVRegister shifted_reg = ushr(vform, temp, src, shift).Round(vform);
1784 return add(vform, dst, dst, shifted_reg);
1785 }
1786
1787
cls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1788 LogicVRegister Simulator::cls(VectorFormat vform,
1789 LogicVRegister dst,
1790 const LogicVRegister& src) {
1791 int lane_size_in_bits = LaneSizeInBitsFromFormat(vform);
1792 int lane_count = LaneCountFromFormat(vform);
1793
1794 // Ensure that we can store one result per lane.
1795 int result[kZRegMaxSizeInBytes];
1796
1797 for (int i = 0; i < lane_count; i++) {
1798 result[i] = CountLeadingSignBits(src.Int(vform, i), lane_size_in_bits);
1799 }
1800
1801 dst.ClearForWrite(vform);
1802 for (int i = 0; i < lane_count; ++i) {
1803 dst.SetUint(vform, i, result[i]);
1804 }
1805 return dst;
1806 }
1807
1808
clz(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1809 LogicVRegister Simulator::clz(VectorFormat vform,
1810 LogicVRegister dst,
1811 const LogicVRegister& src) {
1812 int lane_size_in_bits = LaneSizeInBitsFromFormat(vform);
1813 int lane_count = LaneCountFromFormat(vform);
1814
1815 // Ensure that we can store one result per lane.
1816 int result[kZRegMaxSizeInBytes];
1817
1818 for (int i = 0; i < lane_count; i++) {
1819 result[i] = CountLeadingZeros(src.Uint(vform, i), lane_size_in_bits);
1820 }
1821
1822 dst.ClearForWrite(vform);
1823 for (int i = 0; i < lane_count; ++i) {
1824 dst.SetUint(vform, i, result[i]);
1825 }
1826 return dst;
1827 }
1828
1829
cnot(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1830 LogicVRegister Simulator::cnot(VectorFormat vform,
1831 LogicVRegister dst,
1832 const LogicVRegister& src) {
1833 dst.ClearForWrite(vform);
1834 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1835 uint64_t value = (src.Uint(vform, i) == 0) ? 1 : 0;
1836 dst.SetUint(vform, i, value);
1837 }
1838 return dst;
1839 }
1840
1841
cnt(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1842 LogicVRegister Simulator::cnt(VectorFormat vform,
1843 LogicVRegister dst,
1844 const LogicVRegister& src) {
1845 int lane_size_in_bits = LaneSizeInBitsFromFormat(vform);
1846 int lane_count = LaneCountFromFormat(vform);
1847
1848 // Ensure that we can store one result per lane.
1849 int result[kZRegMaxSizeInBytes];
1850
1851 for (int i = 0; i < lane_count; i++) {
1852 result[i] = CountSetBits(src.Uint(vform, i), lane_size_in_bits);
1853 }
1854
1855 dst.ClearForWrite(vform);
1856 for (int i = 0; i < lane_count; ++i) {
1857 dst.SetUint(vform, i, result[i]);
1858 }
1859 return dst;
1860 }
1861
CalculateSignedShiftDistance(int64_t shift_val,int esize,bool shift_in_ls_byte)1862 static int64_t CalculateSignedShiftDistance(int64_t shift_val,
1863 int esize,
1864 bool shift_in_ls_byte) {
1865 if (shift_in_ls_byte) {
1866 // Neon uses the least-significant byte of the lane as the shift distance.
1867 shift_val = ExtractSignedBitfield64(7, 0, shift_val);
1868 } else {
1869 // SVE uses a saturated shift distance in the range
1870 // -(esize + 1) ... (esize + 1).
1871 if (shift_val > (esize + 1)) shift_val = esize + 1;
1872 if (shift_val < -(esize + 1)) shift_val = -(esize + 1);
1873 }
1874 return shift_val;
1875 }
1876
sshl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool shift_in_ls_byte)1877 LogicVRegister Simulator::sshl(VectorFormat vform,
1878 LogicVRegister dst,
1879 const LogicVRegister& src1,
1880 const LogicVRegister& src2,
1881 bool shift_in_ls_byte) {
1882 dst.ClearForWrite(vform);
1883 int esize = LaneSizeInBitsFromFormat(vform);
1884 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1885 int64_t shift_val = CalculateSignedShiftDistance(src2.Int(vform, i),
1886 esize,
1887 shift_in_ls_byte);
1888
1889 int64_t lj_src_val = src1.IntLeftJustified(vform, i);
1890
1891 // Set signed saturation state.
1892 if ((shift_val > CountLeadingSignBits(lj_src_val)) && (lj_src_val != 0)) {
1893 dst.SetSignedSat(i, lj_src_val >= 0);
1894 }
1895
1896 // Set unsigned saturation state.
1897 if (lj_src_val < 0) {
1898 dst.SetUnsignedSat(i, false);
1899 } else if ((shift_val > CountLeadingZeros(lj_src_val)) &&
1900 (lj_src_val != 0)) {
1901 dst.SetUnsignedSat(i, true);
1902 }
1903
1904 int64_t src_val = src1.Int(vform, i);
1905 bool src_is_negative = src_val < 0;
1906 if (shift_val > 63) {
1907 dst.SetInt(vform, i, 0);
1908 } else if (shift_val < -63) {
1909 dst.SetRounding(i, src_is_negative);
1910 dst.SetInt(vform, i, src_is_negative ? -1 : 0);
1911 } else {
1912 // Use unsigned types for shifts, as behaviour is undefined for signed
1913 // lhs.
1914 uint64_t usrc_val = static_cast<uint64_t>(src_val);
1915
1916 if (shift_val < 0) {
1917 // Convert to right shift.
1918 shift_val = -shift_val;
1919
1920 // Set rounding state by testing most-significant bit shifted out.
1921 // Rounding only needed on right shifts.
1922 if (((usrc_val >> (shift_val - 1)) & 1) == 1) {
1923 dst.SetRounding(i, true);
1924 }
1925
1926 usrc_val >>= shift_val;
1927
1928 if (src_is_negative) {
1929 // Simulate sign-extension.
1930 usrc_val |= (~UINT64_C(0) << (64 - shift_val));
1931 }
1932 } else {
1933 usrc_val <<= shift_val;
1934 }
1935 dst.SetUint(vform, i, usrc_val);
1936 }
1937 }
1938 return dst;
1939 }
1940
1941
ushl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool shift_in_ls_byte)1942 LogicVRegister Simulator::ushl(VectorFormat vform,
1943 LogicVRegister dst,
1944 const LogicVRegister& src1,
1945 const LogicVRegister& src2,
1946 bool shift_in_ls_byte) {
1947 dst.ClearForWrite(vform);
1948 int esize = LaneSizeInBitsFromFormat(vform);
1949 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1950 int64_t shift_val = CalculateSignedShiftDistance(src2.Int(vform, i),
1951 esize,
1952 shift_in_ls_byte);
1953
1954 uint64_t lj_src_val = src1.UintLeftJustified(vform, i);
1955
1956 // Set saturation state.
1957 if ((shift_val > CountLeadingZeros(lj_src_val)) && (lj_src_val != 0)) {
1958 dst.SetUnsignedSat(i, true);
1959 }
1960
1961 uint64_t src_val = src1.Uint(vform, i);
1962 if ((shift_val > 63) || (shift_val < -64)) {
1963 dst.SetUint(vform, i, 0);
1964 } else {
1965 if (shift_val < 0) {
1966 // Set rounding state. Rounding only needed on right shifts.
1967 if (((src_val >> (-shift_val - 1)) & 1) == 1) {
1968 dst.SetRounding(i, true);
1969 }
1970
1971 if (shift_val == -64) {
1972 src_val = 0;
1973 } else {
1974 src_val >>= -shift_val;
1975 }
1976 } else {
1977 src_val <<= shift_val;
1978 }
1979 dst.SetUint(vform, i, src_val);
1980 }
1981 }
1982 return dst;
1983 }
1984
sshr(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1985 LogicVRegister Simulator::sshr(VectorFormat vform,
1986 LogicVRegister dst,
1987 const LogicVRegister& src1,
1988 const LogicVRegister& src2) {
1989 SimVRegister temp;
1990 // Saturate to sidestep the min-int problem.
1991 neg(vform, temp, src2).SignedSaturate(vform);
1992 sshl(vform, dst, src1, temp, false);
1993 return dst;
1994 }
1995
ushr(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1996 LogicVRegister Simulator::ushr(VectorFormat vform,
1997 LogicVRegister dst,
1998 const LogicVRegister& src1,
1999 const LogicVRegister& src2) {
2000 SimVRegister temp;
2001 // Saturate to sidestep the min-int problem.
2002 neg(vform, temp, src2).SignedSaturate(vform);
2003 ushl(vform, dst, src1, temp, false);
2004 return dst;
2005 }
2006
neg(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2007 LogicVRegister Simulator::neg(VectorFormat vform,
2008 LogicVRegister dst,
2009 const LogicVRegister& src) {
2010 dst.ClearForWrite(vform);
2011 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2012 // Test for signed saturation.
2013 int64_t sa = src.Int(vform, i);
2014 if (sa == MinIntFromFormat(vform)) {
2015 dst.SetSignedSat(i, true);
2016 }
2017 dst.SetInt(vform, i, (sa == INT64_MIN) ? sa : -sa);
2018 }
2019 return dst;
2020 }
2021
2022
suqadd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2023 LogicVRegister Simulator::suqadd(VectorFormat vform,
2024 LogicVRegister dst,
2025 const LogicVRegister& src1,
2026 const LogicVRegister& src2) {
2027 dst.ClearForWrite(vform);
2028 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2029 int64_t sa = src1.IntLeftJustified(vform, i);
2030 uint64_t ub = src2.UintLeftJustified(vform, i);
2031 uint64_t ur = sa + ub;
2032
2033 int64_t sr;
2034 memcpy(&sr, &ur, sizeof(sr));
2035 if (sr < sa) { // Test for signed positive saturation.
2036 dst.SetInt(vform, i, MaxIntFromFormat(vform));
2037 } else {
2038 dst.SetUint(vform, i, src1.Int(vform, i) + src2.Uint(vform, i));
2039 }
2040 }
2041 return dst;
2042 }
2043
2044
usqadd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2045 LogicVRegister Simulator::usqadd(VectorFormat vform,
2046 LogicVRegister dst,
2047 const LogicVRegister& src1,
2048 const LogicVRegister& src2) {
2049 dst.ClearForWrite(vform);
2050 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2051 uint64_t ua = src1.UintLeftJustified(vform, i);
2052 int64_t sb = src2.IntLeftJustified(vform, i);
2053 uint64_t ur = ua + sb;
2054
2055 if ((sb > 0) && (ur <= ua)) {
2056 dst.SetUint(vform, i, MaxUintFromFormat(vform)); // Positive saturation.
2057 } else if ((sb < 0) && (ur >= ua)) {
2058 dst.SetUint(vform, i, 0); // Negative saturation.
2059 } else {
2060 dst.SetUint(vform, i, src1.Uint(vform, i) + src2.Int(vform, i));
2061 }
2062 }
2063 return dst;
2064 }
2065
2066
abs(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2067 LogicVRegister Simulator::abs(VectorFormat vform,
2068 LogicVRegister dst,
2069 const LogicVRegister& src) {
2070 dst.ClearForWrite(vform);
2071 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2072 // Test for signed saturation.
2073 int64_t sa = src.Int(vform, i);
2074 if (sa == MinIntFromFormat(vform)) {
2075 dst.SetSignedSat(i, true);
2076 }
2077 if (sa < 0) {
2078 dst.SetInt(vform, i, (sa == INT64_MIN) ? sa : -sa);
2079 } else {
2080 dst.SetInt(vform, i, sa);
2081 }
2082 }
2083 return dst;
2084 }
2085
2086
andv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)2087 LogicVRegister Simulator::andv(VectorFormat vform,
2088 LogicVRegister dst,
2089 const LogicPRegister& pg,
2090 const LogicVRegister& src) {
2091 VIXL_ASSERT(IsSVEFormat(vform));
2092 uint64_t result = GetUintMask(LaneSizeInBitsFromFormat(vform));
2093 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2094 if (!pg.IsActive(vform, i)) continue;
2095
2096 result &= src.Uint(vform, i);
2097 }
2098 VectorFormat vform_dst =
2099 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
2100 dst.ClearForWrite(vform_dst);
2101 dst.SetUint(vform_dst, 0, result);
2102 return dst;
2103 }
2104
2105
eorv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)2106 LogicVRegister Simulator::eorv(VectorFormat vform,
2107 LogicVRegister dst,
2108 const LogicPRegister& pg,
2109 const LogicVRegister& src) {
2110 VIXL_ASSERT(IsSVEFormat(vform));
2111 uint64_t result = 0;
2112 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2113 if (!pg.IsActive(vform, i)) continue;
2114
2115 result ^= src.Uint(vform, i);
2116 }
2117 VectorFormat vform_dst =
2118 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
2119 dst.ClearForWrite(vform_dst);
2120 dst.SetUint(vform_dst, 0, result);
2121 return dst;
2122 }
2123
2124
orv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)2125 LogicVRegister Simulator::orv(VectorFormat vform,
2126 LogicVRegister dst,
2127 const LogicPRegister& pg,
2128 const LogicVRegister& src) {
2129 VIXL_ASSERT(IsSVEFormat(vform));
2130 uint64_t result = 0;
2131 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2132 if (!pg.IsActive(vform, i)) continue;
2133
2134 result |= src.Uint(vform, i);
2135 }
2136 VectorFormat vform_dst =
2137 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
2138 dst.ClearForWrite(vform_dst);
2139 dst.SetUint(vform_dst, 0, result);
2140 return dst;
2141 }
2142
2143
saddv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)2144 LogicVRegister Simulator::saddv(VectorFormat vform,
2145 LogicVRegister dst,
2146 const LogicPRegister& pg,
2147 const LogicVRegister& src) {
2148 VIXL_ASSERT(IsSVEFormat(vform));
2149 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) <= kSRegSize);
2150 int64_t result = 0;
2151 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2152 if (!pg.IsActive(vform, i)) continue;
2153
2154 // The destination register always has D-lane sizes and the source register
2155 // always has S-lanes or smaller, so signed integer overflow -- undefined
2156 // behaviour -- can't occur.
2157 result += src.Int(vform, i);
2158 }
2159
2160 dst.ClearForWrite(kFormatD);
2161 dst.SetInt(kFormatD, 0, result);
2162 return dst;
2163 }
2164
2165
uaddv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)2166 LogicVRegister Simulator::uaddv(VectorFormat vform,
2167 LogicVRegister dst,
2168 const LogicPRegister& pg,
2169 const LogicVRegister& src) {
2170 VIXL_ASSERT(IsSVEFormat(vform));
2171 uint64_t result = 0;
2172 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2173 if (!pg.IsActive(vform, i)) continue;
2174
2175 result += src.Uint(vform, i);
2176 }
2177
2178 dst.ClearForWrite(kFormatD);
2179 dst.SetUint(kFormatD, 0, result);
2180 return dst;
2181 }
2182
2183
extractnarrow(VectorFormat dstform,LogicVRegister dst,bool dst_is_signed,const LogicVRegister & src,bool src_is_signed)2184 LogicVRegister Simulator::extractnarrow(VectorFormat dstform,
2185 LogicVRegister dst,
2186 bool dst_is_signed,
2187 const LogicVRegister& src,
2188 bool src_is_signed) {
2189 bool upperhalf = false;
2190 VectorFormat srcform = dstform;
2191 if ((dstform == kFormat16B) || (dstform == kFormat8H) ||
2192 (dstform == kFormat4S)) {
2193 upperhalf = true;
2194 srcform = VectorFormatHalfLanes(srcform);
2195 }
2196 srcform = VectorFormatDoubleWidth(srcform);
2197
2198 LogicVRegister src_copy = src;
2199
2200 int offset;
2201 if (upperhalf) {
2202 offset = LaneCountFromFormat(dstform) / 2;
2203 } else {
2204 offset = 0;
2205 dst.ClearForWrite(dstform);
2206 }
2207
2208 for (int i = 0; i < LaneCountFromFormat(srcform); i++) {
2209 int64_t ssrc = src_copy.Int(srcform, i);
2210 uint64_t usrc = src_copy.Uint(srcform, i);
2211
2212 // Test for signed saturation
2213 if (ssrc > MaxIntFromFormat(dstform)) {
2214 dst.SetSignedSat(offset + i, true);
2215 } else if (ssrc < MinIntFromFormat(dstform)) {
2216 dst.SetSignedSat(offset + i, false);
2217 }
2218
2219 // Test for unsigned saturation
2220 if (src_is_signed) {
2221 if (ssrc > static_cast<int64_t>(MaxUintFromFormat(dstform))) {
2222 dst.SetUnsignedSat(offset + i, true);
2223 } else if (ssrc < 0) {
2224 dst.SetUnsignedSat(offset + i, false);
2225 }
2226 } else {
2227 if (usrc > MaxUintFromFormat(dstform)) {
2228 dst.SetUnsignedSat(offset + i, true);
2229 }
2230 }
2231
2232 int64_t result;
2233 if (src_is_signed) {
2234 result = ssrc & MaxUintFromFormat(dstform);
2235 } else {
2236 result = usrc & MaxUintFromFormat(dstform);
2237 }
2238
2239 if (dst_is_signed) {
2240 dst.SetInt(dstform, offset + i, result);
2241 } else {
2242 dst.SetUint(dstform, offset + i, result);
2243 }
2244 }
2245 return dst;
2246 }
2247
2248
xtn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2249 LogicVRegister Simulator::xtn(VectorFormat vform,
2250 LogicVRegister dst,
2251 const LogicVRegister& src) {
2252 return extractnarrow(vform, dst, true, src, true);
2253 }
2254
2255
sqxtn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2256 LogicVRegister Simulator::sqxtn(VectorFormat vform,
2257 LogicVRegister dst,
2258 const LogicVRegister& src) {
2259 return extractnarrow(vform, dst, true, src, true).SignedSaturate(vform);
2260 }
2261
2262
sqxtun(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2263 LogicVRegister Simulator::sqxtun(VectorFormat vform,
2264 LogicVRegister dst,
2265 const LogicVRegister& src) {
2266 return extractnarrow(vform, dst, false, src, true).UnsignedSaturate(vform);
2267 }
2268
2269
uqxtn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2270 LogicVRegister Simulator::uqxtn(VectorFormat vform,
2271 LogicVRegister dst,
2272 const LogicVRegister& src) {
2273 return extractnarrow(vform, dst, false, src, false).UnsignedSaturate(vform);
2274 }
2275
2276
absdiff(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool is_signed)2277 LogicVRegister Simulator::absdiff(VectorFormat vform,
2278 LogicVRegister dst,
2279 const LogicVRegister& src1,
2280 const LogicVRegister& src2,
2281 bool is_signed) {
2282 dst.ClearForWrite(vform);
2283 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2284 bool src1_gt_src2 = is_signed ? (src1.Int(vform, i) > src2.Int(vform, i))
2285 : (src1.Uint(vform, i) > src2.Uint(vform, i));
2286 // Always calculate the answer using unsigned arithmetic, to avoid
2287 // implemenation-defined signed overflow.
2288 if (src1_gt_src2) {
2289 dst.SetUint(vform, i, src1.Uint(vform, i) - src2.Uint(vform, i));
2290 } else {
2291 dst.SetUint(vform, i, src2.Uint(vform, i) - src1.Uint(vform, i));
2292 }
2293 }
2294 return dst;
2295 }
2296
2297
saba(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2298 LogicVRegister Simulator::saba(VectorFormat vform,
2299 LogicVRegister dst,
2300 const LogicVRegister& src1,
2301 const LogicVRegister& src2) {
2302 SimVRegister temp;
2303 dst.ClearForWrite(vform);
2304 absdiff(vform, temp, src1, src2, true);
2305 add(vform, dst, dst, temp);
2306 return dst;
2307 }
2308
2309
uaba(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2310 LogicVRegister Simulator::uaba(VectorFormat vform,
2311 LogicVRegister dst,
2312 const LogicVRegister& src1,
2313 const LogicVRegister& src2) {
2314 SimVRegister temp;
2315 dst.ClearForWrite(vform);
2316 absdiff(vform, temp, src1, src2, false);
2317 add(vform, dst, dst, temp);
2318 return dst;
2319 }
2320
2321
not_(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2322 LogicVRegister Simulator::not_(VectorFormat vform,
2323 LogicVRegister dst,
2324 const LogicVRegister& src) {
2325 dst.ClearForWrite(vform);
2326 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2327 dst.SetUint(vform, i, ~src.Uint(vform, i));
2328 }
2329 return dst;
2330 }
2331
2332
rbit(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2333 LogicVRegister Simulator::rbit(VectorFormat vform,
2334 LogicVRegister dst,
2335 const LogicVRegister& src) {
2336 uint64_t result[kZRegMaxSizeInBytes];
2337 int lane_count = LaneCountFromFormat(vform);
2338 int lane_size_in_bits = LaneSizeInBitsFromFormat(vform);
2339 uint64_t reversed_value;
2340 uint64_t value;
2341 for (int i = 0; i < lane_count; i++) {
2342 value = src.Uint(vform, i);
2343 reversed_value = 0;
2344 for (int j = 0; j < lane_size_in_bits; j++) {
2345 reversed_value = (reversed_value << 1) | (value & 1);
2346 value >>= 1;
2347 }
2348 result[i] = reversed_value;
2349 }
2350
2351 dst.ClearForWrite(vform);
2352 for (int i = 0; i < lane_count; ++i) {
2353 dst.SetUint(vform, i, result[i]);
2354 }
2355 return dst;
2356 }
2357
2358
rev(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2359 LogicVRegister Simulator::rev(VectorFormat vform,
2360 LogicVRegister dst,
2361 const LogicVRegister& src) {
2362 VIXL_ASSERT(IsSVEFormat(vform));
2363 int lane_count = LaneCountFromFormat(vform);
2364 for (int i = 0; i < lane_count / 2; i++) {
2365 uint64_t t = src.Uint(vform, i);
2366 dst.SetUint(vform, i, src.Uint(vform, lane_count - i - 1));
2367 dst.SetUint(vform, lane_count - i - 1, t);
2368 }
2369 return dst;
2370 }
2371
2372
rev_byte(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int rev_size)2373 LogicVRegister Simulator::rev_byte(VectorFormat vform,
2374 LogicVRegister dst,
2375 const LogicVRegister& src,
2376 int rev_size) {
2377 uint64_t result[kZRegMaxSizeInBytes] = {};
2378 int lane_count = LaneCountFromFormat(vform);
2379 int lane_size = LaneSizeInBytesFromFormat(vform);
2380 int lanes_per_loop = rev_size / lane_size;
2381 for (int i = 0; i < lane_count; i += lanes_per_loop) {
2382 for (int j = 0; j < lanes_per_loop; j++) {
2383 result[i + lanes_per_loop - 1 - j] = src.Uint(vform, i + j);
2384 }
2385 }
2386 dst.ClearForWrite(vform);
2387 for (int i = 0; i < lane_count; ++i) {
2388 dst.SetUint(vform, i, result[i]);
2389 }
2390 return dst;
2391 }
2392
2393
rev16(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2394 LogicVRegister Simulator::rev16(VectorFormat vform,
2395 LogicVRegister dst,
2396 const LogicVRegister& src) {
2397 return rev_byte(vform, dst, src, 2);
2398 }
2399
2400
rev32(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2401 LogicVRegister Simulator::rev32(VectorFormat vform,
2402 LogicVRegister dst,
2403 const LogicVRegister& src) {
2404 return rev_byte(vform, dst, src, 4);
2405 }
2406
2407
rev64(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2408 LogicVRegister Simulator::rev64(VectorFormat vform,
2409 LogicVRegister dst,
2410 const LogicVRegister& src) {
2411 return rev_byte(vform, dst, src, 8);
2412 }
2413
addlp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,bool is_signed,bool do_accumulate)2414 LogicVRegister Simulator::addlp(VectorFormat vform,
2415 LogicVRegister dst,
2416 const LogicVRegister& src,
2417 bool is_signed,
2418 bool do_accumulate) {
2419 VectorFormat vformsrc = VectorFormatHalfWidthDoubleLanes(vform);
2420 VIXL_ASSERT(LaneSizeInBitsFromFormat(vformsrc) <= kSRegSize);
2421
2422 uint64_t result[kZRegMaxSizeInBytes];
2423 int lane_count = LaneCountFromFormat(vform);
2424 for (int i = 0; i < lane_count; i++) {
2425 if (is_signed) {
2426 result[i] = static_cast<uint64_t>(src.Int(vformsrc, 2 * i) +
2427 src.Int(vformsrc, 2 * i + 1));
2428 } else {
2429 result[i] = src.Uint(vformsrc, 2 * i) + src.Uint(vformsrc, 2 * i + 1);
2430 }
2431 }
2432
2433 dst.ClearForWrite(vform);
2434 for (int i = 0; i < lane_count; ++i) {
2435 if (do_accumulate) {
2436 result[i] += dst.Uint(vform, i);
2437 }
2438 dst.SetUint(vform, i, result[i]);
2439 }
2440
2441 return dst;
2442 }
2443
2444
saddlp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2445 LogicVRegister Simulator::saddlp(VectorFormat vform,
2446 LogicVRegister dst,
2447 const LogicVRegister& src) {
2448 return addlp(vform, dst, src, true, false);
2449 }
2450
2451
uaddlp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2452 LogicVRegister Simulator::uaddlp(VectorFormat vform,
2453 LogicVRegister dst,
2454 const LogicVRegister& src) {
2455 return addlp(vform, dst, src, false, false);
2456 }
2457
2458
sadalp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2459 LogicVRegister Simulator::sadalp(VectorFormat vform,
2460 LogicVRegister dst,
2461 const LogicVRegister& src) {
2462 return addlp(vform, dst, src, true, true);
2463 }
2464
2465
uadalp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2466 LogicVRegister Simulator::uadalp(VectorFormat vform,
2467 LogicVRegister dst,
2468 const LogicVRegister& src) {
2469 return addlp(vform, dst, src, false, true);
2470 }
2471
ror(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int rotation)2472 LogicVRegister Simulator::ror(VectorFormat vform,
2473 LogicVRegister dst,
2474 const LogicVRegister& src,
2475 int rotation) {
2476 int width = LaneSizeInBitsFromFormat(vform);
2477 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2478 uint64_t value = src.Uint(vform, i);
2479 dst.SetUint(vform, i, RotateRight(value, rotation, width));
2480 }
2481 return dst;
2482 }
2483
ext(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)2484 LogicVRegister Simulator::ext(VectorFormat vform,
2485 LogicVRegister dst,
2486 const LogicVRegister& src1,
2487 const LogicVRegister& src2,
2488 int index) {
2489 uint8_t result[kZRegMaxSizeInBytes] = {};
2490 int lane_count = LaneCountFromFormat(vform);
2491 for (int i = 0; i < lane_count - index; ++i) {
2492 result[i] = src1.Uint(vform, i + index);
2493 }
2494 for (int i = 0; i < index; ++i) {
2495 result[lane_count - index + i] = src2.Uint(vform, i);
2496 }
2497 dst.ClearForWrite(vform);
2498 for (int i = 0; i < lane_count; ++i) {
2499 dst.SetUint(vform, i, result[i]);
2500 }
2501 return dst;
2502 }
2503
rotate_elements_right(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int index)2504 LogicVRegister Simulator::rotate_elements_right(VectorFormat vform,
2505 LogicVRegister dst,
2506 const LogicVRegister& src,
2507 int index) {
2508 if (index < 0) index += LaneCountFromFormat(vform);
2509 VIXL_ASSERT((index >= 0) && (index < LaneCountFromFormat(vform)));
2510 index *= LaneSizeInBytesFromFormat(vform);
2511 return ext(kFormatVnB, dst, src, src, index);
2512 }
2513
2514
2515 template <typename T>
fadda(VectorFormat vform,LogicVRegister acc,const LogicPRegister & pg,const LogicVRegister & src)2516 LogicVRegister Simulator::fadda(VectorFormat vform,
2517 LogicVRegister acc,
2518 const LogicPRegister& pg,
2519 const LogicVRegister& src) {
2520 T result = acc.Float<T>(0);
2521 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2522 if (!pg.IsActive(vform, i)) continue;
2523
2524 result = FPAdd(result, src.Float<T>(i));
2525 }
2526 VectorFormat vform_dst =
2527 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
2528 acc.ClearForWrite(vform_dst);
2529 acc.SetFloat(0, result);
2530 return acc;
2531 }
2532
fadda(VectorFormat vform,LogicVRegister acc,const LogicPRegister & pg,const LogicVRegister & src)2533 LogicVRegister Simulator::fadda(VectorFormat vform,
2534 LogicVRegister acc,
2535 const LogicPRegister& pg,
2536 const LogicVRegister& src) {
2537 switch (LaneSizeInBitsFromFormat(vform)) {
2538 case kHRegSize:
2539 fadda<SimFloat16>(vform, acc, pg, src);
2540 break;
2541 case kSRegSize:
2542 fadda<float>(vform, acc, pg, src);
2543 break;
2544 case kDRegSize:
2545 fadda<double>(vform, acc, pg, src);
2546 break;
2547 default:
2548 VIXL_UNREACHABLE();
2549 }
2550 return acc;
2551 }
2552
2553 template <typename T>
fcadd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int rot)2554 LogicVRegister Simulator::fcadd(VectorFormat vform,
2555 LogicVRegister dst, // d
2556 const LogicVRegister& src1, // n
2557 const LogicVRegister& src2, // m
2558 int rot) {
2559 int elements = LaneCountFromFormat(vform);
2560
2561 T element1, element3;
2562 rot = (rot == 1) ? 270 : 90;
2563
2564 // Loop example:
2565 // 2S --> (2/2 = 1 - 1 = 0) --> 1 x Complex Number (2x components: r+i)
2566 // 4S --> (4/2 = 2) - 1 = 1) --> 2 x Complex Number (2x2 components: r+i)
2567
2568 for (int e = 0; e <= (elements / 2) - 1; e++) {
2569 switch (rot) {
2570 case 90:
2571 element1 = FPNeg(src2.Float<T>(e * 2 + 1));
2572 element3 = src2.Float<T>(e * 2);
2573 break;
2574 case 270:
2575 element1 = src2.Float<T>(e * 2 + 1);
2576 element3 = FPNeg(src2.Float<T>(e * 2));
2577 break;
2578 default:
2579 VIXL_UNREACHABLE();
2580 return dst; // prevents "element(n) may be unintialized" errors
2581 }
2582 dst.ClearForWrite(vform);
2583 dst.SetFloat<T>(e * 2, FPAdd(src1.Float<T>(e * 2), element1));
2584 dst.SetFloat<T>(e * 2 + 1, FPAdd(src1.Float<T>(e * 2 + 1), element3));
2585 }
2586 return dst;
2587 }
2588
2589
fcadd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int rot)2590 LogicVRegister Simulator::fcadd(VectorFormat vform,
2591 LogicVRegister dst, // d
2592 const LogicVRegister& src1, // n
2593 const LogicVRegister& src2, // m
2594 int rot) {
2595 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
2596 fcadd<SimFloat16>(vform, dst, src1, src2, rot);
2597 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
2598 fcadd<float>(vform, dst, src1, src2, rot);
2599 } else {
2600 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
2601 fcadd<double>(vform, dst, src1, src2, rot);
2602 }
2603 return dst;
2604 }
2605
2606 template <typename T>
fcmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,const LogicVRegister & acc,int index,int rot)2607 LogicVRegister Simulator::fcmla(VectorFormat vform,
2608 LogicVRegister dst,
2609 const LogicVRegister& src1,
2610 const LogicVRegister& src2,
2611 const LogicVRegister& acc,
2612 int index,
2613 int rot) {
2614 int elements = LaneCountFromFormat(vform);
2615
2616 T element1, element2, element3, element4;
2617 rot *= 90;
2618
2619 // Loop example:
2620 // 2S --> (2/2 = 1 - 1 = 0) --> 1 x Complex Number (2x components: r+i)
2621 // 4S --> (4/2 = 2) - 1 = 1) --> 2 x Complex Number (2x2 components: r+i)
2622
2623 for (int e = 0; e <= (elements / 2) - 1; e++) {
2624 // Index == -1 indicates a vector/vector rather than vector/indexed-element
2625 // operation.
2626 int f = (index < 0) ? e : index;
2627
2628 switch (rot) {
2629 case 0:
2630 element1 = src2.Float<T>(f * 2);
2631 element2 = src1.Float<T>(e * 2);
2632 element3 = src2.Float<T>(f * 2 + 1);
2633 element4 = src1.Float<T>(e * 2);
2634 break;
2635 case 90:
2636 element1 = FPNeg(src2.Float<T>(f * 2 + 1));
2637 element2 = src1.Float<T>(e * 2 + 1);
2638 element3 = src2.Float<T>(f * 2);
2639 element4 = src1.Float<T>(e * 2 + 1);
2640 break;
2641 case 180:
2642 element1 = FPNeg(src2.Float<T>(f * 2));
2643 element2 = src1.Float<T>(e * 2);
2644 element3 = FPNeg(src2.Float<T>(f * 2 + 1));
2645 element4 = src1.Float<T>(e * 2);
2646 break;
2647 case 270:
2648 element1 = src2.Float<T>(f * 2 + 1);
2649 element2 = src1.Float<T>(e * 2 + 1);
2650 element3 = FPNeg(src2.Float<T>(f * 2));
2651 element4 = src1.Float<T>(e * 2 + 1);
2652 break;
2653 default:
2654 VIXL_UNREACHABLE();
2655 return dst; // prevents "element(n) may be unintialized" errors
2656 }
2657 dst.ClearForWrite(vform);
2658 dst.SetFloat<T>(vform,
2659 e * 2,
2660 FPMulAdd(acc.Float<T>(e * 2), element2, element1));
2661 dst.SetFloat<T>(vform,
2662 e * 2 + 1,
2663 FPMulAdd(acc.Float<T>(e * 2 + 1), element4, element3));
2664 }
2665 return dst;
2666 }
2667
fcmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,const LogicVRegister & acc,int rot)2668 LogicVRegister Simulator::fcmla(VectorFormat vform,
2669 LogicVRegister dst,
2670 const LogicVRegister& src1,
2671 const LogicVRegister& src2,
2672 const LogicVRegister& acc,
2673 int rot) {
2674 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
2675 fcmla<SimFloat16>(vform, dst, src1, src2, acc, -1, rot);
2676 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
2677 fcmla<float>(vform, dst, src1, src2, acc, -1, rot);
2678 } else {
2679 fcmla<double>(vform, dst, src1, src2, acc, -1, rot);
2680 }
2681 return dst;
2682 }
2683
2684
fcmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index,int rot)2685 LogicVRegister Simulator::fcmla(VectorFormat vform,
2686 LogicVRegister dst, // d
2687 const LogicVRegister& src1, // n
2688 const LogicVRegister& src2, // m
2689 int index,
2690 int rot) {
2691 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
2692 VIXL_UNIMPLEMENTED();
2693 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
2694 fcmla<float>(vform, dst, src1, src2, dst, index, rot);
2695 } else {
2696 fcmla<double>(vform, dst, src1, src2, dst, index, rot);
2697 }
2698 return dst;
2699 }
2700
cadd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int rot,bool saturate)2701 LogicVRegister Simulator::cadd(VectorFormat vform,
2702 LogicVRegister dst,
2703 const LogicVRegister& src1,
2704 const LogicVRegister& src2,
2705 int rot,
2706 bool saturate) {
2707 SimVRegister src1_r, src1_i;
2708 SimVRegister src2_r, src2_i;
2709 SimVRegister zero;
2710 zero.Clear();
2711 uzp1(vform, src1_r, src1, zero);
2712 uzp2(vform, src1_i, src1, zero);
2713 uzp1(vform, src2_r, src2, zero);
2714 uzp2(vform, src2_i, src2, zero);
2715
2716 if (rot == 90) {
2717 if (saturate) {
2718 sub(vform, src1_r, src1_r, src2_i).SignedSaturate(vform);
2719 add(vform, src1_i, src1_i, src2_r).SignedSaturate(vform);
2720 } else {
2721 sub(vform, src1_r, src1_r, src2_i);
2722 add(vform, src1_i, src1_i, src2_r);
2723 }
2724 } else {
2725 VIXL_ASSERT(rot == 270);
2726 if (saturate) {
2727 add(vform, src1_r, src1_r, src2_i).SignedSaturate(vform);
2728 sub(vform, src1_i, src1_i, src2_r).SignedSaturate(vform);
2729 } else {
2730 add(vform, src1_r, src1_r, src2_i);
2731 sub(vform, src1_i, src1_i, src2_r);
2732 }
2733 }
2734
2735 zip1(vform, dst, src1_r, src1_i);
2736 return dst;
2737 }
2738
cmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & srca,const LogicVRegister & src1,const LogicVRegister & src2,int rot)2739 LogicVRegister Simulator::cmla(VectorFormat vform,
2740 LogicVRegister dst,
2741 const LogicVRegister& srca,
2742 const LogicVRegister& src1,
2743 const LogicVRegister& src2,
2744 int rot) {
2745 SimVRegister src1_a;
2746 SimVRegister src2_a, src2_b;
2747 SimVRegister srca_i, srca_r;
2748 SimVRegister zero, temp;
2749 zero.Clear();
2750
2751 if ((rot == 0) || (rot == 180)) {
2752 uzp1(vform, src1_a, src1, zero);
2753 uzp1(vform, src2_a, src2, zero);
2754 uzp2(vform, src2_b, src2, zero);
2755 } else {
2756 uzp2(vform, src1_a, src1, zero);
2757 uzp2(vform, src2_a, src2, zero);
2758 uzp1(vform, src2_b, src2, zero);
2759 }
2760
2761 uzp1(vform, srca_r, srca, zero);
2762 uzp2(vform, srca_i, srca, zero);
2763
2764 bool sub_r = (rot == 90) || (rot == 180);
2765 bool sub_i = (rot == 180) || (rot == 270);
2766
2767 mul(vform, temp, src1_a, src2_a);
2768 if (sub_r) {
2769 sub(vform, srca_r, srca_r, temp);
2770 } else {
2771 add(vform, srca_r, srca_r, temp);
2772 }
2773
2774 mul(vform, temp, src1_a, src2_b);
2775 if (sub_i) {
2776 sub(vform, srca_i, srca_i, temp);
2777 } else {
2778 add(vform, srca_i, srca_i, temp);
2779 }
2780
2781 zip1(vform, dst, srca_r, srca_i);
2782 return dst;
2783 }
2784
cmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & srca,const LogicVRegister & src1,const LogicVRegister & src2,int index,int rot)2785 LogicVRegister Simulator::cmla(VectorFormat vform,
2786 LogicVRegister dst,
2787 const LogicVRegister& srca,
2788 const LogicVRegister& src1,
2789 const LogicVRegister& src2,
2790 int index,
2791 int rot) {
2792 SimVRegister temp;
2793 dup_elements_to_segments(VectorFormatDoubleWidth(vform), temp, src2, index);
2794 return cmla(vform, dst, srca, src1, temp, rot);
2795 }
2796
bgrp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool do_bext)2797 LogicVRegister Simulator::bgrp(VectorFormat vform,
2798 LogicVRegister dst,
2799 const LogicVRegister& src1,
2800 const LogicVRegister& src2,
2801 bool do_bext) {
2802 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2803 uint64_t value = src1.Uint(vform, i);
2804 uint64_t mask = src2.Uint(vform, i);
2805 int high_pos = 0;
2806 int low_pos = 0;
2807 uint64_t result_high = 0;
2808 uint64_t result_low = 0;
2809 for (unsigned j = 0; j < LaneSizeInBitsFromFormat(vform); j++) {
2810 if ((mask & 1) == 0) {
2811 result_high |= (value & 1) << high_pos;
2812 high_pos++;
2813 } else {
2814 result_low |= (value & 1) << low_pos;
2815 low_pos++;
2816 }
2817 mask >>= 1;
2818 value >>= 1;
2819 }
2820
2821 if (!do_bext) {
2822 result_low |= result_high << low_pos;
2823 }
2824
2825 dst.SetUint(vform, i, result_low);
2826 }
2827 return dst;
2828 }
2829
bdep(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2830 LogicVRegister Simulator::bdep(VectorFormat vform,
2831 LogicVRegister dst,
2832 const LogicVRegister& src1,
2833 const LogicVRegister& src2) {
2834 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2835 uint64_t value = src1.Uint(vform, i);
2836 uint64_t mask = src2.Uint(vform, i);
2837 uint64_t result = 0;
2838 for (unsigned j = 0; j < LaneSizeInBitsFromFormat(vform); j++) {
2839 if ((mask & 1) == 1) {
2840 result |= (value & 1) << j;
2841 value >>= 1;
2842 }
2843 mask >>= 1;
2844 }
2845 dst.SetUint(vform, i, result);
2846 }
2847 return dst;
2848 }
2849
histogram(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src1,const LogicVRegister & src2,bool do_segmented)2850 LogicVRegister Simulator::histogram(VectorFormat vform,
2851 LogicVRegister dst,
2852 const LogicPRegister& pg,
2853 const LogicVRegister& src1,
2854 const LogicVRegister& src2,
2855 bool do_segmented) {
2856 int elements_per_segment = kQRegSize / LaneSizeInBitsFromFormat(vform);
2857 uint64_t result[kZRegMaxSizeInBytes];
2858
2859 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2860 uint64_t count = 0;
2861 uint64_t value = src1.Uint(vform, i);
2862
2863 int segment = do_segmented ? (i / elements_per_segment) : 0;
2864 int segment_offset = segment * elements_per_segment;
2865 int hist_limit = do_segmented ? elements_per_segment : (i + 1);
2866 for (int j = 0; j < hist_limit; j++) {
2867 if (pg.IsActive(vform, j) &&
2868 (value == src2.Uint(vform, j + segment_offset))) {
2869 count++;
2870 }
2871 }
2872 result[i] = count;
2873 }
2874 dst.SetUintArray(vform, result);
2875 return dst;
2876 }
2877
dup_element(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int src_index)2878 LogicVRegister Simulator::dup_element(VectorFormat vform,
2879 LogicVRegister dst,
2880 const LogicVRegister& src,
2881 int src_index) {
2882 if ((vform == kFormatVnQ) || (vform == kFormatVnO)) {
2883 // When duplicating an element larger than 64 bits, split the element into
2884 // 64-bit parts, and duplicate the parts across the destination.
2885 uint64_t d[4];
2886 int count = (vform == kFormatVnQ) ? 2 : 4;
2887 for (int i = 0; i < count; i++) {
2888 d[i] = src.Uint(kFormatVnD, (src_index * count) + i);
2889 }
2890 dst.Clear();
2891 for (int i = 0; i < LaneCountFromFormat(vform) * count; i++) {
2892 dst.SetUint(kFormatVnD, i, d[i % count]);
2893 }
2894 } else {
2895 int lane_count = LaneCountFromFormat(vform);
2896 uint64_t value = src.Uint(vform, src_index);
2897 dst.ClearForWrite(vform);
2898 for (int i = 0; i < lane_count; ++i) {
2899 dst.SetUint(vform, i, value);
2900 }
2901 }
2902 return dst;
2903 }
2904
dup_elements_to_segments(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int src_index)2905 LogicVRegister Simulator::dup_elements_to_segments(VectorFormat vform,
2906 LogicVRegister dst,
2907 const LogicVRegister& src,
2908 int src_index) {
2909 // In SVE, a segment is a 128-bit portion of a vector, like a Q register,
2910 // whereas in NEON, the size of segment is equal to the size of register
2911 // itself.
2912 int segment_size = std::min(kQRegSize, RegisterSizeInBitsFromFormat(vform));
2913 VIXL_ASSERT(IsMultiple(segment_size, LaneSizeInBitsFromFormat(vform)));
2914 int lanes_per_segment = segment_size / LaneSizeInBitsFromFormat(vform);
2915
2916 VIXL_ASSERT(src_index >= 0);
2917 VIXL_ASSERT(src_index < lanes_per_segment);
2918
2919 dst.ClearForWrite(vform);
2920 for (int j = 0; j < LaneCountFromFormat(vform); j += lanes_per_segment) {
2921 uint64_t value = src.Uint(vform, j + src_index);
2922 for (int i = 0; i < lanes_per_segment; i++) {
2923 dst.SetUint(vform, j + i, value);
2924 }
2925 }
2926 return dst;
2927 }
2928
dup_elements_to_segments(VectorFormat vform,LogicVRegister dst,const std::pair<int,int> & src_and_index)2929 LogicVRegister Simulator::dup_elements_to_segments(
2930 VectorFormat vform,
2931 LogicVRegister dst,
2932 const std::pair<int, int>& src_and_index) {
2933 return dup_elements_to_segments(vform,
2934 dst,
2935 ReadVRegister(src_and_index.first),
2936 src_and_index.second);
2937 }
2938
dup_immediate(VectorFormat vform,LogicVRegister dst,uint64_t imm)2939 LogicVRegister Simulator::dup_immediate(VectorFormat vform,
2940 LogicVRegister dst,
2941 uint64_t imm) {
2942 int lane_count = LaneCountFromFormat(vform);
2943 uint64_t value = imm & MaxUintFromFormat(vform);
2944 dst.ClearForWrite(vform);
2945 for (int i = 0; i < lane_count; ++i) {
2946 dst.SetUint(vform, i, value);
2947 }
2948 return dst;
2949 }
2950
2951
ins_element(VectorFormat vform,LogicVRegister dst,int dst_index,const LogicVRegister & src,int src_index)2952 LogicVRegister Simulator::ins_element(VectorFormat vform,
2953 LogicVRegister dst,
2954 int dst_index,
2955 const LogicVRegister& src,
2956 int src_index) {
2957 dst.SetUint(vform, dst_index, src.Uint(vform, src_index));
2958 return dst;
2959 }
2960
2961
ins_immediate(VectorFormat vform,LogicVRegister dst,int dst_index,uint64_t imm)2962 LogicVRegister Simulator::ins_immediate(VectorFormat vform,
2963 LogicVRegister dst,
2964 int dst_index,
2965 uint64_t imm) {
2966 uint64_t value = imm & MaxUintFromFormat(vform);
2967 dst.SetUint(vform, dst_index, value);
2968 return dst;
2969 }
2970
2971
index(VectorFormat vform,LogicVRegister dst,uint64_t start,uint64_t step)2972 LogicVRegister Simulator::index(VectorFormat vform,
2973 LogicVRegister dst,
2974 uint64_t start,
2975 uint64_t step) {
2976 VIXL_ASSERT(IsSVEFormat(vform));
2977 uint64_t value = start;
2978 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2979 dst.SetUint(vform, i, value);
2980 value += step;
2981 }
2982 return dst;
2983 }
2984
2985
insr(VectorFormat vform,LogicVRegister dst,uint64_t imm)2986 LogicVRegister Simulator::insr(VectorFormat vform,
2987 LogicVRegister dst,
2988 uint64_t imm) {
2989 VIXL_ASSERT(IsSVEFormat(vform));
2990 for (int i = LaneCountFromFormat(vform) - 1; i > 0; i--) {
2991 dst.SetUint(vform, i, dst.Uint(vform, i - 1));
2992 }
2993 dst.SetUint(vform, 0, imm);
2994 return dst;
2995 }
2996
2997
mov(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2998 LogicVRegister Simulator::mov(VectorFormat vform,
2999 LogicVRegister dst,
3000 const LogicVRegister& src) {
3001 dst.ClearForWrite(vform);
3002 for (int lane = 0; lane < LaneCountFromFormat(vform); lane++) {
3003 dst.SetUint(vform, lane, src.Uint(vform, lane));
3004 }
3005 return dst;
3006 }
3007
3008
mov(LogicPRegister dst,const LogicPRegister & src)3009 LogicPRegister Simulator::mov(LogicPRegister dst, const LogicPRegister& src) {
3010 // Avoid a copy if the registers already alias.
3011 if (dst.Aliases(src)) return dst;
3012
3013 for (int i = 0; i < dst.GetChunkCount(); i++) {
3014 dst.SetChunk(i, src.GetChunk(i));
3015 }
3016 return dst;
3017 }
3018
3019
mov_merging(VectorFormat vform,LogicVRegister dst,const SimPRegister & pg,const LogicVRegister & src)3020 LogicVRegister Simulator::mov_merging(VectorFormat vform,
3021 LogicVRegister dst,
3022 const SimPRegister& pg,
3023 const LogicVRegister& src) {
3024 return sel(vform, dst, pg, src, dst);
3025 }
3026
mov_zeroing(VectorFormat vform,LogicVRegister dst,const SimPRegister & pg,const LogicVRegister & src)3027 LogicVRegister Simulator::mov_zeroing(VectorFormat vform,
3028 LogicVRegister dst,
3029 const SimPRegister& pg,
3030 const LogicVRegister& src) {
3031 SimVRegister zero;
3032 dup_immediate(vform, zero, 0);
3033 return sel(vform, dst, pg, src, zero);
3034 }
3035
mov_alternating(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int start_at)3036 LogicVRegister Simulator::mov_alternating(VectorFormat vform,
3037 LogicVRegister dst,
3038 const LogicVRegister& src,
3039 int start_at) {
3040 VIXL_ASSERT((start_at == 0) || (start_at == 1));
3041 for (int i = start_at; i < LaneCountFromFormat(vform); i += 2) {
3042 dst.SetUint(vform, i, src.Uint(vform, i));
3043 }
3044 return dst;
3045 }
3046
mov_merging(LogicPRegister dst,const LogicPRegister & pg,const LogicPRegister & src)3047 LogicPRegister Simulator::mov_merging(LogicPRegister dst,
3048 const LogicPRegister& pg,
3049 const LogicPRegister& src) {
3050 return sel(dst, pg, src, dst);
3051 }
3052
mov_zeroing(LogicPRegister dst,const LogicPRegister & pg,const LogicPRegister & src)3053 LogicPRegister Simulator::mov_zeroing(LogicPRegister dst,
3054 const LogicPRegister& pg,
3055 const LogicPRegister& src) {
3056 SimPRegister all_false;
3057 return sel(dst, pg, src, pfalse(all_false));
3058 }
3059
movi(VectorFormat vform,LogicVRegister dst,uint64_t imm)3060 LogicVRegister Simulator::movi(VectorFormat vform,
3061 LogicVRegister dst,
3062 uint64_t imm) {
3063 int lane_count = LaneCountFromFormat(vform);
3064 dst.ClearForWrite(vform);
3065 for (int i = 0; i < lane_count; ++i) {
3066 dst.SetUint(vform, i, imm);
3067 }
3068 return dst;
3069 }
3070
3071
mvni(VectorFormat vform,LogicVRegister dst,uint64_t imm)3072 LogicVRegister Simulator::mvni(VectorFormat vform,
3073 LogicVRegister dst,
3074 uint64_t imm) {
3075 int lane_count = LaneCountFromFormat(vform);
3076 dst.ClearForWrite(vform);
3077 for (int i = 0; i < lane_count; ++i) {
3078 dst.SetUint(vform, i, ~imm);
3079 }
3080 return dst;
3081 }
3082
3083
orr(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,uint64_t imm)3084 LogicVRegister Simulator::orr(VectorFormat vform,
3085 LogicVRegister dst,
3086 const LogicVRegister& src,
3087 uint64_t imm) {
3088 uint64_t result[16];
3089 int lane_count = LaneCountFromFormat(vform);
3090 for (int i = 0; i < lane_count; ++i) {
3091 result[i] = src.Uint(vform, i) | imm;
3092 }
3093 dst.ClearForWrite(vform);
3094 for (int i = 0; i < lane_count; ++i) {
3095 dst.SetUint(vform, i, result[i]);
3096 }
3097 return dst;
3098 }
3099
3100
uxtl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,bool is_2)3101 LogicVRegister Simulator::uxtl(VectorFormat vform,
3102 LogicVRegister dst,
3103 const LogicVRegister& src,
3104 bool is_2) {
3105 VectorFormat vform_half = VectorFormatHalfWidth(vform);
3106 int lane_count = LaneCountFromFormat(vform);
3107 int src_offset = is_2 ? lane_count : 0;
3108
3109 dst.ClearForWrite(vform);
3110 for (int i = 0; i < lane_count; i++) {
3111 dst.SetUint(vform, i, src.Uint(vform_half, src_offset + i));
3112 }
3113 return dst;
3114 }
3115
3116
sxtl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,bool is_2)3117 LogicVRegister Simulator::sxtl(VectorFormat vform,
3118 LogicVRegister dst,
3119 const LogicVRegister& src,
3120 bool is_2) {
3121 VectorFormat vform_half = VectorFormatHalfWidth(vform);
3122 int lane_count = LaneCountFromFormat(vform);
3123 int src_offset = is_2 ? lane_count : 0;
3124
3125 dst.ClearForWrite(vform);
3126 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3127 dst.SetInt(vform, i, src.Int(vform_half, src_offset + i));
3128 }
3129 return dst;
3130 }
3131
3132
uxtl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)3133 LogicVRegister Simulator::uxtl2(VectorFormat vform,
3134 LogicVRegister dst,
3135 const LogicVRegister& src) {
3136 return uxtl(vform, dst, src, /* is_2 = */ true);
3137 }
3138
3139
sxtl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)3140 LogicVRegister Simulator::sxtl2(VectorFormat vform,
3141 LogicVRegister dst,
3142 const LogicVRegister& src) {
3143 return sxtl(vform, dst, src, /* is_2 = */ true);
3144 }
3145
3146
uxt(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,unsigned from_size_in_bits)3147 LogicVRegister Simulator::uxt(VectorFormat vform,
3148 LogicVRegister dst,
3149 const LogicVRegister& src,
3150 unsigned from_size_in_bits) {
3151 int lane_count = LaneCountFromFormat(vform);
3152 uint64_t mask = GetUintMask(from_size_in_bits);
3153
3154 dst.ClearForWrite(vform);
3155 for (int i = 0; i < lane_count; i++) {
3156 dst.SetInt(vform, i, src.Uint(vform, i) & mask);
3157 }
3158 return dst;
3159 }
3160
3161
sxt(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,unsigned from_size_in_bits)3162 LogicVRegister Simulator::sxt(VectorFormat vform,
3163 LogicVRegister dst,
3164 const LogicVRegister& src,
3165 unsigned from_size_in_bits) {
3166 int lane_count = LaneCountFromFormat(vform);
3167
3168 dst.ClearForWrite(vform);
3169 for (int i = 0; i < lane_count; i++) {
3170 uint64_t value =
3171 ExtractSignedBitfield64(from_size_in_bits - 1, 0, src.Uint(vform, i));
3172 dst.SetInt(vform, i, value);
3173 }
3174 return dst;
3175 }
3176
3177
shrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3178 LogicVRegister Simulator::shrn(VectorFormat vform,
3179 LogicVRegister dst,
3180 const LogicVRegister& src,
3181 int shift) {
3182 SimVRegister temp;
3183 VectorFormat vform_src = VectorFormatDoubleWidth(vform);
3184 VectorFormat vform_dst = vform;
3185 LogicVRegister shifted_src = ushr(vform_src, temp, src, shift);
3186 return extractnarrow(vform_dst, dst, false, shifted_src, false);
3187 }
3188
3189
shrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3190 LogicVRegister Simulator::shrn2(VectorFormat vform,
3191 LogicVRegister dst,
3192 const LogicVRegister& src,
3193 int shift) {
3194 SimVRegister temp;
3195 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
3196 VectorFormat vformdst = vform;
3197 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift);
3198 return extractnarrow(vformdst, dst, false, shifted_src, false);
3199 }
3200
3201
rshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3202 LogicVRegister Simulator::rshrn(VectorFormat vform,
3203 LogicVRegister dst,
3204 const LogicVRegister& src,
3205 int shift) {
3206 SimVRegister temp;
3207 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
3208 VectorFormat vformdst = vform;
3209 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc);
3210 return extractnarrow(vformdst, dst, false, shifted_src, false);
3211 }
3212
3213
rshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3214 LogicVRegister Simulator::rshrn2(VectorFormat vform,
3215 LogicVRegister dst,
3216 const LogicVRegister& src,
3217 int shift) {
3218 SimVRegister temp;
3219 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
3220 VectorFormat vformdst = vform;
3221 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc);
3222 return extractnarrow(vformdst, dst, false, shifted_src, false);
3223 }
3224
Table(VectorFormat vform,LogicVRegister dst,const LogicVRegister & ind,bool zero_out_of_bounds,const LogicVRegister * tab1,const LogicVRegister * tab2,const LogicVRegister * tab3,const LogicVRegister * tab4)3225 LogicVRegister Simulator::Table(VectorFormat vform,
3226 LogicVRegister dst,
3227 const LogicVRegister& ind,
3228 bool zero_out_of_bounds,
3229 const LogicVRegister* tab1,
3230 const LogicVRegister* tab2,
3231 const LogicVRegister* tab3,
3232 const LogicVRegister* tab4) {
3233 VIXL_ASSERT(tab1 != NULL);
3234 int lane_count = LaneCountFromFormat(vform);
3235 VIXL_ASSERT((tab3 == NULL) || (lane_count <= 16));
3236 uint64_t table[kZRegMaxSizeInBytes * 2];
3237 uint64_t result[kZRegMaxSizeInBytes];
3238
3239 // For Neon, the table source registers are always 16B, and Neon allows only
3240 // 8B or 16B vform for the destination, so infer the table format from the
3241 // destination.
3242 VectorFormat vform_tab = (vform == kFormat8B) ? kFormat16B : vform;
3243
3244 uint64_t tab_size = tab1->UintArray(vform_tab, &table[0]);
3245 if (tab2 != NULL) tab_size += tab2->UintArray(vform_tab, &table[tab_size]);
3246 if (tab3 != NULL) tab_size += tab3->UintArray(vform_tab, &table[tab_size]);
3247 if (tab4 != NULL) tab_size += tab4->UintArray(vform_tab, &table[tab_size]);
3248
3249 for (int i = 0; i < lane_count; i++) {
3250 uint64_t index = ind.Uint(vform, i);
3251 result[i] = zero_out_of_bounds ? 0 : dst.Uint(vform, i);
3252 if (index < tab_size) result[i] = table[index];
3253 }
3254 dst.SetUintArray(vform, result);
3255 return dst;
3256 }
3257
tbl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & ind)3258 LogicVRegister Simulator::tbl(VectorFormat vform,
3259 LogicVRegister dst,
3260 const LogicVRegister& tab,
3261 const LogicVRegister& ind) {
3262 return Table(vform, dst, ind, true, &tab);
3263 }
3264
3265
tbl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & ind)3266 LogicVRegister Simulator::tbl(VectorFormat vform,
3267 LogicVRegister dst,
3268 const LogicVRegister& tab,
3269 const LogicVRegister& tab2,
3270 const LogicVRegister& ind) {
3271 return Table(vform, dst, ind, true, &tab, &tab2);
3272 }
3273
3274
tbl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & tab3,const LogicVRegister & ind)3275 LogicVRegister Simulator::tbl(VectorFormat vform,
3276 LogicVRegister dst,
3277 const LogicVRegister& tab,
3278 const LogicVRegister& tab2,
3279 const LogicVRegister& tab3,
3280 const LogicVRegister& ind) {
3281 return Table(vform, dst, ind, true, &tab, &tab2, &tab3);
3282 }
3283
3284
tbl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & tab3,const LogicVRegister & tab4,const LogicVRegister & ind)3285 LogicVRegister Simulator::tbl(VectorFormat vform,
3286 LogicVRegister dst,
3287 const LogicVRegister& tab,
3288 const LogicVRegister& tab2,
3289 const LogicVRegister& tab3,
3290 const LogicVRegister& tab4,
3291 const LogicVRegister& ind) {
3292 return Table(vform, dst, ind, true, &tab, &tab2, &tab3, &tab4);
3293 }
3294
3295
tbx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & ind)3296 LogicVRegister Simulator::tbx(VectorFormat vform,
3297 LogicVRegister dst,
3298 const LogicVRegister& tab,
3299 const LogicVRegister& ind) {
3300 return Table(vform, dst, ind, false, &tab);
3301 }
3302
3303
tbx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & ind)3304 LogicVRegister Simulator::tbx(VectorFormat vform,
3305 LogicVRegister dst,
3306 const LogicVRegister& tab,
3307 const LogicVRegister& tab2,
3308 const LogicVRegister& ind) {
3309 return Table(vform, dst, ind, false, &tab, &tab2);
3310 }
3311
3312
tbx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & tab3,const LogicVRegister & ind)3313 LogicVRegister Simulator::tbx(VectorFormat vform,
3314 LogicVRegister dst,
3315 const LogicVRegister& tab,
3316 const LogicVRegister& tab2,
3317 const LogicVRegister& tab3,
3318 const LogicVRegister& ind) {
3319 return Table(vform, dst, ind, false, &tab, &tab2, &tab3);
3320 }
3321
3322
tbx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & tab3,const LogicVRegister & tab4,const LogicVRegister & ind)3323 LogicVRegister Simulator::tbx(VectorFormat vform,
3324 LogicVRegister dst,
3325 const LogicVRegister& tab,
3326 const LogicVRegister& tab2,
3327 const LogicVRegister& tab3,
3328 const LogicVRegister& tab4,
3329 const LogicVRegister& ind) {
3330 return Table(vform, dst, ind, false, &tab, &tab2, &tab3, &tab4);
3331 }
3332
3333
uqshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3334 LogicVRegister Simulator::uqshrn(VectorFormat vform,
3335 LogicVRegister dst,
3336 const LogicVRegister& src,
3337 int shift) {
3338 return shrn(vform, dst, src, shift).UnsignedSaturate(vform);
3339 }
3340
3341
uqshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3342 LogicVRegister Simulator::uqshrn2(VectorFormat vform,
3343 LogicVRegister dst,
3344 const LogicVRegister& src,
3345 int shift) {
3346 return shrn2(vform, dst, src, shift).UnsignedSaturate(vform);
3347 }
3348
3349
uqrshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3350 LogicVRegister Simulator::uqrshrn(VectorFormat vform,
3351 LogicVRegister dst,
3352 const LogicVRegister& src,
3353 int shift) {
3354 return rshrn(vform, dst, src, shift).UnsignedSaturate(vform);
3355 }
3356
3357
uqrshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3358 LogicVRegister Simulator::uqrshrn2(VectorFormat vform,
3359 LogicVRegister dst,
3360 const LogicVRegister& src,
3361 int shift) {
3362 return rshrn2(vform, dst, src, shift).UnsignedSaturate(vform);
3363 }
3364
3365
sqshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3366 LogicVRegister Simulator::sqshrn(VectorFormat vform,
3367 LogicVRegister dst,
3368 const LogicVRegister& src,
3369 int shift) {
3370 SimVRegister temp;
3371 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
3372 VectorFormat vformdst = vform;
3373 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
3374 return sqxtn(vformdst, dst, shifted_src);
3375 }
3376
3377
sqshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3378 LogicVRegister Simulator::sqshrn2(VectorFormat vform,
3379 LogicVRegister dst,
3380 const LogicVRegister& src,
3381 int shift) {
3382 SimVRegister temp;
3383 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
3384 VectorFormat vformdst = vform;
3385 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
3386 return sqxtn(vformdst, dst, shifted_src);
3387 }
3388
3389
sqrshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3390 LogicVRegister Simulator::sqrshrn(VectorFormat vform,
3391 LogicVRegister dst,
3392 const LogicVRegister& src,
3393 int shift) {
3394 SimVRegister temp;
3395 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
3396 VectorFormat vformdst = vform;
3397 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
3398 return sqxtn(vformdst, dst, shifted_src);
3399 }
3400
3401
sqrshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3402 LogicVRegister Simulator::sqrshrn2(VectorFormat vform,
3403 LogicVRegister dst,
3404 const LogicVRegister& src,
3405 int shift) {
3406 SimVRegister temp;
3407 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
3408 VectorFormat vformdst = vform;
3409 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
3410 return sqxtn(vformdst, dst, shifted_src);
3411 }
3412
3413
sqshrun(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3414 LogicVRegister Simulator::sqshrun(VectorFormat vform,
3415 LogicVRegister dst,
3416 const LogicVRegister& src,
3417 int shift) {
3418 SimVRegister temp;
3419 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
3420 VectorFormat vformdst = vform;
3421 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
3422 return sqxtun(vformdst, dst, shifted_src);
3423 }
3424
3425
sqshrun2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3426 LogicVRegister Simulator::sqshrun2(VectorFormat vform,
3427 LogicVRegister dst,
3428 const LogicVRegister& src,
3429 int shift) {
3430 SimVRegister temp;
3431 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
3432 VectorFormat vformdst = vform;
3433 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
3434 return sqxtun(vformdst, dst, shifted_src);
3435 }
3436
3437
sqrshrun(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3438 LogicVRegister Simulator::sqrshrun(VectorFormat vform,
3439 LogicVRegister dst,
3440 const LogicVRegister& src,
3441 int shift) {
3442 SimVRegister temp;
3443 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
3444 VectorFormat vformdst = vform;
3445 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
3446 return sqxtun(vformdst, dst, shifted_src);
3447 }
3448
3449
sqrshrun2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3450 LogicVRegister Simulator::sqrshrun2(VectorFormat vform,
3451 LogicVRegister dst,
3452 const LogicVRegister& src,
3453 int shift) {
3454 SimVRegister temp;
3455 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
3456 VectorFormat vformdst = vform;
3457 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
3458 return sqxtun(vformdst, dst, shifted_src);
3459 }
3460
3461
uaddl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3462 LogicVRegister Simulator::uaddl(VectorFormat vform,
3463 LogicVRegister dst,
3464 const LogicVRegister& src1,
3465 const LogicVRegister& src2) {
3466 SimVRegister temp1, temp2;
3467 uxtl(vform, temp1, src1);
3468 uxtl(vform, temp2, src2);
3469 add(vform, dst, temp1, temp2);
3470 return dst;
3471 }
3472
3473
uaddl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3474 LogicVRegister Simulator::uaddl2(VectorFormat vform,
3475 LogicVRegister dst,
3476 const LogicVRegister& src1,
3477 const LogicVRegister& src2) {
3478 SimVRegister temp1, temp2;
3479 uxtl2(vform, temp1, src1);
3480 uxtl2(vform, temp2, src2);
3481 add(vform, dst, temp1, temp2);
3482 return dst;
3483 }
3484
3485
uaddw(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3486 LogicVRegister Simulator::uaddw(VectorFormat vform,
3487 LogicVRegister dst,
3488 const LogicVRegister& src1,
3489 const LogicVRegister& src2) {
3490 SimVRegister temp;
3491 uxtl(vform, temp, src2);
3492 add(vform, dst, src1, temp);
3493 return dst;
3494 }
3495
3496
uaddw2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3497 LogicVRegister Simulator::uaddw2(VectorFormat vform,
3498 LogicVRegister dst,
3499 const LogicVRegister& src1,
3500 const LogicVRegister& src2) {
3501 SimVRegister temp;
3502 uxtl2(vform, temp, src2);
3503 add(vform, dst, src1, temp);
3504 return dst;
3505 }
3506
3507
saddl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3508 LogicVRegister Simulator::saddl(VectorFormat vform,
3509 LogicVRegister dst,
3510 const LogicVRegister& src1,
3511 const LogicVRegister& src2) {
3512 SimVRegister temp1, temp2;
3513 sxtl(vform, temp1, src1);
3514 sxtl(vform, temp2, src2);
3515 add(vform, dst, temp1, temp2);
3516 return dst;
3517 }
3518
3519
saddl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3520 LogicVRegister Simulator::saddl2(VectorFormat vform,
3521 LogicVRegister dst,
3522 const LogicVRegister& src1,
3523 const LogicVRegister& src2) {
3524 SimVRegister temp1, temp2;
3525 sxtl2(vform, temp1, src1);
3526 sxtl2(vform, temp2, src2);
3527 add(vform, dst, temp1, temp2);
3528 return dst;
3529 }
3530
3531
saddw(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3532 LogicVRegister Simulator::saddw(VectorFormat vform,
3533 LogicVRegister dst,
3534 const LogicVRegister& src1,
3535 const LogicVRegister& src2) {
3536 SimVRegister temp;
3537 sxtl(vform, temp, src2);
3538 add(vform, dst, src1, temp);
3539 return dst;
3540 }
3541
3542
saddw2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3543 LogicVRegister Simulator::saddw2(VectorFormat vform,
3544 LogicVRegister dst,
3545 const LogicVRegister& src1,
3546 const LogicVRegister& src2) {
3547 SimVRegister temp;
3548 sxtl2(vform, temp, src2);
3549 add(vform, dst, src1, temp);
3550 return dst;
3551 }
3552
3553
usubl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3554 LogicVRegister Simulator::usubl(VectorFormat vform,
3555 LogicVRegister dst,
3556 const LogicVRegister& src1,
3557 const LogicVRegister& src2) {
3558 SimVRegister temp1, temp2;
3559 uxtl(vform, temp1, src1);
3560 uxtl(vform, temp2, src2);
3561 sub(vform, dst, temp1, temp2);
3562 return dst;
3563 }
3564
3565
usubl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3566 LogicVRegister Simulator::usubl2(VectorFormat vform,
3567 LogicVRegister dst,
3568 const LogicVRegister& src1,
3569 const LogicVRegister& src2) {
3570 SimVRegister temp1, temp2;
3571 uxtl2(vform, temp1, src1);
3572 uxtl2(vform, temp2, src2);
3573 sub(vform, dst, temp1, temp2);
3574 return dst;
3575 }
3576
3577
usubw(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3578 LogicVRegister Simulator::usubw(VectorFormat vform,
3579 LogicVRegister dst,
3580 const LogicVRegister& src1,
3581 const LogicVRegister& src2) {
3582 SimVRegister temp;
3583 uxtl(vform, temp, src2);
3584 sub(vform, dst, src1, temp);
3585 return dst;
3586 }
3587
3588
usubw2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3589 LogicVRegister Simulator::usubw2(VectorFormat vform,
3590 LogicVRegister dst,
3591 const LogicVRegister& src1,
3592 const LogicVRegister& src2) {
3593 SimVRegister temp;
3594 uxtl2(vform, temp, src2);
3595 sub(vform, dst, src1, temp);
3596 return dst;
3597 }
3598
3599
ssubl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3600 LogicVRegister Simulator::ssubl(VectorFormat vform,
3601 LogicVRegister dst,
3602 const LogicVRegister& src1,
3603 const LogicVRegister& src2) {
3604 SimVRegister temp1, temp2;
3605 sxtl(vform, temp1, src1);
3606 sxtl(vform, temp2, src2);
3607 sub(vform, dst, temp1, temp2);
3608 return dst;
3609 }
3610
3611
ssubl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3612 LogicVRegister Simulator::ssubl2(VectorFormat vform,
3613 LogicVRegister dst,
3614 const LogicVRegister& src1,
3615 const LogicVRegister& src2) {
3616 SimVRegister temp1, temp2;
3617 sxtl2(vform, temp1, src1);
3618 sxtl2(vform, temp2, src2);
3619 sub(vform, dst, temp1, temp2);
3620 return dst;
3621 }
3622
3623
ssubw(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3624 LogicVRegister Simulator::ssubw(VectorFormat vform,
3625 LogicVRegister dst,
3626 const LogicVRegister& src1,
3627 const LogicVRegister& src2) {
3628 SimVRegister temp;
3629 sxtl(vform, temp, src2);
3630 sub(vform, dst, src1, temp);
3631 return dst;
3632 }
3633
3634
ssubw2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3635 LogicVRegister Simulator::ssubw2(VectorFormat vform,
3636 LogicVRegister dst,
3637 const LogicVRegister& src1,
3638 const LogicVRegister& src2) {
3639 SimVRegister temp;
3640 sxtl2(vform, temp, src2);
3641 sub(vform, dst, src1, temp);
3642 return dst;
3643 }
3644
3645
uabal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3646 LogicVRegister Simulator::uabal(VectorFormat vform,
3647 LogicVRegister dst,
3648 const LogicVRegister& src1,
3649 const LogicVRegister& src2) {
3650 SimVRegister temp1, temp2;
3651 uxtl(vform, temp1, src1);
3652 uxtl(vform, temp2, src2);
3653 uaba(vform, dst, temp1, temp2);
3654 return dst;
3655 }
3656
3657
uabal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3658 LogicVRegister Simulator::uabal2(VectorFormat vform,
3659 LogicVRegister dst,
3660 const LogicVRegister& src1,
3661 const LogicVRegister& src2) {
3662 SimVRegister temp1, temp2;
3663 uxtl2(vform, temp1, src1);
3664 uxtl2(vform, temp2, src2);
3665 uaba(vform, dst, temp1, temp2);
3666 return dst;
3667 }
3668
3669
sabal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3670 LogicVRegister Simulator::sabal(VectorFormat vform,
3671 LogicVRegister dst,
3672 const LogicVRegister& src1,
3673 const LogicVRegister& src2) {
3674 SimVRegister temp1, temp2;
3675 sxtl(vform, temp1, src1);
3676 sxtl(vform, temp2, src2);
3677 saba(vform, dst, temp1, temp2);
3678 return dst;
3679 }
3680
3681
sabal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3682 LogicVRegister Simulator::sabal2(VectorFormat vform,
3683 LogicVRegister dst,
3684 const LogicVRegister& src1,
3685 const LogicVRegister& src2) {
3686 SimVRegister temp1, temp2;
3687 sxtl2(vform, temp1, src1);
3688 sxtl2(vform, temp2, src2);
3689 saba(vform, dst, temp1, temp2);
3690 return dst;
3691 }
3692
3693
uabdl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3694 LogicVRegister Simulator::uabdl(VectorFormat vform,
3695 LogicVRegister dst,
3696 const LogicVRegister& src1,
3697 const LogicVRegister& src2) {
3698 SimVRegister temp1, temp2;
3699 uxtl(vform, temp1, src1);
3700 uxtl(vform, temp2, src2);
3701 absdiff(vform, dst, temp1, temp2, false);
3702 return dst;
3703 }
3704
3705
uabdl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3706 LogicVRegister Simulator::uabdl2(VectorFormat vform,
3707 LogicVRegister dst,
3708 const LogicVRegister& src1,
3709 const LogicVRegister& src2) {
3710 SimVRegister temp1, temp2;
3711 uxtl2(vform, temp1, src1);
3712 uxtl2(vform, temp2, src2);
3713 absdiff(vform, dst, temp1, temp2, false);
3714 return dst;
3715 }
3716
3717
sabdl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3718 LogicVRegister Simulator::sabdl(VectorFormat vform,
3719 LogicVRegister dst,
3720 const LogicVRegister& src1,
3721 const LogicVRegister& src2) {
3722 SimVRegister temp1, temp2;
3723 sxtl(vform, temp1, src1);
3724 sxtl(vform, temp2, src2);
3725 absdiff(vform, dst, temp1, temp2, true);
3726 return dst;
3727 }
3728
3729
sabdl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3730 LogicVRegister Simulator::sabdl2(VectorFormat vform,
3731 LogicVRegister dst,
3732 const LogicVRegister& src1,
3733 const LogicVRegister& src2) {
3734 SimVRegister temp1, temp2;
3735 sxtl2(vform, temp1, src1);
3736 sxtl2(vform, temp2, src2);
3737 absdiff(vform, dst, temp1, temp2, true);
3738 return dst;
3739 }
3740
3741
umull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool is_2)3742 LogicVRegister Simulator::umull(VectorFormat vform,
3743 LogicVRegister dst,
3744 const LogicVRegister& src1,
3745 const LogicVRegister& src2,
3746 bool is_2) {
3747 SimVRegister temp1, temp2;
3748 uxtl(vform, temp1, src1, is_2);
3749 uxtl(vform, temp2, src2, is_2);
3750 mul(vform, dst, temp1, temp2);
3751 return dst;
3752 }
3753
3754
umull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3755 LogicVRegister Simulator::umull2(VectorFormat vform,
3756 LogicVRegister dst,
3757 const LogicVRegister& src1,
3758 const LogicVRegister& src2) {
3759 return umull(vform, dst, src1, src2, /* is_2 = */ true);
3760 }
3761
3762
smull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool is_2)3763 LogicVRegister Simulator::smull(VectorFormat vform,
3764 LogicVRegister dst,
3765 const LogicVRegister& src1,
3766 const LogicVRegister& src2,
3767 bool is_2) {
3768 SimVRegister temp1, temp2;
3769 sxtl(vform, temp1, src1, is_2);
3770 sxtl(vform, temp2, src2, is_2);
3771 mul(vform, dst, temp1, temp2);
3772 return dst;
3773 }
3774
3775
smull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3776 LogicVRegister Simulator::smull2(VectorFormat vform,
3777 LogicVRegister dst,
3778 const LogicVRegister& src1,
3779 const LogicVRegister& src2) {
3780 return smull(vform, dst, src1, src2, /* is_2 = */ true);
3781 }
3782
3783
umlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool is_2)3784 LogicVRegister Simulator::umlsl(VectorFormat vform,
3785 LogicVRegister dst,
3786 const LogicVRegister& src1,
3787 const LogicVRegister& src2,
3788 bool is_2) {
3789 SimVRegister temp1, temp2;
3790 uxtl(vform, temp1, src1, is_2);
3791 uxtl(vform, temp2, src2, is_2);
3792 mls(vform, dst, dst, temp1, temp2);
3793 return dst;
3794 }
3795
3796
umlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3797 LogicVRegister Simulator::umlsl2(VectorFormat vform,
3798 LogicVRegister dst,
3799 const LogicVRegister& src1,
3800 const LogicVRegister& src2) {
3801 return umlsl(vform, dst, src1, src2, /* is_2 = */ true);
3802 }
3803
3804
smlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool is_2)3805 LogicVRegister Simulator::smlsl(VectorFormat vform,
3806 LogicVRegister dst,
3807 const LogicVRegister& src1,
3808 const LogicVRegister& src2,
3809 bool is_2) {
3810 SimVRegister temp1, temp2;
3811 sxtl(vform, temp1, src1, is_2);
3812 sxtl(vform, temp2, src2, is_2);
3813 mls(vform, dst, dst, temp1, temp2);
3814 return dst;
3815 }
3816
3817
smlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3818 LogicVRegister Simulator::smlsl2(VectorFormat vform,
3819 LogicVRegister dst,
3820 const LogicVRegister& src1,
3821 const LogicVRegister& src2) {
3822 return smlsl(vform, dst, src1, src2, /* is_2 = */ true);
3823 }
3824
3825
umlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool is_2)3826 LogicVRegister Simulator::umlal(VectorFormat vform,
3827 LogicVRegister dst,
3828 const LogicVRegister& src1,
3829 const LogicVRegister& src2,
3830 bool is_2) {
3831 SimVRegister temp1, temp2;
3832 uxtl(vform, temp1, src1, is_2);
3833 uxtl(vform, temp2, src2, is_2);
3834 mla(vform, dst, dst, temp1, temp2);
3835 return dst;
3836 }
3837
3838
umlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3839 LogicVRegister Simulator::umlal2(VectorFormat vform,
3840 LogicVRegister dst,
3841 const LogicVRegister& src1,
3842 const LogicVRegister& src2) {
3843 return umlal(vform, dst, src1, src2, /* is_2 = */ true);
3844 }
3845
3846
smlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool is_2)3847 LogicVRegister Simulator::smlal(VectorFormat vform,
3848 LogicVRegister dst,
3849 const LogicVRegister& src1,
3850 const LogicVRegister& src2,
3851 bool is_2) {
3852 SimVRegister temp1, temp2;
3853 sxtl(vform, temp1, src1, is_2);
3854 sxtl(vform, temp2, src2, is_2);
3855 mla(vform, dst, dst, temp1, temp2);
3856 return dst;
3857 }
3858
3859
smlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3860 LogicVRegister Simulator::smlal2(VectorFormat vform,
3861 LogicVRegister dst,
3862 const LogicVRegister& src1,
3863 const LogicVRegister& src2) {
3864 return smlal(vform, dst, src1, src2, /* is_2 = */ true);
3865 }
3866
3867
sqdmlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool is_2)3868 LogicVRegister Simulator::sqdmlal(VectorFormat vform,
3869 LogicVRegister dst,
3870 const LogicVRegister& src1,
3871 const LogicVRegister& src2,
3872 bool is_2) {
3873 SimVRegister temp;
3874 LogicVRegister product = sqdmull(vform, temp, src1, src2, is_2);
3875 return add(vform, dst, dst, product).SignedSaturate(vform);
3876 }
3877
3878
sqdmlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3879 LogicVRegister Simulator::sqdmlal2(VectorFormat vform,
3880 LogicVRegister dst,
3881 const LogicVRegister& src1,
3882 const LogicVRegister& src2) {
3883 return sqdmlal(vform, dst, src1, src2, /* is_2 = */ true);
3884 }
3885
3886
sqdmlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool is_2)3887 LogicVRegister Simulator::sqdmlsl(VectorFormat vform,
3888 LogicVRegister dst,
3889 const LogicVRegister& src1,
3890 const LogicVRegister& src2,
3891 bool is_2) {
3892 SimVRegister temp;
3893 LogicVRegister product = sqdmull(vform, temp, src1, src2, is_2);
3894 return sub(vform, dst, dst, product).SignedSaturate(vform);
3895 }
3896
3897
sqdmlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3898 LogicVRegister Simulator::sqdmlsl2(VectorFormat vform,
3899 LogicVRegister dst,
3900 const LogicVRegister& src1,
3901 const LogicVRegister& src2) {
3902 return sqdmlsl(vform, dst, src1, src2, /* is_2 = */ true);
3903 }
3904
3905
sqdmull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool is_2)3906 LogicVRegister Simulator::sqdmull(VectorFormat vform,
3907 LogicVRegister dst,
3908 const LogicVRegister& src1,
3909 const LogicVRegister& src2,
3910 bool is_2) {
3911 SimVRegister temp;
3912 LogicVRegister product = smull(vform, temp, src1, src2, is_2);
3913 return add(vform, dst, product, product).SignedSaturate(vform);
3914 }
3915
3916
sqdmull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3917 LogicVRegister Simulator::sqdmull2(VectorFormat vform,
3918 LogicVRegister dst,
3919 const LogicVRegister& src1,
3920 const LogicVRegister& src2) {
3921 return sqdmull(vform, dst, src1, src2, /* is_2 = */ true);
3922 }
3923
sqrdmulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool round)3924 LogicVRegister Simulator::sqrdmulh(VectorFormat vform,
3925 LogicVRegister dst,
3926 const LogicVRegister& src1,
3927 const LogicVRegister& src2,
3928 bool round) {
3929 int esize = LaneSizeInBitsFromFormat(vform);
3930
3931 SimVRegister temp_lo, temp_hi;
3932
3933 // Compute low and high multiplication results.
3934 mul(vform, temp_lo, src1, src2);
3935 smulh(vform, temp_hi, src1, src2);
3936
3937 // Double by shifting high half, and adding in most-significant bit of low
3938 // half.
3939 shl(vform, temp_hi, temp_hi, 1);
3940 usra(vform, temp_hi, temp_lo, esize - 1);
3941
3942 if (round) {
3943 // Add the second (due to doubling) most-significant bit of the low half
3944 // into the result.
3945 shl(vform, temp_lo, temp_lo, 1);
3946 usra(vform, temp_hi, temp_lo, esize - 1);
3947 }
3948
3949 SimPRegister not_sat;
3950 LogicPRegister ptemp(not_sat);
3951 dst.ClearForWrite(vform);
3952 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3953 // Saturation only occurs when src1 = src2 = minimum representable value.
3954 // Check this as a special case.
3955 ptemp.SetActive(vform, i, true);
3956 if ((src1.Int(vform, i) == MinIntFromFormat(vform)) &&
3957 (src2.Int(vform, i) == MinIntFromFormat(vform))) {
3958 ptemp.SetActive(vform, i, false);
3959 }
3960 dst.SetInt(vform, i, MaxIntFromFormat(vform));
3961 }
3962
3963 mov_merging(vform, dst, not_sat, temp_hi);
3964 return dst;
3965 }
3966
3967
dot(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool is_src1_signed,bool is_src2_signed)3968 LogicVRegister Simulator::dot(VectorFormat vform,
3969 LogicVRegister dst,
3970 const LogicVRegister& src1,
3971 const LogicVRegister& src2,
3972 bool is_src1_signed,
3973 bool is_src2_signed) {
3974 VectorFormat quarter_vform =
3975 VectorFormatHalfWidthDoubleLanes(VectorFormatHalfWidthDoubleLanes(vform));
3976
3977 dst.ClearForWrite(vform);
3978 for (int e = 0; e < LaneCountFromFormat(vform); e++) {
3979 uint64_t result = 0;
3980 int64_t element1, element2;
3981 for (int i = 0; i < 4; i++) {
3982 int index = 4 * e + i;
3983 if (is_src1_signed) {
3984 element1 = src1.Int(quarter_vform, index);
3985 } else {
3986 element1 = src1.Uint(quarter_vform, index);
3987 }
3988 if (is_src2_signed) {
3989 element2 = src2.Int(quarter_vform, index);
3990 } else {
3991 element2 = src2.Uint(quarter_vform, index);
3992 }
3993 result += element1 * element2;
3994 }
3995 dst.SetUint(vform, e, result + dst.Uint(vform, e));
3996 }
3997 return dst;
3998 }
3999
4000
sdot(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4001 LogicVRegister Simulator::sdot(VectorFormat vform,
4002 LogicVRegister dst,
4003 const LogicVRegister& src1,
4004 const LogicVRegister& src2) {
4005 return dot(vform, dst, src1, src2, true, true);
4006 }
4007
4008
udot(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4009 LogicVRegister Simulator::udot(VectorFormat vform,
4010 LogicVRegister dst,
4011 const LogicVRegister& src1,
4012 const LogicVRegister& src2) {
4013 return dot(vform, dst, src1, src2, false, false);
4014 }
4015
usdot(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4016 LogicVRegister Simulator::usdot(VectorFormat vform,
4017 LogicVRegister dst,
4018 const LogicVRegister& src1,
4019 const LogicVRegister& src2) {
4020 return dot(vform, dst, src1, src2, false, true);
4021 }
4022
cdot(VectorFormat vform,LogicVRegister dst,const LogicVRegister & acc,const LogicVRegister & src1,const LogicVRegister & src2,int rot)4023 LogicVRegister Simulator::cdot(VectorFormat vform,
4024 LogicVRegister dst,
4025 const LogicVRegister& acc,
4026 const LogicVRegister& src1,
4027 const LogicVRegister& src2,
4028 int rot) {
4029 VIXL_ASSERT((rot == 0) || (rot == 90) || (rot == 180) || (rot == 270));
4030 VectorFormat quarter_vform =
4031 VectorFormatHalfWidthDoubleLanes(VectorFormatHalfWidthDoubleLanes(vform));
4032
4033 int sel_a = ((rot == 0) || (rot == 180)) ? 0 : 1;
4034 int sel_b = 1 - sel_a;
4035 int sub_i = ((rot == 90) || (rot == 180)) ? 1 : -1;
4036
4037 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4038 int64_t result = acc.Int(vform, i);
4039 for (int j = 0; j < 2; j++) {
4040 int64_t r1 = src1.Int(quarter_vform, (4 * i) + (2 * j) + 0);
4041 int64_t i1 = src1.Int(quarter_vform, (4 * i) + (2 * j) + 1);
4042 int64_t r2 = src2.Int(quarter_vform, (4 * i) + (2 * j) + sel_a);
4043 int64_t i2 = src2.Int(quarter_vform, (4 * i) + (2 * j) + sel_b);
4044 result += (r1 * r2) + (sub_i * i1 * i2);
4045 }
4046 dst.SetInt(vform, i, result);
4047 }
4048 return dst;
4049 }
4050
sqrdcmlah(VectorFormat vform,LogicVRegister dst,const LogicVRegister & srca,const LogicVRegister & src1,const LogicVRegister & src2,int rot)4051 LogicVRegister Simulator::sqrdcmlah(VectorFormat vform,
4052 LogicVRegister dst,
4053 const LogicVRegister& srca,
4054 const LogicVRegister& src1,
4055 const LogicVRegister& src2,
4056 int rot) {
4057 SimVRegister src1_a, src1_b;
4058 SimVRegister src2_a, src2_b;
4059 SimVRegister srca_i, srca_r;
4060 SimVRegister zero, temp;
4061 zero.Clear();
4062
4063 if ((rot == 0) || (rot == 180)) {
4064 uzp1(vform, src1_a, src1, zero);
4065 uzp1(vform, src2_a, src2, zero);
4066 uzp2(vform, src2_b, src2, zero);
4067 } else {
4068 uzp2(vform, src1_a, src1, zero);
4069 uzp2(vform, src2_a, src2, zero);
4070 uzp1(vform, src2_b, src2, zero);
4071 }
4072
4073 uzp1(vform, srca_r, srca, zero);
4074 uzp2(vform, srca_i, srca, zero);
4075
4076 bool sub_r = (rot == 90) || (rot == 180);
4077 bool sub_i = (rot == 180) || (rot == 270);
4078
4079 const bool round = true;
4080 sqrdmlash(vform, srca_r, src1_a, src2_a, round, sub_r);
4081 sqrdmlash(vform, srca_i, src1_a, src2_b, round, sub_i);
4082 zip1(vform, dst, srca_r, srca_i);
4083 return dst;
4084 }
4085
sqrdcmlah(VectorFormat vform,LogicVRegister dst,const LogicVRegister & srca,const LogicVRegister & src1,const LogicVRegister & src2,int index,int rot)4086 LogicVRegister Simulator::sqrdcmlah(VectorFormat vform,
4087 LogicVRegister dst,
4088 const LogicVRegister& srca,
4089 const LogicVRegister& src1,
4090 const LogicVRegister& src2,
4091 int index,
4092 int rot) {
4093 SimVRegister temp;
4094 dup_elements_to_segments(VectorFormatDoubleWidth(vform), temp, src2, index);
4095 return sqrdcmlah(vform, dst, srca, src1, temp, rot);
4096 }
4097
sqrdmlash_d(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool round,bool sub_op)4098 LogicVRegister Simulator::sqrdmlash_d(VectorFormat vform,
4099 LogicVRegister dst,
4100 const LogicVRegister& src1,
4101 const LogicVRegister& src2,
4102 bool round,
4103 bool sub_op) {
4104 // 2 * INT_64_MIN * INT_64_MIN causes INT_128 to overflow.
4105 // To avoid this, we use:
4106 // (dst << (esize - 1) + src1 * src2 + 1 << (esize - 2)) >> (esize - 1)
4107 // which is same as:
4108 // (dst << esize + 2 * src1 * src2 + 1 << (esize - 1)) >> esize.
4109
4110 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4111 int esize = kDRegSize;
4112 vixl_uint128_t round_const, accum;
4113 round_const.first = 0;
4114 if (round) {
4115 round_const.second = UINT64_C(1) << (esize - 2);
4116 } else {
4117 round_const.second = 0;
4118 }
4119
4120 dst.ClearForWrite(vform);
4121 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4122 // Shift the whole value left by `esize - 1` bits.
4123 accum.first = dst.Int(vform, i) >> 1;
4124 accum.second = dst.Int(vform, i) << (esize - 1);
4125
4126 vixl_uint128_t product = Mul64(src1.Int(vform, i), src2.Int(vform, i));
4127
4128 if (sub_op) {
4129 product = Neg128(product);
4130 }
4131 accum = Add128(accum, product);
4132
4133 // Perform rounding.
4134 accum = Add128(accum, round_const);
4135
4136 // Arithmetic shift the whole value right by `esize - 1` bits.
4137 accum.second = (accum.first << 1) | (accum.second >> (esize - 1));
4138 accum.first = -(accum.first >> (esize - 1));
4139
4140 // Perform saturation.
4141 bool is_pos = (accum.first == 0) ? true : false;
4142 if (is_pos &&
4143 (accum.second > static_cast<uint64_t>(MaxIntFromFormat(vform)))) {
4144 accum.second = MaxIntFromFormat(vform);
4145 } else if (!is_pos && (accum.second <
4146 static_cast<uint64_t>(MinIntFromFormat(vform)))) {
4147 accum.second = MinIntFromFormat(vform);
4148 }
4149
4150 dst.SetInt(vform, i, accum.second);
4151 }
4152
4153 return dst;
4154 }
4155
sqrdmlash(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool round,bool sub_op)4156 LogicVRegister Simulator::sqrdmlash(VectorFormat vform,
4157 LogicVRegister dst,
4158 const LogicVRegister& src1,
4159 const LogicVRegister& src2,
4160 bool round,
4161 bool sub_op) {
4162 // 2 * INT_32_MIN * INT_32_MIN causes int64_t to overflow.
4163 // To avoid this, we use:
4164 // (dst << (esize - 1) + src1 * src2 + 1 << (esize - 2)) >> (esize - 1)
4165 // which is same as:
4166 // (dst << esize + 2 * src1 * src2 + 1 << (esize - 1)) >> esize.
4167
4168 if (vform == kFormatVnD) {
4169 return sqrdmlash_d(vform, dst, src1, src2, round, sub_op);
4170 }
4171
4172 int esize = LaneSizeInBitsFromFormat(vform);
4173 int round_const = round ? (1 << (esize - 2)) : 0;
4174 int64_t accum;
4175
4176 dst.ClearForWrite(vform);
4177 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4178 accum = dst.Int(vform, i) << (esize - 1);
4179 if (sub_op) {
4180 accum -= src1.Int(vform, i) * src2.Int(vform, i);
4181 } else {
4182 accum += src1.Int(vform, i) * src2.Int(vform, i);
4183 }
4184 accum += round_const;
4185 accum = accum >> (esize - 1);
4186
4187 if (accum > MaxIntFromFormat(vform)) {
4188 accum = MaxIntFromFormat(vform);
4189 } else if (accum < MinIntFromFormat(vform)) {
4190 accum = MinIntFromFormat(vform);
4191 }
4192 dst.SetInt(vform, i, accum);
4193 }
4194 return dst;
4195 }
4196
4197
sqrdmlah(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool round)4198 LogicVRegister Simulator::sqrdmlah(VectorFormat vform,
4199 LogicVRegister dst,
4200 const LogicVRegister& src1,
4201 const LogicVRegister& src2,
4202 bool round) {
4203 return sqrdmlash(vform, dst, src1, src2, round, false);
4204 }
4205
4206
sqrdmlsh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool round)4207 LogicVRegister Simulator::sqrdmlsh(VectorFormat vform,
4208 LogicVRegister dst,
4209 const LogicVRegister& src1,
4210 const LogicVRegister& src2,
4211 bool round) {
4212 return sqrdmlash(vform, dst, src1, src2, round, true);
4213 }
4214
4215
sqdmulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4216 LogicVRegister Simulator::sqdmulh(VectorFormat vform,
4217 LogicVRegister dst,
4218 const LogicVRegister& src1,
4219 const LogicVRegister& src2) {
4220 return sqrdmulh(vform, dst, src1, src2, false);
4221 }
4222
4223
addhn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4224 LogicVRegister Simulator::addhn(VectorFormat vform,
4225 LogicVRegister dst,
4226 const LogicVRegister& src1,
4227 const LogicVRegister& src2) {
4228 SimVRegister temp;
4229 add(VectorFormatDoubleWidth(vform), temp, src1, src2);
4230 shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4231 return dst;
4232 }
4233
4234
addhn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4235 LogicVRegister Simulator::addhn2(VectorFormat vform,
4236 LogicVRegister dst,
4237 const LogicVRegister& src1,
4238 const LogicVRegister& src2) {
4239 SimVRegister temp;
4240 add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
4241 shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4242 return dst;
4243 }
4244
4245
raddhn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4246 LogicVRegister Simulator::raddhn(VectorFormat vform,
4247 LogicVRegister dst,
4248 const LogicVRegister& src1,
4249 const LogicVRegister& src2) {
4250 SimVRegister temp;
4251 add(VectorFormatDoubleWidth(vform), temp, src1, src2);
4252 rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4253 return dst;
4254 }
4255
4256
raddhn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4257 LogicVRegister Simulator::raddhn2(VectorFormat vform,
4258 LogicVRegister dst,
4259 const LogicVRegister& src1,
4260 const LogicVRegister& src2) {
4261 SimVRegister temp;
4262 add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
4263 rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4264 return dst;
4265 }
4266
4267
subhn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4268 LogicVRegister Simulator::subhn(VectorFormat vform,
4269 LogicVRegister dst,
4270 const LogicVRegister& src1,
4271 const LogicVRegister& src2) {
4272 SimVRegister temp;
4273 sub(VectorFormatDoubleWidth(vform), temp, src1, src2);
4274 shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4275 return dst;
4276 }
4277
4278
subhn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4279 LogicVRegister Simulator::subhn2(VectorFormat vform,
4280 LogicVRegister dst,
4281 const LogicVRegister& src1,
4282 const LogicVRegister& src2) {
4283 SimVRegister temp;
4284 sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
4285 shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4286 return dst;
4287 }
4288
4289
rsubhn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4290 LogicVRegister Simulator::rsubhn(VectorFormat vform,
4291 LogicVRegister dst,
4292 const LogicVRegister& src1,
4293 const LogicVRegister& src2) {
4294 SimVRegister temp;
4295 sub(VectorFormatDoubleWidth(vform), temp, src1, src2);
4296 rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4297 return dst;
4298 }
4299
4300
rsubhn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4301 LogicVRegister Simulator::rsubhn2(VectorFormat vform,
4302 LogicVRegister dst,
4303 const LogicVRegister& src1,
4304 const LogicVRegister& src2) {
4305 SimVRegister temp;
4306 sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
4307 rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4308 return dst;
4309 }
4310
4311
trn1(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4312 LogicVRegister Simulator::trn1(VectorFormat vform,
4313 LogicVRegister dst,
4314 const LogicVRegister& src1,
4315 const LogicVRegister& src2) {
4316 uint64_t result[kZRegMaxSizeInBytes] = {};
4317 int lane_count = LaneCountFromFormat(vform);
4318 int pairs = lane_count / 2;
4319 for (int i = 0; i < pairs; ++i) {
4320 result[2 * i] = src1.Uint(vform, 2 * i);
4321 result[(2 * i) + 1] = src2.Uint(vform, 2 * i);
4322 }
4323
4324 dst.ClearForWrite(vform);
4325 for (int i = 0; i < lane_count; ++i) {
4326 dst.SetUint(vform, i, result[i]);
4327 }
4328 return dst;
4329 }
4330
4331
trn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4332 LogicVRegister Simulator::trn2(VectorFormat vform,
4333 LogicVRegister dst,
4334 const LogicVRegister& src1,
4335 const LogicVRegister& src2) {
4336 uint64_t result[kZRegMaxSizeInBytes] = {};
4337 int lane_count = LaneCountFromFormat(vform);
4338 int pairs = lane_count / 2;
4339 for (int i = 0; i < pairs; ++i) {
4340 result[2 * i] = src1.Uint(vform, (2 * i) + 1);
4341 result[(2 * i) + 1] = src2.Uint(vform, (2 * i) + 1);
4342 }
4343
4344 dst.ClearForWrite(vform);
4345 for (int i = 0; i < lane_count; ++i) {
4346 dst.SetUint(vform, i, result[i]);
4347 }
4348 return dst;
4349 }
4350
4351
zip1(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4352 LogicVRegister Simulator::zip1(VectorFormat vform,
4353 LogicVRegister dst,
4354 const LogicVRegister& src1,
4355 const LogicVRegister& src2) {
4356 uint64_t result[kZRegMaxSizeInBytes] = {};
4357 int lane_count = LaneCountFromFormat(vform);
4358 int pairs = lane_count / 2;
4359 for (int i = 0; i < pairs; ++i) {
4360 result[2 * i] = src1.Uint(vform, i);
4361 result[(2 * i) + 1] = src2.Uint(vform, i);
4362 }
4363
4364 dst.ClearForWrite(vform);
4365 for (int i = 0; i < lane_count; ++i) {
4366 dst.SetUint(vform, i, result[i]);
4367 }
4368 return dst;
4369 }
4370
4371
zip2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4372 LogicVRegister Simulator::zip2(VectorFormat vform,
4373 LogicVRegister dst,
4374 const LogicVRegister& src1,
4375 const LogicVRegister& src2) {
4376 uint64_t result[kZRegMaxSizeInBytes] = {};
4377 int lane_count = LaneCountFromFormat(vform);
4378 int pairs = lane_count / 2;
4379 for (int i = 0; i < pairs; ++i) {
4380 result[2 * i] = src1.Uint(vform, pairs + i);
4381 result[(2 * i) + 1] = src2.Uint(vform, pairs + i);
4382 }
4383
4384 dst.ClearForWrite(vform);
4385 for (int i = 0; i < lane_count; ++i) {
4386 dst.SetUint(vform, i, result[i]);
4387 }
4388 return dst;
4389 }
4390
4391
uzp1(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4392 LogicVRegister Simulator::uzp1(VectorFormat vform,
4393 LogicVRegister dst,
4394 const LogicVRegister& src1,
4395 const LogicVRegister& src2) {
4396 uint64_t result[kZRegMaxSizeInBytes * 2];
4397 int lane_count = LaneCountFromFormat(vform);
4398 for (int i = 0; i < lane_count; ++i) {
4399 result[i] = src1.Uint(vform, i);
4400 result[lane_count + i] = src2.Uint(vform, i);
4401 }
4402
4403 dst.ClearForWrite(vform);
4404 for (int i = 0; i < lane_count; ++i) {
4405 dst.SetUint(vform, i, result[2 * i]);
4406 }
4407 return dst;
4408 }
4409
4410
uzp2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4411 LogicVRegister Simulator::uzp2(VectorFormat vform,
4412 LogicVRegister dst,
4413 const LogicVRegister& src1,
4414 const LogicVRegister& src2) {
4415 uint64_t result[kZRegMaxSizeInBytes * 2];
4416 int lane_count = LaneCountFromFormat(vform);
4417 for (int i = 0; i < lane_count; ++i) {
4418 result[i] = src1.Uint(vform, i);
4419 result[lane_count + i] = src2.Uint(vform, i);
4420 }
4421
4422 dst.ClearForWrite(vform);
4423 for (int i = 0; i < lane_count; ++i) {
4424 dst.SetUint(vform, i, result[(2 * i) + 1]);
4425 }
4426 return dst;
4427 }
4428
interleave_top_bottom(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4429 LogicVRegister Simulator::interleave_top_bottom(VectorFormat vform,
4430 LogicVRegister dst,
4431 const LogicVRegister& src) {
4432 // Interleave the top and bottom half of a vector, ie. for a vector:
4433 //
4434 // [ ... | F | D | B | ... | E | C | A ]
4435 //
4436 // where B is the first element in the top half of the vector, produce a
4437 // result vector:
4438 //
4439 // [ ... | ... | F | E | D | C | B | A ]
4440
4441 uint64_t result[kZRegMaxSizeInBytes] = {};
4442 int lane_count = LaneCountFromFormat(vform);
4443 for (int i = 0; i < lane_count; i += 2) {
4444 result[i] = src.Uint(vform, i / 2);
4445 result[i + 1] = src.Uint(vform, (lane_count / 2) + (i / 2));
4446 }
4447 dst.SetUintArray(vform, result);
4448 return dst;
4449 }
4450
4451 template <typename T>
FPNeg(T op)4452 T Simulator::FPNeg(T op) {
4453 return -op;
4454 }
4455
4456 template <typename T>
FPAdd(T op1,T op2)4457 T Simulator::FPAdd(T op1, T op2) {
4458 T result = FPProcessNaNs(op1, op2);
4459 if (IsNaN(result)) {
4460 return result;
4461 }
4462
4463 if (IsInf(op1) && IsInf(op2) && (op1 != op2)) {
4464 // inf + -inf returns the default NaN.
4465 FPProcessException();
4466 return FPDefaultNaN<T>();
4467 } else {
4468 // Other cases should be handled by standard arithmetic.
4469 return op1 + op2;
4470 }
4471 }
4472
4473
4474 template <typename T>
FPSub(T op1,T op2)4475 T Simulator::FPSub(T op1, T op2) {
4476 // NaNs should be handled elsewhere.
4477 VIXL_ASSERT(!IsNaN(op1) && !IsNaN(op2));
4478
4479 if (IsInf(op1) && IsInf(op2) && (op1 == op2)) {
4480 // inf - inf returns the default NaN.
4481 FPProcessException();
4482 return FPDefaultNaN<T>();
4483 } else {
4484 // Other cases should be handled by standard arithmetic.
4485 return op1 - op2;
4486 }
4487 }
4488
4489 template <typename T>
FPMulNaNs(T op1,T op2)4490 T Simulator::FPMulNaNs(T op1, T op2) {
4491 T result = FPProcessNaNs(op1, op2);
4492 return IsNaN(result) ? result : FPMul(op1, op2);
4493 }
4494
4495 template <typename T>
FPMul(T op1,T op2)4496 T Simulator::FPMul(T op1, T op2) {
4497 // NaNs should be handled elsewhere.
4498 VIXL_ASSERT(!IsNaN(op1) && !IsNaN(op2));
4499
4500 if ((IsInf(op1) && (op2 == 0.0)) || (IsInf(op2) && (op1 == 0.0))) {
4501 // inf * 0.0 returns the default NaN.
4502 FPProcessException();
4503 return FPDefaultNaN<T>();
4504 } else {
4505 // Other cases should be handled by standard arithmetic.
4506 return op1 * op2;
4507 }
4508 }
4509
4510
4511 template <typename T>
FPMulx(T op1,T op2)4512 T Simulator::FPMulx(T op1, T op2) {
4513 if ((IsInf(op1) && (op2 == 0.0)) || (IsInf(op2) && (op1 == 0.0))) {
4514 // inf * 0.0 returns +/-2.0.
4515 T two = 2.0;
4516 return copysign(1.0, op1) * copysign(1.0, op2) * two;
4517 }
4518 return FPMul(op1, op2);
4519 }
4520
4521
4522 template <typename T>
FPMulAdd(T a,T op1,T op2)4523 T Simulator::FPMulAdd(T a, T op1, T op2) {
4524 T result = FPProcessNaNs3(a, op1, op2);
4525
4526 T sign_a = copysign(1.0, a);
4527 T sign_prod = copysign(1.0, op1) * copysign(1.0, op2);
4528 bool isinf_prod = IsInf(op1) || IsInf(op2);
4529 bool operation_generates_nan =
4530 (IsInf(op1) && (op2 == 0.0)) || // inf * 0.0
4531 (IsInf(op2) && (op1 == 0.0)) || // 0.0 * inf
4532 (IsInf(a) && isinf_prod && (sign_a != sign_prod)); // inf - inf
4533
4534 if (IsNaN(result)) {
4535 // Generated NaNs override quiet NaNs propagated from a.
4536 if (operation_generates_nan && IsQuietNaN(a)) {
4537 FPProcessException();
4538 return FPDefaultNaN<T>();
4539 } else {
4540 return result;
4541 }
4542 }
4543
4544 // If the operation would produce a NaN, return the default NaN.
4545 if (operation_generates_nan) {
4546 FPProcessException();
4547 return FPDefaultNaN<T>();
4548 }
4549
4550 // Work around broken fma implementations for exact zero results: The sign of
4551 // exact 0.0 results is positive unless both a and op1 * op2 are negative.
4552 if (((op1 == 0.0) || (op2 == 0.0)) && (a == 0.0)) {
4553 return ((sign_a < T(0.0)) && (sign_prod < T(0.0))) ? -0.0 : 0.0;
4554 }
4555
4556 result = FusedMultiplyAdd(op1, op2, a);
4557 VIXL_ASSERT(!IsNaN(result));
4558
4559 // Work around broken fma implementations for rounded zero results: If a is
4560 // 0.0, the sign of the result is the sign of op1 * op2 before rounding.
4561 if ((a == 0.0) && (result == 0.0)) {
4562 return copysign(0.0, sign_prod);
4563 }
4564
4565 return result;
4566 }
4567
4568
4569 template <typename T>
FPDiv(T op1,T op2)4570 T Simulator::FPDiv(T op1, T op2) {
4571 // NaNs should be handled elsewhere.
4572 VIXL_ASSERT(!IsNaN(op1) && !IsNaN(op2));
4573
4574 if ((IsInf(op1) && IsInf(op2)) || ((op1 == 0.0) && (op2 == 0.0))) {
4575 // inf / inf and 0.0 / 0.0 return the default NaN.
4576 FPProcessException();
4577 return FPDefaultNaN<T>();
4578 } else {
4579 if (op2 == 0.0) {
4580 FPProcessException();
4581 if (!IsNaN(op1)) {
4582 double op1_sign = copysign(1.0, op1);
4583 double op2_sign = copysign(1.0, op2);
4584 return static_cast<T>(op1_sign * op2_sign * kFP64PositiveInfinity);
4585 }
4586 }
4587
4588 // Other cases should be handled by standard arithmetic.
4589 return op1 / op2;
4590 }
4591 }
4592
4593
4594 template <typename T>
FPSqrt(T op)4595 T Simulator::FPSqrt(T op) {
4596 if (IsNaN(op)) {
4597 return FPProcessNaN(op);
4598 } else if (op < T(0.0)) {
4599 FPProcessException();
4600 return FPDefaultNaN<T>();
4601 } else {
4602 return sqrt(op);
4603 }
4604 }
4605
4606
4607 template <typename T>
FPMax(T a,T b)4608 T Simulator::FPMax(T a, T b) {
4609 T result = FPProcessNaNs(a, b);
4610 if (IsNaN(result)) return result;
4611
4612 if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) {
4613 // a and b are zero, and the sign differs: return +0.0.
4614 return 0.0;
4615 } else {
4616 return (a > b) ? a : b;
4617 }
4618 }
4619
4620
4621 template <typename T>
FPMaxNM(T a,T b)4622 T Simulator::FPMaxNM(T a, T b) {
4623 if (IsQuietNaN(a) && !IsQuietNaN(b)) {
4624 a = kFP64NegativeInfinity;
4625 } else if (!IsQuietNaN(a) && IsQuietNaN(b)) {
4626 b = kFP64NegativeInfinity;
4627 }
4628
4629 T result = FPProcessNaNs(a, b);
4630 return IsNaN(result) ? result : FPMax(a, b);
4631 }
4632
4633
4634 template <typename T>
FPMin(T a,T b)4635 T Simulator::FPMin(T a, T b) {
4636 T result = FPProcessNaNs(a, b);
4637 if (IsNaN(result)) return result;
4638
4639 if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) {
4640 // a and b are zero, and the sign differs: return -0.0.
4641 return -0.0;
4642 } else {
4643 return (a < b) ? a : b;
4644 }
4645 }
4646
4647
4648 template <typename T>
FPMinNM(T a,T b)4649 T Simulator::FPMinNM(T a, T b) {
4650 if (IsQuietNaN(a) && !IsQuietNaN(b)) {
4651 a = kFP64PositiveInfinity;
4652 } else if (!IsQuietNaN(a) && IsQuietNaN(b)) {
4653 b = kFP64PositiveInfinity;
4654 }
4655
4656 T result = FPProcessNaNs(a, b);
4657 return IsNaN(result) ? result : FPMin(a, b);
4658 }
4659
4660
4661 template <typename T>
FPRecipStepFused(T op1,T op2)4662 T Simulator::FPRecipStepFused(T op1, T op2) {
4663 const T two = 2.0;
4664 if ((IsInf(op1) && (op2 == 0.0)) || ((op1 == 0.0) && (IsInf(op2)))) {
4665 return two;
4666 } else if (IsInf(op1) || IsInf(op2)) {
4667 // Return +inf if signs match, otherwise -inf.
4668 return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity
4669 : kFP64NegativeInfinity;
4670 } else {
4671 return FusedMultiplyAdd(op1, op2, two);
4672 }
4673 }
4674
4675 template <typename T>
IsNormal(T value)4676 bool IsNormal(T value) {
4677 return std::isnormal(value);
4678 }
4679
4680 template <>
IsNormal(SimFloat16 value)4681 bool IsNormal(SimFloat16 value) {
4682 uint16_t rawbits = Float16ToRawbits(value);
4683 uint16_t exp_mask = 0x7c00;
4684 // Check that the exponent is neither all zeroes or all ones.
4685 return ((rawbits & exp_mask) != 0) && ((~rawbits & exp_mask) != 0);
4686 }
4687
4688
4689 template <typename T>
FPRSqrtStepFused(T op1,T op2)4690 T Simulator::FPRSqrtStepFused(T op1, T op2) {
4691 const T one_point_five = 1.5;
4692 const T two = 2.0;
4693
4694 if ((IsInf(op1) && (op2 == 0.0)) || ((op1 == 0.0) && (IsInf(op2)))) {
4695 return one_point_five;
4696 } else if (IsInf(op1) || IsInf(op2)) {
4697 // Return +inf if signs match, otherwise -inf.
4698 return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity
4699 : kFP64NegativeInfinity;
4700 } else {
4701 // The multiply-add-halve operation must be fully fused, so avoid interim
4702 // rounding by checking which operand can be losslessly divided by two
4703 // before doing the multiply-add.
4704 if (IsNormal(op1 / two)) {
4705 return FusedMultiplyAdd(op1 / two, op2, one_point_five);
4706 } else if (IsNormal(op2 / two)) {
4707 return FusedMultiplyAdd(op1, op2 / two, one_point_five);
4708 } else {
4709 // Neither operand is normal after halving: the result is dominated by
4710 // the addition term, so just return that.
4711 return one_point_five;
4712 }
4713 }
4714 }
4715
FPToFixedJS(double value)4716 int32_t Simulator::FPToFixedJS(double value) {
4717 // The Z-flag is set when the conversion from double precision floating-point
4718 // to 32-bit integer is exact. If the source value is +/-Infinity, -0.0, NaN,
4719 // outside the bounds of a 32-bit integer, or isn't an exact integer then the
4720 // Z-flag is unset.
4721 int Z = 1;
4722 int32_t result;
4723
4724 if ((value == 0.0) || (value == kFP64PositiveInfinity) ||
4725 (value == kFP64NegativeInfinity)) {
4726 // +/- zero and infinity all return zero, however -0 and +/- Infinity also
4727 // unset the Z-flag.
4728 result = 0.0;
4729 if ((value != 0.0) || std::signbit(value)) {
4730 Z = 0;
4731 }
4732 } else if (std::isnan(value)) {
4733 // NaN values unset the Z-flag and set the result to 0.
4734 FPProcessNaN(value);
4735 result = 0;
4736 Z = 0;
4737 } else {
4738 // All other values are converted to an integer representation, rounded
4739 // toward zero.
4740 double int_result = std::floor(value);
4741 double error = value - int_result;
4742
4743 if ((error != 0.0) && (int_result < 0.0)) {
4744 int_result++;
4745 }
4746
4747 // Constrain the value into the range [INT32_MIN, INT32_MAX]. We can almost
4748 // write a one-liner with std::round, but the behaviour on ties is incorrect
4749 // for our purposes.
4750 double mod_const = static_cast<double>(UINT64_C(1) << 32);
4751 double mod_error =
4752 (int_result / mod_const) - std::floor(int_result / mod_const);
4753 double constrained;
4754 if (mod_error == 0.5) {
4755 constrained = INT32_MIN;
4756 } else {
4757 constrained = int_result - mod_const * round(int_result / mod_const);
4758 }
4759
4760 VIXL_ASSERT(std::floor(constrained) == constrained);
4761 VIXL_ASSERT(constrained >= INT32_MIN);
4762 VIXL_ASSERT(constrained <= INT32_MAX);
4763
4764 // Take the bottom 32 bits of the result as a 32-bit integer.
4765 result = static_cast<int32_t>(constrained);
4766
4767 if ((int_result < INT32_MIN) || (int_result > INT32_MAX) ||
4768 (error != 0.0)) {
4769 // If the integer result is out of range or the conversion isn't exact,
4770 // take exception and unset the Z-flag.
4771 FPProcessException();
4772 Z = 0;
4773 }
4774 }
4775
4776 ReadNzcv().SetN(0);
4777 ReadNzcv().SetZ(Z);
4778 ReadNzcv().SetC(0);
4779 ReadNzcv().SetV(0);
4780
4781 return result;
4782 }
4783
FPRoundIntCommon(double value,FPRounding round_mode)4784 double Simulator::FPRoundIntCommon(double value, FPRounding round_mode) {
4785 VIXL_ASSERT((value != kFP64PositiveInfinity) &&
4786 (value != kFP64NegativeInfinity));
4787 VIXL_ASSERT(!IsNaN(value));
4788
4789 double int_result = std::floor(value);
4790 double error = value - int_result;
4791 switch (round_mode) {
4792 case FPTieAway: {
4793 // Take care of correctly handling the range ]-0.5, -0.0], which must
4794 // yield -0.0.
4795 if ((-0.5 < value) && (value < 0.0)) {
4796 int_result = -0.0;
4797
4798 } else if ((error > 0.5) || ((error == 0.5) && (int_result >= 0.0))) {
4799 // If the error is greater than 0.5, or is equal to 0.5 and the integer
4800 // result is positive, round up.
4801 int_result++;
4802 }
4803 break;
4804 }
4805 case FPTieEven: {
4806 // Take care of correctly handling the range [-0.5, -0.0], which must
4807 // yield -0.0.
4808 if ((-0.5 <= value) && (value < 0.0)) {
4809 int_result = -0.0;
4810
4811 // If the error is greater than 0.5, or is equal to 0.5 and the integer
4812 // result is odd, round up.
4813 } else if ((error > 0.5) ||
4814 ((error == 0.5) && (std::fmod(int_result, 2) != 0))) {
4815 int_result++;
4816 }
4817 break;
4818 }
4819 case FPZero: {
4820 // If value>0 then we take floor(value)
4821 // otherwise, ceil(value).
4822 if (value < 0) {
4823 int_result = ceil(value);
4824 }
4825 break;
4826 }
4827 case FPNegativeInfinity: {
4828 // We always use floor(value).
4829 break;
4830 }
4831 case FPPositiveInfinity: {
4832 // Take care of correctly handling the range ]-1.0, -0.0], which must
4833 // yield -0.0.
4834 if ((-1.0 < value) && (value < 0.0)) {
4835 int_result = -0.0;
4836
4837 // If the error is non-zero, round up.
4838 } else if (error > 0.0) {
4839 int_result++;
4840 }
4841 break;
4842 }
4843 default:
4844 VIXL_UNIMPLEMENTED();
4845 }
4846 return int_result;
4847 }
4848
FPRoundInt(double value,FPRounding round_mode)4849 double Simulator::FPRoundInt(double value, FPRounding round_mode) {
4850 if ((value == 0.0) || (value == kFP64PositiveInfinity) ||
4851 (value == kFP64NegativeInfinity)) {
4852 return value;
4853 } else if (IsNaN(value)) {
4854 return FPProcessNaN(value);
4855 }
4856 return FPRoundIntCommon(value, round_mode);
4857 }
4858
FPRoundInt(double value,FPRounding round_mode,FrintMode frint_mode)4859 double Simulator::FPRoundInt(double value,
4860 FPRounding round_mode,
4861 FrintMode frint_mode) {
4862 if (frint_mode == kFrintToInteger) {
4863 return FPRoundInt(value, round_mode);
4864 }
4865
4866 VIXL_ASSERT((frint_mode == kFrintToInt32) || (frint_mode == kFrintToInt64));
4867
4868 if (value == 0.0) {
4869 return value;
4870 }
4871
4872 if ((value == kFP64PositiveInfinity) || (value == kFP64NegativeInfinity) ||
4873 IsNaN(value)) {
4874 if (frint_mode == kFrintToInt32) {
4875 return INT32_MIN;
4876 } else {
4877 return INT64_MIN;
4878 }
4879 }
4880
4881 double result = FPRoundIntCommon(value, round_mode);
4882
4883 // We want to compare `result > INT64_MAX` below, but INT64_MAX isn't exactly
4884 // representable as a double, and is rounded to (INT64_MAX + 1) when
4885 // converted. To avoid this, we compare `result >= int64_max_plus_one`
4886 // instead; this is safe because `result` is known to be integral, and
4887 // `int64_max_plus_one` is exactly representable as a double.
4888 constexpr uint64_t int64_max_plus_one = static_cast<uint64_t>(INT64_MAX) + 1;
4889 VIXL_STATIC_ASSERT(static_cast<uint64_t>(static_cast<double>(
4890 int64_max_plus_one)) == int64_max_plus_one);
4891
4892 if (frint_mode == kFrintToInt32) {
4893 if ((result > INT32_MAX) || (result < INT32_MIN)) {
4894 return INT32_MIN;
4895 }
4896 } else if ((result >= int64_max_plus_one) || (result < INT64_MIN)) {
4897 return INT64_MIN;
4898 }
4899
4900 return result;
4901 }
4902
FPToInt16(double value,FPRounding rmode)4903 int16_t Simulator::FPToInt16(double value, FPRounding rmode) {
4904 value = FPRoundInt(value, rmode);
4905 if (value >= kHMaxInt) {
4906 return kHMaxInt;
4907 } else if (value < kHMinInt) {
4908 return kHMinInt;
4909 }
4910 return IsNaN(value) ? 0 : static_cast<int16_t>(value);
4911 }
4912
4913
FPToInt32(double value,FPRounding rmode)4914 int32_t Simulator::FPToInt32(double value, FPRounding rmode) {
4915 value = FPRoundInt(value, rmode);
4916 if (value >= kWMaxInt) {
4917 return kWMaxInt;
4918 } else if (value < kWMinInt) {
4919 return kWMinInt;
4920 }
4921 return IsNaN(value) ? 0 : static_cast<int32_t>(value);
4922 }
4923
4924
FPToInt64(double value,FPRounding rmode)4925 int64_t Simulator::FPToInt64(double value, FPRounding rmode) {
4926 value = FPRoundInt(value, rmode);
4927 // This is equivalent to "if (value >= kXMaxInt)" but avoids rounding issues
4928 // as a result of kMaxInt not being representable as a double.
4929 if (value >= 9223372036854775808.) {
4930 return kXMaxInt;
4931 } else if (value < kXMinInt) {
4932 return kXMinInt;
4933 }
4934 return IsNaN(value) ? 0 : static_cast<int64_t>(value);
4935 }
4936
4937
FPToUInt16(double value,FPRounding rmode)4938 uint16_t Simulator::FPToUInt16(double value, FPRounding rmode) {
4939 value = FPRoundInt(value, rmode);
4940 if (value >= kHMaxUInt) {
4941 return kHMaxUInt;
4942 } else if (value < 0.0) {
4943 return 0;
4944 }
4945 return IsNaN(value) ? 0 : static_cast<uint16_t>(value);
4946 }
4947
4948
FPToUInt32(double value,FPRounding rmode)4949 uint32_t Simulator::FPToUInt32(double value, FPRounding rmode) {
4950 value = FPRoundInt(value, rmode);
4951 if (value >= kWMaxUInt) {
4952 return kWMaxUInt;
4953 } else if (value < 0.0) {
4954 return 0;
4955 }
4956 return IsNaN(value) ? 0 : static_cast<uint32_t>(value);
4957 }
4958
4959
FPToUInt64(double value,FPRounding rmode)4960 uint64_t Simulator::FPToUInt64(double value, FPRounding rmode) {
4961 value = FPRoundInt(value, rmode);
4962 // This is equivalent to "if (value >= kXMaxUInt)" but avoids rounding issues
4963 // as a result of kMaxUInt not being representable as a double.
4964 if (value >= 18446744073709551616.) {
4965 return kXMaxUInt;
4966 } else if (value < 0.0) {
4967 return 0;
4968 }
4969 return IsNaN(value) ? 0 : static_cast<uint64_t>(value);
4970 }
4971
4972
4973 #define DEFINE_NEON_FP_VECTOR_OP(FN, OP, PROCNAN) \
4974 template <typename T> \
4975 LogicVRegister Simulator::FN(VectorFormat vform, \
4976 LogicVRegister dst, \
4977 const LogicVRegister& src1, \
4978 const LogicVRegister& src2) { \
4979 dst.ClearForWrite(vform); \
4980 for (int i = 0; i < LaneCountFromFormat(vform); i++) { \
4981 T op1 = src1.Float<T>(i); \
4982 T op2 = src2.Float<T>(i); \
4983 T result; \
4984 if (PROCNAN) { \
4985 result = FPProcessNaNs(op1, op2); \
4986 if (!IsNaN(result)) { \
4987 result = OP(op1, op2); \
4988 } \
4989 } else { \
4990 result = OP(op1, op2); \
4991 } \
4992 dst.SetFloat(vform, i, result); \
4993 } \
4994 return dst; \
4995 } \
4996 \
4997 LogicVRegister Simulator::FN(VectorFormat vform, \
4998 LogicVRegister dst, \
4999 const LogicVRegister& src1, \
5000 const LogicVRegister& src2) { \
5001 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { \
5002 FN<SimFloat16>(vform, dst, src1, src2); \
5003 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { \
5004 FN<float>(vform, dst, src1, src2); \
5005 } else { \
5006 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); \
5007 FN<double>(vform, dst, src1, src2); \
5008 } \
5009 return dst; \
5010 }
NEON_FP3SAME_LIST(DEFINE_NEON_FP_VECTOR_OP)5011 NEON_FP3SAME_LIST(DEFINE_NEON_FP_VECTOR_OP)
5012 #undef DEFINE_NEON_FP_VECTOR_OP
5013
5014
5015 LogicVRegister Simulator::fnmul(VectorFormat vform,
5016 LogicVRegister dst,
5017 const LogicVRegister& src1,
5018 const LogicVRegister& src2) {
5019 SimVRegister temp;
5020 LogicVRegister product = fmul(vform, temp, src1, src2);
5021 return fneg(vform, dst, product);
5022 }
5023
5024
5025 template <typename T>
frecps(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)5026 LogicVRegister Simulator::frecps(VectorFormat vform,
5027 LogicVRegister dst,
5028 const LogicVRegister& src1,
5029 const LogicVRegister& src2) {
5030 dst.ClearForWrite(vform);
5031 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5032 T op1 = -src1.Float<T>(i);
5033 T op2 = src2.Float<T>(i);
5034 T result = FPProcessNaNs(op1, op2);
5035 dst.SetFloat(vform, i, IsNaN(result) ? result : FPRecipStepFused(op1, op2));
5036 }
5037 return dst;
5038 }
5039
5040
frecps(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)5041 LogicVRegister Simulator::frecps(VectorFormat vform,
5042 LogicVRegister dst,
5043 const LogicVRegister& src1,
5044 const LogicVRegister& src2) {
5045 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5046 frecps<SimFloat16>(vform, dst, src1, src2);
5047 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5048 frecps<float>(vform, dst, src1, src2);
5049 } else {
5050 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5051 frecps<double>(vform, dst, src1, src2);
5052 }
5053 return dst;
5054 }
5055
5056
5057 template <typename T>
frsqrts(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)5058 LogicVRegister Simulator::frsqrts(VectorFormat vform,
5059 LogicVRegister dst,
5060 const LogicVRegister& src1,
5061 const LogicVRegister& src2) {
5062 dst.ClearForWrite(vform);
5063 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5064 T op1 = -src1.Float<T>(i);
5065 T op2 = src2.Float<T>(i);
5066 T result = FPProcessNaNs(op1, op2);
5067 dst.SetFloat(vform, i, IsNaN(result) ? result : FPRSqrtStepFused(op1, op2));
5068 }
5069 return dst;
5070 }
5071
5072
frsqrts(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)5073 LogicVRegister Simulator::frsqrts(VectorFormat vform,
5074 LogicVRegister dst,
5075 const LogicVRegister& src1,
5076 const LogicVRegister& src2) {
5077 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5078 frsqrts<SimFloat16>(vform, dst, src1, src2);
5079 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5080 frsqrts<float>(vform, dst, src1, src2);
5081 } else {
5082 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5083 frsqrts<double>(vform, dst, src1, src2);
5084 }
5085 return dst;
5086 }
5087
5088
5089 template <typename T>
fcmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,Condition cond)5090 LogicVRegister Simulator::fcmp(VectorFormat vform,
5091 LogicVRegister dst,
5092 const LogicVRegister& src1,
5093 const LogicVRegister& src2,
5094 Condition cond) {
5095 dst.ClearForWrite(vform);
5096 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5097 bool result = false;
5098 T op1 = src1.Float<T>(i);
5099 T op2 = src2.Float<T>(i);
5100 bool unordered = IsNaN(FPProcessNaNs(op1, op2));
5101
5102 switch (cond) {
5103 case eq:
5104 result = (op1 == op2);
5105 break;
5106 case ge:
5107 result = (op1 >= op2);
5108 break;
5109 case gt:
5110 result = (op1 > op2);
5111 break;
5112 case le:
5113 result = (op1 <= op2);
5114 break;
5115 case lt:
5116 result = (op1 < op2);
5117 break;
5118 case ne:
5119 result = (op1 != op2);
5120 break;
5121 case uo:
5122 result = unordered;
5123 break;
5124 default:
5125 // Other conditions are defined in terms of those above.
5126 VIXL_UNREACHABLE();
5127 break;
5128 }
5129
5130 if (result && unordered) {
5131 // Only `uo` and `ne` can be true for unordered comparisons.
5132 VIXL_ASSERT((cond == uo) || (cond == ne));
5133 }
5134
5135 dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0);
5136 }
5137 return dst;
5138 }
5139
5140
fcmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,Condition cond)5141 LogicVRegister Simulator::fcmp(VectorFormat vform,
5142 LogicVRegister dst,
5143 const LogicVRegister& src1,
5144 const LogicVRegister& src2,
5145 Condition cond) {
5146 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5147 fcmp<SimFloat16>(vform, dst, src1, src2, cond);
5148 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5149 fcmp<float>(vform, dst, src1, src2, cond);
5150 } else {
5151 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5152 fcmp<double>(vform, dst, src1, src2, cond);
5153 }
5154 return dst;
5155 }
5156
5157
fcmp_zero(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,Condition cond)5158 LogicVRegister Simulator::fcmp_zero(VectorFormat vform,
5159 LogicVRegister dst,
5160 const LogicVRegister& src,
5161 Condition cond) {
5162 SimVRegister temp;
5163 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5164 LogicVRegister zero_reg =
5165 dup_immediate(vform, temp, Float16ToRawbits(SimFloat16(0.0)));
5166 fcmp<SimFloat16>(vform, dst, src, zero_reg, cond);
5167 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5168 LogicVRegister zero_reg = dup_immediate(vform, temp, FloatToRawbits(0.0));
5169 fcmp<float>(vform, dst, src, zero_reg, cond);
5170 } else {
5171 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5172 LogicVRegister zero_reg = dup_immediate(vform, temp, DoubleToRawbits(0.0));
5173 fcmp<double>(vform, dst, src, zero_reg, cond);
5174 }
5175 return dst;
5176 }
5177
5178
fabscmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,Condition cond)5179 LogicVRegister Simulator::fabscmp(VectorFormat vform,
5180 LogicVRegister dst,
5181 const LogicVRegister& src1,
5182 const LogicVRegister& src2,
5183 Condition cond) {
5184 SimVRegister temp1, temp2;
5185 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5186 LogicVRegister abs_src1 = fabs_<SimFloat16>(vform, temp1, src1);
5187 LogicVRegister abs_src2 = fabs_<SimFloat16>(vform, temp2, src2);
5188 fcmp<SimFloat16>(vform, dst, abs_src1, abs_src2, cond);
5189 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5190 LogicVRegister abs_src1 = fabs_<float>(vform, temp1, src1);
5191 LogicVRegister abs_src2 = fabs_<float>(vform, temp2, src2);
5192 fcmp<float>(vform, dst, abs_src1, abs_src2, cond);
5193 } else {
5194 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5195 LogicVRegister abs_src1 = fabs_<double>(vform, temp1, src1);
5196 LogicVRegister abs_src2 = fabs_<double>(vform, temp2, src2);
5197 fcmp<double>(vform, dst, abs_src1, abs_src2, cond);
5198 }
5199 return dst;
5200 }
5201
5202
5203 template <typename T>
fmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & srca,const LogicVRegister & src1,const LogicVRegister & src2)5204 LogicVRegister Simulator::fmla(VectorFormat vform,
5205 LogicVRegister dst,
5206 const LogicVRegister& srca,
5207 const LogicVRegister& src1,
5208 const LogicVRegister& src2) {
5209 dst.ClearForWrite(vform);
5210 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5211 T op1 = src1.Float<T>(i);
5212 T op2 = src2.Float<T>(i);
5213 T acc = srca.Float<T>(i);
5214 T result = FPMulAdd(acc, op1, op2);
5215 dst.SetFloat(vform, i, result);
5216 }
5217 return dst;
5218 }
5219
5220
fmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & srca,const LogicVRegister & src1,const LogicVRegister & src2)5221 LogicVRegister Simulator::fmla(VectorFormat vform,
5222 LogicVRegister dst,
5223 const LogicVRegister& srca,
5224 const LogicVRegister& src1,
5225 const LogicVRegister& src2) {
5226 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5227 fmla<SimFloat16>(vform, dst, srca, src1, src2);
5228 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5229 fmla<float>(vform, dst, srca, src1, src2);
5230 } else {
5231 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5232 fmla<double>(vform, dst, srca, src1, src2);
5233 }
5234 return dst;
5235 }
5236
5237
5238 template <typename T>
fmls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & srca,const LogicVRegister & src1,const LogicVRegister & src2)5239 LogicVRegister Simulator::fmls(VectorFormat vform,
5240 LogicVRegister dst,
5241 const LogicVRegister& srca,
5242 const LogicVRegister& src1,
5243 const LogicVRegister& src2) {
5244 dst.ClearForWrite(vform);
5245 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5246 T op1 = -src1.Float<T>(i);
5247 T op2 = src2.Float<T>(i);
5248 T acc = srca.Float<T>(i);
5249 T result = FPMulAdd(acc, op1, op2);
5250 dst.SetFloat(i, result);
5251 }
5252 return dst;
5253 }
5254
5255
fmls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & srca,const LogicVRegister & src1,const LogicVRegister & src2)5256 LogicVRegister Simulator::fmls(VectorFormat vform,
5257 LogicVRegister dst,
5258 const LogicVRegister& srca,
5259 const LogicVRegister& src1,
5260 const LogicVRegister& src2) {
5261 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5262 fmls<SimFloat16>(vform, dst, srca, src1, src2);
5263 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5264 fmls<float>(vform, dst, srca, src1, src2);
5265 } else {
5266 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5267 fmls<double>(vform, dst, srca, src1, src2);
5268 }
5269 return dst;
5270 }
5271
5272
fmlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)5273 LogicVRegister Simulator::fmlal(VectorFormat vform,
5274 LogicVRegister dst,
5275 const LogicVRegister& src1,
5276 const LogicVRegister& src2) {
5277 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5278 dst.ClearForWrite(vform);
5279 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5280 float op1 = FPToFloat(src1.Float<SimFloat16>(i), kIgnoreDefaultNaN);
5281 float op2 = FPToFloat(src2.Float<SimFloat16>(i), kIgnoreDefaultNaN);
5282 float acc = dst.Float<float>(i);
5283 float result = FPMulAdd(acc, op1, op2);
5284 dst.SetFloat(i, result);
5285 }
5286 return dst;
5287 }
5288
5289
fmlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)5290 LogicVRegister Simulator::fmlal2(VectorFormat vform,
5291 LogicVRegister dst,
5292 const LogicVRegister& src1,
5293 const LogicVRegister& src2) {
5294 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5295 dst.ClearForWrite(vform);
5296 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5297 int src = i + LaneCountFromFormat(vform);
5298 float op1 = FPToFloat(src1.Float<SimFloat16>(src), kIgnoreDefaultNaN);
5299 float op2 = FPToFloat(src2.Float<SimFloat16>(src), kIgnoreDefaultNaN);
5300 float acc = dst.Float<float>(i);
5301 float result = FPMulAdd(acc, op1, op2);
5302 dst.SetFloat(i, result);
5303 }
5304 return dst;
5305 }
5306
5307
fmlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)5308 LogicVRegister Simulator::fmlsl(VectorFormat vform,
5309 LogicVRegister dst,
5310 const LogicVRegister& src1,
5311 const LogicVRegister& src2) {
5312 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5313 dst.ClearForWrite(vform);
5314 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5315 float op1 = -FPToFloat(src1.Float<SimFloat16>(i), kIgnoreDefaultNaN);
5316 float op2 = FPToFloat(src2.Float<SimFloat16>(i), kIgnoreDefaultNaN);
5317 float acc = dst.Float<float>(i);
5318 float result = FPMulAdd(acc, op1, op2);
5319 dst.SetFloat(i, result);
5320 }
5321 return dst;
5322 }
5323
5324
fmlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)5325 LogicVRegister Simulator::fmlsl2(VectorFormat vform,
5326 LogicVRegister dst,
5327 const LogicVRegister& src1,
5328 const LogicVRegister& src2) {
5329 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5330 dst.ClearForWrite(vform);
5331 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5332 int src = i + LaneCountFromFormat(vform);
5333 float op1 = -FPToFloat(src1.Float<SimFloat16>(src), kIgnoreDefaultNaN);
5334 float op2 = FPToFloat(src2.Float<SimFloat16>(src), kIgnoreDefaultNaN);
5335 float acc = dst.Float<float>(i);
5336 float result = FPMulAdd(acc, op1, op2);
5337 dst.SetFloat(i, result);
5338 }
5339 return dst;
5340 }
5341
5342
fmlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)5343 LogicVRegister Simulator::fmlal(VectorFormat vform,
5344 LogicVRegister dst,
5345 const LogicVRegister& src1,
5346 const LogicVRegister& src2,
5347 int index) {
5348 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5349 dst.ClearForWrite(vform);
5350 float op2 = FPToFloat(src2.Float<SimFloat16>(index), kIgnoreDefaultNaN);
5351 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5352 float op1 = FPToFloat(src1.Float<SimFloat16>(i), kIgnoreDefaultNaN);
5353 float acc = dst.Float<float>(i);
5354 float result = FPMulAdd(acc, op1, op2);
5355 dst.SetFloat(i, result);
5356 }
5357 return dst;
5358 }
5359
5360
fmlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)5361 LogicVRegister Simulator::fmlal2(VectorFormat vform,
5362 LogicVRegister dst,
5363 const LogicVRegister& src1,
5364 const LogicVRegister& src2,
5365 int index) {
5366 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5367 dst.ClearForWrite(vform);
5368 float op2 = FPToFloat(src2.Float<SimFloat16>(index), kIgnoreDefaultNaN);
5369 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5370 int src = i + LaneCountFromFormat(vform);
5371 float op1 = FPToFloat(src1.Float<SimFloat16>(src), kIgnoreDefaultNaN);
5372 float acc = dst.Float<float>(i);
5373 float result = FPMulAdd(acc, op1, op2);
5374 dst.SetFloat(i, result);
5375 }
5376 return dst;
5377 }
5378
5379
fmlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)5380 LogicVRegister Simulator::fmlsl(VectorFormat vform,
5381 LogicVRegister dst,
5382 const LogicVRegister& src1,
5383 const LogicVRegister& src2,
5384 int index) {
5385 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5386 dst.ClearForWrite(vform);
5387 float op2 = FPToFloat(src2.Float<SimFloat16>(index), kIgnoreDefaultNaN);
5388 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5389 float op1 = -FPToFloat(src1.Float<SimFloat16>(i), kIgnoreDefaultNaN);
5390 float acc = dst.Float<float>(i);
5391 float result = FPMulAdd(acc, op1, op2);
5392 dst.SetFloat(i, result);
5393 }
5394 return dst;
5395 }
5396
5397
fmlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)5398 LogicVRegister Simulator::fmlsl2(VectorFormat vform,
5399 LogicVRegister dst,
5400 const LogicVRegister& src1,
5401 const LogicVRegister& src2,
5402 int index) {
5403 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5404 dst.ClearForWrite(vform);
5405 float op2 = FPToFloat(src2.Float<SimFloat16>(index), kIgnoreDefaultNaN);
5406 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5407 int src = i + LaneCountFromFormat(vform);
5408 float op1 = -FPToFloat(src1.Float<SimFloat16>(src), kIgnoreDefaultNaN);
5409 float acc = dst.Float<float>(i);
5410 float result = FPMulAdd(acc, op1, op2);
5411 dst.SetFloat(i, result);
5412 }
5413 return dst;
5414 }
5415
5416
5417 template <typename T>
fneg(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5418 LogicVRegister Simulator::fneg(VectorFormat vform,
5419 LogicVRegister dst,
5420 const LogicVRegister& src) {
5421 dst.ClearForWrite(vform);
5422 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5423 T op = src.Float<T>(i);
5424 op = -op;
5425 dst.SetFloat(i, op);
5426 }
5427 return dst;
5428 }
5429
5430
fneg(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5431 LogicVRegister Simulator::fneg(VectorFormat vform,
5432 LogicVRegister dst,
5433 const LogicVRegister& src) {
5434 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5435 fneg<SimFloat16>(vform, dst, src);
5436 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5437 fneg<float>(vform, dst, src);
5438 } else {
5439 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5440 fneg<double>(vform, dst, src);
5441 }
5442 return dst;
5443 }
5444
5445
5446 template <typename T>
fabs_(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5447 LogicVRegister Simulator::fabs_(VectorFormat vform,
5448 LogicVRegister dst,
5449 const LogicVRegister& src) {
5450 dst.ClearForWrite(vform);
5451 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5452 T op = src.Float<T>(i);
5453 if (copysign(1.0, op) < 0.0) {
5454 op = -op;
5455 }
5456 dst.SetFloat(i, op);
5457 }
5458 return dst;
5459 }
5460
5461
fabs_(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5462 LogicVRegister Simulator::fabs_(VectorFormat vform,
5463 LogicVRegister dst,
5464 const LogicVRegister& src) {
5465 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5466 fabs_<SimFloat16>(vform, dst, src);
5467 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5468 fabs_<float>(vform, dst, src);
5469 } else {
5470 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5471 fabs_<double>(vform, dst, src);
5472 }
5473 return dst;
5474 }
5475
5476
fabd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)5477 LogicVRegister Simulator::fabd(VectorFormat vform,
5478 LogicVRegister dst,
5479 const LogicVRegister& src1,
5480 const LogicVRegister& src2) {
5481 SimVRegister temp;
5482 fsub(vform, temp, src1, src2);
5483 fabs_(vform, dst, temp);
5484 return dst;
5485 }
5486
5487
fsqrt(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5488 LogicVRegister Simulator::fsqrt(VectorFormat vform,
5489 LogicVRegister dst,
5490 const LogicVRegister& src) {
5491 dst.ClearForWrite(vform);
5492 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5493 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5494 SimFloat16 result = FPSqrt(src.Float<SimFloat16>(i));
5495 dst.SetFloat(i, result);
5496 }
5497 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5498 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5499 float result = FPSqrt(src.Float<float>(i));
5500 dst.SetFloat(i, result);
5501 }
5502 } else {
5503 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5504 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5505 double result = FPSqrt(src.Float<double>(i));
5506 dst.SetFloat(i, result);
5507 }
5508 }
5509 return dst;
5510 }
5511
5512
5513 #define DEFINE_NEON_FP_PAIR_OP(FNP, FN, OP) \
5514 LogicVRegister Simulator::FNP(VectorFormat vform, \
5515 LogicVRegister dst, \
5516 const LogicVRegister& src1, \
5517 const LogicVRegister& src2) { \
5518 SimVRegister temp1, temp2; \
5519 uzp1(vform, temp1, src1, src2); \
5520 uzp2(vform, temp2, src1, src2); \
5521 FN(vform, dst, temp1, temp2); \
5522 if (IsSVEFormat(vform)) { \
5523 interleave_top_bottom(vform, dst, dst); \
5524 } \
5525 return dst; \
5526 } \
5527 \
5528 LogicVRegister Simulator::FNP(VectorFormat vform, \
5529 LogicVRegister dst, \
5530 const LogicVRegister& src) { \
5531 if (vform == kFormatH) { \
5532 SimFloat16 result(OP(SimFloat16(RawbitsToFloat16(src.Uint(vform, 0))), \
5533 SimFloat16(RawbitsToFloat16(src.Uint(vform, 1))))); \
5534 dst.SetUint(vform, 0, Float16ToRawbits(result)); \
5535 } else if (vform == kFormatS) { \
5536 float result = OP(src.Float<float>(0), src.Float<float>(1)); \
5537 dst.SetFloat(0, result); \
5538 } else { \
5539 VIXL_ASSERT(vform == kFormatD); \
5540 double result = OP(src.Float<double>(0), src.Float<double>(1)); \
5541 dst.SetFloat(0, result); \
5542 } \
5543 dst.ClearForWrite(vform); \
5544 return dst; \
5545 }
NEON_FPPAIRWISE_LIST(DEFINE_NEON_FP_PAIR_OP)5546 NEON_FPPAIRWISE_LIST(DEFINE_NEON_FP_PAIR_OP)
5547 #undef DEFINE_NEON_FP_PAIR_OP
5548
5549 template <typename T>
5550 LogicVRegister Simulator::FPPairedAcrossHelper(VectorFormat vform,
5551 LogicVRegister dst,
5552 const LogicVRegister& src,
5553 typename TFPPairOp<T>::type fn,
5554 uint64_t inactive_value) {
5555 int lane_count = LaneCountFromFormat(vform);
5556 T result[kZRegMaxSizeInBytes / sizeof(T)];
5557 // Copy the source vector into a working array. Initialise the unused elements
5558 // at the end of the array to the same value that a false predicate would set.
5559 for (int i = 0; i < static_cast<int>(ArrayLength(result)); i++) {
5560 result[i] = (i < lane_count)
5561 ? src.Float<T>(i)
5562 : RawbitsWithSizeToFP<T>(sizeof(T) * 8, inactive_value);
5563 }
5564
5565 // Pairwise reduce the elements to a single value, using the pair op function
5566 // argument.
5567 for (int step = 1; step < lane_count; step *= 2) {
5568 for (int i = 0; i < lane_count; i += step * 2) {
5569 result[i] = (this->*fn)(result[i], result[i + step]);
5570 }
5571 }
5572 dst.ClearForWrite(ScalarFormatFromFormat(vform));
5573 dst.SetFloat<T>(0, result[0]);
5574 return dst;
5575 }
5576
FPPairedAcrossHelper(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,typename TFPPairOp<SimFloat16>::type fn16,typename TFPPairOp<float>::type fn32,typename TFPPairOp<double>::type fn64,uint64_t inactive_value)5577 LogicVRegister Simulator::FPPairedAcrossHelper(
5578 VectorFormat vform,
5579 LogicVRegister dst,
5580 const LogicVRegister& src,
5581 typename TFPPairOp<SimFloat16>::type fn16,
5582 typename TFPPairOp<float>::type fn32,
5583 typename TFPPairOp<double>::type fn64,
5584 uint64_t inactive_value) {
5585 switch (LaneSizeInBitsFromFormat(vform)) {
5586 case kHRegSize:
5587 return FPPairedAcrossHelper<SimFloat16>(vform,
5588 dst,
5589 src,
5590 fn16,
5591 inactive_value);
5592 case kSRegSize:
5593 return FPPairedAcrossHelper<float>(vform, dst, src, fn32, inactive_value);
5594 default:
5595 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5596 return FPPairedAcrossHelper<double>(vform,
5597 dst,
5598 src,
5599 fn64,
5600 inactive_value);
5601 }
5602 }
5603
faddv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5604 LogicVRegister Simulator::faddv(VectorFormat vform,
5605 LogicVRegister dst,
5606 const LogicVRegister& src) {
5607 return FPPairedAcrossHelper(vform,
5608 dst,
5609 src,
5610 &Simulator::FPAdd<SimFloat16>,
5611 &Simulator::FPAdd<float>,
5612 &Simulator::FPAdd<double>,
5613 0);
5614 }
5615
fmaxv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5616 LogicVRegister Simulator::fmaxv(VectorFormat vform,
5617 LogicVRegister dst,
5618 const LogicVRegister& src) {
5619 int lane_size = LaneSizeInBitsFromFormat(vform);
5620 uint64_t inactive_value =
5621 FPToRawbitsWithSize(lane_size, kFP64NegativeInfinity);
5622 return FPPairedAcrossHelper(vform,
5623 dst,
5624 src,
5625 &Simulator::FPMax<SimFloat16>,
5626 &Simulator::FPMax<float>,
5627 &Simulator::FPMax<double>,
5628 inactive_value);
5629 }
5630
5631
fminv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5632 LogicVRegister Simulator::fminv(VectorFormat vform,
5633 LogicVRegister dst,
5634 const LogicVRegister& src) {
5635 int lane_size = LaneSizeInBitsFromFormat(vform);
5636 uint64_t inactive_value =
5637 FPToRawbitsWithSize(lane_size, kFP64PositiveInfinity);
5638 return FPPairedAcrossHelper(vform,
5639 dst,
5640 src,
5641 &Simulator::FPMin<SimFloat16>,
5642 &Simulator::FPMin<float>,
5643 &Simulator::FPMin<double>,
5644 inactive_value);
5645 }
5646
5647
fmaxnmv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5648 LogicVRegister Simulator::fmaxnmv(VectorFormat vform,
5649 LogicVRegister dst,
5650 const LogicVRegister& src) {
5651 int lane_size = LaneSizeInBitsFromFormat(vform);
5652 uint64_t inactive_value = FPToRawbitsWithSize(lane_size, kFP64DefaultNaN);
5653 return FPPairedAcrossHelper(vform,
5654 dst,
5655 src,
5656 &Simulator::FPMaxNM<SimFloat16>,
5657 &Simulator::FPMaxNM<float>,
5658 &Simulator::FPMaxNM<double>,
5659 inactive_value);
5660 }
5661
5662
fminnmv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5663 LogicVRegister Simulator::fminnmv(VectorFormat vform,
5664 LogicVRegister dst,
5665 const LogicVRegister& src) {
5666 int lane_size = LaneSizeInBitsFromFormat(vform);
5667 uint64_t inactive_value = FPToRawbitsWithSize(lane_size, kFP64DefaultNaN);
5668 return FPPairedAcrossHelper(vform,
5669 dst,
5670 src,
5671 &Simulator::FPMinNM<SimFloat16>,
5672 &Simulator::FPMinNM<float>,
5673 &Simulator::FPMinNM<double>,
5674 inactive_value);
5675 }
5676
5677
fmul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)5678 LogicVRegister Simulator::fmul(VectorFormat vform,
5679 LogicVRegister dst,
5680 const LogicVRegister& src1,
5681 const LogicVRegister& src2,
5682 int index) {
5683 dst.ClearForWrite(vform);
5684 SimVRegister temp;
5685 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5686 LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index);
5687 fmul<SimFloat16>(vform, dst, src1, index_reg);
5688 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5689 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
5690 fmul<float>(vform, dst, src1, index_reg);
5691 } else {
5692 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5693 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
5694 fmul<double>(vform, dst, src1, index_reg);
5695 }
5696 return dst;
5697 }
5698
5699
fmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)5700 LogicVRegister Simulator::fmla(VectorFormat vform,
5701 LogicVRegister dst,
5702 const LogicVRegister& src1,
5703 const LogicVRegister& src2,
5704 int index) {
5705 dst.ClearForWrite(vform);
5706 SimVRegister temp;
5707 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5708 LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index);
5709 fmla<SimFloat16>(vform, dst, dst, src1, index_reg);
5710 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5711 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
5712 fmla<float>(vform, dst, dst, src1, index_reg);
5713 } else {
5714 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5715 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
5716 fmla<double>(vform, dst, dst, src1, index_reg);
5717 }
5718 return dst;
5719 }
5720
5721
fmls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)5722 LogicVRegister Simulator::fmls(VectorFormat vform,
5723 LogicVRegister dst,
5724 const LogicVRegister& src1,
5725 const LogicVRegister& src2,
5726 int index) {
5727 dst.ClearForWrite(vform);
5728 SimVRegister temp;
5729 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5730 LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index);
5731 fmls<SimFloat16>(vform, dst, dst, src1, index_reg);
5732 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5733 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
5734 fmls<float>(vform, dst, dst, src1, index_reg);
5735 } else {
5736 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5737 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
5738 fmls<double>(vform, dst, dst, src1, index_reg);
5739 }
5740 return dst;
5741 }
5742
5743
fmulx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)5744 LogicVRegister Simulator::fmulx(VectorFormat vform,
5745 LogicVRegister dst,
5746 const LogicVRegister& src1,
5747 const LogicVRegister& src2,
5748 int index) {
5749 dst.ClearForWrite(vform);
5750 SimVRegister temp;
5751 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5752 LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index);
5753 fmulx<SimFloat16>(vform, dst, src1, index_reg);
5754 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5755 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
5756 fmulx<float>(vform, dst, src1, index_reg);
5757 } else {
5758 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5759 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
5760 fmulx<double>(vform, dst, src1, index_reg);
5761 }
5762 return dst;
5763 }
5764
5765
frint(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,FPRounding rounding_mode,bool inexact_exception,FrintMode frint_mode)5766 LogicVRegister Simulator::frint(VectorFormat vform,
5767 LogicVRegister dst,
5768 const LogicVRegister& src,
5769 FPRounding rounding_mode,
5770 bool inexact_exception,
5771 FrintMode frint_mode) {
5772 dst.ClearForWrite(vform);
5773 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5774 VIXL_ASSERT(frint_mode == kFrintToInteger);
5775 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5776 SimFloat16 input = src.Float<SimFloat16>(i);
5777 SimFloat16 rounded = FPRoundInt(input, rounding_mode);
5778 if (inexact_exception && !IsNaN(input) && (input != rounded)) {
5779 FPProcessException();
5780 }
5781 dst.SetFloat<SimFloat16>(i, rounded);
5782 }
5783 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5784 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5785 float input = src.Float<float>(i);
5786 float rounded = FPRoundInt(input, rounding_mode, frint_mode);
5787
5788 if (inexact_exception && !IsNaN(input) && (input != rounded)) {
5789 FPProcessException();
5790 }
5791 dst.SetFloat<float>(i, rounded);
5792 }
5793 } else {
5794 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5795 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5796 double input = src.Float<double>(i);
5797 double rounded = FPRoundInt(input, rounding_mode, frint_mode);
5798 if (inexact_exception && !IsNaN(input) && (input != rounded)) {
5799 FPProcessException();
5800 }
5801 dst.SetFloat<double>(i, rounded);
5802 }
5803 }
5804 return dst;
5805 }
5806
fcvt(VectorFormat dst_vform,VectorFormat src_vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)5807 LogicVRegister Simulator::fcvt(VectorFormat dst_vform,
5808 VectorFormat src_vform,
5809 LogicVRegister dst,
5810 const LogicPRegister& pg,
5811 const LogicVRegister& src) {
5812 unsigned dst_data_size_in_bits = LaneSizeInBitsFromFormat(dst_vform);
5813 unsigned src_data_size_in_bits = LaneSizeInBitsFromFormat(src_vform);
5814 VectorFormat vform = SVEFormatFromLaneSizeInBits(
5815 std::max(dst_data_size_in_bits, src_data_size_in_bits));
5816
5817 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5818 if (!pg.IsActive(vform, i)) continue;
5819
5820 uint64_t src_raw_bits = ExtractUnsignedBitfield64(src_data_size_in_bits - 1,
5821 0,
5822 src.Uint(vform, i));
5823 double dst_value =
5824 RawbitsWithSizeToFP<double>(src_data_size_in_bits, src_raw_bits);
5825
5826 uint64_t dst_raw_bits =
5827 FPToRawbitsWithSize(dst_data_size_in_bits, dst_value);
5828
5829 dst.SetUint(vform, i, dst_raw_bits);
5830 }
5831
5832 return dst;
5833 }
5834
fcvts(VectorFormat vform,unsigned dst_data_size_in_bits,unsigned src_data_size_in_bits,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src,FPRounding round,int fbits)5835 LogicVRegister Simulator::fcvts(VectorFormat vform,
5836 unsigned dst_data_size_in_bits,
5837 unsigned src_data_size_in_bits,
5838 LogicVRegister dst,
5839 const LogicPRegister& pg,
5840 const LogicVRegister& src,
5841 FPRounding round,
5842 int fbits) {
5843 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits);
5844 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits);
5845
5846 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5847 if (!pg.IsActive(vform, i)) continue;
5848
5849 uint64_t value = ExtractUnsignedBitfield64(src_data_size_in_bits - 1,
5850 0,
5851 src.Uint(vform, i));
5852 double result = RawbitsWithSizeToFP<double>(src_data_size_in_bits, value) *
5853 std::pow(2.0, fbits);
5854
5855 switch (dst_data_size_in_bits) {
5856 case kHRegSize:
5857 dst.SetInt(vform, i, FPToInt16(result, round));
5858 break;
5859 case kSRegSize:
5860 dst.SetInt(vform, i, FPToInt32(result, round));
5861 break;
5862 case kDRegSize:
5863 dst.SetInt(vform, i, FPToInt64(result, round));
5864 break;
5865 default:
5866 VIXL_UNIMPLEMENTED();
5867 break;
5868 }
5869 }
5870
5871 return dst;
5872 }
5873
fcvts(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,FPRounding round,int fbits)5874 LogicVRegister Simulator::fcvts(VectorFormat vform,
5875 LogicVRegister dst,
5876 const LogicVRegister& src,
5877 FPRounding round,
5878 int fbits) {
5879 dst.ClearForWrite(vform);
5880 return fcvts(vform,
5881 LaneSizeInBitsFromFormat(vform),
5882 LaneSizeInBitsFromFormat(vform),
5883 dst,
5884 GetPTrue(),
5885 src,
5886 round,
5887 fbits);
5888 }
5889
fcvtu(VectorFormat vform,unsigned dst_data_size_in_bits,unsigned src_data_size_in_bits,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src,FPRounding round,int fbits)5890 LogicVRegister Simulator::fcvtu(VectorFormat vform,
5891 unsigned dst_data_size_in_bits,
5892 unsigned src_data_size_in_bits,
5893 LogicVRegister dst,
5894 const LogicPRegister& pg,
5895 const LogicVRegister& src,
5896 FPRounding round,
5897 int fbits) {
5898 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits);
5899 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits);
5900
5901 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5902 if (!pg.IsActive(vform, i)) continue;
5903
5904 uint64_t value = ExtractUnsignedBitfield64(src_data_size_in_bits - 1,
5905 0,
5906 src.Uint(vform, i));
5907 double result = RawbitsWithSizeToFP<double>(src_data_size_in_bits, value) *
5908 std::pow(2.0, fbits);
5909
5910 switch (dst_data_size_in_bits) {
5911 case kHRegSize:
5912 dst.SetUint(vform, i, FPToUInt16(result, round));
5913 break;
5914 case kSRegSize:
5915 dst.SetUint(vform, i, FPToUInt32(result, round));
5916 break;
5917 case kDRegSize:
5918 dst.SetUint(vform, i, FPToUInt64(result, round));
5919 break;
5920 default:
5921 VIXL_UNIMPLEMENTED();
5922 break;
5923 }
5924 }
5925
5926 return dst;
5927 }
5928
fcvtu(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,FPRounding round,int fbits)5929 LogicVRegister Simulator::fcvtu(VectorFormat vform,
5930 LogicVRegister dst,
5931 const LogicVRegister& src,
5932 FPRounding round,
5933 int fbits) {
5934 dst.ClearForWrite(vform);
5935 return fcvtu(vform,
5936 LaneSizeInBitsFromFormat(vform),
5937 LaneSizeInBitsFromFormat(vform),
5938 dst,
5939 GetPTrue(),
5940 src,
5941 round,
5942 fbits);
5943 }
5944
fcvtl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5945 LogicVRegister Simulator::fcvtl(VectorFormat vform,
5946 LogicVRegister dst,
5947 const LogicVRegister& src) {
5948 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5949 for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
5950 // TODO: Full support for SimFloat16 in SimRegister(s).
5951 dst.SetFloat(i,
5952 FPToFloat(RawbitsToFloat16(src.Float<uint16_t>(i)),
5953 ReadDN()));
5954 }
5955 } else {
5956 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5957 for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
5958 dst.SetFloat(i, FPToDouble(src.Float<float>(i), ReadDN()));
5959 }
5960 }
5961 return dst;
5962 }
5963
5964
fcvtl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5965 LogicVRegister Simulator::fcvtl2(VectorFormat vform,
5966 LogicVRegister dst,
5967 const LogicVRegister& src) {
5968 int lane_count = LaneCountFromFormat(vform);
5969 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5970 for (int i = 0; i < lane_count; i++) {
5971 // TODO: Full support for SimFloat16 in SimRegister(s).
5972 dst.SetFloat(i,
5973 FPToFloat(RawbitsToFloat16(
5974 src.Float<uint16_t>(i + lane_count)),
5975 ReadDN()));
5976 }
5977 } else {
5978 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5979 for (int i = 0; i < lane_count; i++) {
5980 dst.SetFloat(i, FPToDouble(src.Float<float>(i + lane_count), ReadDN()));
5981 }
5982 }
5983 return dst;
5984 }
5985
5986
fcvtn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5987 LogicVRegister Simulator::fcvtn(VectorFormat vform,
5988 LogicVRegister dst,
5989 const LogicVRegister& src) {
5990 SimVRegister tmp;
5991 LogicVRegister srctmp = mov(kFormat2D, tmp, src);
5992 dst.ClearForWrite(vform);
5993 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5994 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5995 dst.SetFloat(i,
5996 Float16ToRawbits(FPToFloat16(srctmp.Float<float>(i),
5997 FPTieEven,
5998 ReadDN())));
5999 }
6000 } else {
6001 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
6002 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6003 dst.SetFloat(i, FPToFloat(srctmp.Float<double>(i), FPTieEven, ReadDN()));
6004 }
6005 }
6006 return dst;
6007 }
6008
6009
fcvtn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)6010 LogicVRegister Simulator::fcvtn2(VectorFormat vform,
6011 LogicVRegister dst,
6012 const LogicVRegister& src) {
6013 int lane_count = LaneCountFromFormat(vform) / 2;
6014 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
6015 for (int i = lane_count - 1; i >= 0; i--) {
6016 dst.SetFloat(i + lane_count,
6017 Float16ToRawbits(
6018 FPToFloat16(src.Float<float>(i), FPTieEven, ReadDN())));
6019 }
6020 } else {
6021 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
6022 for (int i = lane_count - 1; i >= 0; i--) {
6023 dst.SetFloat(i + lane_count,
6024 FPToFloat(src.Float<double>(i), FPTieEven, ReadDN()));
6025 }
6026 }
6027 return dst;
6028 }
6029
6030
fcvtxn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)6031 LogicVRegister Simulator::fcvtxn(VectorFormat vform,
6032 LogicVRegister dst,
6033 const LogicVRegister& src) {
6034 SimVRegister tmp;
6035 LogicVRegister srctmp = mov(kFormat2D, tmp, src);
6036 int input_lane_count = LaneCountFromFormat(vform);
6037 if (IsSVEFormat(vform)) {
6038 mov(kFormatVnB, tmp, src);
6039 input_lane_count /= 2;
6040 }
6041
6042 dst.ClearForWrite(vform);
6043 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
6044
6045 for (int i = 0; i < input_lane_count; i++) {
6046 dst.SetFloat(i, FPToFloat(srctmp.Float<double>(i), FPRoundOdd, ReadDN()));
6047 }
6048 return dst;
6049 }
6050
6051
fcvtxn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)6052 LogicVRegister Simulator::fcvtxn2(VectorFormat vform,
6053 LogicVRegister dst,
6054 const LogicVRegister& src) {
6055 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
6056 int lane_count = LaneCountFromFormat(vform) / 2;
6057 for (int i = lane_count - 1; i >= 0; i--) {
6058 dst.SetFloat(i + lane_count,
6059 FPToFloat(src.Float<double>(i), FPRoundOdd, ReadDN()));
6060 }
6061 return dst;
6062 }
6063
6064
6065 // Based on reference C function recip_sqrt_estimate from ARM ARM.
recip_sqrt_estimate(double a)6066 double Simulator::recip_sqrt_estimate(double a) {
6067 int quot0, quot1, s;
6068 double r;
6069 if (a < 0.5) {
6070 quot0 = static_cast<int>(a * 512.0);
6071 r = 1.0 / sqrt((static_cast<double>(quot0) + 0.5) / 512.0);
6072 } else {
6073 quot1 = static_cast<int>(a * 256.0);
6074 r = 1.0 / sqrt((static_cast<double>(quot1) + 0.5) / 256.0);
6075 }
6076 s = static_cast<int>(256.0 * r + 0.5);
6077 return static_cast<double>(s) / 256.0;
6078 }
6079
6080
Bits(uint64_t val,int start_bit,int end_bit)6081 static inline uint64_t Bits(uint64_t val, int start_bit, int end_bit) {
6082 return ExtractUnsignedBitfield64(start_bit, end_bit, val);
6083 }
6084
6085
6086 template <typename T>
FPRecipSqrtEstimate(T op)6087 T Simulator::FPRecipSqrtEstimate(T op) {
6088 if (IsNaN(op)) {
6089 return FPProcessNaN(op);
6090 } else if (op == 0.0) {
6091 if (copysign(1.0, op) < 0.0) {
6092 return kFP64NegativeInfinity;
6093 } else {
6094 return kFP64PositiveInfinity;
6095 }
6096 } else if (copysign(1.0, op) < 0.0) {
6097 FPProcessException();
6098 return FPDefaultNaN<T>();
6099 } else if (IsInf(op)) {
6100 return 0.0;
6101 } else {
6102 uint64_t fraction;
6103 int exp, result_exp;
6104
6105 if (IsFloat16<T>()) {
6106 exp = Float16Exp(op);
6107 fraction = Float16Mantissa(op);
6108 fraction <<= 42;
6109 } else if (IsFloat32<T>()) {
6110 exp = FloatExp(op);
6111 fraction = FloatMantissa(op);
6112 fraction <<= 29;
6113 } else {
6114 VIXL_ASSERT(IsFloat64<T>());
6115 exp = DoubleExp(op);
6116 fraction = DoubleMantissa(op);
6117 }
6118
6119 if (exp == 0) {
6120 while (Bits(fraction, 51, 51) == 0) {
6121 fraction = Bits(fraction, 50, 0) << 1;
6122 exp -= 1;
6123 }
6124 fraction = Bits(fraction, 50, 0) << 1;
6125 }
6126
6127 double scaled;
6128 if (Bits(exp, 0, 0) == 0) {
6129 scaled = DoublePack(0, 1022, Bits(fraction, 51, 44) << 44);
6130 } else {
6131 scaled = DoublePack(0, 1021, Bits(fraction, 51, 44) << 44);
6132 }
6133
6134 if (IsFloat16<T>()) {
6135 result_exp = (44 - exp) / 2;
6136 } else if (IsFloat32<T>()) {
6137 result_exp = (380 - exp) / 2;
6138 } else {
6139 VIXL_ASSERT(IsFloat64<T>());
6140 result_exp = (3068 - exp) / 2;
6141 }
6142
6143 uint64_t estimate = DoubleToRawbits(recip_sqrt_estimate(scaled));
6144
6145 if (IsFloat16<T>()) {
6146 uint16_t exp_bits = static_cast<uint16_t>(Bits(result_exp, 4, 0));
6147 uint16_t est_bits = static_cast<uint16_t>(Bits(estimate, 51, 42));
6148 return Float16Pack(0, exp_bits, est_bits);
6149 } else if (IsFloat32<T>()) {
6150 uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0));
6151 uint32_t est_bits = static_cast<uint32_t>(Bits(estimate, 51, 29));
6152 return FloatPack(0, exp_bits, est_bits);
6153 } else {
6154 VIXL_ASSERT(IsFloat64<T>());
6155 return DoublePack(0, Bits(result_exp, 10, 0), Bits(estimate, 51, 0));
6156 }
6157 }
6158 }
6159
6160
frsqrte(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)6161 LogicVRegister Simulator::frsqrte(VectorFormat vform,
6162 LogicVRegister dst,
6163 const LogicVRegister& src) {
6164 dst.ClearForWrite(vform);
6165 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
6166 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6167 SimFloat16 input = src.Float<SimFloat16>(i);
6168 dst.SetFloat(vform, i, FPRecipSqrtEstimate<SimFloat16>(input));
6169 }
6170 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
6171 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6172 float input = src.Float<float>(i);
6173 dst.SetFloat(vform, i, FPRecipSqrtEstimate<float>(input));
6174 }
6175 } else {
6176 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
6177 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6178 double input = src.Float<double>(i);
6179 dst.SetFloat(vform, i, FPRecipSqrtEstimate<double>(input));
6180 }
6181 }
6182 return dst;
6183 }
6184
6185 template <typename T>
FPRecipEstimate(T op,FPRounding rounding)6186 T Simulator::FPRecipEstimate(T op, FPRounding rounding) {
6187 uint32_t sign;
6188
6189 if (IsFloat16<T>()) {
6190 sign = Float16Sign(op);
6191 } else if (IsFloat32<T>()) {
6192 sign = FloatSign(op);
6193 } else {
6194 VIXL_ASSERT(IsFloat64<T>());
6195 sign = DoubleSign(op);
6196 }
6197
6198 if (IsNaN(op)) {
6199 return FPProcessNaN(op);
6200 } else if (IsInf(op)) {
6201 return (sign == 1) ? -0.0 : 0.0;
6202 } else if (op == 0.0) {
6203 FPProcessException(); // FPExc_DivideByZero exception.
6204 return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity;
6205 } else if ((IsFloat16<T>() && (std::fabs(op) < std::pow(2.0, -16.0))) ||
6206 (IsFloat32<T>() && (std::fabs(op) < std::pow(2.0, -128.0))) ||
6207 (IsFloat64<T>() && (std::fabs(op) < std::pow(2.0, -1024.0)))) {
6208 bool overflow_to_inf = false;
6209 switch (rounding) {
6210 case FPTieEven:
6211 overflow_to_inf = true;
6212 break;
6213 case FPPositiveInfinity:
6214 overflow_to_inf = (sign == 0);
6215 break;
6216 case FPNegativeInfinity:
6217 overflow_to_inf = (sign == 1);
6218 break;
6219 case FPZero:
6220 overflow_to_inf = false;
6221 break;
6222 default:
6223 break;
6224 }
6225 FPProcessException(); // FPExc_Overflow and FPExc_Inexact.
6226 if (overflow_to_inf) {
6227 return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity;
6228 } else {
6229 // Return FPMaxNormal(sign).
6230 if (IsFloat16<T>()) {
6231 return Float16Pack(sign, 0x1f, 0x3ff);
6232 } else if (IsFloat32<T>()) {
6233 return FloatPack(sign, 0xfe, 0x07fffff);
6234 } else {
6235 VIXL_ASSERT(IsFloat64<T>());
6236 return DoublePack(sign, 0x7fe, 0x0fffffffffffffl);
6237 }
6238 }
6239 } else {
6240 uint64_t fraction;
6241 int exp, result_exp;
6242
6243 if (IsFloat16<T>()) {
6244 sign = Float16Sign(op);
6245 exp = Float16Exp(op);
6246 fraction = Float16Mantissa(op);
6247 fraction <<= 42;
6248 } else if (IsFloat32<T>()) {
6249 sign = FloatSign(op);
6250 exp = FloatExp(op);
6251 fraction = FloatMantissa(op);
6252 fraction <<= 29;
6253 } else {
6254 VIXL_ASSERT(IsFloat64<T>());
6255 sign = DoubleSign(op);
6256 exp = DoubleExp(op);
6257 fraction = DoubleMantissa(op);
6258 }
6259
6260 if (exp == 0) {
6261 if (Bits(fraction, 51, 51) == 0) {
6262 exp -= 1;
6263 fraction = Bits(fraction, 49, 0) << 2;
6264 } else {
6265 fraction = Bits(fraction, 50, 0) << 1;
6266 }
6267 }
6268
6269 double scaled = DoublePack(0, 1022, Bits(fraction, 51, 44) << 44);
6270
6271 if (IsFloat16<T>()) {
6272 result_exp = (29 - exp); // In range 29-30 = -1 to 29+1 = 30.
6273 } else if (IsFloat32<T>()) {
6274 result_exp = (253 - exp); // In range 253-254 = -1 to 253+1 = 254.
6275 } else {
6276 VIXL_ASSERT(IsFloat64<T>());
6277 result_exp = (2045 - exp); // In range 2045-2046 = -1 to 2045+1 = 2046.
6278 }
6279
6280 double estimate = recip_estimate(scaled);
6281
6282 fraction = DoubleMantissa(estimate);
6283 if (result_exp == 0) {
6284 fraction = (UINT64_C(1) << 51) | Bits(fraction, 51, 1);
6285 } else if (result_exp == -1) {
6286 fraction = (UINT64_C(1) << 50) | Bits(fraction, 51, 2);
6287 result_exp = 0;
6288 }
6289 if (IsFloat16<T>()) {
6290 uint16_t exp_bits = static_cast<uint16_t>(Bits(result_exp, 4, 0));
6291 uint16_t frac_bits = static_cast<uint16_t>(Bits(fraction, 51, 42));
6292 return Float16Pack(sign, exp_bits, frac_bits);
6293 } else if (IsFloat32<T>()) {
6294 uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0));
6295 uint32_t frac_bits = static_cast<uint32_t>(Bits(fraction, 51, 29));
6296 return FloatPack(sign, exp_bits, frac_bits);
6297 } else {
6298 VIXL_ASSERT(IsFloat64<T>());
6299 return DoublePack(sign, Bits(result_exp, 10, 0), Bits(fraction, 51, 0));
6300 }
6301 }
6302 }
6303
6304
frecpe(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,FPRounding round)6305 LogicVRegister Simulator::frecpe(VectorFormat vform,
6306 LogicVRegister dst,
6307 const LogicVRegister& src,
6308 FPRounding round) {
6309 dst.ClearForWrite(vform);
6310 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
6311 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6312 SimFloat16 input = src.Float<SimFloat16>(i);
6313 dst.SetFloat(vform, i, FPRecipEstimate<SimFloat16>(input, round));
6314 }
6315 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
6316 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6317 float input = src.Float<float>(i);
6318 dst.SetFloat(vform, i, FPRecipEstimate<float>(input, round));
6319 }
6320 } else {
6321 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
6322 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6323 double input = src.Float<double>(i);
6324 dst.SetFloat(vform, i, FPRecipEstimate<double>(input, round));
6325 }
6326 }
6327 return dst;
6328 }
6329
6330
ursqrte(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)6331 LogicVRegister Simulator::ursqrte(VectorFormat vform,
6332 LogicVRegister dst,
6333 const LogicVRegister& src) {
6334 dst.ClearForWrite(vform);
6335 uint64_t operand;
6336 uint32_t result;
6337 double dp_operand, dp_result;
6338 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6339 operand = src.Uint(vform, i);
6340 if (operand <= 0x3FFFFFFF) {
6341 result = 0xFFFFFFFF;
6342 } else {
6343 dp_operand = operand * std::pow(2.0, -32);
6344 dp_result = recip_sqrt_estimate(dp_operand) * std::pow(2.0, 31);
6345 result = static_cast<uint32_t>(dp_result);
6346 }
6347 dst.SetUint(vform, i, result);
6348 }
6349 return dst;
6350 }
6351
6352
6353 // Based on reference C function recip_estimate from ARM ARM.
recip_estimate(double a)6354 double Simulator::recip_estimate(double a) {
6355 int q, s;
6356 double r;
6357 q = static_cast<int>(a * 512.0);
6358 r = 1.0 / ((static_cast<double>(q) + 0.5) / 512.0);
6359 s = static_cast<int>(256.0 * r + 0.5);
6360 return static_cast<double>(s) / 256.0;
6361 }
6362
6363
urecpe(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)6364 LogicVRegister Simulator::urecpe(VectorFormat vform,
6365 LogicVRegister dst,
6366 const LogicVRegister& src) {
6367 dst.ClearForWrite(vform);
6368 uint64_t operand;
6369 uint32_t result;
6370 double dp_operand, dp_result;
6371 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6372 operand = src.Uint(vform, i);
6373 if (operand <= 0x7FFFFFFF) {
6374 result = 0xFFFFFFFF;
6375 } else {
6376 dp_operand = operand * std::pow(2.0, -32);
6377 dp_result = recip_estimate(dp_operand) * std::pow(2.0, 31);
6378 result = static_cast<uint32_t>(dp_result);
6379 }
6380 dst.SetUint(vform, i, result);
6381 }
6382 return dst;
6383 }
6384
pfalse(LogicPRegister dst)6385 LogicPRegister Simulator::pfalse(LogicPRegister dst) {
6386 dst.Clear();
6387 return dst;
6388 }
6389
pfirst(LogicPRegister dst,const LogicPRegister & pg,const LogicPRegister & src)6390 LogicPRegister Simulator::pfirst(LogicPRegister dst,
6391 const LogicPRegister& pg,
6392 const LogicPRegister& src) {
6393 int first_pg = GetFirstActive(kFormatVnB, pg);
6394 VIXL_ASSERT(first_pg < LaneCountFromFormat(kFormatVnB));
6395 mov(dst, src);
6396 if (first_pg >= 0) dst.SetActive(kFormatVnB, first_pg, true);
6397 return dst;
6398 }
6399
ptrue(VectorFormat vform,LogicPRegister dst,int pattern)6400 LogicPRegister Simulator::ptrue(VectorFormat vform,
6401 LogicPRegister dst,
6402 int pattern) {
6403 int count = GetPredicateConstraintLaneCount(vform, pattern);
6404 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6405 dst.SetActive(vform, i, i < count);
6406 }
6407 return dst;
6408 }
6409
pnext(VectorFormat vform,LogicPRegister dst,const LogicPRegister & pg,const LogicPRegister & src)6410 LogicPRegister Simulator::pnext(VectorFormat vform,
6411 LogicPRegister dst,
6412 const LogicPRegister& pg,
6413 const LogicPRegister& src) {
6414 int next = GetLastActive(vform, src) + 1;
6415 while (next < LaneCountFromFormat(vform)) {
6416 if (pg.IsActive(vform, next)) break;
6417 next++;
6418 }
6419
6420 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6421 dst.SetActive(vform, i, (i == next));
6422 }
6423 return dst;
6424 }
6425
6426 template <typename T>
frecpx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)6427 LogicVRegister Simulator::frecpx(VectorFormat vform,
6428 LogicVRegister dst,
6429 const LogicVRegister& src) {
6430 dst.ClearForWrite(vform);
6431 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6432 T op = src.Float<T>(i);
6433 T result;
6434 if (IsNaN(op)) {
6435 result = FPProcessNaN(op);
6436 } else {
6437 int exp;
6438 uint32_t sign;
6439 if (IsFloat16<T>()) {
6440 sign = Float16Sign(op);
6441 exp = Float16Exp(op);
6442 exp = (exp == 0) ? (0x1F - 1) : static_cast<int>(Bits(~exp, 4, 0));
6443 result = Float16Pack(sign, exp, 0);
6444 } else if (IsFloat32<T>()) {
6445 sign = FloatSign(op);
6446 exp = FloatExp(op);
6447 exp = (exp == 0) ? (0xFF - 1) : static_cast<int>(Bits(~exp, 7, 0));
6448 result = FloatPack(sign, exp, 0);
6449 } else {
6450 VIXL_ASSERT(IsFloat64<T>());
6451 sign = DoubleSign(op);
6452 exp = DoubleExp(op);
6453 exp = (exp == 0) ? (0x7FF - 1) : static_cast<int>(Bits(~exp, 10, 0));
6454 result = DoublePack(sign, exp, 0);
6455 }
6456 }
6457 dst.SetFloat(i, result);
6458 }
6459 return dst;
6460 }
6461
6462
frecpx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)6463 LogicVRegister Simulator::frecpx(VectorFormat vform,
6464 LogicVRegister dst,
6465 const LogicVRegister& src) {
6466 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
6467 frecpx<SimFloat16>(vform, dst, src);
6468 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
6469 frecpx<float>(vform, dst, src);
6470 } else {
6471 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
6472 frecpx<double>(vform, dst, src);
6473 }
6474 return dst;
6475 }
6476
flogb(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)6477 LogicVRegister Simulator::flogb(VectorFormat vform,
6478 LogicVRegister dst,
6479 const LogicVRegister& src) {
6480 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6481 double op = 0.0;
6482 switch (vform) {
6483 case kFormatVnH:
6484 op = FPToDouble(src.Float<SimFloat16>(i), kIgnoreDefaultNaN);
6485 break;
6486 case kFormatVnS:
6487 op = src.Float<float>(i);
6488 break;
6489 case kFormatVnD:
6490 op = src.Float<double>(i);
6491 break;
6492 default:
6493 VIXL_UNREACHABLE();
6494 }
6495
6496 switch (std::fpclassify(op)) {
6497 case FP_INFINITE:
6498 dst.SetInt(vform, i, MaxIntFromFormat(vform));
6499 break;
6500 case FP_NAN:
6501 case FP_ZERO:
6502 dst.SetInt(vform, i, MinIntFromFormat(vform));
6503 break;
6504 case FP_SUBNORMAL: {
6505 // DoubleMantissa returns the mantissa of its input, leaving 12 zero
6506 // bits where the sign and exponent would be. We subtract 12 to
6507 // find the number of leading zero bits in the mantissa itself.
6508 int64_t mant_zero_count = CountLeadingZeros(DoubleMantissa(op)) - 12;
6509 // Log2 of a subnormal is the lowest exponent a normal number can
6510 // represent, together with the zeros in the mantissa.
6511 dst.SetInt(vform, i, -1023 - mant_zero_count);
6512 break;
6513 }
6514 case FP_NORMAL:
6515 // Log2 of a normal number is the exponent minus the bias.
6516 dst.SetInt(vform, i, static_cast<int64_t>(DoubleExp(op)) - 1023);
6517 break;
6518 }
6519 }
6520 return dst;
6521 }
6522
ftsmul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)6523 LogicVRegister Simulator::ftsmul(VectorFormat vform,
6524 LogicVRegister dst,
6525 const LogicVRegister& src1,
6526 const LogicVRegister& src2) {
6527 SimVRegister maybe_neg_src1;
6528
6529 // The bottom bit of src2 controls the sign of the result. Use it to
6530 // conditionally invert the sign of one `fmul` operand.
6531 shl(vform, maybe_neg_src1, src2, LaneSizeInBitsFromFormat(vform) - 1);
6532 eor(vform, maybe_neg_src1, maybe_neg_src1, src1);
6533
6534 // Multiply src1 by the modified neg_src1, which is potentially its negation.
6535 // In the case of NaNs, NaN * -NaN will return the first NaN intact, so src1,
6536 // rather than neg_src1, must be the first source argument.
6537 fmul(vform, dst, src1, maybe_neg_src1);
6538
6539 return dst;
6540 }
6541
ftssel(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)6542 LogicVRegister Simulator::ftssel(VectorFormat vform,
6543 LogicVRegister dst,
6544 const LogicVRegister& src1,
6545 const LogicVRegister& src2) {
6546 unsigned lane_bits = LaneSizeInBitsFromFormat(vform);
6547 uint64_t sign_bit = UINT64_C(1) << (lane_bits - 1);
6548 uint64_t one;
6549
6550 if (lane_bits == kHRegSize) {
6551 one = Float16ToRawbits(Float16(1.0));
6552 } else if (lane_bits == kSRegSize) {
6553 one = FloatToRawbits(1.0);
6554 } else {
6555 VIXL_ASSERT(lane_bits == kDRegSize);
6556 one = DoubleToRawbits(1.0);
6557 }
6558
6559 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6560 // Use integer accessors for this operation, as this is a data manipulation
6561 // task requiring no calculation.
6562 uint64_t op = src1.Uint(vform, i);
6563
6564 // Only the bottom two bits of the src2 register are significant, indicating
6565 // the quadrant. Bit 0 controls whether src1 or 1.0 is written to dst. Bit 1
6566 // determines the sign of the value written to dst.
6567 uint64_t q = src2.Uint(vform, i);
6568 if ((q & 1) == 1) op = one;
6569 if ((q & 2) == 2) op ^= sign_bit;
6570
6571 dst.SetUint(vform, i, op);
6572 }
6573
6574 return dst;
6575 }
6576
6577 template <typename T>
FTMaddHelper(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,uint64_t coeff_pos,uint64_t coeff_neg)6578 LogicVRegister Simulator::FTMaddHelper(VectorFormat vform,
6579 LogicVRegister dst,
6580 const LogicVRegister& src1,
6581 const LogicVRegister& src2,
6582 uint64_t coeff_pos,
6583 uint64_t coeff_neg) {
6584 SimVRegister zero;
6585 dup_immediate(kFormatVnB, zero, 0);
6586
6587 SimVRegister cf;
6588 SimVRegister cfn;
6589 dup_immediate(vform, cf, coeff_pos);
6590 dup_immediate(vform, cfn, coeff_neg);
6591
6592 // The specification requires testing the top bit of the raw value, rather
6593 // than the sign of the floating point number, so use an integer comparison
6594 // here.
6595 SimPRegister is_neg;
6596 SVEIntCompareVectorsHelper(lt,
6597 vform,
6598 is_neg,
6599 GetPTrue(),
6600 src2,
6601 zero,
6602 false,
6603 LeaveFlags);
6604 mov_merging(vform, cf, is_neg, cfn);
6605
6606 SimVRegister temp;
6607 fabs_<T>(vform, temp, src2);
6608 fmla<T>(vform, cf, cf, src1, temp);
6609 mov(vform, dst, cf);
6610 return dst;
6611 }
6612
6613
ftmad(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,unsigned index)6614 LogicVRegister Simulator::ftmad(VectorFormat vform,
6615 LogicVRegister dst,
6616 const LogicVRegister& src1,
6617 const LogicVRegister& src2,
6618 unsigned index) {
6619 static const uint64_t ftmad_coeff16[] = {0x3c00,
6620 0xb155,
6621 0x2030,
6622 0x0000,
6623 0x0000,
6624 0x0000,
6625 0x0000,
6626 0x0000,
6627 0x3c00,
6628 0xb800,
6629 0x293a,
6630 0x0000,
6631 0x0000,
6632 0x0000,
6633 0x0000,
6634 0x0000};
6635
6636 static const uint64_t ftmad_coeff32[] = {0x3f800000,
6637 0xbe2aaaab,
6638 0x3c088886,
6639 0xb95008b9,
6640 0x36369d6d,
6641 0x00000000,
6642 0x00000000,
6643 0x00000000,
6644 0x3f800000,
6645 0xbf000000,
6646 0x3d2aaaa6,
6647 0xbab60705,
6648 0x37cd37cc,
6649 0x00000000,
6650 0x00000000,
6651 0x00000000};
6652
6653 static const uint64_t ftmad_coeff64[] = {0x3ff0000000000000,
6654 0xbfc5555555555543,
6655 0x3f8111111110f30c,
6656 0xbf2a01a019b92fc6,
6657 0x3ec71de351f3d22b,
6658 0xbe5ae5e2b60f7b91,
6659 0x3de5d8408868552f,
6660 0x0000000000000000,
6661 0x3ff0000000000000,
6662 0xbfe0000000000000,
6663 0x3fa5555555555536,
6664 0xbf56c16c16c13a0b,
6665 0x3efa01a019b1e8d8,
6666 0xbe927e4f7282f468,
6667 0x3e21ee96d2641b13,
6668 0xbda8f76380fbb401};
6669 VIXL_ASSERT((index + 8) < ArrayLength(ftmad_coeff64));
6670 VIXL_ASSERT(ArrayLength(ftmad_coeff16) == ArrayLength(ftmad_coeff64));
6671 VIXL_ASSERT(ArrayLength(ftmad_coeff32) == ArrayLength(ftmad_coeff64));
6672
6673 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
6674 FTMaddHelper<SimFloat16>(vform,
6675 dst,
6676 src1,
6677 src2,
6678 ftmad_coeff16[index],
6679 ftmad_coeff16[index + 8]);
6680 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
6681 FTMaddHelper<float>(vform,
6682 dst,
6683 src1,
6684 src2,
6685 ftmad_coeff32[index],
6686 ftmad_coeff32[index + 8]);
6687 } else {
6688 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
6689 FTMaddHelper<double>(vform,
6690 dst,
6691 src1,
6692 src2,
6693 ftmad_coeff64[index],
6694 ftmad_coeff64[index + 8]);
6695 }
6696 return dst;
6697 }
6698
fexpa(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)6699 LogicVRegister Simulator::fexpa(VectorFormat vform,
6700 LogicVRegister dst,
6701 const LogicVRegister& src) {
6702 static const uint64_t fexpa_coeff16[] = {0x0000, 0x0016, 0x002d, 0x0045,
6703 0x005d, 0x0075, 0x008e, 0x00a8,
6704 0x00c2, 0x00dc, 0x00f8, 0x0114,
6705 0x0130, 0x014d, 0x016b, 0x0189,
6706 0x01a8, 0x01c8, 0x01e8, 0x0209,
6707 0x022b, 0x024e, 0x0271, 0x0295,
6708 0x02ba, 0x02e0, 0x0306, 0x032e,
6709 0x0356, 0x037f, 0x03a9, 0x03d4};
6710
6711 static const uint64_t fexpa_coeff32[] =
6712 {0x000000, 0x0164d2, 0x02cd87, 0x043a29, 0x05aac3, 0x071f62, 0x08980f,
6713 0x0a14d5, 0x0b95c2, 0x0d1adf, 0x0ea43a, 0x1031dc, 0x11c3d3, 0x135a2b,
6714 0x14f4f0, 0x16942d, 0x1837f0, 0x19e046, 0x1b8d3a, 0x1d3eda, 0x1ef532,
6715 0x20b051, 0x227043, 0x243516, 0x25fed7, 0x27cd94, 0x29a15b, 0x2b7a3a,
6716 0x2d583f, 0x2f3b79, 0x3123f6, 0x3311c4, 0x3504f3, 0x36fd92, 0x38fbaf,
6717 0x3aff5b, 0x3d08a4, 0x3f179a, 0x412c4d, 0x4346cd, 0x45672a, 0x478d75,
6718 0x49b9be, 0x4bec15, 0x4e248c, 0x506334, 0x52a81e, 0x54f35b, 0x5744fd,
6719 0x599d16, 0x5bfbb8, 0x5e60f5, 0x60ccdf, 0x633f89, 0x65b907, 0x68396a,
6720 0x6ac0c7, 0x6d4f30, 0x6fe4ba, 0x728177, 0x75257d, 0x77d0df, 0x7a83b3,
6721 0x7d3e0c};
6722
6723 static const uint64_t fexpa_coeff64[] =
6724 {0X0000000000000, 0X02c9a3e778061, 0X059b0d3158574, 0X0874518759bc8,
6725 0X0b5586cf9890f, 0X0e3ec32d3d1a2, 0X11301d0125b51, 0X1429aaea92de0,
6726 0X172b83c7d517b, 0X1a35beb6fcb75, 0X1d4873168b9aa, 0X2063b88628cd6,
6727 0X2387a6e756238, 0X26b4565e27cdd, 0X29e9df51fdee1, 0X2d285a6e4030b,
6728 0X306fe0a31b715, 0X33c08b26416ff, 0X371a7373aa9cb, 0X3a7db34e59ff7,
6729 0X3dea64c123422, 0X4160a21f72e2a, 0X44e086061892d, 0X486a2b5c13cd0,
6730 0X4bfdad5362a27, 0X4f9b2769d2ca7, 0X5342b569d4f82, 0X56f4736b527da,
6731 0X5ab07dd485429, 0X5e76f15ad2148, 0X6247eb03a5585, 0X6623882552225,
6732 0X6a09e667f3bcd, 0X6dfb23c651a2f, 0X71f75e8ec5f74, 0X75feb564267c9,
6733 0X7a11473eb0187, 0X7e2f336cf4e62, 0X82589994cce13, 0X868d99b4492ed,
6734 0X8ace5422aa0db, 0X8f1ae99157736, 0X93737b0cdc5e5, 0X97d829fde4e50,
6735 0X9c49182a3f090, 0Xa0c667b5de565, 0Xa5503b23e255d, 0Xa9e6b5579fdbf,
6736 0Xae89f995ad3ad, 0Xb33a2b84f15fb, 0Xb7f76f2fb5e47, 0Xbcc1e904bc1d2,
6737 0Xc199bdd85529c, 0Xc67f12e57d14b, 0Xcb720dcef9069, 0Xd072d4a07897c,
6738 0Xd5818dcfba487, 0Xda9e603db3285, 0Xdfc97337b9b5f, 0Xe502ee78b3ff6,
6739 0Xea4afa2a490da, 0Xefa1bee615a27, 0Xf50765b6e4540, 0Xfa7c1819e90d8};
6740
6741 unsigned lane_size = LaneSizeInBitsFromFormat(vform);
6742 int index_highbit = 5;
6743 int op_highbit, op_shift;
6744 const uint64_t* fexpa_coeff;
6745
6746 if (lane_size == kHRegSize) {
6747 index_highbit = 4;
6748 VIXL_ASSERT(ArrayLength(fexpa_coeff16) == (1U << (index_highbit + 1)));
6749 fexpa_coeff = fexpa_coeff16;
6750 op_highbit = 9;
6751 op_shift = 10;
6752 } else if (lane_size == kSRegSize) {
6753 VIXL_ASSERT(ArrayLength(fexpa_coeff32) == (1U << (index_highbit + 1)));
6754 fexpa_coeff = fexpa_coeff32;
6755 op_highbit = 13;
6756 op_shift = 23;
6757 } else {
6758 VIXL_ASSERT(lane_size == kDRegSize);
6759 VIXL_ASSERT(ArrayLength(fexpa_coeff64) == (1U << (index_highbit + 1)));
6760 fexpa_coeff = fexpa_coeff64;
6761 op_highbit = 16;
6762 op_shift = 52;
6763 }
6764
6765 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6766 uint64_t op = src.Uint(vform, i);
6767 uint64_t result = fexpa_coeff[Bits(op, index_highbit, 0)];
6768 result |= (Bits(op, op_highbit, index_highbit + 1) << op_shift);
6769 dst.SetUint(vform, i, result);
6770 }
6771 return dst;
6772 }
6773
6774 template <typename T>
fscale(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)6775 LogicVRegister Simulator::fscale(VectorFormat vform,
6776 LogicVRegister dst,
6777 const LogicVRegister& src1,
6778 const LogicVRegister& src2) {
6779 T two = T(2.0);
6780 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6781 T src1_val = src1.Float<T>(i);
6782 if (!IsNaN(src1_val)) {
6783 int64_t scale = src2.Int(vform, i);
6784 // TODO: this is a low-performance implementation, but it's simple and
6785 // less likely to be buggy. Consider replacing it with something faster.
6786
6787 // Scales outside of these bounds become infinity or zero, so there's no
6788 // point iterating further.
6789 scale = std::min<int64_t>(std::max<int64_t>(scale, -2048), 2048);
6790
6791 // Compute src1_val * 2 ^ scale. If scale is positive, multiply by two and
6792 // decrement scale until it's zero.
6793 while (scale-- > 0) {
6794 src1_val = FPMul(src1_val, two);
6795 }
6796
6797 // If scale is negative, divide by two and increment scale until it's
6798 // zero. Initially, scale is (src2 - 1), so we pre-increment.
6799 while (++scale < 0) {
6800 src1_val = FPDiv(src1_val, two);
6801 }
6802 }
6803 dst.SetFloat<T>(i, src1_val);
6804 }
6805 return dst;
6806 }
6807
fscale(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)6808 LogicVRegister Simulator::fscale(VectorFormat vform,
6809 LogicVRegister dst,
6810 const LogicVRegister& src1,
6811 const LogicVRegister& src2) {
6812 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
6813 fscale<SimFloat16>(vform, dst, src1, src2);
6814 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
6815 fscale<float>(vform, dst, src1, src2);
6816 } else {
6817 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
6818 fscale<double>(vform, dst, src1, src2);
6819 }
6820 return dst;
6821 }
6822
scvtf(VectorFormat vform,unsigned dst_data_size_in_bits,unsigned src_data_size_in_bits,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src,FPRounding round,int fbits)6823 LogicVRegister Simulator::scvtf(VectorFormat vform,
6824 unsigned dst_data_size_in_bits,
6825 unsigned src_data_size_in_bits,
6826 LogicVRegister dst,
6827 const LogicPRegister& pg,
6828 const LogicVRegister& src,
6829 FPRounding round,
6830 int fbits) {
6831 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits);
6832 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits);
6833 dst.ClearForWrite(vform);
6834
6835 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6836 if (!pg.IsActive(vform, i)) continue;
6837
6838 int64_t value = ExtractSignedBitfield64(src_data_size_in_bits - 1,
6839 0,
6840 src.Uint(vform, i));
6841
6842 switch (dst_data_size_in_bits) {
6843 case kHRegSize: {
6844 SimFloat16 result = FixedToFloat16(value, fbits, round);
6845 dst.SetUint(vform, i, Float16ToRawbits(result));
6846 break;
6847 }
6848 case kSRegSize: {
6849 float result = FixedToFloat(value, fbits, round);
6850 dst.SetUint(vform, i, FloatToRawbits(result));
6851 break;
6852 }
6853 case kDRegSize: {
6854 double result = FixedToDouble(value, fbits, round);
6855 dst.SetUint(vform, i, DoubleToRawbits(result));
6856 break;
6857 }
6858 default:
6859 VIXL_UNIMPLEMENTED();
6860 break;
6861 }
6862 }
6863
6864 return dst;
6865 }
6866
scvtf(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int fbits,FPRounding round)6867 LogicVRegister Simulator::scvtf(VectorFormat vform,
6868 LogicVRegister dst,
6869 const LogicVRegister& src,
6870 int fbits,
6871 FPRounding round) {
6872 return scvtf(vform,
6873 LaneSizeInBitsFromFormat(vform),
6874 LaneSizeInBitsFromFormat(vform),
6875 dst,
6876 GetPTrue(),
6877 src,
6878 round,
6879 fbits);
6880 }
6881
ucvtf(VectorFormat vform,unsigned dst_data_size_in_bits,unsigned src_data_size_in_bits,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src,FPRounding round,int fbits)6882 LogicVRegister Simulator::ucvtf(VectorFormat vform,
6883 unsigned dst_data_size_in_bits,
6884 unsigned src_data_size_in_bits,
6885 LogicVRegister dst,
6886 const LogicPRegister& pg,
6887 const LogicVRegister& src,
6888 FPRounding round,
6889 int fbits) {
6890 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits);
6891 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits);
6892 dst.ClearForWrite(vform);
6893
6894 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6895 if (!pg.IsActive(vform, i)) continue;
6896
6897 uint64_t value = ExtractUnsignedBitfield64(src_data_size_in_bits - 1,
6898 0,
6899 src.Uint(vform, i));
6900
6901 switch (dst_data_size_in_bits) {
6902 case kHRegSize: {
6903 SimFloat16 result = UFixedToFloat16(value, fbits, round);
6904 dst.SetUint(vform, i, Float16ToRawbits(result));
6905 break;
6906 }
6907 case kSRegSize: {
6908 float result = UFixedToFloat(value, fbits, round);
6909 dst.SetUint(vform, i, FloatToRawbits(result));
6910 break;
6911 }
6912 case kDRegSize: {
6913 double result = UFixedToDouble(value, fbits, round);
6914 dst.SetUint(vform, i, DoubleToRawbits(result));
6915 break;
6916 }
6917 default:
6918 VIXL_UNIMPLEMENTED();
6919 break;
6920 }
6921 }
6922
6923 return dst;
6924 }
6925
ucvtf(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int fbits,FPRounding round)6926 LogicVRegister Simulator::ucvtf(VectorFormat vform,
6927 LogicVRegister dst,
6928 const LogicVRegister& src,
6929 int fbits,
6930 FPRounding round) {
6931 return ucvtf(vform,
6932 LaneSizeInBitsFromFormat(vform),
6933 LaneSizeInBitsFromFormat(vform),
6934 dst,
6935 GetPTrue(),
6936 src,
6937 round,
6938 fbits);
6939 }
6940
unpk(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,UnpackType unpack_type,ExtendType extend_type)6941 LogicVRegister Simulator::unpk(VectorFormat vform,
6942 LogicVRegister dst,
6943 const LogicVRegister& src,
6944 UnpackType unpack_type,
6945 ExtendType extend_type) {
6946 VectorFormat vform_half = VectorFormatHalfWidth(vform);
6947 const int lane_count = LaneCountFromFormat(vform);
6948 const int src_start_lane = (unpack_type == kLoHalf) ? 0 : lane_count;
6949
6950 switch (extend_type) {
6951 case kSignedExtend: {
6952 int64_t result[kZRegMaxSizeInBytes];
6953 for (int i = 0; i < lane_count; ++i) {
6954 result[i] = src.Int(vform_half, i + src_start_lane);
6955 }
6956 for (int i = 0; i < lane_count; ++i) {
6957 dst.SetInt(vform, i, result[i]);
6958 }
6959 break;
6960 }
6961 case kUnsignedExtend: {
6962 uint64_t result[kZRegMaxSizeInBytes];
6963 for (int i = 0; i < lane_count; ++i) {
6964 result[i] = src.Uint(vform_half, i + src_start_lane);
6965 }
6966 for (int i = 0; i < lane_count; ++i) {
6967 dst.SetUint(vform, i, result[i]);
6968 }
6969 break;
6970 }
6971 default:
6972 VIXL_UNREACHABLE();
6973 }
6974 return dst;
6975 }
6976
SVEIntCompareVectorsHelper(Condition cond,VectorFormat vform,LogicPRegister dst,const LogicPRegister & mask,const LogicVRegister & src1,const LogicVRegister & src2,bool is_wide_elements,FlagsUpdate flags)6977 LogicPRegister Simulator::SVEIntCompareVectorsHelper(Condition cond,
6978 VectorFormat vform,
6979 LogicPRegister dst,
6980 const LogicPRegister& mask,
6981 const LogicVRegister& src1,
6982 const LogicVRegister& src2,
6983 bool is_wide_elements,
6984 FlagsUpdate flags) {
6985 for (int lane = 0; lane < LaneCountFromFormat(vform); lane++) {
6986 bool result = false;
6987 if (mask.IsActive(vform, lane)) {
6988 int64_t op1 = 0xbadbeef;
6989 int64_t op2 = 0xbadbeef;
6990 int d_lane = (lane * LaneSizeInBitsFromFormat(vform)) / kDRegSize;
6991 switch (cond) {
6992 case eq:
6993 case ge:
6994 case gt:
6995 case lt:
6996 case le:
6997 case ne:
6998 op1 = src1.Int(vform, lane);
6999 op2 = is_wide_elements ? src2.Int(kFormatVnD, d_lane)
7000 : src2.Int(vform, lane);
7001 break;
7002 case hi:
7003 case hs:
7004 case ls:
7005 case lo:
7006 op1 = src1.Uint(vform, lane);
7007 op2 = is_wide_elements ? src2.Uint(kFormatVnD, d_lane)
7008 : src2.Uint(vform, lane);
7009 break;
7010 default:
7011 VIXL_UNREACHABLE();
7012 }
7013
7014 switch (cond) {
7015 case eq:
7016 result = (op1 == op2);
7017 break;
7018 case ne:
7019 result = (op1 != op2);
7020 break;
7021 case ge:
7022 result = (op1 >= op2);
7023 break;
7024 case gt:
7025 result = (op1 > op2);
7026 break;
7027 case le:
7028 result = (op1 <= op2);
7029 break;
7030 case lt:
7031 result = (op1 < op2);
7032 break;
7033 case hs:
7034 result = (static_cast<uint64_t>(op1) >= static_cast<uint64_t>(op2));
7035 break;
7036 case hi:
7037 result = (static_cast<uint64_t>(op1) > static_cast<uint64_t>(op2));
7038 break;
7039 case ls:
7040 result = (static_cast<uint64_t>(op1) <= static_cast<uint64_t>(op2));
7041 break;
7042 case lo:
7043 result = (static_cast<uint64_t>(op1) < static_cast<uint64_t>(op2));
7044 break;
7045 default:
7046 VIXL_UNREACHABLE();
7047 }
7048 }
7049 dst.SetActive(vform, lane, result);
7050 }
7051
7052 if (flags == SetFlags) PredTest(vform, mask, dst);
7053
7054 return dst;
7055 }
7056
SVEBitwiseShiftHelper(Shift shift_op,VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool is_wide_elements)7057 LogicVRegister Simulator::SVEBitwiseShiftHelper(Shift shift_op,
7058 VectorFormat vform,
7059 LogicVRegister dst,
7060 const LogicVRegister& src1,
7061 const LogicVRegister& src2,
7062 bool is_wide_elements) {
7063 unsigned lane_size = LaneSizeInBitsFromFormat(vform);
7064 VectorFormat shift_vform = is_wide_elements ? kFormatVnD : vform;
7065
7066 for (int lane = 0; lane < LaneCountFromFormat(vform); lane++) {
7067 int shift_src_lane = lane;
7068 if (is_wide_elements) {
7069 // If the shift amount comes from wide elements, select the D-sized lane
7070 // which occupies the corresponding lanes of the value to be shifted.
7071 shift_src_lane = (lane * lane_size) / kDRegSize;
7072 }
7073 uint64_t shift_amount = src2.Uint(shift_vform, shift_src_lane);
7074
7075 // Saturate shift_amount to the size of the lane that will be shifted.
7076 if (shift_amount > lane_size) shift_amount = lane_size;
7077
7078 uint64_t value = src1.Uint(vform, lane);
7079 int64_t result = ShiftOperand(lane_size,
7080 value,
7081 shift_op,
7082 static_cast<unsigned>(shift_amount));
7083 dst.SetUint(vform, lane, result);
7084 }
7085
7086 return dst;
7087 }
7088
asrd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,int shift)7089 LogicVRegister Simulator::asrd(VectorFormat vform,
7090 LogicVRegister dst,
7091 const LogicVRegister& src1,
7092 int shift) {
7093 VIXL_ASSERT((shift > 0) && (static_cast<unsigned>(shift) <=
7094 LaneSizeInBitsFromFormat(vform)));
7095
7096 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7097 int64_t value = src1.Int(vform, i);
7098 if (shift <= 63) {
7099 if (value < 0) {
7100 // The max possible mask is 0x7fff'ffff'ffff'ffff, which can be safely
7101 // cast to int64_t, and cannot cause signed overflow in the result.
7102 value = value + GetUintMask(shift);
7103 }
7104 value = ShiftOperand(kDRegSize, value, ASR, shift);
7105 } else {
7106 value = 0;
7107 }
7108 dst.SetInt(vform, i, value);
7109 }
7110 return dst;
7111 }
7112
SVEBitwiseLogicalUnpredicatedHelper(LogicalOp logical_op,VectorFormat vform,LogicVRegister zd,const LogicVRegister & zn,const LogicVRegister & zm)7113 LogicVRegister Simulator::SVEBitwiseLogicalUnpredicatedHelper(
7114 LogicalOp logical_op,
7115 VectorFormat vform,
7116 LogicVRegister zd,
7117 const LogicVRegister& zn,
7118 const LogicVRegister& zm) {
7119 VIXL_ASSERT(IsSVEFormat(vform));
7120 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7121 uint64_t op1 = zn.Uint(vform, i);
7122 uint64_t op2 = zm.Uint(vform, i);
7123 uint64_t result = 0;
7124 switch (logical_op) {
7125 case AND:
7126 result = op1 & op2;
7127 break;
7128 case BIC:
7129 result = op1 & ~op2;
7130 break;
7131 case EOR:
7132 result = op1 ^ op2;
7133 break;
7134 case ORR:
7135 result = op1 | op2;
7136 break;
7137 default:
7138 VIXL_UNIMPLEMENTED();
7139 }
7140 zd.SetUint(vform, i, result);
7141 }
7142
7143 return zd;
7144 }
7145
SVEPredicateLogicalHelper(SVEPredicateLogicalOp op,LogicPRegister pd,const LogicPRegister & pn,const LogicPRegister & pm)7146 LogicPRegister Simulator::SVEPredicateLogicalHelper(SVEPredicateLogicalOp op,
7147 LogicPRegister pd,
7148 const LogicPRegister& pn,
7149 const LogicPRegister& pm) {
7150 for (int i = 0; i < pn.GetChunkCount(); i++) {
7151 LogicPRegister::ChunkType op1 = pn.GetChunk(i);
7152 LogicPRegister::ChunkType op2 = pm.GetChunk(i);
7153 LogicPRegister::ChunkType result = 0;
7154 switch (op) {
7155 case ANDS_p_p_pp_z:
7156 case AND_p_p_pp_z:
7157 result = op1 & op2;
7158 break;
7159 case BICS_p_p_pp_z:
7160 case BIC_p_p_pp_z:
7161 result = op1 & ~op2;
7162 break;
7163 case EORS_p_p_pp_z:
7164 case EOR_p_p_pp_z:
7165 result = op1 ^ op2;
7166 break;
7167 case NANDS_p_p_pp_z:
7168 case NAND_p_p_pp_z:
7169 result = ~(op1 & op2);
7170 break;
7171 case NORS_p_p_pp_z:
7172 case NOR_p_p_pp_z:
7173 result = ~(op1 | op2);
7174 break;
7175 case ORNS_p_p_pp_z:
7176 case ORN_p_p_pp_z:
7177 result = op1 | ~op2;
7178 break;
7179 case ORRS_p_p_pp_z:
7180 case ORR_p_p_pp_z:
7181 result = op1 | op2;
7182 break;
7183 default:
7184 VIXL_UNIMPLEMENTED();
7185 }
7186 pd.SetChunk(i, result);
7187 }
7188 return pd;
7189 }
7190
SVEBitwiseImmHelper(SVEBitwiseLogicalWithImm_UnpredicatedOp op,VectorFormat vform,LogicVRegister zd,uint64_t imm)7191 LogicVRegister Simulator::SVEBitwiseImmHelper(
7192 SVEBitwiseLogicalWithImm_UnpredicatedOp op,
7193 VectorFormat vform,
7194 LogicVRegister zd,
7195 uint64_t imm) {
7196 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7197 uint64_t op1 = zd.Uint(vform, i);
7198 uint64_t result = 0;
7199 switch (op) {
7200 case AND_z_zi:
7201 result = op1 & imm;
7202 break;
7203 case EOR_z_zi:
7204 result = op1 ^ imm;
7205 break;
7206 case ORR_z_zi:
7207 result = op1 | imm;
7208 break;
7209 default:
7210 VIXL_UNIMPLEMENTED();
7211 }
7212 zd.SetUint(vform, i, result);
7213 }
7214
7215 return zd;
7216 }
7217
SVEStructuredStoreHelper(VectorFormat vform,const LogicPRegister & pg,unsigned zt_code,const LogicSVEAddressVector & addr)7218 void Simulator::SVEStructuredStoreHelper(VectorFormat vform,
7219 const LogicPRegister& pg,
7220 unsigned zt_code,
7221 const LogicSVEAddressVector& addr) {
7222 VIXL_ASSERT(zt_code < kNumberOfZRegisters);
7223
7224 int esize_in_bytes_log2 = LaneSizeInBytesLog2FromFormat(vform);
7225 int msize_in_bytes_log2 = addr.GetMsizeInBytesLog2();
7226 int msize_in_bytes = addr.GetMsizeInBytes();
7227 int reg_count = addr.GetRegCount();
7228
7229 VIXL_ASSERT(esize_in_bytes_log2 >= msize_in_bytes_log2);
7230 VIXL_ASSERT((reg_count >= 1) && (reg_count <= 4));
7231
7232 unsigned zt_codes[4] = {zt_code,
7233 (zt_code + 1) % kNumberOfZRegisters,
7234 (zt_code + 2) % kNumberOfZRegisters,
7235 (zt_code + 3) % kNumberOfZRegisters};
7236
7237 LogicVRegister zt[4] = {
7238 ReadVRegister(zt_codes[0]),
7239 ReadVRegister(zt_codes[1]),
7240 ReadVRegister(zt_codes[2]),
7241 ReadVRegister(zt_codes[3]),
7242 };
7243
7244 // For unpacked forms (e.g. `st1b { z0.h }, ...`, the upper parts of the lanes
7245 // are ignored, so read the source register using the VectorFormat that
7246 // corresponds with the storage format, and multiply the index accordingly.
7247 VectorFormat unpack_vform =
7248 SVEFormatFromLaneSizeInBytesLog2(msize_in_bytes_log2);
7249 int unpack_shift = esize_in_bytes_log2 - msize_in_bytes_log2;
7250
7251 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7252 if (!pg.IsActive(vform, i)) continue;
7253
7254 for (int r = 0; r < reg_count; r++) {
7255 uint64_t element_address = addr.GetElementAddress(i, r);
7256 StoreLane(zt[r], unpack_vform, i << unpack_shift, element_address);
7257 }
7258 }
7259
7260 if (ShouldTraceWrites()) {
7261 PrintRegisterFormat format = GetPrintRegisterFormat(vform);
7262 if (esize_in_bytes_log2 == msize_in_bytes_log2) {
7263 // Use an FP format where it's likely that we're accessing FP data.
7264 format = GetPrintRegisterFormatTryFP(format);
7265 }
7266 // Stores don't represent a change to the source register's value, so only
7267 // print the relevant part of the value.
7268 format = GetPrintRegPartial(format);
7269
7270 PrintZStructAccess(zt_code,
7271 reg_count,
7272 pg,
7273 format,
7274 msize_in_bytes,
7275 "->",
7276 addr);
7277 }
7278 }
7279
SVEStructuredLoadHelper(VectorFormat vform,const LogicPRegister & pg,unsigned zt_code,const LogicSVEAddressVector & addr,bool is_signed)7280 void Simulator::SVEStructuredLoadHelper(VectorFormat vform,
7281 const LogicPRegister& pg,
7282 unsigned zt_code,
7283 const LogicSVEAddressVector& addr,
7284 bool is_signed) {
7285 int esize_in_bytes_log2 = LaneSizeInBytesLog2FromFormat(vform);
7286 int msize_in_bytes_log2 = addr.GetMsizeInBytesLog2();
7287 int msize_in_bytes = addr.GetMsizeInBytes();
7288 int reg_count = addr.GetRegCount();
7289
7290 VIXL_ASSERT(zt_code < kNumberOfZRegisters);
7291 VIXL_ASSERT(esize_in_bytes_log2 >= msize_in_bytes_log2);
7292 VIXL_ASSERT((reg_count >= 1) && (reg_count <= 4));
7293
7294 unsigned zt_codes[4] = {zt_code,
7295 (zt_code + 1) % kNumberOfZRegisters,
7296 (zt_code + 2) % kNumberOfZRegisters,
7297 (zt_code + 3) % kNumberOfZRegisters};
7298 LogicVRegister zt[4] = {
7299 ReadVRegister(zt_codes[0]),
7300 ReadVRegister(zt_codes[1]),
7301 ReadVRegister(zt_codes[2]),
7302 ReadVRegister(zt_codes[3]),
7303 };
7304
7305 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7306 for (int r = 0; r < reg_count; r++) {
7307 uint64_t element_address = addr.GetElementAddress(i, r);
7308
7309 if (!pg.IsActive(vform, i)) {
7310 zt[r].SetUint(vform, i, 0);
7311 continue;
7312 }
7313
7314 if (is_signed) {
7315 LoadIntToLane(zt[r], vform, msize_in_bytes, i, element_address);
7316 } else {
7317 LoadUintToLane(zt[r], vform, msize_in_bytes, i, element_address);
7318 }
7319 }
7320 }
7321
7322 if (ShouldTraceVRegs()) {
7323 PrintRegisterFormat format = GetPrintRegisterFormat(vform);
7324 if ((esize_in_bytes_log2 == msize_in_bytes_log2) && !is_signed) {
7325 // Use an FP format where it's likely that we're accessing FP data.
7326 format = GetPrintRegisterFormatTryFP(format);
7327 }
7328 PrintZStructAccess(zt_code,
7329 reg_count,
7330 pg,
7331 format,
7332 msize_in_bytes,
7333 "<-",
7334 addr);
7335 }
7336 }
7337
brka(LogicPRegister pd,const LogicPRegister & pg,const LogicPRegister & pn)7338 LogicPRegister Simulator::brka(LogicPRegister pd,
7339 const LogicPRegister& pg,
7340 const LogicPRegister& pn) {
7341 bool break_ = false;
7342 for (int i = 0; i < LaneCountFromFormat(kFormatVnB); i++) {
7343 if (pg.IsActive(kFormatVnB, i)) {
7344 pd.SetActive(kFormatVnB, i, !break_);
7345 break_ |= pn.IsActive(kFormatVnB, i);
7346 }
7347 }
7348
7349 return pd;
7350 }
7351
brkb(LogicPRegister pd,const LogicPRegister & pg,const LogicPRegister & pn)7352 LogicPRegister Simulator::brkb(LogicPRegister pd,
7353 const LogicPRegister& pg,
7354 const LogicPRegister& pn) {
7355 bool break_ = false;
7356 for (int i = 0; i < LaneCountFromFormat(kFormatVnB); i++) {
7357 if (pg.IsActive(kFormatVnB, i)) {
7358 break_ |= pn.IsActive(kFormatVnB, i);
7359 pd.SetActive(kFormatVnB, i, !break_);
7360 }
7361 }
7362
7363 return pd;
7364 }
7365
brkn(LogicPRegister pdm,const LogicPRegister & pg,const LogicPRegister & pn)7366 LogicPRegister Simulator::brkn(LogicPRegister pdm,
7367 const LogicPRegister& pg,
7368 const LogicPRegister& pn) {
7369 if (!IsLastActive(kFormatVnB, pg, pn)) {
7370 pfalse(pdm);
7371 }
7372 return pdm;
7373 }
7374
brkpa(LogicPRegister pd,const LogicPRegister & pg,const LogicPRegister & pn,const LogicPRegister & pm)7375 LogicPRegister Simulator::brkpa(LogicPRegister pd,
7376 const LogicPRegister& pg,
7377 const LogicPRegister& pn,
7378 const LogicPRegister& pm) {
7379 bool last_active = IsLastActive(kFormatVnB, pg, pn);
7380
7381 for (int i = 0; i < LaneCountFromFormat(kFormatVnB); i++) {
7382 bool active = false;
7383 if (pg.IsActive(kFormatVnB, i)) {
7384 active = last_active;
7385 last_active = last_active && !pm.IsActive(kFormatVnB, i);
7386 }
7387 pd.SetActive(kFormatVnB, i, active);
7388 }
7389
7390 return pd;
7391 }
7392
brkpb(LogicPRegister pd,const LogicPRegister & pg,const LogicPRegister & pn,const LogicPRegister & pm)7393 LogicPRegister Simulator::brkpb(LogicPRegister pd,
7394 const LogicPRegister& pg,
7395 const LogicPRegister& pn,
7396 const LogicPRegister& pm) {
7397 bool last_active = IsLastActive(kFormatVnB, pg, pn);
7398
7399 for (int i = 0; i < LaneCountFromFormat(kFormatVnB); i++) {
7400 bool active = false;
7401 if (pg.IsActive(kFormatVnB, i)) {
7402 last_active = last_active && !pm.IsActive(kFormatVnB, i);
7403 active = last_active;
7404 }
7405 pd.SetActive(kFormatVnB, i, active);
7406 }
7407
7408 return pd;
7409 }
7410
SVEFaultTolerantLoadHelper(VectorFormat vform,const LogicPRegister & pg,unsigned zt_code,const LogicSVEAddressVector & addr,SVEFaultTolerantLoadType type,bool is_signed)7411 void Simulator::SVEFaultTolerantLoadHelper(VectorFormat vform,
7412 const LogicPRegister& pg,
7413 unsigned zt_code,
7414 const LogicSVEAddressVector& addr,
7415 SVEFaultTolerantLoadType type,
7416 bool is_signed) {
7417 int esize_in_bytes = LaneSizeInBytesFromFormat(vform);
7418 int msize_in_bits = addr.GetMsizeInBits();
7419 int msize_in_bytes = addr.GetMsizeInBytes();
7420
7421 VIXL_ASSERT(zt_code < kNumberOfZRegisters);
7422 VIXL_ASSERT(esize_in_bytes >= msize_in_bytes);
7423 VIXL_ASSERT(addr.GetRegCount() == 1);
7424
7425 LogicVRegister zt = ReadVRegister(zt_code);
7426 LogicPRegister ffr = ReadFFR();
7427
7428 // Non-faulting loads are allowed to fail arbitrarily. To stress user
7429 // code, fail a random element in roughly one in eight full-vector loads.
7430 uint32_t rnd = static_cast<uint32_t>(jrand48(rand_state_));
7431 int fake_fault_at_lane = rnd % (LaneCountFromFormat(vform) * 8);
7432
7433 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7434 uint64_t value = 0;
7435
7436 if (pg.IsActive(vform, i)) {
7437 uint64_t element_address = addr.GetElementAddress(i, 0);
7438
7439 if (type == kSVEFirstFaultLoad) {
7440 // First-faulting loads always load the first active element, regardless
7441 // of FFR. The result will be discarded if its FFR lane is inactive, but
7442 // it could still generate a fault.
7443 value = MemReadUint(msize_in_bytes, element_address);
7444 // All subsequent elements have non-fault semantics.
7445 type = kSVENonFaultLoad;
7446
7447 } else if (ffr.IsActive(vform, i)) {
7448 // Simulation of fault-tolerant loads relies on system calls, and is
7449 // likely to be relatively slow, so we only actually perform the load if
7450 // its FFR lane is active.
7451
7452 bool can_read = (i < fake_fault_at_lane) &&
7453 CanReadMemory(element_address, msize_in_bytes);
7454 if (can_read) {
7455 value = MemReadUint(msize_in_bytes, element_address);
7456 } else {
7457 // Propagate the fault to the end of FFR.
7458 for (int j = i; j < LaneCountFromFormat(vform); j++) {
7459 ffr.SetActive(vform, j, false);
7460 }
7461 }
7462 }
7463 }
7464
7465 // The architecture permits a few possible results for inactive FFR lanes
7466 // (including those caused by a fault in this instruction). We choose to
7467 // leave the register value unchanged (like merging predication) because
7468 // no other input to this instruction can have the same behaviour.
7469 //
7470 // Note that this behaviour takes precedence over pg's zeroing predication.
7471
7472 if (ffr.IsActive(vform, i)) {
7473 int msb = msize_in_bits - 1;
7474 if (is_signed) {
7475 zt.SetInt(vform, i, ExtractSignedBitfield64(msb, 0, value));
7476 } else {
7477 zt.SetUint(vform, i, ExtractUnsignedBitfield64(msb, 0, value));
7478 }
7479 }
7480 }
7481
7482 if (ShouldTraceVRegs()) {
7483 PrintRegisterFormat format = GetPrintRegisterFormat(vform);
7484 if ((esize_in_bytes == msize_in_bytes) && !is_signed) {
7485 // Use an FP format where it's likely that we're accessing FP data.
7486 format = GetPrintRegisterFormatTryFP(format);
7487 }
7488 // Log accessed lanes that are active in both pg and ffr. PrintZStructAccess
7489 // expects a single mask, so combine the two predicates.
7490 SimPRegister mask;
7491 SVEPredicateLogicalHelper(AND_p_p_pp_z, mask, pg, ffr);
7492 PrintZStructAccess(zt_code, 1, mask, format, msize_in_bytes, "<-", addr);
7493 }
7494 }
7495
SVEGatherLoadScalarPlusVectorHelper(const Instruction * instr,VectorFormat vform,SVEOffsetModifier mod)7496 void Simulator::SVEGatherLoadScalarPlusVectorHelper(const Instruction* instr,
7497 VectorFormat vform,
7498 SVEOffsetModifier mod) {
7499 bool is_signed = instr->ExtractBit(14) == 0;
7500 bool is_ff = instr->ExtractBit(13) == 1;
7501 // Note that these instructions don't use the Dtype encoding.
7502 int msize_in_bytes_log2 = instr->ExtractBits(24, 23);
7503 int scale = instr->ExtractBit(21) * msize_in_bytes_log2;
7504 uint64_t base = ReadXRegister(instr->GetRn());
7505 LogicSVEAddressVector addr(base,
7506 &ReadVRegister(instr->GetRm()),
7507 vform,
7508 mod,
7509 scale);
7510 addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
7511 if (is_ff) {
7512 SVEFaultTolerantLoadHelper(vform,
7513 ReadPRegister(instr->GetPgLow8()),
7514 instr->GetRt(),
7515 addr,
7516 kSVEFirstFaultLoad,
7517 is_signed);
7518 } else {
7519 SVEStructuredLoadHelper(vform,
7520 ReadPRegister(instr->GetPgLow8()),
7521 instr->GetRt(),
7522 addr,
7523 is_signed);
7524 }
7525 }
7526
GetFirstActive(VectorFormat vform,const LogicPRegister & pg) const7527 int Simulator::GetFirstActive(VectorFormat vform,
7528 const LogicPRegister& pg) const {
7529 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7530 if (pg.IsActive(vform, i)) return i;
7531 }
7532 return -1;
7533 }
7534
GetLastActive(VectorFormat vform,const LogicPRegister & pg) const7535 int Simulator::GetLastActive(VectorFormat vform,
7536 const LogicPRegister& pg) const {
7537 for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
7538 if (pg.IsActive(vform, i)) return i;
7539 }
7540 return -1;
7541 }
7542
CountActiveLanes(VectorFormat vform,const LogicPRegister & pg) const7543 int Simulator::CountActiveLanes(VectorFormat vform,
7544 const LogicPRegister& pg) const {
7545 int count = 0;
7546 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7547 count += pg.IsActive(vform, i) ? 1 : 0;
7548 }
7549 return count;
7550 }
7551
CountActiveAndTrueLanes(VectorFormat vform,const LogicPRegister & pg,const LogicPRegister & pn) const7552 int Simulator::CountActiveAndTrueLanes(VectorFormat vform,
7553 const LogicPRegister& pg,
7554 const LogicPRegister& pn) const {
7555 int count = 0;
7556 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7557 count += (pg.IsActive(vform, i) && pn.IsActive(vform, i)) ? 1 : 0;
7558 }
7559 return count;
7560 }
7561
GetPredicateConstraintLaneCount(VectorFormat vform,int pattern) const7562 int Simulator::GetPredicateConstraintLaneCount(VectorFormat vform,
7563 int pattern) const {
7564 VIXL_ASSERT(IsSVEFormat(vform));
7565 int all = LaneCountFromFormat(vform);
7566 VIXL_ASSERT(all > 0);
7567
7568 switch (pattern) {
7569 case SVE_VL1:
7570 case SVE_VL2:
7571 case SVE_VL3:
7572 case SVE_VL4:
7573 case SVE_VL5:
7574 case SVE_VL6:
7575 case SVE_VL7:
7576 case SVE_VL8:
7577 // VL1-VL8 are encoded directly.
7578 VIXL_STATIC_ASSERT(SVE_VL1 == 1);
7579 VIXL_STATIC_ASSERT(SVE_VL8 == 8);
7580 return (pattern <= all) ? pattern : 0;
7581 case SVE_VL16:
7582 case SVE_VL32:
7583 case SVE_VL64:
7584 case SVE_VL128:
7585 case SVE_VL256: {
7586 // VL16-VL256 are encoded as log2(N) + c.
7587 int min = 16 << (pattern - SVE_VL16);
7588 return (min <= all) ? min : 0;
7589 }
7590 // Special cases.
7591 case SVE_POW2:
7592 return 1 << HighestSetBitPosition(all);
7593 case SVE_MUL4:
7594 return all - (all % 4);
7595 case SVE_MUL3:
7596 return all - (all % 3);
7597 case SVE_ALL:
7598 return all;
7599 }
7600 // Unnamed cases architecturally return 0.
7601 return 0;
7602 }
7603
match(VectorFormat vform,LogicPRegister dst,const LogicVRegister & haystack,const LogicVRegister & needles,bool negate_match)7604 LogicPRegister Simulator::match(VectorFormat vform,
7605 LogicPRegister dst,
7606 const LogicVRegister& haystack,
7607 const LogicVRegister& needles,
7608 bool negate_match) {
7609 SimVRegister ztemp;
7610 SimPRegister ptemp;
7611
7612 pfalse(dst);
7613 int lanes_per_segment = kQRegSize / LaneSizeInBitsFromFormat(vform);
7614 for (int i = 0; i < lanes_per_segment; i++) {
7615 dup_elements_to_segments(vform, ztemp, needles, i);
7616 SVEIntCompareVectorsHelper(eq,
7617 vform,
7618 ptemp,
7619 GetPTrue(),
7620 haystack,
7621 ztemp,
7622 false,
7623 LeaveFlags);
7624 SVEPredicateLogicalHelper(ORR_p_p_pp_z, dst, dst, ptemp);
7625 }
7626 if (negate_match) {
7627 ptrue(vform, ptemp, SVE_ALL);
7628 SVEPredicateLogicalHelper(EOR_p_p_pp_z, dst, dst, ptemp);
7629 }
7630 return dst;
7631 }
7632
GetStructAddress(int lane) const7633 uint64_t LogicSVEAddressVector::GetStructAddress(int lane) const {
7634 if (IsContiguous()) {
7635 return base_ + (lane * GetRegCount()) * GetMsizeInBytes();
7636 }
7637
7638 VIXL_ASSERT(IsScatterGather());
7639 VIXL_ASSERT(vector_ != NULL);
7640
7641 // For scatter-gather accesses, we need to extract the offset from vector_,
7642 // and apply modifiers.
7643
7644 uint64_t offset = 0;
7645 switch (vector_form_) {
7646 case kFormatVnS:
7647 offset = vector_->GetLane<uint32_t>(lane);
7648 break;
7649 case kFormatVnD:
7650 offset = vector_->GetLane<uint64_t>(lane);
7651 break;
7652 default:
7653 VIXL_UNIMPLEMENTED();
7654 break;
7655 }
7656
7657 switch (vector_mod_) {
7658 case SVE_MUL_VL:
7659 VIXL_UNIMPLEMENTED();
7660 break;
7661 case SVE_LSL:
7662 // We apply the shift below. There's nothing to do here.
7663 break;
7664 case NO_SVE_OFFSET_MODIFIER:
7665 VIXL_ASSERT(vector_shift_ == 0);
7666 break;
7667 case SVE_UXTW:
7668 offset = ExtractUnsignedBitfield64(kWRegSize - 1, 0, offset);
7669 break;
7670 case SVE_SXTW:
7671 offset = ExtractSignedBitfield64(kWRegSize - 1, 0, offset);
7672 break;
7673 }
7674
7675 return base_ + (offset << vector_shift_);
7676 }
7677
pack_odd_elements(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)7678 LogicVRegister Simulator::pack_odd_elements(VectorFormat vform,
7679 LogicVRegister dst,
7680 const LogicVRegister& src) {
7681 SimVRegister zero;
7682 zero.Clear();
7683 return uzp2(vform, dst, src, zero);
7684 }
7685
pack_even_elements(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)7686 LogicVRegister Simulator::pack_even_elements(VectorFormat vform,
7687 LogicVRegister dst,
7688 const LogicVRegister& src) {
7689 SimVRegister zero;
7690 zero.Clear();
7691 return uzp1(vform, dst, src, zero);
7692 }
7693
adcl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool top)7694 LogicVRegister Simulator::adcl(VectorFormat vform,
7695 LogicVRegister dst,
7696 const LogicVRegister& src1,
7697 const LogicVRegister& src2,
7698 bool top) {
7699 unsigned reg_size = LaneSizeInBitsFromFormat(vform);
7700 VIXL_ASSERT((reg_size == kSRegSize) || (reg_size == kDRegSize));
7701
7702 for (int i = 0; i < LaneCountFromFormat(vform); i += 2) {
7703 uint64_t left = src1.Uint(vform, i + (top ? 1 : 0));
7704 uint64_t right = dst.Uint(vform, i);
7705 unsigned carry_in = src2.Uint(vform, i + 1) & 1;
7706 std::pair<uint64_t, uint8_t> val_and_flags =
7707 AddWithCarry(reg_size, left, right, carry_in);
7708
7709 // Set even lanes to the result of the addition.
7710 dst.SetUint(vform, i, val_and_flags.first);
7711
7712 // Set odd lanes to the carry flag from the addition.
7713 uint64_t carry_out = (val_and_flags.second >> 1) & 1;
7714 dst.SetUint(vform, i + 1, carry_out);
7715 }
7716 return dst;
7717 }
7718
7719 // Multiply the 2x8 8-bit matrix in src1 by the 8x2 8-bit matrix in src2, add
7720 // the 2x2 32-bit result to the matrix in srcdst, and write back to srcdst.
7721 //
7722 // Matrices of the form:
7723 //
7724 // src1 = ( a b c d e f g h ) src2 = ( A B )
7725 // ( i j k l m n o p ) ( C D )
7726 // ( E F )
7727 // ( G H )
7728 // ( I J )
7729 // ( K L )
7730 // ( M N )
7731 // ( O P )
7732 //
7733 // Are stored in the input vector registers as:
7734 //
7735 // 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
7736 // src1 = [ p | o | n | m | l | k | j | i | h | g | f | e | d | c | b | a ]
7737 // src2 = [ P | N | L | J | H | F | D | B | O | M | K | I | G | E | C | A ]
7738 //
matmul(VectorFormat vform_dst,LogicVRegister srcdst,const LogicVRegister & src1,const LogicVRegister & src2,bool src1_signed,bool src2_signed)7739 LogicVRegister Simulator::matmul(VectorFormat vform_dst,
7740 LogicVRegister srcdst,
7741 const LogicVRegister& src1,
7742 const LogicVRegister& src2,
7743 bool src1_signed,
7744 bool src2_signed) {
7745 // Two destination forms are supported: Q register containing four S-sized
7746 // elements (4S) and Z register containing n S-sized elements (VnS).
7747 VIXL_ASSERT((vform_dst == kFormat4S) || (vform_dst == kFormatVnS));
7748 VectorFormat vform_src = kFormatVnB;
7749 int b_per_segment = kQRegSize / kBRegSize;
7750 int s_per_segment = kQRegSize / kSRegSize;
7751 int64_t result[kZRegMaxSizeInBytes / kSRegSizeInBytes] = {};
7752 int segment_count = LaneCountFromFormat(vform_dst) / 4;
7753 for (int seg = 0; seg < segment_count; seg++) {
7754 for (int i = 0; i < 2; i++) {
7755 for (int j = 0; j < 2; j++) {
7756 int dstidx = (2 * i) + j + (seg * s_per_segment);
7757 int64_t sum = srcdst.Int(vform_dst, dstidx);
7758 for (int k = 0; k < 8; k++) {
7759 int idx1 = (8 * i) + k + (seg * b_per_segment);
7760 int idx2 = (8 * j) + k + (seg * b_per_segment);
7761 int64_t e1 = src1_signed ? src1.Int(vform_src, idx1)
7762 : src1.Uint(vform_src, idx1);
7763 int64_t e2 = src2_signed ? src2.Int(vform_src, idx2)
7764 : src2.Uint(vform_src, idx2);
7765 sum += e1 * e2;
7766 }
7767 result[dstidx] = sum;
7768 }
7769 }
7770 }
7771 srcdst.SetIntArray(vform_dst, result);
7772 return srcdst;
7773 }
7774
7775 // Multiply the 2x2 FP matrix in src1 by the 2x2 FP matrix in src2, add the 2x2
7776 // result to the matrix in srcdst, and write back to srcdst.
7777 //
7778 // Matrices of the form:
7779 //
7780 // src1 = ( a b ) src2 = ( A B )
7781 // ( c d ) ( C D )
7782 //
7783 // Are stored in the input vector registers as:
7784 //
7785 // 3 2 1 0
7786 // src1 = [ d | c | b | a ]
7787 // src2 = [ D | B | C | A ]
7788 //
7789 template <typename T>
fmatmul(VectorFormat vform,LogicVRegister srcdst,const LogicVRegister & src1,const LogicVRegister & src2)7790 LogicVRegister Simulator::fmatmul(VectorFormat vform,
7791 LogicVRegister srcdst,
7792 const LogicVRegister& src1,
7793 const LogicVRegister& src2) {
7794 T result[kZRegMaxSizeInBytes / sizeof(T)];
7795 int T_per_segment = 4;
7796 int segment_count = GetVectorLengthInBytes() / (T_per_segment * sizeof(T));
7797 for (int seg = 0; seg < segment_count; seg++) {
7798 int segoff = seg * T_per_segment;
7799 for (int i = 0; i < 2; i++) {
7800 for (int j = 0; j < 2; j++) {
7801 T prod0 = FPMulNaNs(src1.Float<T>(2 * i + 0 + segoff),
7802 src2.Float<T>(2 * j + 0 + segoff));
7803 T prod1 = FPMulNaNs(src1.Float<T>(2 * i + 1 + segoff),
7804 src2.Float<T>(2 * j + 1 + segoff));
7805 T sum = FPAdd(srcdst.Float<T>(2 * i + j + segoff), prod0);
7806 result[2 * i + j + segoff] = FPAdd(sum, prod1);
7807 }
7808 }
7809 }
7810 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7811 // Elements outside a multiple of 4T are set to zero. This happens only
7812 // for double precision operations, when the VL is a multiple of 128 bits,
7813 // but not a mutiple of 256 bits.
7814 T value = (i < (T_per_segment * segment_count)) ? result[i] : 0;
7815 srcdst.SetFloat<T>(vform, i, value);
7816 }
7817 return srcdst;
7818 }
7819
fmatmul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)7820 LogicVRegister Simulator::fmatmul(VectorFormat vform,
7821 LogicVRegister dst,
7822 const LogicVRegister& src1,
7823 const LogicVRegister& src2) {
7824 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
7825 fmatmul<float>(vform, dst, src1, src2);
7826 } else {
7827 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
7828 fmatmul<double>(vform, dst, src1, src2);
7829 }
7830 return dst;
7831 }
7832
7833 } // namespace aarch64
7834 } // namespace vixl
7835
7836 #endif // VIXL_INCLUDE_SIMULATOR_AARCH64
7837