1 // Copyright 2015, VIXL authors
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are met:
6 //
7 // * Redistributions of source code must retain the above copyright notice,
8 // this list of conditions and the following disclaimer.
9 // * Redistributions in binary form must reproduce the above copyright notice,
10 // this list of conditions and the following disclaimer in the documentation
11 // and/or other materials provided with the distribution.
12 // * Neither the name of ARM Limited nor the names of its contributors may be
13 // used to endorse or promote products derived from this software without
14 // specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
27 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
28
29 #include <cmath>
30
31 #include "simulator-aarch64.h"
32
33 namespace vixl {
34 namespace aarch64 {
35
36 using vixl::internal::SimFloat16;
37
38 template <typename T>
IsFloat64()39 bool IsFloat64() {
40 return false;
41 }
42 template <>
IsFloat64()43 bool IsFloat64<double>() {
44 return true;
45 }
46
47 template <typename T>
IsFloat32()48 bool IsFloat32() {
49 return false;
50 }
51 template <>
IsFloat32()52 bool IsFloat32<float>() {
53 return true;
54 }
55
56 template <typename T>
IsFloat16()57 bool IsFloat16() {
58 return false;
59 }
60 template <>
IsFloat16()61 bool IsFloat16<Float16>() {
62 return true;
63 }
64 template <>
IsFloat16()65 bool IsFloat16<SimFloat16>() {
66 return true;
67 }
68
69 template <>
FPDefaultNaN()70 double Simulator::FPDefaultNaN<double>() {
71 return kFP64DefaultNaN;
72 }
73
74
75 template <>
FPDefaultNaN()76 float Simulator::FPDefaultNaN<float>() {
77 return kFP32DefaultNaN;
78 }
79
80
81 template <>
FPDefaultNaN()82 SimFloat16 Simulator::FPDefaultNaN<SimFloat16>() {
83 return SimFloat16(kFP16DefaultNaN);
84 }
85
86
FixedToDouble(int64_t src,int fbits,FPRounding round)87 double Simulator::FixedToDouble(int64_t src, int fbits, FPRounding round) {
88 if (src >= 0) {
89 return UFixedToDouble(src, fbits, round);
90 } else if (src == INT64_MIN) {
91 return -UFixedToDouble(src, fbits, round);
92 } else {
93 return -UFixedToDouble(-src, fbits, round);
94 }
95 }
96
97
UFixedToDouble(uint64_t src,int fbits,FPRounding round)98 double Simulator::UFixedToDouble(uint64_t src, int fbits, FPRounding round) {
99 // An input of 0 is a special case because the result is effectively
100 // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
101 if (src == 0) {
102 return 0.0;
103 }
104
105 // Calculate the exponent. The highest significant bit will have the value
106 // 2^exponent.
107 const int highest_significant_bit = 63 - CountLeadingZeros(src);
108 const int64_t exponent = highest_significant_bit - fbits;
109
110 return FPRoundToDouble(0, exponent, src, round);
111 }
112
113
FixedToFloat(int64_t src,int fbits,FPRounding round)114 float Simulator::FixedToFloat(int64_t src, int fbits, FPRounding round) {
115 if (src >= 0) {
116 return UFixedToFloat(src, fbits, round);
117 } else if (src == INT64_MIN) {
118 return -UFixedToFloat(src, fbits, round);
119 } else {
120 return -UFixedToFloat(-src, fbits, round);
121 }
122 }
123
124
UFixedToFloat(uint64_t src,int fbits,FPRounding round)125 float Simulator::UFixedToFloat(uint64_t src, int fbits, FPRounding round) {
126 // An input of 0 is a special case because the result is effectively
127 // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
128 if (src == 0) {
129 return 0.0f;
130 }
131
132 // Calculate the exponent. The highest significant bit will have the value
133 // 2^exponent.
134 const int highest_significant_bit = 63 - CountLeadingZeros(src);
135 const int32_t exponent = highest_significant_bit - fbits;
136
137 return FPRoundToFloat(0, exponent, src, round);
138 }
139
140
FixedToFloat16(int64_t src,int fbits,FPRounding round)141 SimFloat16 Simulator::FixedToFloat16(int64_t src, int fbits, FPRounding round) {
142 if (src >= 0) {
143 return UFixedToFloat16(src, fbits, round);
144 } else if (src == INT64_MIN) {
145 return -UFixedToFloat16(src, fbits, round);
146 } else {
147 return -UFixedToFloat16(-src, fbits, round);
148 }
149 }
150
151
UFixedToFloat16(uint64_t src,int fbits,FPRounding round)152 SimFloat16 Simulator::UFixedToFloat16(uint64_t src,
153 int fbits,
154 FPRounding round) {
155 // An input of 0 is a special case because the result is effectively
156 // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
157 if (src == 0) {
158 return 0.0f;
159 }
160
161 // Calculate the exponent. The highest significant bit will have the value
162 // 2^exponent.
163 const int highest_significant_bit = 63 - CountLeadingZeros(src);
164 const int16_t exponent = highest_significant_bit - fbits;
165
166 return FPRoundToFloat16(0, exponent, src, round);
167 }
168
169
ld1(VectorFormat vform,LogicVRegister dst,uint64_t addr)170 void Simulator::ld1(VectorFormat vform, LogicVRegister dst, uint64_t addr) {
171 dst.ClearForWrite(vform);
172 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
173 LoadLane(dst, vform, i, addr);
174 addr += LaneSizeInBytesFromFormat(vform);
175 }
176 }
177
178
ld1(VectorFormat vform,LogicVRegister dst,int index,uint64_t addr)179 void Simulator::ld1(VectorFormat vform,
180 LogicVRegister dst,
181 int index,
182 uint64_t addr) {
183 LoadLane(dst, vform, index, addr);
184 }
185
186
ld1r(VectorFormat vform,VectorFormat unpack_vform,LogicVRegister dst,uint64_t addr,bool is_signed)187 void Simulator::ld1r(VectorFormat vform,
188 VectorFormat unpack_vform,
189 LogicVRegister dst,
190 uint64_t addr,
191 bool is_signed) {
192 unsigned unpack_size = LaneSizeInBytesFromFormat(unpack_vform);
193 dst.ClearForWrite(vform);
194 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
195 if (is_signed) {
196 LoadIntToLane(dst, vform, unpack_size, i, addr);
197 } else {
198 LoadUintToLane(dst, vform, unpack_size, i, addr);
199 }
200 }
201 }
202
203
ld1r(VectorFormat vform,LogicVRegister dst,uint64_t addr)204 void Simulator::ld1r(VectorFormat vform, LogicVRegister dst, uint64_t addr) {
205 ld1r(vform, vform, dst, addr);
206 }
207
208
ld2(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,uint64_t addr1)209 void Simulator::ld2(VectorFormat vform,
210 LogicVRegister dst1,
211 LogicVRegister dst2,
212 uint64_t addr1) {
213 dst1.ClearForWrite(vform);
214 dst2.ClearForWrite(vform);
215 int esize = LaneSizeInBytesFromFormat(vform);
216 uint64_t addr2 = addr1 + esize;
217 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
218 LoadLane(dst1, vform, i, addr1);
219 LoadLane(dst2, vform, i, addr2);
220 addr1 += 2 * esize;
221 addr2 += 2 * esize;
222 }
223 }
224
225
ld2(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,int index,uint64_t addr1)226 void Simulator::ld2(VectorFormat vform,
227 LogicVRegister dst1,
228 LogicVRegister dst2,
229 int index,
230 uint64_t addr1) {
231 dst1.ClearForWrite(vform);
232 dst2.ClearForWrite(vform);
233 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
234 LoadLane(dst1, vform, index, addr1);
235 LoadLane(dst2, vform, index, addr2);
236 }
237
238
ld2r(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,uint64_t addr)239 void Simulator::ld2r(VectorFormat vform,
240 LogicVRegister dst1,
241 LogicVRegister dst2,
242 uint64_t addr) {
243 dst1.ClearForWrite(vform);
244 dst2.ClearForWrite(vform);
245 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
246 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
247 LoadLane(dst1, vform, i, addr);
248 LoadLane(dst2, vform, i, addr2);
249 }
250 }
251
252
ld3(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,uint64_t addr1)253 void Simulator::ld3(VectorFormat vform,
254 LogicVRegister dst1,
255 LogicVRegister dst2,
256 LogicVRegister dst3,
257 uint64_t addr1) {
258 dst1.ClearForWrite(vform);
259 dst2.ClearForWrite(vform);
260 dst3.ClearForWrite(vform);
261 int esize = LaneSizeInBytesFromFormat(vform);
262 uint64_t addr2 = addr1 + esize;
263 uint64_t addr3 = addr2 + esize;
264 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
265 LoadLane(dst1, vform, i, addr1);
266 LoadLane(dst2, vform, i, addr2);
267 LoadLane(dst3, vform, i, addr3);
268 addr1 += 3 * esize;
269 addr2 += 3 * esize;
270 addr3 += 3 * esize;
271 }
272 }
273
274
ld3(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,int index,uint64_t addr1)275 void Simulator::ld3(VectorFormat vform,
276 LogicVRegister dst1,
277 LogicVRegister dst2,
278 LogicVRegister dst3,
279 int index,
280 uint64_t addr1) {
281 dst1.ClearForWrite(vform);
282 dst2.ClearForWrite(vform);
283 dst3.ClearForWrite(vform);
284 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
285 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
286 LoadLane(dst1, vform, index, addr1);
287 LoadLane(dst2, vform, index, addr2);
288 LoadLane(dst3, vform, index, addr3);
289 }
290
291
ld3r(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,uint64_t addr)292 void Simulator::ld3r(VectorFormat vform,
293 LogicVRegister dst1,
294 LogicVRegister dst2,
295 LogicVRegister dst3,
296 uint64_t addr) {
297 dst1.ClearForWrite(vform);
298 dst2.ClearForWrite(vform);
299 dst3.ClearForWrite(vform);
300 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
301 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
302 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
303 LoadLane(dst1, vform, i, addr);
304 LoadLane(dst2, vform, i, addr2);
305 LoadLane(dst3, vform, i, addr3);
306 }
307 }
308
309
ld4(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,LogicVRegister dst4,uint64_t addr1)310 void Simulator::ld4(VectorFormat vform,
311 LogicVRegister dst1,
312 LogicVRegister dst2,
313 LogicVRegister dst3,
314 LogicVRegister dst4,
315 uint64_t addr1) {
316 dst1.ClearForWrite(vform);
317 dst2.ClearForWrite(vform);
318 dst3.ClearForWrite(vform);
319 dst4.ClearForWrite(vform);
320 int esize = LaneSizeInBytesFromFormat(vform);
321 uint64_t addr2 = addr1 + esize;
322 uint64_t addr3 = addr2 + esize;
323 uint64_t addr4 = addr3 + esize;
324 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
325 LoadLane(dst1, vform, i, addr1);
326 LoadLane(dst2, vform, i, addr2);
327 LoadLane(dst3, vform, i, addr3);
328 LoadLane(dst4, vform, i, addr4);
329 addr1 += 4 * esize;
330 addr2 += 4 * esize;
331 addr3 += 4 * esize;
332 addr4 += 4 * esize;
333 }
334 }
335
336
ld4(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,LogicVRegister dst4,int index,uint64_t addr1)337 void Simulator::ld4(VectorFormat vform,
338 LogicVRegister dst1,
339 LogicVRegister dst2,
340 LogicVRegister dst3,
341 LogicVRegister dst4,
342 int index,
343 uint64_t addr1) {
344 dst1.ClearForWrite(vform);
345 dst2.ClearForWrite(vform);
346 dst3.ClearForWrite(vform);
347 dst4.ClearForWrite(vform);
348 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
349 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
350 uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform);
351 LoadLane(dst1, vform, index, addr1);
352 LoadLane(dst2, vform, index, addr2);
353 LoadLane(dst3, vform, index, addr3);
354 LoadLane(dst4, vform, index, addr4);
355 }
356
357
ld4r(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,LogicVRegister dst4,uint64_t addr)358 void Simulator::ld4r(VectorFormat vform,
359 LogicVRegister dst1,
360 LogicVRegister dst2,
361 LogicVRegister dst3,
362 LogicVRegister dst4,
363 uint64_t addr) {
364 dst1.ClearForWrite(vform);
365 dst2.ClearForWrite(vform);
366 dst3.ClearForWrite(vform);
367 dst4.ClearForWrite(vform);
368 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
369 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
370 uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform);
371 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
372 LoadLane(dst1, vform, i, addr);
373 LoadLane(dst2, vform, i, addr2);
374 LoadLane(dst3, vform, i, addr3);
375 LoadLane(dst4, vform, i, addr4);
376 }
377 }
378
379
st1(VectorFormat vform,LogicVRegister src,uint64_t addr)380 void Simulator::st1(VectorFormat vform, LogicVRegister src, uint64_t addr) {
381 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
382 StoreLane(src, vform, i, addr);
383 addr += LaneSizeInBytesFromFormat(vform);
384 }
385 }
386
387
st1(VectorFormat vform,LogicVRegister src,int index,uint64_t addr)388 void Simulator::st1(VectorFormat vform,
389 LogicVRegister src,
390 int index,
391 uint64_t addr) {
392 StoreLane(src, vform, index, addr);
393 }
394
395
st2(VectorFormat vform,LogicVRegister src,LogicVRegister src2,uint64_t addr)396 void Simulator::st2(VectorFormat vform,
397 LogicVRegister src,
398 LogicVRegister src2,
399 uint64_t addr) {
400 int esize = LaneSizeInBytesFromFormat(vform);
401 uint64_t addr2 = addr + esize;
402 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
403 StoreLane(src, vform, i, addr);
404 StoreLane(src2, vform, i, addr2);
405 addr += 2 * esize;
406 addr2 += 2 * esize;
407 }
408 }
409
410
st2(VectorFormat vform,LogicVRegister src,LogicVRegister src2,int index,uint64_t addr)411 void Simulator::st2(VectorFormat vform,
412 LogicVRegister src,
413 LogicVRegister src2,
414 int index,
415 uint64_t addr) {
416 int esize = LaneSizeInBytesFromFormat(vform);
417 StoreLane(src, vform, index, addr);
418 StoreLane(src2, vform, index, addr + 1 * esize);
419 }
420
421
st3(VectorFormat vform,LogicVRegister src,LogicVRegister src2,LogicVRegister src3,uint64_t addr)422 void Simulator::st3(VectorFormat vform,
423 LogicVRegister src,
424 LogicVRegister src2,
425 LogicVRegister src3,
426 uint64_t addr) {
427 int esize = LaneSizeInBytesFromFormat(vform);
428 uint64_t addr2 = addr + esize;
429 uint64_t addr3 = addr2 + esize;
430 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
431 StoreLane(src, vform, i, addr);
432 StoreLane(src2, vform, i, addr2);
433 StoreLane(src3, vform, i, addr3);
434 addr += 3 * esize;
435 addr2 += 3 * esize;
436 addr3 += 3 * esize;
437 }
438 }
439
440
st3(VectorFormat vform,LogicVRegister src,LogicVRegister src2,LogicVRegister src3,int index,uint64_t addr)441 void Simulator::st3(VectorFormat vform,
442 LogicVRegister src,
443 LogicVRegister src2,
444 LogicVRegister src3,
445 int index,
446 uint64_t addr) {
447 int esize = LaneSizeInBytesFromFormat(vform);
448 StoreLane(src, vform, index, addr);
449 StoreLane(src2, vform, index, addr + 1 * esize);
450 StoreLane(src3, vform, index, addr + 2 * esize);
451 }
452
453
st4(VectorFormat vform,LogicVRegister src,LogicVRegister src2,LogicVRegister src3,LogicVRegister src4,uint64_t addr)454 void Simulator::st4(VectorFormat vform,
455 LogicVRegister src,
456 LogicVRegister src2,
457 LogicVRegister src3,
458 LogicVRegister src4,
459 uint64_t addr) {
460 int esize = LaneSizeInBytesFromFormat(vform);
461 uint64_t addr2 = addr + esize;
462 uint64_t addr3 = addr2 + esize;
463 uint64_t addr4 = addr3 + esize;
464 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
465 StoreLane(src, vform, i, addr);
466 StoreLane(src2, vform, i, addr2);
467 StoreLane(src3, vform, i, addr3);
468 StoreLane(src4, vform, i, addr4);
469 addr += 4 * esize;
470 addr2 += 4 * esize;
471 addr3 += 4 * esize;
472 addr4 += 4 * esize;
473 }
474 }
475
476
st4(VectorFormat vform,LogicVRegister src,LogicVRegister src2,LogicVRegister src3,LogicVRegister src4,int index,uint64_t addr)477 void Simulator::st4(VectorFormat vform,
478 LogicVRegister src,
479 LogicVRegister src2,
480 LogicVRegister src3,
481 LogicVRegister src4,
482 int index,
483 uint64_t addr) {
484 int esize = LaneSizeInBytesFromFormat(vform);
485 StoreLane(src, vform, index, addr);
486 StoreLane(src2, vform, index, addr + 1 * esize);
487 StoreLane(src3, vform, index, addr + 2 * esize);
488 StoreLane(src4, vform, index, addr + 3 * esize);
489 }
490
491
cmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,Condition cond)492 LogicVRegister Simulator::cmp(VectorFormat vform,
493 LogicVRegister dst,
494 const LogicVRegister& src1,
495 const LogicVRegister& src2,
496 Condition cond) {
497 dst.ClearForWrite(vform);
498 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
499 int64_t sa = src1.Int(vform, i);
500 int64_t sb = src2.Int(vform, i);
501 uint64_t ua = src1.Uint(vform, i);
502 uint64_t ub = src2.Uint(vform, i);
503 bool result = false;
504 switch (cond) {
505 case eq:
506 result = (ua == ub);
507 break;
508 case ge:
509 result = (sa >= sb);
510 break;
511 case gt:
512 result = (sa > sb);
513 break;
514 case hi:
515 result = (ua > ub);
516 break;
517 case hs:
518 result = (ua >= ub);
519 break;
520 case lt:
521 result = (sa < sb);
522 break;
523 case le:
524 result = (sa <= sb);
525 break;
526 default:
527 VIXL_UNREACHABLE();
528 break;
529 }
530 dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0);
531 }
532 return dst;
533 }
534
535
cmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,int imm,Condition cond)536 LogicVRegister Simulator::cmp(VectorFormat vform,
537 LogicVRegister dst,
538 const LogicVRegister& src1,
539 int imm,
540 Condition cond) {
541 SimVRegister temp;
542 LogicVRegister imm_reg = dup_immediate(vform, temp, imm);
543 return cmp(vform, dst, src1, imm_reg, cond);
544 }
545
546
cmptst(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)547 LogicVRegister Simulator::cmptst(VectorFormat vform,
548 LogicVRegister dst,
549 const LogicVRegister& src1,
550 const LogicVRegister& src2) {
551 dst.ClearForWrite(vform);
552 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
553 uint64_t ua = src1.Uint(vform, i);
554 uint64_t ub = src2.Uint(vform, i);
555 dst.SetUint(vform, i, ((ua & ub) != 0) ? MaxUintFromFormat(vform) : 0);
556 }
557 return dst;
558 }
559
560
add(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)561 LogicVRegister Simulator::add(VectorFormat vform,
562 LogicVRegister dst,
563 const LogicVRegister& src1,
564 const LogicVRegister& src2) {
565 int lane_size = LaneSizeInBitsFromFormat(vform);
566 dst.ClearForWrite(vform);
567
568 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
569 // Test for unsigned saturation.
570 uint64_t ua = src1.UintLeftJustified(vform, i);
571 uint64_t ub = src2.UintLeftJustified(vform, i);
572 uint64_t ur = ua + ub;
573 if (ur < ua) {
574 dst.SetUnsignedSat(i, true);
575 }
576
577 // Test for signed saturation.
578 bool pos_a = (ua >> 63) == 0;
579 bool pos_b = (ub >> 63) == 0;
580 bool pos_r = (ur >> 63) == 0;
581 // If the signs of the operands are the same, but different from the result,
582 // there was an overflow.
583 if ((pos_a == pos_b) && (pos_a != pos_r)) {
584 dst.SetSignedSat(i, pos_a);
585 }
586 dst.SetInt(vform, i, ur >> (64 - lane_size));
587 }
588 return dst;
589 }
590
add_uint(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,uint64_t value)591 LogicVRegister Simulator::add_uint(VectorFormat vform,
592 LogicVRegister dst,
593 const LogicVRegister& src1,
594 uint64_t value) {
595 int lane_size = LaneSizeInBitsFromFormat(vform);
596 VIXL_ASSERT(IsUintN(lane_size, value));
597 dst.ClearForWrite(vform);
598 // Left-justify `value`.
599 uint64_t ub = value << (64 - lane_size);
600 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
601 // Test for unsigned saturation.
602 uint64_t ua = src1.UintLeftJustified(vform, i);
603 uint64_t ur = ua + ub;
604 if (ur < ua) {
605 dst.SetUnsignedSat(i, true);
606 }
607
608 // Test for signed saturation.
609 // `value` is always positive, so we have an overflow if the (signed) result
610 // is smaller than the first operand.
611 if (RawbitsToInt64(ur) < RawbitsToInt64(ua)) {
612 dst.SetSignedSat(i, true);
613 }
614
615 dst.SetInt(vform, i, ur >> (64 - lane_size));
616 }
617 return dst;
618 }
619
addp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)620 LogicVRegister Simulator::addp(VectorFormat vform,
621 LogicVRegister dst,
622 const LogicVRegister& src1,
623 const LogicVRegister& src2) {
624 SimVRegister temp1, temp2;
625 uzp1(vform, temp1, src1, src2);
626 uzp2(vform, temp2, src1, src2);
627 add(vform, dst, temp1, temp2);
628 return dst;
629 }
630
sdiv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)631 LogicVRegister Simulator::sdiv(VectorFormat vform,
632 LogicVRegister dst,
633 const LogicVRegister& src1,
634 const LogicVRegister& src2) {
635 VIXL_ASSERT((vform == kFormatVnS) || (vform == kFormatVnD));
636
637 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
638 int64_t val1 = src1.Int(vform, i);
639 int64_t val2 = src2.Int(vform, i);
640 int64_t min_int = (vform == kFormatVnD) ? kXMinInt : kWMinInt;
641 int64_t quotient = 0;
642 if ((val1 == min_int) && (val2 == -1)) {
643 quotient = min_int;
644 } else if (val2 != 0) {
645 quotient = val1 / val2;
646 }
647 dst.SetInt(vform, i, quotient);
648 }
649
650 return dst;
651 }
652
udiv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)653 LogicVRegister Simulator::udiv(VectorFormat vform,
654 LogicVRegister dst,
655 const LogicVRegister& src1,
656 const LogicVRegister& src2) {
657 VIXL_ASSERT((vform == kFormatVnS) || (vform == kFormatVnD));
658
659 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
660 uint64_t val1 = src1.Uint(vform, i);
661 uint64_t val2 = src2.Uint(vform, i);
662 uint64_t quotient = 0;
663 if (val2 != 0) {
664 quotient = val1 / val2;
665 }
666 dst.SetUint(vform, i, quotient);
667 }
668
669 return dst;
670 }
671
672
mla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & srca,const LogicVRegister & src1,const LogicVRegister & src2)673 LogicVRegister Simulator::mla(VectorFormat vform,
674 LogicVRegister dst,
675 const LogicVRegister& srca,
676 const LogicVRegister& src1,
677 const LogicVRegister& src2) {
678 SimVRegister temp;
679 mul(vform, temp, src1, src2);
680 add(vform, dst, srca, temp);
681 return dst;
682 }
683
684
mls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & srca,const LogicVRegister & src1,const LogicVRegister & src2)685 LogicVRegister Simulator::mls(VectorFormat vform,
686 LogicVRegister dst,
687 const LogicVRegister& srca,
688 const LogicVRegister& src1,
689 const LogicVRegister& src2) {
690 SimVRegister temp;
691 mul(vform, temp, src1, src2);
692 sub(vform, dst, srca, temp);
693 return dst;
694 }
695
696
mul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)697 LogicVRegister Simulator::mul(VectorFormat vform,
698 LogicVRegister dst,
699 const LogicVRegister& src1,
700 const LogicVRegister& src2) {
701 dst.ClearForWrite(vform);
702
703 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
704 dst.SetUint(vform, i, src1.Uint(vform, i) * src2.Uint(vform, i));
705 }
706 return dst;
707 }
708
709
mul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)710 LogicVRegister Simulator::mul(VectorFormat vform,
711 LogicVRegister dst,
712 const LogicVRegister& src1,
713 const LogicVRegister& src2,
714 int index) {
715 SimVRegister temp;
716 VectorFormat indexform = VectorFormatFillQ(vform);
717 return mul(vform, dst, src1, dup_element(indexform, temp, src2, index));
718 }
719
720
smulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)721 LogicVRegister Simulator::smulh(VectorFormat vform,
722 LogicVRegister dst,
723 const LogicVRegister& src1,
724 const LogicVRegister& src2) {
725 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
726 int64_t dst_val;
727 int64_t val1 = src1.Int(vform, i);
728 int64_t val2 = src2.Int(vform, i);
729 switch (LaneSizeInBitsFromFormat(vform)) {
730 case 8:
731 dst_val = internal::MultiplyHigh<8>(val1, val2);
732 break;
733 case 16:
734 dst_val = internal::MultiplyHigh<16>(val1, val2);
735 break;
736 case 32:
737 dst_val = internal::MultiplyHigh<32>(val1, val2);
738 break;
739 case 64:
740 dst_val = internal::MultiplyHigh<64>(val1, val2);
741 break;
742 default:
743 dst_val = 0xbadbeef;
744 VIXL_UNREACHABLE();
745 break;
746 }
747 dst.SetInt(vform, i, dst_val);
748 }
749 return dst;
750 }
751
752
umulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)753 LogicVRegister Simulator::umulh(VectorFormat vform,
754 LogicVRegister dst,
755 const LogicVRegister& src1,
756 const LogicVRegister& src2) {
757 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
758 uint64_t dst_val;
759 uint64_t val1 = src1.Uint(vform, i);
760 uint64_t val2 = src2.Uint(vform, i);
761 switch (LaneSizeInBitsFromFormat(vform)) {
762 case 8:
763 dst_val = internal::MultiplyHigh<8>(val1, val2);
764 break;
765 case 16:
766 dst_val = internal::MultiplyHigh<16>(val1, val2);
767 break;
768 case 32:
769 dst_val = internal::MultiplyHigh<32>(val1, val2);
770 break;
771 case 64:
772 dst_val = internal::MultiplyHigh<64>(val1, val2);
773 break;
774 default:
775 dst_val = 0xbadbeef;
776 VIXL_UNREACHABLE();
777 break;
778 }
779 dst.SetUint(vform, i, dst_val);
780 }
781 return dst;
782 }
783
784
mla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)785 LogicVRegister Simulator::mla(VectorFormat vform,
786 LogicVRegister dst,
787 const LogicVRegister& src1,
788 const LogicVRegister& src2,
789 int index) {
790 SimVRegister temp;
791 VectorFormat indexform = VectorFormatFillQ(vform);
792 return mla(vform, dst, dst, src1, dup_element(indexform, temp, src2, index));
793 }
794
795
mls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)796 LogicVRegister Simulator::mls(VectorFormat vform,
797 LogicVRegister dst,
798 const LogicVRegister& src1,
799 const LogicVRegister& src2,
800 int index) {
801 SimVRegister temp;
802 VectorFormat indexform = VectorFormatFillQ(vform);
803 return mls(vform, dst, dst, src1, dup_element(indexform, temp, src2, index));
804 }
805
806
smull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)807 LogicVRegister Simulator::smull(VectorFormat vform,
808 LogicVRegister dst,
809 const LogicVRegister& src1,
810 const LogicVRegister& src2,
811 int index) {
812 SimVRegister temp;
813 VectorFormat indexform =
814 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
815 return smull(vform, dst, src1, dup_element(indexform, temp, src2, index));
816 }
817
818
smull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)819 LogicVRegister Simulator::smull2(VectorFormat vform,
820 LogicVRegister dst,
821 const LogicVRegister& src1,
822 const LogicVRegister& src2,
823 int index) {
824 SimVRegister temp;
825 VectorFormat indexform =
826 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
827 return smull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
828 }
829
830
umull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)831 LogicVRegister Simulator::umull(VectorFormat vform,
832 LogicVRegister dst,
833 const LogicVRegister& src1,
834 const LogicVRegister& src2,
835 int index) {
836 SimVRegister temp;
837 VectorFormat indexform =
838 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
839 return umull(vform, dst, src1, dup_element(indexform, temp, src2, index));
840 }
841
842
umull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)843 LogicVRegister Simulator::umull2(VectorFormat vform,
844 LogicVRegister dst,
845 const LogicVRegister& src1,
846 const LogicVRegister& src2,
847 int index) {
848 SimVRegister temp;
849 VectorFormat indexform =
850 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
851 return umull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
852 }
853
854
smlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)855 LogicVRegister Simulator::smlal(VectorFormat vform,
856 LogicVRegister dst,
857 const LogicVRegister& src1,
858 const LogicVRegister& src2,
859 int index) {
860 SimVRegister temp;
861 VectorFormat indexform =
862 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
863 return smlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
864 }
865
866
smlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)867 LogicVRegister Simulator::smlal2(VectorFormat vform,
868 LogicVRegister dst,
869 const LogicVRegister& src1,
870 const LogicVRegister& src2,
871 int index) {
872 SimVRegister temp;
873 VectorFormat indexform =
874 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
875 return smlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
876 }
877
878
umlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)879 LogicVRegister Simulator::umlal(VectorFormat vform,
880 LogicVRegister dst,
881 const LogicVRegister& src1,
882 const LogicVRegister& src2,
883 int index) {
884 SimVRegister temp;
885 VectorFormat indexform =
886 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
887 return umlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
888 }
889
890
umlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)891 LogicVRegister Simulator::umlal2(VectorFormat vform,
892 LogicVRegister dst,
893 const LogicVRegister& src1,
894 const LogicVRegister& src2,
895 int index) {
896 SimVRegister temp;
897 VectorFormat indexform =
898 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
899 return umlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
900 }
901
902
smlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)903 LogicVRegister Simulator::smlsl(VectorFormat vform,
904 LogicVRegister dst,
905 const LogicVRegister& src1,
906 const LogicVRegister& src2,
907 int index) {
908 SimVRegister temp;
909 VectorFormat indexform =
910 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
911 return smlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
912 }
913
914
smlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)915 LogicVRegister Simulator::smlsl2(VectorFormat vform,
916 LogicVRegister dst,
917 const LogicVRegister& src1,
918 const LogicVRegister& src2,
919 int index) {
920 SimVRegister temp;
921 VectorFormat indexform =
922 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
923 return smlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
924 }
925
926
umlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)927 LogicVRegister Simulator::umlsl(VectorFormat vform,
928 LogicVRegister dst,
929 const LogicVRegister& src1,
930 const LogicVRegister& src2,
931 int index) {
932 SimVRegister temp;
933 VectorFormat indexform =
934 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
935 return umlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
936 }
937
938
umlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)939 LogicVRegister Simulator::umlsl2(VectorFormat vform,
940 LogicVRegister dst,
941 const LogicVRegister& src1,
942 const LogicVRegister& src2,
943 int index) {
944 SimVRegister temp;
945 VectorFormat indexform =
946 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
947 return umlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
948 }
949
950
sqdmull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)951 LogicVRegister Simulator::sqdmull(VectorFormat vform,
952 LogicVRegister dst,
953 const LogicVRegister& src1,
954 const LogicVRegister& src2,
955 int index) {
956 SimVRegister temp;
957 VectorFormat indexform =
958 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
959 return sqdmull(vform, dst, src1, dup_element(indexform, temp, src2, index));
960 }
961
962
sqdmull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)963 LogicVRegister Simulator::sqdmull2(VectorFormat vform,
964 LogicVRegister dst,
965 const LogicVRegister& src1,
966 const LogicVRegister& src2,
967 int index) {
968 SimVRegister temp;
969 VectorFormat indexform =
970 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
971 return sqdmull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
972 }
973
974
sqdmlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)975 LogicVRegister Simulator::sqdmlal(VectorFormat vform,
976 LogicVRegister dst,
977 const LogicVRegister& src1,
978 const LogicVRegister& src2,
979 int index) {
980 SimVRegister temp;
981 VectorFormat indexform =
982 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
983 return sqdmlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
984 }
985
986
sqdmlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)987 LogicVRegister Simulator::sqdmlal2(VectorFormat vform,
988 LogicVRegister dst,
989 const LogicVRegister& src1,
990 const LogicVRegister& src2,
991 int index) {
992 SimVRegister temp;
993 VectorFormat indexform =
994 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
995 return sqdmlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
996 }
997
998
sqdmlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)999 LogicVRegister Simulator::sqdmlsl(VectorFormat vform,
1000 LogicVRegister dst,
1001 const LogicVRegister& src1,
1002 const LogicVRegister& src2,
1003 int index) {
1004 SimVRegister temp;
1005 VectorFormat indexform =
1006 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
1007 return sqdmlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
1008 }
1009
1010
sqdmlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)1011 LogicVRegister Simulator::sqdmlsl2(VectorFormat vform,
1012 LogicVRegister dst,
1013 const LogicVRegister& src1,
1014 const LogicVRegister& src2,
1015 int index) {
1016 SimVRegister temp;
1017 VectorFormat indexform =
1018 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
1019 return sqdmlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
1020 }
1021
1022
sqdmulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)1023 LogicVRegister Simulator::sqdmulh(VectorFormat vform,
1024 LogicVRegister dst,
1025 const LogicVRegister& src1,
1026 const LogicVRegister& src2,
1027 int index) {
1028 SimVRegister temp;
1029 VectorFormat indexform = VectorFormatFillQ(vform);
1030 return sqdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index));
1031 }
1032
1033
sqrdmulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)1034 LogicVRegister Simulator::sqrdmulh(VectorFormat vform,
1035 LogicVRegister dst,
1036 const LogicVRegister& src1,
1037 const LogicVRegister& src2,
1038 int index) {
1039 SimVRegister temp;
1040 VectorFormat indexform = VectorFormatFillQ(vform);
1041 return sqrdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index));
1042 }
1043
1044
sdot(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)1045 LogicVRegister Simulator::sdot(VectorFormat vform,
1046 LogicVRegister dst,
1047 const LogicVRegister& src1,
1048 const LogicVRegister& src2,
1049 int index) {
1050 SimVRegister temp;
1051 // NEON indexed `dot` allows the index value exceed the register size.
1052 // Promote the format to Q-sized vector format before the duplication.
1053 dup_elements_to_segments(IsSVEFormat(vform) ? vform
1054 : VectorFormatFillQ(vform),
1055 temp,
1056 src2,
1057 index);
1058 return sdot(vform, dst, src1, temp);
1059 }
1060
1061
sqrdmlah(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)1062 LogicVRegister Simulator::sqrdmlah(VectorFormat vform,
1063 LogicVRegister dst,
1064 const LogicVRegister& src1,
1065 const LogicVRegister& src2,
1066 int index) {
1067 SimVRegister temp;
1068 VectorFormat indexform = VectorFormatFillQ(vform);
1069 return sqrdmlah(vform, dst, src1, dup_element(indexform, temp, src2, index));
1070 }
1071
1072
udot(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)1073 LogicVRegister Simulator::udot(VectorFormat vform,
1074 LogicVRegister dst,
1075 const LogicVRegister& src1,
1076 const LogicVRegister& src2,
1077 int index) {
1078 SimVRegister temp;
1079 // NEON indexed `dot` allows the index value exceed the register size.
1080 // Promote the format to Q-sized vector format before the duplication.
1081 dup_elements_to_segments(IsSVEFormat(vform) ? vform
1082 : VectorFormatFillQ(vform),
1083 temp,
1084 src2,
1085 index);
1086 return udot(vform, dst, src1, temp);
1087 }
1088
1089
sqrdmlsh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)1090 LogicVRegister Simulator::sqrdmlsh(VectorFormat vform,
1091 LogicVRegister dst,
1092 const LogicVRegister& src1,
1093 const LogicVRegister& src2,
1094 int index) {
1095 SimVRegister temp;
1096 VectorFormat indexform = VectorFormatFillQ(vform);
1097 return sqrdmlsh(vform, dst, src1, dup_element(indexform, temp, src2, index));
1098 }
1099
1100
PolynomialMult(uint8_t op1,uint8_t op2) const1101 uint16_t Simulator::PolynomialMult(uint8_t op1, uint8_t op2) const {
1102 uint16_t result = 0;
1103 uint16_t extended_op2 = op2;
1104 for (int i = 0; i < 8; ++i) {
1105 if ((op1 >> i) & 1) {
1106 result = result ^ (extended_op2 << i);
1107 }
1108 }
1109 return result;
1110 }
1111
1112
pmul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1113 LogicVRegister Simulator::pmul(VectorFormat vform,
1114 LogicVRegister dst,
1115 const LogicVRegister& src1,
1116 const LogicVRegister& src2) {
1117 dst.ClearForWrite(vform);
1118 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1119 dst.SetUint(vform,
1120 i,
1121 PolynomialMult(src1.Uint(vform, i), src2.Uint(vform, i)));
1122 }
1123 return dst;
1124 }
1125
1126
pmull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1127 LogicVRegister Simulator::pmull(VectorFormat vform,
1128 LogicVRegister dst,
1129 const LogicVRegister& src1,
1130 const LogicVRegister& src2) {
1131 VectorFormat vform_src = VectorFormatHalfWidth(vform);
1132 dst.ClearForWrite(vform);
1133 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1134 dst.SetUint(vform,
1135 i,
1136 PolynomialMult(src1.Uint(vform_src, i),
1137 src2.Uint(vform_src, i)));
1138 }
1139 return dst;
1140 }
1141
1142
pmull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1143 LogicVRegister Simulator::pmull2(VectorFormat vform,
1144 LogicVRegister dst,
1145 const LogicVRegister& src1,
1146 const LogicVRegister& src2) {
1147 VectorFormat vform_src = VectorFormatHalfWidthDoubleLanes(vform);
1148 dst.ClearForWrite(vform);
1149 int lane_count = LaneCountFromFormat(vform);
1150 for (int i = 0; i < lane_count; i++) {
1151 dst.SetUint(vform,
1152 i,
1153 PolynomialMult(src1.Uint(vform_src, lane_count + i),
1154 src2.Uint(vform_src, lane_count + i)));
1155 }
1156 return dst;
1157 }
1158
1159
sub(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1160 LogicVRegister Simulator::sub(VectorFormat vform,
1161 LogicVRegister dst,
1162 const LogicVRegister& src1,
1163 const LogicVRegister& src2) {
1164 int lane_size = LaneSizeInBitsFromFormat(vform);
1165 dst.ClearForWrite(vform);
1166 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1167 // Test for unsigned saturation.
1168 uint64_t ua = src1.UintLeftJustified(vform, i);
1169 uint64_t ub = src2.UintLeftJustified(vform, i);
1170 uint64_t ur = ua - ub;
1171 if (ub > ua) {
1172 dst.SetUnsignedSat(i, false);
1173 }
1174
1175 // Test for signed saturation.
1176 bool pos_a = (ua >> 63) == 0;
1177 bool pos_b = (ub >> 63) == 0;
1178 bool pos_r = (ur >> 63) == 0;
1179 // If the signs of the operands are different, and the sign of the first
1180 // operand doesn't match the result, there was an overflow.
1181 if ((pos_a != pos_b) && (pos_a != pos_r)) {
1182 dst.SetSignedSat(i, pos_a);
1183 }
1184
1185 dst.SetInt(vform, i, ur >> (64 - lane_size));
1186 }
1187 return dst;
1188 }
1189
sub_uint(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,uint64_t value)1190 LogicVRegister Simulator::sub_uint(VectorFormat vform,
1191 LogicVRegister dst,
1192 const LogicVRegister& src1,
1193 uint64_t value) {
1194 int lane_size = LaneSizeInBitsFromFormat(vform);
1195 VIXL_ASSERT(IsUintN(lane_size, value));
1196 dst.ClearForWrite(vform);
1197 // Left-justify `value`.
1198 uint64_t ub = value << (64 - lane_size);
1199 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1200 // Test for unsigned saturation.
1201 uint64_t ua = src1.UintLeftJustified(vform, i);
1202 uint64_t ur = ua - ub;
1203 if (ub > ua) {
1204 dst.SetUnsignedSat(i, false);
1205 }
1206
1207 // Test for signed saturation.
1208 // `value` is always positive, so we have an overflow if the (signed) result
1209 // is greater than the first operand.
1210 if (RawbitsToInt64(ur) > RawbitsToInt64(ua)) {
1211 dst.SetSignedSat(i, false);
1212 }
1213
1214 dst.SetInt(vform, i, ur >> (64 - lane_size));
1215 }
1216 return dst;
1217 }
1218
and_(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1219 LogicVRegister Simulator::and_(VectorFormat vform,
1220 LogicVRegister dst,
1221 const LogicVRegister& src1,
1222 const LogicVRegister& src2) {
1223 dst.ClearForWrite(vform);
1224 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1225 dst.SetUint(vform, i, src1.Uint(vform, i) & src2.Uint(vform, i));
1226 }
1227 return dst;
1228 }
1229
1230
orr(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1231 LogicVRegister Simulator::orr(VectorFormat vform,
1232 LogicVRegister dst,
1233 const LogicVRegister& src1,
1234 const LogicVRegister& src2) {
1235 dst.ClearForWrite(vform);
1236 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1237 dst.SetUint(vform, i, src1.Uint(vform, i) | src2.Uint(vform, i));
1238 }
1239 return dst;
1240 }
1241
1242
orn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1243 LogicVRegister Simulator::orn(VectorFormat vform,
1244 LogicVRegister dst,
1245 const LogicVRegister& src1,
1246 const LogicVRegister& src2) {
1247 dst.ClearForWrite(vform);
1248 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1249 dst.SetUint(vform, i, src1.Uint(vform, i) | ~src2.Uint(vform, i));
1250 }
1251 return dst;
1252 }
1253
1254
eor(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1255 LogicVRegister Simulator::eor(VectorFormat vform,
1256 LogicVRegister dst,
1257 const LogicVRegister& src1,
1258 const LogicVRegister& src2) {
1259 dst.ClearForWrite(vform);
1260 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1261 dst.SetUint(vform, i, src1.Uint(vform, i) ^ src2.Uint(vform, i));
1262 }
1263 return dst;
1264 }
1265
1266
bic(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1267 LogicVRegister Simulator::bic(VectorFormat vform,
1268 LogicVRegister dst,
1269 const LogicVRegister& src1,
1270 const LogicVRegister& src2) {
1271 dst.ClearForWrite(vform);
1272 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1273 dst.SetUint(vform, i, src1.Uint(vform, i) & ~src2.Uint(vform, i));
1274 }
1275 return dst;
1276 }
1277
1278
bic(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,uint64_t imm)1279 LogicVRegister Simulator::bic(VectorFormat vform,
1280 LogicVRegister dst,
1281 const LogicVRegister& src,
1282 uint64_t imm) {
1283 uint64_t result[16];
1284 int lane_count = LaneCountFromFormat(vform);
1285 for (int i = 0; i < lane_count; ++i) {
1286 result[i] = src.Uint(vform, i) & ~imm;
1287 }
1288 dst.ClearForWrite(vform);
1289 for (int i = 0; i < lane_count; ++i) {
1290 dst.SetUint(vform, i, result[i]);
1291 }
1292 return dst;
1293 }
1294
1295
bif(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1296 LogicVRegister Simulator::bif(VectorFormat vform,
1297 LogicVRegister dst,
1298 const LogicVRegister& src1,
1299 const LogicVRegister& src2) {
1300 dst.ClearForWrite(vform);
1301 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1302 uint64_t operand1 = dst.Uint(vform, i);
1303 uint64_t operand2 = ~src2.Uint(vform, i);
1304 uint64_t operand3 = src1.Uint(vform, i);
1305 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1306 dst.SetUint(vform, i, result);
1307 }
1308 return dst;
1309 }
1310
1311
bit(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1312 LogicVRegister Simulator::bit(VectorFormat vform,
1313 LogicVRegister dst,
1314 const LogicVRegister& src1,
1315 const LogicVRegister& src2) {
1316 dst.ClearForWrite(vform);
1317 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1318 uint64_t operand1 = dst.Uint(vform, i);
1319 uint64_t operand2 = src2.Uint(vform, i);
1320 uint64_t operand3 = src1.Uint(vform, i);
1321 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1322 dst.SetUint(vform, i, result);
1323 }
1324 return dst;
1325 }
1326
1327
bsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1328 LogicVRegister Simulator::bsl(VectorFormat vform,
1329 LogicVRegister dst,
1330 const LogicVRegister& src1,
1331 const LogicVRegister& src2) {
1332 dst.ClearForWrite(vform);
1333 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1334 uint64_t operand1 = src2.Uint(vform, i);
1335 uint64_t operand2 = dst.Uint(vform, i);
1336 uint64_t operand3 = src1.Uint(vform, i);
1337 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1338 dst.SetUint(vform, i, result);
1339 }
1340 return dst;
1341 }
1342
1343
sminmax(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool max)1344 LogicVRegister Simulator::sminmax(VectorFormat vform,
1345 LogicVRegister dst,
1346 const LogicVRegister& src1,
1347 const LogicVRegister& src2,
1348 bool max) {
1349 dst.ClearForWrite(vform);
1350 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1351 int64_t src1_val = src1.Int(vform, i);
1352 int64_t src2_val = src2.Int(vform, i);
1353 int64_t dst_val;
1354 if (max) {
1355 dst_val = (src1_val > src2_val) ? src1_val : src2_val;
1356 } else {
1357 dst_val = (src1_val < src2_val) ? src1_val : src2_val;
1358 }
1359 dst.SetInt(vform, i, dst_val);
1360 }
1361 return dst;
1362 }
1363
1364
smax(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1365 LogicVRegister Simulator::smax(VectorFormat vform,
1366 LogicVRegister dst,
1367 const LogicVRegister& src1,
1368 const LogicVRegister& src2) {
1369 return sminmax(vform, dst, src1, src2, true);
1370 }
1371
1372
smin(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1373 LogicVRegister Simulator::smin(VectorFormat vform,
1374 LogicVRegister dst,
1375 const LogicVRegister& src1,
1376 const LogicVRegister& src2) {
1377 return sminmax(vform, dst, src1, src2, false);
1378 }
1379
1380
sminmaxp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool max)1381 LogicVRegister Simulator::sminmaxp(VectorFormat vform,
1382 LogicVRegister dst,
1383 const LogicVRegister& src1,
1384 const LogicVRegister& src2,
1385 bool max) {
1386 int lanes = LaneCountFromFormat(vform);
1387 int64_t result[kMaxLanesPerVector];
1388 const LogicVRegister* src = &src1;
1389 for (int j = 0; j < 2; j++) {
1390 for (int i = 0; i < lanes; i += 2) {
1391 int64_t first_val = src->Int(vform, i);
1392 int64_t second_val = src->Int(vform, i + 1);
1393 int64_t dst_val;
1394 if (max) {
1395 dst_val = (first_val > second_val) ? first_val : second_val;
1396 } else {
1397 dst_val = (first_val < second_val) ? first_val : second_val;
1398 }
1399 VIXL_ASSERT(((i >> 1) + (j * lanes / 2)) < kMaxLanesPerVector);
1400 result[(i >> 1) + (j * lanes / 2)] = dst_val;
1401 }
1402 src = &src2;
1403 }
1404 dst.SetIntArray(vform, result);
1405 return dst;
1406 }
1407
1408
smaxp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1409 LogicVRegister Simulator::smaxp(VectorFormat vform,
1410 LogicVRegister dst,
1411 const LogicVRegister& src1,
1412 const LogicVRegister& src2) {
1413 return sminmaxp(vform, dst, src1, src2, true);
1414 }
1415
1416
sminp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1417 LogicVRegister Simulator::sminp(VectorFormat vform,
1418 LogicVRegister dst,
1419 const LogicVRegister& src1,
1420 const LogicVRegister& src2) {
1421 return sminmaxp(vform, dst, src1, src2, false);
1422 }
1423
1424
addp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1425 LogicVRegister Simulator::addp(VectorFormat vform,
1426 LogicVRegister dst,
1427 const LogicVRegister& src) {
1428 VIXL_ASSERT(vform == kFormatD);
1429
1430 uint64_t dst_val = src.Uint(kFormat2D, 0) + src.Uint(kFormat2D, 1);
1431 dst.ClearForWrite(vform);
1432 dst.SetUint(vform, 0, dst_val);
1433 return dst;
1434 }
1435
1436
addv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1437 LogicVRegister Simulator::addv(VectorFormat vform,
1438 LogicVRegister dst,
1439 const LogicVRegister& src) {
1440 VectorFormat vform_dst =
1441 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
1442
1443
1444 int64_t dst_val = 0;
1445 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1446 dst_val += src.Int(vform, i);
1447 }
1448
1449 dst.ClearForWrite(vform_dst);
1450 dst.SetInt(vform_dst, 0, dst_val);
1451 return dst;
1452 }
1453
1454
saddlv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1455 LogicVRegister Simulator::saddlv(VectorFormat vform,
1456 LogicVRegister dst,
1457 const LogicVRegister& src) {
1458 VectorFormat vform_dst =
1459 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2);
1460
1461 int64_t dst_val = 0;
1462 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1463 dst_val += src.Int(vform, i);
1464 }
1465
1466 dst.ClearForWrite(vform_dst);
1467 dst.SetInt(vform_dst, 0, dst_val);
1468 return dst;
1469 }
1470
1471
uaddlv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1472 LogicVRegister Simulator::uaddlv(VectorFormat vform,
1473 LogicVRegister dst,
1474 const LogicVRegister& src) {
1475 VectorFormat vform_dst =
1476 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2);
1477
1478 uint64_t dst_val = 0;
1479 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1480 dst_val += src.Uint(vform, i);
1481 }
1482
1483 dst.ClearForWrite(vform_dst);
1484 dst.SetUint(vform_dst, 0, dst_val);
1485 return dst;
1486 }
1487
1488
sminmaxv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src,bool max)1489 LogicVRegister Simulator::sminmaxv(VectorFormat vform,
1490 LogicVRegister dst,
1491 const LogicPRegister& pg,
1492 const LogicVRegister& src,
1493 bool max) {
1494 int64_t dst_val = max ? INT64_MIN : INT64_MAX;
1495 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1496 if (!pg.IsActive(vform, i)) continue;
1497
1498 int64_t src_val = src.Int(vform, i);
1499 if (max) {
1500 dst_val = (src_val > dst_val) ? src_val : dst_val;
1501 } else {
1502 dst_val = (src_val < dst_val) ? src_val : dst_val;
1503 }
1504 }
1505 dst.ClearForWrite(ScalarFormatFromFormat(vform));
1506 dst.SetInt(vform, 0, dst_val);
1507 return dst;
1508 }
1509
1510
smaxv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1511 LogicVRegister Simulator::smaxv(VectorFormat vform,
1512 LogicVRegister dst,
1513 const LogicVRegister& src) {
1514 sminmaxv(vform, dst, GetPTrue(), src, true);
1515 return dst;
1516 }
1517
1518
sminv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1519 LogicVRegister Simulator::sminv(VectorFormat vform,
1520 LogicVRegister dst,
1521 const LogicVRegister& src) {
1522 sminmaxv(vform, dst, GetPTrue(), src, false);
1523 return dst;
1524 }
1525
1526
smaxv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)1527 LogicVRegister Simulator::smaxv(VectorFormat vform,
1528 LogicVRegister dst,
1529 const LogicPRegister& pg,
1530 const LogicVRegister& src) {
1531 VIXL_ASSERT(IsSVEFormat(vform));
1532 sminmaxv(vform, dst, pg, src, true);
1533 return dst;
1534 }
1535
1536
sminv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)1537 LogicVRegister Simulator::sminv(VectorFormat vform,
1538 LogicVRegister dst,
1539 const LogicPRegister& pg,
1540 const LogicVRegister& src) {
1541 VIXL_ASSERT(IsSVEFormat(vform));
1542 sminmaxv(vform, dst, pg, src, false);
1543 return dst;
1544 }
1545
1546
uminmax(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool max)1547 LogicVRegister Simulator::uminmax(VectorFormat vform,
1548 LogicVRegister dst,
1549 const LogicVRegister& src1,
1550 const LogicVRegister& src2,
1551 bool max) {
1552 dst.ClearForWrite(vform);
1553 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1554 uint64_t src1_val = src1.Uint(vform, i);
1555 uint64_t src2_val = src2.Uint(vform, i);
1556 uint64_t dst_val;
1557 if (max) {
1558 dst_val = (src1_val > src2_val) ? src1_val : src2_val;
1559 } else {
1560 dst_val = (src1_val < src2_val) ? src1_val : src2_val;
1561 }
1562 dst.SetUint(vform, i, dst_val);
1563 }
1564 return dst;
1565 }
1566
1567
umax(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1568 LogicVRegister Simulator::umax(VectorFormat vform,
1569 LogicVRegister dst,
1570 const LogicVRegister& src1,
1571 const LogicVRegister& src2) {
1572 return uminmax(vform, dst, src1, src2, true);
1573 }
1574
1575
umin(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1576 LogicVRegister Simulator::umin(VectorFormat vform,
1577 LogicVRegister dst,
1578 const LogicVRegister& src1,
1579 const LogicVRegister& src2) {
1580 return uminmax(vform, dst, src1, src2, false);
1581 }
1582
1583
uminmaxp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool max)1584 LogicVRegister Simulator::uminmaxp(VectorFormat vform,
1585 LogicVRegister dst,
1586 const LogicVRegister& src1,
1587 const LogicVRegister& src2,
1588 bool max) {
1589 int lanes = LaneCountFromFormat(vform);
1590 uint64_t result[kMaxLanesPerVector];
1591 const LogicVRegister* src = &src1;
1592 for (int j = 0; j < 2; j++) {
1593 for (int i = 0; i < LaneCountFromFormat(vform); i += 2) {
1594 uint64_t first_val = src->Uint(vform, i);
1595 uint64_t second_val = src->Uint(vform, i + 1);
1596 uint64_t dst_val;
1597 if (max) {
1598 dst_val = (first_val > second_val) ? first_val : second_val;
1599 } else {
1600 dst_val = (first_val < second_val) ? first_val : second_val;
1601 }
1602 VIXL_ASSERT(((i >> 1) + (j * lanes / 2)) < kMaxLanesPerVector);
1603 result[(i >> 1) + (j * lanes / 2)] = dst_val;
1604 }
1605 src = &src2;
1606 }
1607 dst.SetUintArray(vform, result);
1608 return dst;
1609 }
1610
1611
umaxp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1612 LogicVRegister Simulator::umaxp(VectorFormat vform,
1613 LogicVRegister dst,
1614 const LogicVRegister& src1,
1615 const LogicVRegister& src2) {
1616 return uminmaxp(vform, dst, src1, src2, true);
1617 }
1618
1619
uminp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1620 LogicVRegister Simulator::uminp(VectorFormat vform,
1621 LogicVRegister dst,
1622 const LogicVRegister& src1,
1623 const LogicVRegister& src2) {
1624 return uminmaxp(vform, dst, src1, src2, false);
1625 }
1626
1627
uminmaxv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src,bool max)1628 LogicVRegister Simulator::uminmaxv(VectorFormat vform,
1629 LogicVRegister dst,
1630 const LogicPRegister& pg,
1631 const LogicVRegister& src,
1632 bool max) {
1633 uint64_t dst_val = max ? 0 : UINT64_MAX;
1634 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1635 if (!pg.IsActive(vform, i)) continue;
1636
1637 uint64_t src_val = src.Uint(vform, i);
1638 if (max) {
1639 dst_val = (src_val > dst_val) ? src_val : dst_val;
1640 } else {
1641 dst_val = (src_val < dst_val) ? src_val : dst_val;
1642 }
1643 }
1644 dst.ClearForWrite(ScalarFormatFromFormat(vform));
1645 dst.SetUint(vform, 0, dst_val);
1646 return dst;
1647 }
1648
1649
umaxv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1650 LogicVRegister Simulator::umaxv(VectorFormat vform,
1651 LogicVRegister dst,
1652 const LogicVRegister& src) {
1653 uminmaxv(vform, dst, GetPTrue(), src, true);
1654 return dst;
1655 }
1656
1657
uminv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1658 LogicVRegister Simulator::uminv(VectorFormat vform,
1659 LogicVRegister dst,
1660 const LogicVRegister& src) {
1661 uminmaxv(vform, dst, GetPTrue(), src, false);
1662 return dst;
1663 }
1664
1665
umaxv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)1666 LogicVRegister Simulator::umaxv(VectorFormat vform,
1667 LogicVRegister dst,
1668 const LogicPRegister& pg,
1669 const LogicVRegister& src) {
1670 VIXL_ASSERT(IsSVEFormat(vform));
1671 uminmaxv(vform, dst, pg, src, true);
1672 return dst;
1673 }
1674
1675
uminv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)1676 LogicVRegister Simulator::uminv(VectorFormat vform,
1677 LogicVRegister dst,
1678 const LogicPRegister& pg,
1679 const LogicVRegister& src) {
1680 VIXL_ASSERT(IsSVEFormat(vform));
1681 uminmaxv(vform, dst, pg, src, false);
1682 return dst;
1683 }
1684
1685
shl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1686 LogicVRegister Simulator::shl(VectorFormat vform,
1687 LogicVRegister dst,
1688 const LogicVRegister& src,
1689 int shift) {
1690 VIXL_ASSERT(shift >= 0);
1691 SimVRegister temp;
1692 LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1693 return ushl(vform, dst, src, shiftreg);
1694 }
1695
1696
sshll(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1697 LogicVRegister Simulator::sshll(VectorFormat vform,
1698 LogicVRegister dst,
1699 const LogicVRegister& src,
1700 int shift) {
1701 VIXL_ASSERT(shift >= 0);
1702 SimVRegister temp1, temp2;
1703 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1704 LogicVRegister extendedreg = sxtl(vform, temp2, src);
1705 return sshl(vform, dst, extendedreg, shiftreg);
1706 }
1707
1708
sshll2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1709 LogicVRegister Simulator::sshll2(VectorFormat vform,
1710 LogicVRegister dst,
1711 const LogicVRegister& src,
1712 int shift) {
1713 VIXL_ASSERT(shift >= 0);
1714 SimVRegister temp1, temp2;
1715 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1716 LogicVRegister extendedreg = sxtl2(vform, temp2, src);
1717 return sshl(vform, dst, extendedreg, shiftreg);
1718 }
1719
1720
shll(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1721 LogicVRegister Simulator::shll(VectorFormat vform,
1722 LogicVRegister dst,
1723 const LogicVRegister& src) {
1724 int shift = LaneSizeInBitsFromFormat(vform) / 2;
1725 return sshll(vform, dst, src, shift);
1726 }
1727
1728
shll2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1729 LogicVRegister Simulator::shll2(VectorFormat vform,
1730 LogicVRegister dst,
1731 const LogicVRegister& src) {
1732 int shift = LaneSizeInBitsFromFormat(vform) / 2;
1733 return sshll2(vform, dst, src, shift);
1734 }
1735
1736
ushll(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1737 LogicVRegister Simulator::ushll(VectorFormat vform,
1738 LogicVRegister dst,
1739 const LogicVRegister& src,
1740 int shift) {
1741 VIXL_ASSERT(shift >= 0);
1742 SimVRegister temp1, temp2;
1743 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1744 LogicVRegister extendedreg = uxtl(vform, temp2, src);
1745 return ushl(vform, dst, extendedreg, shiftreg);
1746 }
1747
1748
ushll2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1749 LogicVRegister Simulator::ushll2(VectorFormat vform,
1750 LogicVRegister dst,
1751 const LogicVRegister& src,
1752 int shift) {
1753 VIXL_ASSERT(shift >= 0);
1754 SimVRegister temp1, temp2;
1755 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1756 LogicVRegister extendedreg = uxtl2(vform, temp2, src);
1757 return ushl(vform, dst, extendedreg, shiftreg);
1758 }
1759
clast(VectorFormat vform,const LogicPRegister & pg,const LogicVRegister & src,int offset_from_last_active)1760 std::pair<bool, uint64_t> Simulator::clast(VectorFormat vform,
1761 const LogicPRegister& pg,
1762 const LogicVRegister& src,
1763 int offset_from_last_active) {
1764 // Untested for any other values.
1765 VIXL_ASSERT((offset_from_last_active == 0) || (offset_from_last_active == 1));
1766
1767 int last_active = GetLastActive(vform, pg);
1768 int lane_count = LaneCountFromFormat(vform);
1769 int index =
1770 ((last_active + offset_from_last_active) + lane_count) % lane_count;
1771 return std::make_pair(last_active >= 0, src.Uint(vform, index));
1772 }
1773
compact(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)1774 LogicVRegister Simulator::compact(VectorFormat vform,
1775 LogicVRegister dst,
1776 const LogicPRegister& pg,
1777 const LogicVRegister& src) {
1778 int j = 0;
1779 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1780 if (pg.IsActive(vform, i)) {
1781 dst.SetUint(vform, j++, src.Uint(vform, i));
1782 }
1783 }
1784 for (; j < LaneCountFromFormat(vform); j++) {
1785 dst.SetUint(vform, j, 0);
1786 }
1787 return dst;
1788 }
1789
splice(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src1,const LogicVRegister & src2)1790 LogicVRegister Simulator::splice(VectorFormat vform,
1791 LogicVRegister dst,
1792 const LogicPRegister& pg,
1793 const LogicVRegister& src1,
1794 const LogicVRegister& src2) {
1795 int lane_count = LaneCountFromFormat(vform);
1796 int first_active = GetFirstActive(vform, pg);
1797 int last_active = GetLastActive(vform, pg);
1798 int dst_idx = 0;
1799 uint64_t result[kZRegMaxSizeInBytes];
1800
1801 if (first_active >= 0) {
1802 VIXL_ASSERT(last_active >= first_active);
1803 VIXL_ASSERT(last_active < lane_count);
1804 for (int i = first_active; i <= last_active; i++) {
1805 result[dst_idx++] = src1.Uint(vform, i);
1806 }
1807 }
1808
1809 VIXL_ASSERT(dst_idx <= lane_count);
1810 for (int i = dst_idx; i < lane_count; i++) {
1811 result[i] = src2.Uint(vform, i - dst_idx);
1812 }
1813
1814 for (int i = 0; i < lane_count; i++) {
1815 dst.SetUint(vform, i, result[i]);
1816 }
1817 return dst;
1818 }
1819
sel(VectorFormat vform,LogicVRegister dst,const SimPRegister & pg,const LogicVRegister & src1,const LogicVRegister & src2)1820 LogicVRegister Simulator::sel(VectorFormat vform,
1821 LogicVRegister dst,
1822 const SimPRegister& pg,
1823 const LogicVRegister& src1,
1824 const LogicVRegister& src2) {
1825 int p_reg_bits_per_lane =
1826 LaneSizeInBitsFromFormat(vform) / kZRegBitsPerPRegBit;
1827 for (int lane = 0; lane < LaneCountFromFormat(vform); lane++) {
1828 uint64_t lane_value = pg.GetBit(lane * p_reg_bits_per_lane)
1829 ? src1.Uint(vform, lane)
1830 : src2.Uint(vform, lane);
1831 dst.SetUint(vform, lane, lane_value);
1832 }
1833 return dst;
1834 }
1835
1836
sel(LogicPRegister dst,const LogicPRegister & pg,const LogicPRegister & src1,const LogicPRegister & src2)1837 LogicPRegister Simulator::sel(LogicPRegister dst,
1838 const LogicPRegister& pg,
1839 const LogicPRegister& src1,
1840 const LogicPRegister& src2) {
1841 for (int i = 0; i < dst.GetChunkCount(); i++) {
1842 LogicPRegister::ChunkType mask = pg.GetChunk(i);
1843 LogicPRegister::ChunkType result =
1844 (mask & src1.GetChunk(i)) | (~mask & src2.GetChunk(i));
1845 dst.SetChunk(i, result);
1846 }
1847 return dst;
1848 }
1849
1850
sli(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1851 LogicVRegister Simulator::sli(VectorFormat vform,
1852 LogicVRegister dst,
1853 const LogicVRegister& src,
1854 int shift) {
1855 dst.ClearForWrite(vform);
1856 int lane_count = LaneCountFromFormat(vform);
1857 for (int i = 0; i < lane_count; i++) {
1858 uint64_t src_lane = src.Uint(vform, i);
1859 uint64_t dst_lane = dst.Uint(vform, i);
1860 uint64_t shifted = src_lane << shift;
1861 uint64_t mask = MaxUintFromFormat(vform) << shift;
1862 dst.SetUint(vform, i, (dst_lane & ~mask) | shifted);
1863 }
1864 return dst;
1865 }
1866
1867
sqshl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1868 LogicVRegister Simulator::sqshl(VectorFormat vform,
1869 LogicVRegister dst,
1870 const LogicVRegister& src,
1871 int shift) {
1872 VIXL_ASSERT(shift >= 0);
1873 SimVRegister temp;
1874 LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1875 return sshl(vform, dst, src, shiftreg).SignedSaturate(vform);
1876 }
1877
1878
uqshl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1879 LogicVRegister Simulator::uqshl(VectorFormat vform,
1880 LogicVRegister dst,
1881 const LogicVRegister& src,
1882 int shift) {
1883 VIXL_ASSERT(shift >= 0);
1884 SimVRegister temp;
1885 LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1886 return ushl(vform, dst, src, shiftreg).UnsignedSaturate(vform);
1887 }
1888
1889
sqshlu(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1890 LogicVRegister Simulator::sqshlu(VectorFormat vform,
1891 LogicVRegister dst,
1892 const LogicVRegister& src,
1893 int shift) {
1894 VIXL_ASSERT(shift >= 0);
1895 SimVRegister temp;
1896 LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1897 return sshl(vform, dst, src, shiftreg).UnsignedSaturate(vform);
1898 }
1899
1900
sri(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1901 LogicVRegister Simulator::sri(VectorFormat vform,
1902 LogicVRegister dst,
1903 const LogicVRegister& src,
1904 int shift) {
1905 dst.ClearForWrite(vform);
1906 int lane_count = LaneCountFromFormat(vform);
1907 VIXL_ASSERT((shift > 0) &&
1908 (shift <= static_cast<int>(LaneSizeInBitsFromFormat(vform))));
1909 for (int i = 0; i < lane_count; i++) {
1910 uint64_t src_lane = src.Uint(vform, i);
1911 uint64_t dst_lane = dst.Uint(vform, i);
1912 uint64_t shifted;
1913 uint64_t mask;
1914 if (shift == 64) {
1915 shifted = 0;
1916 mask = 0;
1917 } else {
1918 shifted = src_lane >> shift;
1919 mask = MaxUintFromFormat(vform) >> shift;
1920 }
1921 dst.SetUint(vform, i, (dst_lane & ~mask) | shifted);
1922 }
1923 return dst;
1924 }
1925
1926
ushr(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1927 LogicVRegister Simulator::ushr(VectorFormat vform,
1928 LogicVRegister dst,
1929 const LogicVRegister& src,
1930 int shift) {
1931 VIXL_ASSERT(shift >= 0);
1932 SimVRegister temp;
1933 LogicVRegister shiftreg = dup_immediate(vform, temp, -shift);
1934 return ushl(vform, dst, src, shiftreg);
1935 }
1936
1937
sshr(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1938 LogicVRegister Simulator::sshr(VectorFormat vform,
1939 LogicVRegister dst,
1940 const LogicVRegister& src,
1941 int shift) {
1942 VIXL_ASSERT(shift >= 0);
1943 SimVRegister temp;
1944 LogicVRegister shiftreg = dup_immediate(vform, temp, -shift);
1945 return sshl(vform, dst, src, shiftreg);
1946 }
1947
1948
ssra(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1949 LogicVRegister Simulator::ssra(VectorFormat vform,
1950 LogicVRegister dst,
1951 const LogicVRegister& src,
1952 int shift) {
1953 SimVRegister temp;
1954 LogicVRegister shifted_reg = sshr(vform, temp, src, shift);
1955 return add(vform, dst, dst, shifted_reg);
1956 }
1957
1958
usra(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1959 LogicVRegister Simulator::usra(VectorFormat vform,
1960 LogicVRegister dst,
1961 const LogicVRegister& src,
1962 int shift) {
1963 SimVRegister temp;
1964 LogicVRegister shifted_reg = ushr(vform, temp, src, shift);
1965 return add(vform, dst, dst, shifted_reg);
1966 }
1967
1968
srsra(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1969 LogicVRegister Simulator::srsra(VectorFormat vform,
1970 LogicVRegister dst,
1971 const LogicVRegister& src,
1972 int shift) {
1973 SimVRegister temp;
1974 LogicVRegister shifted_reg = sshr(vform, temp, src, shift).Round(vform);
1975 return add(vform, dst, dst, shifted_reg);
1976 }
1977
1978
ursra(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1979 LogicVRegister Simulator::ursra(VectorFormat vform,
1980 LogicVRegister dst,
1981 const LogicVRegister& src,
1982 int shift) {
1983 SimVRegister temp;
1984 LogicVRegister shifted_reg = ushr(vform, temp, src, shift).Round(vform);
1985 return add(vform, dst, dst, shifted_reg);
1986 }
1987
1988
cls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1989 LogicVRegister Simulator::cls(VectorFormat vform,
1990 LogicVRegister dst,
1991 const LogicVRegister& src) {
1992 int lane_size_in_bits = LaneSizeInBitsFromFormat(vform);
1993 int lane_count = LaneCountFromFormat(vform);
1994
1995 // Ensure that we can store one result per lane.
1996 int result[kZRegMaxSizeInBytes];
1997
1998 for (int i = 0; i < lane_count; i++) {
1999 result[i] = CountLeadingSignBits(src.Int(vform, i), lane_size_in_bits);
2000 }
2001
2002 dst.ClearForWrite(vform);
2003 for (int i = 0; i < lane_count; ++i) {
2004 dst.SetUint(vform, i, result[i]);
2005 }
2006 return dst;
2007 }
2008
2009
clz(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2010 LogicVRegister Simulator::clz(VectorFormat vform,
2011 LogicVRegister dst,
2012 const LogicVRegister& src) {
2013 int lane_size_in_bits = LaneSizeInBitsFromFormat(vform);
2014 int lane_count = LaneCountFromFormat(vform);
2015
2016 // Ensure that we can store one result per lane.
2017 int result[kZRegMaxSizeInBytes];
2018
2019 for (int i = 0; i < lane_count; i++) {
2020 result[i] = CountLeadingZeros(src.Uint(vform, i), lane_size_in_bits);
2021 }
2022
2023 dst.ClearForWrite(vform);
2024 for (int i = 0; i < lane_count; ++i) {
2025 dst.SetUint(vform, i, result[i]);
2026 }
2027 return dst;
2028 }
2029
2030
cnot(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2031 LogicVRegister Simulator::cnot(VectorFormat vform,
2032 LogicVRegister dst,
2033 const LogicVRegister& src) {
2034 dst.ClearForWrite(vform);
2035 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2036 uint64_t value = (src.Uint(vform, i) == 0) ? 1 : 0;
2037 dst.SetUint(vform, i, value);
2038 }
2039 return dst;
2040 }
2041
2042
cnt(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2043 LogicVRegister Simulator::cnt(VectorFormat vform,
2044 LogicVRegister dst,
2045 const LogicVRegister& src) {
2046 int lane_size_in_bits = LaneSizeInBitsFromFormat(vform);
2047 int lane_count = LaneCountFromFormat(vform);
2048
2049 // Ensure that we can store one result per lane.
2050 int result[kZRegMaxSizeInBytes];
2051
2052 for (int i = 0; i < lane_count; i++) {
2053 result[i] = CountSetBits(src.Uint(vform, i), lane_size_in_bits);
2054 }
2055
2056 dst.ClearForWrite(vform);
2057 for (int i = 0; i < lane_count; ++i) {
2058 dst.SetUint(vform, i, result[i]);
2059 }
2060 return dst;
2061 }
2062
2063
sshl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2064 LogicVRegister Simulator::sshl(VectorFormat vform,
2065 LogicVRegister dst,
2066 const LogicVRegister& src1,
2067 const LogicVRegister& src2) {
2068 dst.ClearForWrite(vform);
2069 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2070 int8_t shift_val = src2.Int(vform, i);
2071 int64_t lj_src_val = src1.IntLeftJustified(vform, i);
2072
2073 // Set signed saturation state.
2074 if ((shift_val > CountLeadingSignBits(lj_src_val)) && (lj_src_val != 0)) {
2075 dst.SetSignedSat(i, lj_src_val >= 0);
2076 }
2077
2078 // Set unsigned saturation state.
2079 if (lj_src_val < 0) {
2080 dst.SetUnsignedSat(i, false);
2081 } else if ((shift_val > CountLeadingZeros(lj_src_val)) &&
2082 (lj_src_val != 0)) {
2083 dst.SetUnsignedSat(i, true);
2084 }
2085
2086 int64_t src_val = src1.Int(vform, i);
2087 bool src_is_negative = src_val < 0;
2088 if (shift_val > 63) {
2089 dst.SetInt(vform, i, 0);
2090 } else if (shift_val < -63) {
2091 dst.SetRounding(i, src_is_negative);
2092 dst.SetInt(vform, i, src_is_negative ? -1 : 0);
2093 } else {
2094 // Use unsigned types for shifts, as behaviour is undefined for signed
2095 // lhs.
2096 uint64_t usrc_val = static_cast<uint64_t>(src_val);
2097
2098 if (shift_val < 0) {
2099 // Convert to right shift.
2100 shift_val = -shift_val;
2101
2102 // Set rounding state by testing most-significant bit shifted out.
2103 // Rounding only needed on right shifts.
2104 if (((usrc_val >> (shift_val - 1)) & 1) == 1) {
2105 dst.SetRounding(i, true);
2106 }
2107
2108 usrc_val >>= shift_val;
2109
2110 if (src_is_negative) {
2111 // Simulate sign-extension.
2112 usrc_val |= (~UINT64_C(0) << (64 - shift_val));
2113 }
2114 } else {
2115 usrc_val <<= shift_val;
2116 }
2117 dst.SetUint(vform, i, usrc_val);
2118 }
2119 }
2120 return dst;
2121 }
2122
2123
ushl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2124 LogicVRegister Simulator::ushl(VectorFormat vform,
2125 LogicVRegister dst,
2126 const LogicVRegister& src1,
2127 const LogicVRegister& src2) {
2128 dst.ClearForWrite(vform);
2129 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2130 int8_t shift_val = src2.Int(vform, i);
2131 uint64_t lj_src_val = src1.UintLeftJustified(vform, i);
2132
2133 // Set saturation state.
2134 if ((shift_val > CountLeadingZeros(lj_src_val)) && (lj_src_val != 0)) {
2135 dst.SetUnsignedSat(i, true);
2136 }
2137
2138 uint64_t src_val = src1.Uint(vform, i);
2139 if ((shift_val > 63) || (shift_val < -64)) {
2140 dst.SetUint(vform, i, 0);
2141 } else {
2142 if (shift_val < 0) {
2143 // Set rounding state. Rounding only needed on right shifts.
2144 if (((src_val >> (-shift_val - 1)) & 1) == 1) {
2145 dst.SetRounding(i, true);
2146 }
2147
2148 if (shift_val == -64) {
2149 src_val = 0;
2150 } else {
2151 src_val >>= -shift_val;
2152 }
2153 } else {
2154 src_val <<= shift_val;
2155 }
2156 dst.SetUint(vform, i, src_val);
2157 }
2158 }
2159 return dst;
2160 }
2161
2162
neg(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2163 LogicVRegister Simulator::neg(VectorFormat vform,
2164 LogicVRegister dst,
2165 const LogicVRegister& src) {
2166 dst.ClearForWrite(vform);
2167 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2168 // Test for signed saturation.
2169 int64_t sa = src.Int(vform, i);
2170 if (sa == MinIntFromFormat(vform)) {
2171 dst.SetSignedSat(i, true);
2172 }
2173 dst.SetInt(vform, i, (sa == INT64_MIN) ? sa : -sa);
2174 }
2175 return dst;
2176 }
2177
2178
suqadd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2179 LogicVRegister Simulator::suqadd(VectorFormat vform,
2180 LogicVRegister dst,
2181 const LogicVRegister& src) {
2182 dst.ClearForWrite(vform);
2183 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2184 int64_t sa = dst.IntLeftJustified(vform, i);
2185 uint64_t ub = src.UintLeftJustified(vform, i);
2186 uint64_t ur = sa + ub;
2187
2188 int64_t sr;
2189 memcpy(&sr, &ur, sizeof(sr));
2190 if (sr < sa) { // Test for signed positive saturation.
2191 dst.SetInt(vform, i, MaxIntFromFormat(vform));
2192 } else {
2193 dst.SetUint(vform, i, dst.Int(vform, i) + src.Uint(vform, i));
2194 }
2195 }
2196 return dst;
2197 }
2198
2199
usqadd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2200 LogicVRegister Simulator::usqadd(VectorFormat vform,
2201 LogicVRegister dst,
2202 const LogicVRegister& src) {
2203 dst.ClearForWrite(vform);
2204 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2205 uint64_t ua = dst.UintLeftJustified(vform, i);
2206 int64_t sb = src.IntLeftJustified(vform, i);
2207 uint64_t ur = ua + sb;
2208
2209 if ((sb > 0) && (ur <= ua)) {
2210 dst.SetUint(vform, i, MaxUintFromFormat(vform)); // Positive saturation.
2211 } else if ((sb < 0) && (ur >= ua)) {
2212 dst.SetUint(vform, i, 0); // Negative saturation.
2213 } else {
2214 dst.SetUint(vform, i, dst.Uint(vform, i) + src.Int(vform, i));
2215 }
2216 }
2217 return dst;
2218 }
2219
2220
abs(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2221 LogicVRegister Simulator::abs(VectorFormat vform,
2222 LogicVRegister dst,
2223 const LogicVRegister& src) {
2224 dst.ClearForWrite(vform);
2225 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2226 // Test for signed saturation.
2227 int64_t sa = src.Int(vform, i);
2228 if (sa == MinIntFromFormat(vform)) {
2229 dst.SetSignedSat(i, true);
2230 }
2231 if (sa < 0) {
2232 dst.SetInt(vform, i, (sa == INT64_MIN) ? sa : -sa);
2233 } else {
2234 dst.SetInt(vform, i, sa);
2235 }
2236 }
2237 return dst;
2238 }
2239
2240
andv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)2241 LogicVRegister Simulator::andv(VectorFormat vform,
2242 LogicVRegister dst,
2243 const LogicPRegister& pg,
2244 const LogicVRegister& src) {
2245 VIXL_ASSERT(IsSVEFormat(vform));
2246 uint64_t result = GetUintMask(LaneSizeInBitsFromFormat(vform));
2247 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2248 if (!pg.IsActive(vform, i)) continue;
2249
2250 result &= src.Uint(vform, i);
2251 }
2252 VectorFormat vform_dst =
2253 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
2254 dst.ClearForWrite(vform_dst);
2255 dst.SetUint(vform_dst, 0, result);
2256 return dst;
2257 }
2258
2259
eorv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)2260 LogicVRegister Simulator::eorv(VectorFormat vform,
2261 LogicVRegister dst,
2262 const LogicPRegister& pg,
2263 const LogicVRegister& src) {
2264 VIXL_ASSERT(IsSVEFormat(vform));
2265 uint64_t result = 0;
2266 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2267 if (!pg.IsActive(vform, i)) continue;
2268
2269 result ^= src.Uint(vform, i);
2270 }
2271 VectorFormat vform_dst =
2272 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
2273 dst.ClearForWrite(vform_dst);
2274 dst.SetUint(vform_dst, 0, result);
2275 return dst;
2276 }
2277
2278
orv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)2279 LogicVRegister Simulator::orv(VectorFormat vform,
2280 LogicVRegister dst,
2281 const LogicPRegister& pg,
2282 const LogicVRegister& src) {
2283 VIXL_ASSERT(IsSVEFormat(vform));
2284 uint64_t result = 0;
2285 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2286 if (!pg.IsActive(vform, i)) continue;
2287
2288 result |= src.Uint(vform, i);
2289 }
2290 VectorFormat vform_dst =
2291 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
2292 dst.ClearForWrite(vform_dst);
2293 dst.SetUint(vform_dst, 0, result);
2294 return dst;
2295 }
2296
2297
saddv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)2298 LogicVRegister Simulator::saddv(VectorFormat vform,
2299 LogicVRegister dst,
2300 const LogicPRegister& pg,
2301 const LogicVRegister& src) {
2302 VIXL_ASSERT(IsSVEFormat(vform));
2303 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) <= kSRegSize);
2304 int64_t result = 0;
2305 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2306 if (!pg.IsActive(vform, i)) continue;
2307
2308 // The destination register always has D-lane sizes and the source register
2309 // always has S-lanes or smaller, so signed integer overflow -- undefined
2310 // behaviour -- can't occur.
2311 result += src.Int(vform, i);
2312 }
2313
2314 dst.ClearForWrite(kFormatD);
2315 dst.SetInt(kFormatD, 0, result);
2316 return dst;
2317 }
2318
2319
uaddv(VectorFormat vform,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)2320 LogicVRegister Simulator::uaddv(VectorFormat vform,
2321 LogicVRegister dst,
2322 const LogicPRegister& pg,
2323 const LogicVRegister& src) {
2324 VIXL_ASSERT(IsSVEFormat(vform));
2325 uint64_t result = 0;
2326 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2327 if (!pg.IsActive(vform, i)) continue;
2328
2329 result += src.Uint(vform, i);
2330 }
2331
2332 dst.ClearForWrite(kFormatD);
2333 dst.SetUint(kFormatD, 0, result);
2334 return dst;
2335 }
2336
2337
extractnarrow(VectorFormat dstform,LogicVRegister dst,bool dst_is_signed,const LogicVRegister & src,bool src_is_signed)2338 LogicVRegister Simulator::extractnarrow(VectorFormat dstform,
2339 LogicVRegister dst,
2340 bool dst_is_signed,
2341 const LogicVRegister& src,
2342 bool src_is_signed) {
2343 bool upperhalf = false;
2344 VectorFormat srcform = kFormatUndefined;
2345 int64_t ssrc[8];
2346 uint64_t usrc[8];
2347
2348 switch (dstform) {
2349 case kFormat8B:
2350 upperhalf = false;
2351 srcform = kFormat8H;
2352 break;
2353 case kFormat16B:
2354 upperhalf = true;
2355 srcform = kFormat8H;
2356 break;
2357 case kFormat4H:
2358 upperhalf = false;
2359 srcform = kFormat4S;
2360 break;
2361 case kFormat8H:
2362 upperhalf = true;
2363 srcform = kFormat4S;
2364 break;
2365 case kFormat2S:
2366 upperhalf = false;
2367 srcform = kFormat2D;
2368 break;
2369 case kFormat4S:
2370 upperhalf = true;
2371 srcform = kFormat2D;
2372 break;
2373 case kFormatB:
2374 upperhalf = false;
2375 srcform = kFormatH;
2376 break;
2377 case kFormatH:
2378 upperhalf = false;
2379 srcform = kFormatS;
2380 break;
2381 case kFormatS:
2382 upperhalf = false;
2383 srcform = kFormatD;
2384 break;
2385 default:
2386 VIXL_UNIMPLEMENTED();
2387 }
2388
2389 for (int i = 0; i < LaneCountFromFormat(srcform); i++) {
2390 ssrc[i] = src.Int(srcform, i);
2391 usrc[i] = src.Uint(srcform, i);
2392 }
2393
2394 int offset;
2395 if (upperhalf) {
2396 offset = LaneCountFromFormat(dstform) / 2;
2397 } else {
2398 offset = 0;
2399 dst.ClearForWrite(dstform);
2400 }
2401
2402 for (int i = 0; i < LaneCountFromFormat(srcform); i++) {
2403 // Test for signed saturation
2404 if (ssrc[i] > MaxIntFromFormat(dstform)) {
2405 dst.SetSignedSat(offset + i, true);
2406 } else if (ssrc[i] < MinIntFromFormat(dstform)) {
2407 dst.SetSignedSat(offset + i, false);
2408 }
2409
2410 // Test for unsigned saturation
2411 if (src_is_signed) {
2412 if (ssrc[i] > static_cast<int64_t>(MaxUintFromFormat(dstform))) {
2413 dst.SetUnsignedSat(offset + i, true);
2414 } else if (ssrc[i] < 0) {
2415 dst.SetUnsignedSat(offset + i, false);
2416 }
2417 } else {
2418 if (usrc[i] > MaxUintFromFormat(dstform)) {
2419 dst.SetUnsignedSat(offset + i, true);
2420 }
2421 }
2422
2423 int64_t result;
2424 if (src_is_signed) {
2425 result = ssrc[i] & MaxUintFromFormat(dstform);
2426 } else {
2427 result = usrc[i] & MaxUintFromFormat(dstform);
2428 }
2429
2430 if (dst_is_signed) {
2431 dst.SetInt(dstform, offset + i, result);
2432 } else {
2433 dst.SetUint(dstform, offset + i, result);
2434 }
2435 }
2436 return dst;
2437 }
2438
2439
xtn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2440 LogicVRegister Simulator::xtn(VectorFormat vform,
2441 LogicVRegister dst,
2442 const LogicVRegister& src) {
2443 return extractnarrow(vform, dst, true, src, true);
2444 }
2445
2446
sqxtn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2447 LogicVRegister Simulator::sqxtn(VectorFormat vform,
2448 LogicVRegister dst,
2449 const LogicVRegister& src) {
2450 return extractnarrow(vform, dst, true, src, true).SignedSaturate(vform);
2451 }
2452
2453
sqxtun(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2454 LogicVRegister Simulator::sqxtun(VectorFormat vform,
2455 LogicVRegister dst,
2456 const LogicVRegister& src) {
2457 return extractnarrow(vform, dst, false, src, true).UnsignedSaturate(vform);
2458 }
2459
2460
uqxtn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2461 LogicVRegister Simulator::uqxtn(VectorFormat vform,
2462 LogicVRegister dst,
2463 const LogicVRegister& src) {
2464 return extractnarrow(vform, dst, false, src, false).UnsignedSaturate(vform);
2465 }
2466
2467
absdiff(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool is_signed)2468 LogicVRegister Simulator::absdiff(VectorFormat vform,
2469 LogicVRegister dst,
2470 const LogicVRegister& src1,
2471 const LogicVRegister& src2,
2472 bool is_signed) {
2473 dst.ClearForWrite(vform);
2474 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2475 bool src1_gt_src2 = is_signed ? (src1.Int(vform, i) > src2.Int(vform, i))
2476 : (src1.Uint(vform, i) > src2.Uint(vform, i));
2477 // Always calculate the answer using unsigned arithmetic, to avoid
2478 // implemenation-defined signed overflow.
2479 if (src1_gt_src2) {
2480 dst.SetUint(vform, i, src1.Uint(vform, i) - src2.Uint(vform, i));
2481 } else {
2482 dst.SetUint(vform, i, src2.Uint(vform, i) - src1.Uint(vform, i));
2483 }
2484 }
2485 return dst;
2486 }
2487
2488
saba(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2489 LogicVRegister Simulator::saba(VectorFormat vform,
2490 LogicVRegister dst,
2491 const LogicVRegister& src1,
2492 const LogicVRegister& src2) {
2493 SimVRegister temp;
2494 dst.ClearForWrite(vform);
2495 absdiff(vform, temp, src1, src2, true);
2496 add(vform, dst, dst, temp);
2497 return dst;
2498 }
2499
2500
uaba(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2501 LogicVRegister Simulator::uaba(VectorFormat vform,
2502 LogicVRegister dst,
2503 const LogicVRegister& src1,
2504 const LogicVRegister& src2) {
2505 SimVRegister temp;
2506 dst.ClearForWrite(vform);
2507 absdiff(vform, temp, src1, src2, false);
2508 add(vform, dst, dst, temp);
2509 return dst;
2510 }
2511
2512
not_(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2513 LogicVRegister Simulator::not_(VectorFormat vform,
2514 LogicVRegister dst,
2515 const LogicVRegister& src) {
2516 dst.ClearForWrite(vform);
2517 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2518 dst.SetUint(vform, i, ~src.Uint(vform, i));
2519 }
2520 return dst;
2521 }
2522
2523
rbit(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2524 LogicVRegister Simulator::rbit(VectorFormat vform,
2525 LogicVRegister dst,
2526 const LogicVRegister& src) {
2527 uint64_t result[kZRegMaxSizeInBytes];
2528 int lane_count = LaneCountFromFormat(vform);
2529 int lane_size_in_bits = LaneSizeInBitsFromFormat(vform);
2530 uint64_t reversed_value;
2531 uint64_t value;
2532 for (int i = 0; i < lane_count; i++) {
2533 value = src.Uint(vform, i);
2534 reversed_value = 0;
2535 for (int j = 0; j < lane_size_in_bits; j++) {
2536 reversed_value = (reversed_value << 1) | (value & 1);
2537 value >>= 1;
2538 }
2539 result[i] = reversed_value;
2540 }
2541
2542 dst.ClearForWrite(vform);
2543 for (int i = 0; i < lane_count; ++i) {
2544 dst.SetUint(vform, i, result[i]);
2545 }
2546 return dst;
2547 }
2548
2549
rev(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2550 LogicVRegister Simulator::rev(VectorFormat vform,
2551 LogicVRegister dst,
2552 const LogicVRegister& src) {
2553 VIXL_ASSERT(IsSVEFormat(vform));
2554 int lane_count = LaneCountFromFormat(vform);
2555 for (int i = 0; i < lane_count / 2; i++) {
2556 uint64_t t = src.Uint(vform, i);
2557 dst.SetUint(vform, i, src.Uint(vform, lane_count - i - 1));
2558 dst.SetUint(vform, lane_count - i - 1, t);
2559 }
2560 return dst;
2561 }
2562
2563
rev_byte(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int rev_size)2564 LogicVRegister Simulator::rev_byte(VectorFormat vform,
2565 LogicVRegister dst,
2566 const LogicVRegister& src,
2567 int rev_size) {
2568 uint64_t result[kZRegMaxSizeInBytes];
2569 int lane_count = LaneCountFromFormat(vform);
2570 int lane_size = LaneSizeInBytesFromFormat(vform);
2571 int lanes_per_loop = rev_size / lane_size;
2572 for (int i = 0; i < lane_count; i += lanes_per_loop) {
2573 for (int j = 0; j < lanes_per_loop; j++) {
2574 result[i + lanes_per_loop - 1 - j] = src.Uint(vform, i + j);
2575 }
2576 }
2577 dst.ClearForWrite(vform);
2578 for (int i = 0; i < lane_count; ++i) {
2579 dst.SetUint(vform, i, result[i]);
2580 }
2581 return dst;
2582 }
2583
2584
rev16(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2585 LogicVRegister Simulator::rev16(VectorFormat vform,
2586 LogicVRegister dst,
2587 const LogicVRegister& src) {
2588 return rev_byte(vform, dst, src, 2);
2589 }
2590
2591
rev32(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2592 LogicVRegister Simulator::rev32(VectorFormat vform,
2593 LogicVRegister dst,
2594 const LogicVRegister& src) {
2595 return rev_byte(vform, dst, src, 4);
2596 }
2597
2598
rev64(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2599 LogicVRegister Simulator::rev64(VectorFormat vform,
2600 LogicVRegister dst,
2601 const LogicVRegister& src) {
2602 return rev_byte(vform, dst, src, 8);
2603 }
2604
2605
addlp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,bool is_signed,bool do_accumulate)2606 LogicVRegister Simulator::addlp(VectorFormat vform,
2607 LogicVRegister dst,
2608 const LogicVRegister& src,
2609 bool is_signed,
2610 bool do_accumulate) {
2611 VectorFormat vformsrc = VectorFormatHalfWidthDoubleLanes(vform);
2612 VIXL_ASSERT(LaneSizeInBitsFromFormat(vformsrc) <= 32);
2613 VIXL_ASSERT(LaneCountFromFormat(vform) <= 8);
2614
2615 uint64_t result[8];
2616 int lane_count = LaneCountFromFormat(vform);
2617 for (int i = 0; i < lane_count; i++) {
2618 if (is_signed) {
2619 result[i] = static_cast<uint64_t>(src.Int(vformsrc, 2 * i) +
2620 src.Int(vformsrc, 2 * i + 1));
2621 } else {
2622 result[i] = src.Uint(vformsrc, 2 * i) + src.Uint(vformsrc, 2 * i + 1);
2623 }
2624 }
2625
2626 dst.ClearForWrite(vform);
2627 for (int i = 0; i < lane_count; ++i) {
2628 if (do_accumulate) {
2629 result[i] += dst.Uint(vform, i);
2630 }
2631 dst.SetUint(vform, i, result[i]);
2632 }
2633
2634 return dst;
2635 }
2636
2637
saddlp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2638 LogicVRegister Simulator::saddlp(VectorFormat vform,
2639 LogicVRegister dst,
2640 const LogicVRegister& src) {
2641 return addlp(vform, dst, src, true, false);
2642 }
2643
2644
uaddlp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2645 LogicVRegister Simulator::uaddlp(VectorFormat vform,
2646 LogicVRegister dst,
2647 const LogicVRegister& src) {
2648 return addlp(vform, dst, src, false, false);
2649 }
2650
2651
sadalp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2652 LogicVRegister Simulator::sadalp(VectorFormat vform,
2653 LogicVRegister dst,
2654 const LogicVRegister& src) {
2655 return addlp(vform, dst, src, true, true);
2656 }
2657
2658
uadalp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2659 LogicVRegister Simulator::uadalp(VectorFormat vform,
2660 LogicVRegister dst,
2661 const LogicVRegister& src) {
2662 return addlp(vform, dst, src, false, true);
2663 }
2664
2665
ext(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)2666 LogicVRegister Simulator::ext(VectorFormat vform,
2667 LogicVRegister dst,
2668 const LogicVRegister& src1,
2669 const LogicVRegister& src2,
2670 int index) {
2671 uint8_t result[kZRegMaxSizeInBytes];
2672 int lane_count = LaneCountFromFormat(vform);
2673 for (int i = 0; i < lane_count - index; ++i) {
2674 result[i] = src1.Uint(vform, i + index);
2675 }
2676 for (int i = 0; i < index; ++i) {
2677 result[lane_count - index + i] = src2.Uint(vform, i);
2678 }
2679 dst.ClearForWrite(vform);
2680 for (int i = 0; i < lane_count; ++i) {
2681 dst.SetUint(vform, i, result[i]);
2682 }
2683 return dst;
2684 }
2685
2686 template <typename T>
fadda(VectorFormat vform,LogicVRegister acc,const LogicPRegister & pg,const LogicVRegister & src)2687 LogicVRegister Simulator::fadda(VectorFormat vform,
2688 LogicVRegister acc,
2689 const LogicPRegister& pg,
2690 const LogicVRegister& src) {
2691 T result = acc.Float<T>(0);
2692 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2693 if (!pg.IsActive(vform, i)) continue;
2694
2695 result = FPAdd(result, src.Float<T>(i));
2696 }
2697 VectorFormat vform_dst =
2698 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
2699 acc.ClearForWrite(vform_dst);
2700 acc.SetFloat(0, result);
2701 return acc;
2702 }
2703
fadda(VectorFormat vform,LogicVRegister acc,const LogicPRegister & pg,const LogicVRegister & src)2704 LogicVRegister Simulator::fadda(VectorFormat vform,
2705 LogicVRegister acc,
2706 const LogicPRegister& pg,
2707 const LogicVRegister& src) {
2708 switch (LaneSizeInBitsFromFormat(vform)) {
2709 case kHRegSize:
2710 fadda<SimFloat16>(vform, acc, pg, src);
2711 break;
2712 case kSRegSize:
2713 fadda<float>(vform, acc, pg, src);
2714 break;
2715 case kDRegSize:
2716 fadda<double>(vform, acc, pg, src);
2717 break;
2718 default:
2719 VIXL_UNREACHABLE();
2720 }
2721 return acc;
2722 }
2723
2724 template <typename T>
fcadd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int rot)2725 LogicVRegister Simulator::fcadd(VectorFormat vform,
2726 LogicVRegister dst, // d
2727 const LogicVRegister& src1, // n
2728 const LogicVRegister& src2, // m
2729 int rot) {
2730 int elements = LaneCountFromFormat(vform);
2731
2732 T element1, element3;
2733 rot = (rot == 1) ? 270 : 90;
2734
2735 // Loop example:
2736 // 2S --> (2/2 = 1 - 1 = 0) --> 1 x Complex Number (2x components: r+i)
2737 // 4S --> (4/2 = 2) - 1 = 1) --> 2 x Complex Number (2x2 components: r+i)
2738
2739 for (int e = 0; e <= (elements / 2) - 1; e++) {
2740 switch (rot) {
2741 case 90:
2742 element1 = FPNeg(src2.Float<T>(e * 2 + 1));
2743 element3 = src2.Float<T>(e * 2);
2744 break;
2745 case 270:
2746 element1 = src2.Float<T>(e * 2 + 1);
2747 element3 = FPNeg(src2.Float<T>(e * 2));
2748 break;
2749 default:
2750 VIXL_UNREACHABLE();
2751 return dst; // prevents "element(n) may be unintialized" errors
2752 }
2753 dst.ClearForWrite(vform);
2754 dst.SetFloat<T>(e * 2, FPAdd(src1.Float<T>(e * 2), element1));
2755 dst.SetFloat<T>(e * 2 + 1, FPAdd(src1.Float<T>(e * 2 + 1), element3));
2756 }
2757 return dst;
2758 }
2759
2760
fcadd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int rot)2761 LogicVRegister Simulator::fcadd(VectorFormat vform,
2762 LogicVRegister dst, // d
2763 const LogicVRegister& src1, // n
2764 const LogicVRegister& src2, // m
2765 int rot) {
2766 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
2767 fcadd<SimFloat16>(vform, dst, src1, src2, rot);
2768 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
2769 fcadd<float>(vform, dst, src1, src2, rot);
2770 } else {
2771 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
2772 fcadd<double>(vform, dst, src1, src2, rot);
2773 }
2774 return dst;
2775 }
2776
2777 template <typename T>
fcmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,const LogicVRegister & acc,int index,int rot)2778 LogicVRegister Simulator::fcmla(VectorFormat vform,
2779 LogicVRegister dst,
2780 const LogicVRegister& src1,
2781 const LogicVRegister& src2,
2782 const LogicVRegister& acc,
2783 int index,
2784 int rot) {
2785 int elements = LaneCountFromFormat(vform);
2786
2787 T element1, element2, element3, element4;
2788 rot *= 90;
2789
2790 // Loop example:
2791 // 2S --> (2/2 = 1 - 1 = 0) --> 1 x Complex Number (2x components: r+i)
2792 // 4S --> (4/2 = 2) - 1 = 1) --> 2 x Complex Number (2x2 components: r+i)
2793
2794 for (int e = 0; e <= (elements / 2) - 1; e++) {
2795 // Index == -1 indicates a vector/vector rather than vector/indexed-element
2796 // operation.
2797 int f = (index < 0) ? e : index;
2798
2799 switch (rot) {
2800 case 0:
2801 element1 = src2.Float<T>(f * 2);
2802 element2 = src1.Float<T>(e * 2);
2803 element3 = src2.Float<T>(f * 2 + 1);
2804 element4 = src1.Float<T>(e * 2);
2805 break;
2806 case 90:
2807 element1 = FPNeg(src2.Float<T>(f * 2 + 1));
2808 element2 = src1.Float<T>(e * 2 + 1);
2809 element3 = src2.Float<T>(f * 2);
2810 element4 = src1.Float<T>(e * 2 + 1);
2811 break;
2812 case 180:
2813 element1 = FPNeg(src2.Float<T>(f * 2));
2814 element2 = src1.Float<T>(e * 2);
2815 element3 = FPNeg(src2.Float<T>(f * 2 + 1));
2816 element4 = src1.Float<T>(e * 2);
2817 break;
2818 case 270:
2819 element1 = src2.Float<T>(f * 2 + 1);
2820 element2 = src1.Float<T>(e * 2 + 1);
2821 element3 = FPNeg(src2.Float<T>(f * 2));
2822 element4 = src1.Float<T>(e * 2 + 1);
2823 break;
2824 default:
2825 VIXL_UNREACHABLE();
2826 return dst; // prevents "element(n) may be unintialized" errors
2827 }
2828 dst.ClearForWrite(vform);
2829 dst.SetFloat<T>(vform,
2830 e * 2,
2831 FPMulAdd(acc.Float<T>(e * 2), element2, element1));
2832 dst.SetFloat<T>(vform,
2833 e * 2 + 1,
2834 FPMulAdd(acc.Float<T>(e * 2 + 1), element4, element3));
2835 }
2836 return dst;
2837 }
2838
fcmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,const LogicVRegister & acc,int rot)2839 LogicVRegister Simulator::fcmla(VectorFormat vform,
2840 LogicVRegister dst,
2841 const LogicVRegister& src1,
2842 const LogicVRegister& src2,
2843 const LogicVRegister& acc,
2844 int rot) {
2845 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
2846 fcmla<SimFloat16>(vform, dst, src1, src2, acc, -1, rot);
2847 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
2848 fcmla<float>(vform, dst, src1, src2, acc, -1, rot);
2849 } else {
2850 fcmla<double>(vform, dst, src1, src2, acc, -1, rot);
2851 }
2852 return dst;
2853 }
2854
2855
fcmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index,int rot)2856 LogicVRegister Simulator::fcmla(VectorFormat vform,
2857 LogicVRegister dst, // d
2858 const LogicVRegister& src1, // n
2859 const LogicVRegister& src2, // m
2860 int index,
2861 int rot) {
2862 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
2863 VIXL_UNIMPLEMENTED();
2864 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
2865 fcmla<float>(vform, dst, src1, src2, dst, index, rot);
2866 } else {
2867 fcmla<double>(vform, dst, src1, src2, dst, index, rot);
2868 }
2869 return dst;
2870 }
2871
2872
dup_element(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int src_index)2873 LogicVRegister Simulator::dup_element(VectorFormat vform,
2874 LogicVRegister dst,
2875 const LogicVRegister& src,
2876 int src_index) {
2877 if (vform == kFormatVnQ) {
2878 // When duplicating a 128-bit value, split it into two 64-bit parts, and
2879 // then copy the two to their slots on destination register.
2880 uint64_t low = src.Uint(kFormatVnD, src_index * 2);
2881 uint64_t high = src.Uint(kFormatVnD, (src_index * 2) + 1);
2882 dst.ClearForWrite(vform);
2883 for (int d_lane = 0; d_lane < LaneCountFromFormat(kFormatVnD);
2884 d_lane += 2) {
2885 dst.SetUint(kFormatVnD, d_lane, low);
2886 dst.SetUint(kFormatVnD, d_lane + 1, high);
2887 }
2888 } else {
2889 int lane_count = LaneCountFromFormat(vform);
2890 uint64_t value = src.Uint(vform, src_index);
2891 dst.ClearForWrite(vform);
2892 for (int i = 0; i < lane_count; ++i) {
2893 dst.SetUint(vform, i, value);
2894 }
2895 }
2896 return dst;
2897 }
2898
dup_elements_to_segments(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int src_index)2899 LogicVRegister Simulator::dup_elements_to_segments(VectorFormat vform,
2900 LogicVRegister dst,
2901 const LogicVRegister& src,
2902 int src_index) {
2903 // In SVE, a segment is a 128-bit portion of a vector, like a Q register,
2904 // whereas in NEON, the size of segment is equal to the size of register
2905 // itself.
2906 int segment_size = std::min(kQRegSize, RegisterSizeInBitsFromFormat(vform));
2907 VIXL_ASSERT(IsMultiple(segment_size, LaneSizeInBitsFromFormat(vform)));
2908 int lanes_per_segment = segment_size / LaneSizeInBitsFromFormat(vform);
2909
2910 VIXL_ASSERT(src_index >= 0);
2911 VIXL_ASSERT(src_index < lanes_per_segment);
2912
2913 dst.ClearForWrite(vform);
2914 for (int j = 0; j < LaneCountFromFormat(vform); j += lanes_per_segment) {
2915 uint64_t value = src.Uint(vform, j + src_index);
2916 for (int i = 0; i < lanes_per_segment; i++) {
2917 dst.SetUint(vform, j + i, value);
2918 }
2919 }
2920 return dst;
2921 }
2922
dup_immediate(VectorFormat vform,LogicVRegister dst,uint64_t imm)2923 LogicVRegister Simulator::dup_immediate(VectorFormat vform,
2924 LogicVRegister dst,
2925 uint64_t imm) {
2926 int lane_count = LaneCountFromFormat(vform);
2927 uint64_t value = imm & MaxUintFromFormat(vform);
2928 dst.ClearForWrite(vform);
2929 for (int i = 0; i < lane_count; ++i) {
2930 dst.SetUint(vform, i, value);
2931 }
2932 return dst;
2933 }
2934
2935
ins_element(VectorFormat vform,LogicVRegister dst,int dst_index,const LogicVRegister & src,int src_index)2936 LogicVRegister Simulator::ins_element(VectorFormat vform,
2937 LogicVRegister dst,
2938 int dst_index,
2939 const LogicVRegister& src,
2940 int src_index) {
2941 dst.SetUint(vform, dst_index, src.Uint(vform, src_index));
2942 return dst;
2943 }
2944
2945
ins_immediate(VectorFormat vform,LogicVRegister dst,int dst_index,uint64_t imm)2946 LogicVRegister Simulator::ins_immediate(VectorFormat vform,
2947 LogicVRegister dst,
2948 int dst_index,
2949 uint64_t imm) {
2950 uint64_t value = imm & MaxUintFromFormat(vform);
2951 dst.SetUint(vform, dst_index, value);
2952 return dst;
2953 }
2954
2955
index(VectorFormat vform,LogicVRegister dst,uint64_t start,uint64_t step)2956 LogicVRegister Simulator::index(VectorFormat vform,
2957 LogicVRegister dst,
2958 uint64_t start,
2959 uint64_t step) {
2960 VIXL_ASSERT(IsSVEFormat(vform));
2961 uint64_t value = start;
2962 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2963 dst.SetUint(vform, i, value);
2964 value += step;
2965 }
2966 return dst;
2967 }
2968
2969
insr(VectorFormat vform,LogicVRegister dst,uint64_t imm)2970 LogicVRegister Simulator::insr(VectorFormat vform,
2971 LogicVRegister dst,
2972 uint64_t imm) {
2973 VIXL_ASSERT(IsSVEFormat(vform));
2974 for (int i = LaneCountFromFormat(vform) - 1; i > 0; i--) {
2975 dst.SetUint(vform, i, dst.Uint(vform, i - 1));
2976 }
2977 dst.SetUint(vform, 0, imm);
2978 return dst;
2979 }
2980
2981
mov(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2982 LogicVRegister Simulator::mov(VectorFormat vform,
2983 LogicVRegister dst,
2984 const LogicVRegister& src) {
2985 dst.ClearForWrite(vform);
2986 for (int lane = 0; lane < LaneCountFromFormat(vform); lane++) {
2987 dst.SetUint(vform, lane, src.Uint(vform, lane));
2988 }
2989 return dst;
2990 }
2991
2992
mov(LogicPRegister dst,const LogicPRegister & src)2993 LogicPRegister Simulator::mov(LogicPRegister dst, const LogicPRegister& src) {
2994 // Avoid a copy if the registers already alias.
2995 if (dst.Aliases(src)) return dst;
2996
2997 for (int i = 0; i < dst.GetChunkCount(); i++) {
2998 dst.SetChunk(i, src.GetChunk(i));
2999 }
3000 return dst;
3001 }
3002
3003
mov_merging(VectorFormat vform,LogicVRegister dst,const SimPRegister & pg,const LogicVRegister & src)3004 LogicVRegister Simulator::mov_merging(VectorFormat vform,
3005 LogicVRegister dst,
3006 const SimPRegister& pg,
3007 const LogicVRegister& src) {
3008 return sel(vform, dst, pg, src, dst);
3009 }
3010
3011
mov_zeroing(VectorFormat vform,LogicVRegister dst,const SimPRegister & pg,const LogicVRegister & src)3012 LogicVRegister Simulator::mov_zeroing(VectorFormat vform,
3013 LogicVRegister dst,
3014 const SimPRegister& pg,
3015 const LogicVRegister& src) {
3016 SimVRegister zero;
3017 dup_immediate(vform, zero, 0);
3018 return sel(vform, dst, pg, src, zero);
3019 }
3020
3021
mov_merging(LogicPRegister dst,const LogicPRegister & pg,const LogicPRegister & src)3022 LogicPRegister Simulator::mov_merging(LogicPRegister dst,
3023 const LogicPRegister& pg,
3024 const LogicPRegister& src) {
3025 return sel(dst, pg, src, dst);
3026 }
3027
3028
mov_zeroing(LogicPRegister dst,const LogicPRegister & pg,const LogicPRegister & src)3029 LogicPRegister Simulator::mov_zeroing(LogicPRegister dst,
3030 const LogicPRegister& pg,
3031 const LogicPRegister& src) {
3032 SimPRegister all_false;
3033 return sel(dst, pg, src, pfalse(all_false));
3034 }
3035
3036
movi(VectorFormat vform,LogicVRegister dst,uint64_t imm)3037 LogicVRegister Simulator::movi(VectorFormat vform,
3038 LogicVRegister dst,
3039 uint64_t imm) {
3040 int lane_count = LaneCountFromFormat(vform);
3041 dst.ClearForWrite(vform);
3042 for (int i = 0; i < lane_count; ++i) {
3043 dst.SetUint(vform, i, imm);
3044 }
3045 return dst;
3046 }
3047
3048
mvni(VectorFormat vform,LogicVRegister dst,uint64_t imm)3049 LogicVRegister Simulator::mvni(VectorFormat vform,
3050 LogicVRegister dst,
3051 uint64_t imm) {
3052 int lane_count = LaneCountFromFormat(vform);
3053 dst.ClearForWrite(vform);
3054 for (int i = 0; i < lane_count; ++i) {
3055 dst.SetUint(vform, i, ~imm);
3056 }
3057 return dst;
3058 }
3059
3060
orr(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,uint64_t imm)3061 LogicVRegister Simulator::orr(VectorFormat vform,
3062 LogicVRegister dst,
3063 const LogicVRegister& src,
3064 uint64_t imm) {
3065 uint64_t result[16];
3066 int lane_count = LaneCountFromFormat(vform);
3067 for (int i = 0; i < lane_count; ++i) {
3068 result[i] = src.Uint(vform, i) | imm;
3069 }
3070 dst.ClearForWrite(vform);
3071 for (int i = 0; i < lane_count; ++i) {
3072 dst.SetUint(vform, i, result[i]);
3073 }
3074 return dst;
3075 }
3076
3077
uxtl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)3078 LogicVRegister Simulator::uxtl(VectorFormat vform,
3079 LogicVRegister dst,
3080 const LogicVRegister& src) {
3081 VectorFormat vform_half = VectorFormatHalfWidth(vform);
3082
3083 dst.ClearForWrite(vform);
3084 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3085 dst.SetUint(vform, i, src.Uint(vform_half, i));
3086 }
3087 return dst;
3088 }
3089
3090
sxtl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)3091 LogicVRegister Simulator::sxtl(VectorFormat vform,
3092 LogicVRegister dst,
3093 const LogicVRegister& src) {
3094 VectorFormat vform_half = VectorFormatHalfWidth(vform);
3095
3096 dst.ClearForWrite(vform);
3097 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3098 dst.SetInt(vform, i, src.Int(vform_half, i));
3099 }
3100 return dst;
3101 }
3102
3103
uxtl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)3104 LogicVRegister Simulator::uxtl2(VectorFormat vform,
3105 LogicVRegister dst,
3106 const LogicVRegister& src) {
3107 VectorFormat vform_half = VectorFormatHalfWidth(vform);
3108 int lane_count = LaneCountFromFormat(vform);
3109
3110 dst.ClearForWrite(vform);
3111 for (int i = 0; i < lane_count; i++) {
3112 dst.SetUint(vform, i, src.Uint(vform_half, lane_count + i));
3113 }
3114 return dst;
3115 }
3116
3117
sxtl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)3118 LogicVRegister Simulator::sxtl2(VectorFormat vform,
3119 LogicVRegister dst,
3120 const LogicVRegister& src) {
3121 VectorFormat vform_half = VectorFormatHalfWidth(vform);
3122 int lane_count = LaneCountFromFormat(vform);
3123
3124 dst.ClearForWrite(vform);
3125 for (int i = 0; i < lane_count; i++) {
3126 dst.SetInt(vform, i, src.Int(vform_half, lane_count + i));
3127 }
3128 return dst;
3129 }
3130
3131
uxt(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,unsigned from_size_in_bits)3132 LogicVRegister Simulator::uxt(VectorFormat vform,
3133 LogicVRegister dst,
3134 const LogicVRegister& src,
3135 unsigned from_size_in_bits) {
3136 int lane_count = LaneCountFromFormat(vform);
3137 uint64_t mask = GetUintMask(from_size_in_bits);
3138
3139 dst.ClearForWrite(vform);
3140 for (int i = 0; i < lane_count; i++) {
3141 dst.SetInt(vform, i, src.Uint(vform, i) & mask);
3142 }
3143 return dst;
3144 }
3145
3146
sxt(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,unsigned from_size_in_bits)3147 LogicVRegister Simulator::sxt(VectorFormat vform,
3148 LogicVRegister dst,
3149 const LogicVRegister& src,
3150 unsigned from_size_in_bits) {
3151 int lane_count = LaneCountFromFormat(vform);
3152
3153 dst.ClearForWrite(vform);
3154 for (int i = 0; i < lane_count; i++) {
3155 uint64_t value =
3156 ExtractSignedBitfield64(from_size_in_bits - 1, 0, src.Uint(vform, i));
3157 dst.SetInt(vform, i, value);
3158 }
3159 return dst;
3160 }
3161
3162
shrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3163 LogicVRegister Simulator::shrn(VectorFormat vform,
3164 LogicVRegister dst,
3165 const LogicVRegister& src,
3166 int shift) {
3167 SimVRegister temp;
3168 VectorFormat vform_src = VectorFormatDoubleWidth(vform);
3169 VectorFormat vform_dst = vform;
3170 LogicVRegister shifted_src = ushr(vform_src, temp, src, shift);
3171 return extractnarrow(vform_dst, dst, false, shifted_src, false);
3172 }
3173
3174
shrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3175 LogicVRegister Simulator::shrn2(VectorFormat vform,
3176 LogicVRegister dst,
3177 const LogicVRegister& src,
3178 int shift) {
3179 SimVRegister temp;
3180 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
3181 VectorFormat vformdst = vform;
3182 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift);
3183 return extractnarrow(vformdst, dst, false, shifted_src, false);
3184 }
3185
3186
rshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3187 LogicVRegister Simulator::rshrn(VectorFormat vform,
3188 LogicVRegister dst,
3189 const LogicVRegister& src,
3190 int shift) {
3191 SimVRegister temp;
3192 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
3193 VectorFormat vformdst = vform;
3194 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc);
3195 return extractnarrow(vformdst, dst, false, shifted_src, false);
3196 }
3197
3198
rshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3199 LogicVRegister Simulator::rshrn2(VectorFormat vform,
3200 LogicVRegister dst,
3201 const LogicVRegister& src,
3202 int shift) {
3203 SimVRegister temp;
3204 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
3205 VectorFormat vformdst = vform;
3206 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc);
3207 return extractnarrow(vformdst, dst, false, shifted_src, false);
3208 }
3209
Table(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & ind)3210 LogicVRegister Simulator::Table(VectorFormat vform,
3211 LogicVRegister dst,
3212 const LogicVRegister& tab,
3213 const LogicVRegister& ind) {
3214 VIXL_ASSERT(IsSVEFormat(vform));
3215 int lane_count = LaneCountFromFormat(vform);
3216 for (int i = 0; i < lane_count; i++) {
3217 uint64_t index = ind.Uint(vform, i);
3218 uint64_t value = (index >= static_cast<uint64_t>(lane_count))
3219 ? 0
3220 : tab.Uint(vform, static_cast<int>(index));
3221 dst.SetUint(vform, i, value);
3222 }
3223 return dst;
3224 }
3225
3226
Table(VectorFormat vform,LogicVRegister dst,const LogicVRegister & ind,bool zero_out_of_bounds,const LogicVRegister * tab1,const LogicVRegister * tab2,const LogicVRegister * tab3,const LogicVRegister * tab4)3227 LogicVRegister Simulator::Table(VectorFormat vform,
3228 LogicVRegister dst,
3229 const LogicVRegister& ind,
3230 bool zero_out_of_bounds,
3231 const LogicVRegister* tab1,
3232 const LogicVRegister* tab2,
3233 const LogicVRegister* tab3,
3234 const LogicVRegister* tab4) {
3235 VIXL_ASSERT(tab1 != NULL);
3236 const LogicVRegister* tab[4] = {tab1, tab2, tab3, tab4};
3237 uint64_t result[kMaxLanesPerVector];
3238 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3239 result[i] = zero_out_of_bounds ? 0 : dst.Uint(kFormat16B, i);
3240 }
3241 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3242 uint64_t j = ind.Uint(vform, i);
3243 int tab_idx = static_cast<int>(j >> 4);
3244 int j_idx = static_cast<int>(j & 15);
3245 if ((tab_idx < 4) && (tab[tab_idx] != NULL)) {
3246 result[i] = tab[tab_idx]->Uint(kFormat16B, j_idx);
3247 }
3248 }
3249 dst.SetUintArray(vform, result);
3250 return dst;
3251 }
3252
3253
tbl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & ind)3254 LogicVRegister Simulator::tbl(VectorFormat vform,
3255 LogicVRegister dst,
3256 const LogicVRegister& tab,
3257 const LogicVRegister& ind) {
3258 return Table(vform, dst, ind, true, &tab);
3259 }
3260
3261
tbl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & ind)3262 LogicVRegister Simulator::tbl(VectorFormat vform,
3263 LogicVRegister dst,
3264 const LogicVRegister& tab,
3265 const LogicVRegister& tab2,
3266 const LogicVRegister& ind) {
3267 return Table(vform, dst, ind, true, &tab, &tab2);
3268 }
3269
3270
tbl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & tab3,const LogicVRegister & ind)3271 LogicVRegister Simulator::tbl(VectorFormat vform,
3272 LogicVRegister dst,
3273 const LogicVRegister& tab,
3274 const LogicVRegister& tab2,
3275 const LogicVRegister& tab3,
3276 const LogicVRegister& ind) {
3277 return Table(vform, dst, ind, true, &tab, &tab2, &tab3);
3278 }
3279
3280
tbl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & tab3,const LogicVRegister & tab4,const LogicVRegister & ind)3281 LogicVRegister Simulator::tbl(VectorFormat vform,
3282 LogicVRegister dst,
3283 const LogicVRegister& tab,
3284 const LogicVRegister& tab2,
3285 const LogicVRegister& tab3,
3286 const LogicVRegister& tab4,
3287 const LogicVRegister& ind) {
3288 return Table(vform, dst, ind, true, &tab, &tab2, &tab3, &tab4);
3289 }
3290
3291
tbx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & ind)3292 LogicVRegister Simulator::tbx(VectorFormat vform,
3293 LogicVRegister dst,
3294 const LogicVRegister& tab,
3295 const LogicVRegister& ind) {
3296 return Table(vform, dst, ind, false, &tab);
3297 }
3298
3299
tbx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & ind)3300 LogicVRegister Simulator::tbx(VectorFormat vform,
3301 LogicVRegister dst,
3302 const LogicVRegister& tab,
3303 const LogicVRegister& tab2,
3304 const LogicVRegister& ind) {
3305 return Table(vform, dst, ind, false, &tab, &tab2);
3306 }
3307
3308
tbx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & tab3,const LogicVRegister & ind)3309 LogicVRegister Simulator::tbx(VectorFormat vform,
3310 LogicVRegister dst,
3311 const LogicVRegister& tab,
3312 const LogicVRegister& tab2,
3313 const LogicVRegister& tab3,
3314 const LogicVRegister& ind) {
3315 return Table(vform, dst, ind, false, &tab, &tab2, &tab3);
3316 }
3317
3318
tbx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & tab3,const LogicVRegister & tab4,const LogicVRegister & ind)3319 LogicVRegister Simulator::tbx(VectorFormat vform,
3320 LogicVRegister dst,
3321 const LogicVRegister& tab,
3322 const LogicVRegister& tab2,
3323 const LogicVRegister& tab3,
3324 const LogicVRegister& tab4,
3325 const LogicVRegister& ind) {
3326 return Table(vform, dst, ind, false, &tab, &tab2, &tab3, &tab4);
3327 }
3328
3329
uqshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3330 LogicVRegister Simulator::uqshrn(VectorFormat vform,
3331 LogicVRegister dst,
3332 const LogicVRegister& src,
3333 int shift) {
3334 return shrn(vform, dst, src, shift).UnsignedSaturate(vform);
3335 }
3336
3337
uqshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3338 LogicVRegister Simulator::uqshrn2(VectorFormat vform,
3339 LogicVRegister dst,
3340 const LogicVRegister& src,
3341 int shift) {
3342 return shrn2(vform, dst, src, shift).UnsignedSaturate(vform);
3343 }
3344
3345
uqrshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3346 LogicVRegister Simulator::uqrshrn(VectorFormat vform,
3347 LogicVRegister dst,
3348 const LogicVRegister& src,
3349 int shift) {
3350 return rshrn(vform, dst, src, shift).UnsignedSaturate(vform);
3351 }
3352
3353
uqrshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3354 LogicVRegister Simulator::uqrshrn2(VectorFormat vform,
3355 LogicVRegister dst,
3356 const LogicVRegister& src,
3357 int shift) {
3358 return rshrn2(vform, dst, src, shift).UnsignedSaturate(vform);
3359 }
3360
3361
sqshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3362 LogicVRegister Simulator::sqshrn(VectorFormat vform,
3363 LogicVRegister dst,
3364 const LogicVRegister& src,
3365 int shift) {
3366 SimVRegister temp;
3367 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
3368 VectorFormat vformdst = vform;
3369 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
3370 return sqxtn(vformdst, dst, shifted_src);
3371 }
3372
3373
sqshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3374 LogicVRegister Simulator::sqshrn2(VectorFormat vform,
3375 LogicVRegister dst,
3376 const LogicVRegister& src,
3377 int shift) {
3378 SimVRegister temp;
3379 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
3380 VectorFormat vformdst = vform;
3381 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
3382 return sqxtn(vformdst, dst, shifted_src);
3383 }
3384
3385
sqrshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3386 LogicVRegister Simulator::sqrshrn(VectorFormat vform,
3387 LogicVRegister dst,
3388 const LogicVRegister& src,
3389 int shift) {
3390 SimVRegister temp;
3391 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
3392 VectorFormat vformdst = vform;
3393 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
3394 return sqxtn(vformdst, dst, shifted_src);
3395 }
3396
3397
sqrshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3398 LogicVRegister Simulator::sqrshrn2(VectorFormat vform,
3399 LogicVRegister dst,
3400 const LogicVRegister& src,
3401 int shift) {
3402 SimVRegister temp;
3403 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
3404 VectorFormat vformdst = vform;
3405 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
3406 return sqxtn(vformdst, dst, shifted_src);
3407 }
3408
3409
sqshrun(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3410 LogicVRegister Simulator::sqshrun(VectorFormat vform,
3411 LogicVRegister dst,
3412 const LogicVRegister& src,
3413 int shift) {
3414 SimVRegister temp;
3415 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
3416 VectorFormat vformdst = vform;
3417 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
3418 return sqxtun(vformdst, dst, shifted_src);
3419 }
3420
3421
sqshrun2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3422 LogicVRegister Simulator::sqshrun2(VectorFormat vform,
3423 LogicVRegister dst,
3424 const LogicVRegister& src,
3425 int shift) {
3426 SimVRegister temp;
3427 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
3428 VectorFormat vformdst = vform;
3429 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
3430 return sqxtun(vformdst, dst, shifted_src);
3431 }
3432
3433
sqrshrun(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3434 LogicVRegister Simulator::sqrshrun(VectorFormat vform,
3435 LogicVRegister dst,
3436 const LogicVRegister& src,
3437 int shift) {
3438 SimVRegister temp;
3439 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
3440 VectorFormat vformdst = vform;
3441 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
3442 return sqxtun(vformdst, dst, shifted_src);
3443 }
3444
3445
sqrshrun2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)3446 LogicVRegister Simulator::sqrshrun2(VectorFormat vform,
3447 LogicVRegister dst,
3448 const LogicVRegister& src,
3449 int shift) {
3450 SimVRegister temp;
3451 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
3452 VectorFormat vformdst = vform;
3453 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
3454 return sqxtun(vformdst, dst, shifted_src);
3455 }
3456
3457
uaddl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3458 LogicVRegister Simulator::uaddl(VectorFormat vform,
3459 LogicVRegister dst,
3460 const LogicVRegister& src1,
3461 const LogicVRegister& src2) {
3462 SimVRegister temp1, temp2;
3463 uxtl(vform, temp1, src1);
3464 uxtl(vform, temp2, src2);
3465 add(vform, dst, temp1, temp2);
3466 return dst;
3467 }
3468
3469
uaddl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3470 LogicVRegister Simulator::uaddl2(VectorFormat vform,
3471 LogicVRegister dst,
3472 const LogicVRegister& src1,
3473 const LogicVRegister& src2) {
3474 SimVRegister temp1, temp2;
3475 uxtl2(vform, temp1, src1);
3476 uxtl2(vform, temp2, src2);
3477 add(vform, dst, temp1, temp2);
3478 return dst;
3479 }
3480
3481
uaddw(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3482 LogicVRegister Simulator::uaddw(VectorFormat vform,
3483 LogicVRegister dst,
3484 const LogicVRegister& src1,
3485 const LogicVRegister& src2) {
3486 SimVRegister temp;
3487 uxtl(vform, temp, src2);
3488 add(vform, dst, src1, temp);
3489 return dst;
3490 }
3491
3492
uaddw2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3493 LogicVRegister Simulator::uaddw2(VectorFormat vform,
3494 LogicVRegister dst,
3495 const LogicVRegister& src1,
3496 const LogicVRegister& src2) {
3497 SimVRegister temp;
3498 uxtl2(vform, temp, src2);
3499 add(vform, dst, src1, temp);
3500 return dst;
3501 }
3502
3503
saddl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3504 LogicVRegister Simulator::saddl(VectorFormat vform,
3505 LogicVRegister dst,
3506 const LogicVRegister& src1,
3507 const LogicVRegister& src2) {
3508 SimVRegister temp1, temp2;
3509 sxtl(vform, temp1, src1);
3510 sxtl(vform, temp2, src2);
3511 add(vform, dst, temp1, temp2);
3512 return dst;
3513 }
3514
3515
saddl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3516 LogicVRegister Simulator::saddl2(VectorFormat vform,
3517 LogicVRegister dst,
3518 const LogicVRegister& src1,
3519 const LogicVRegister& src2) {
3520 SimVRegister temp1, temp2;
3521 sxtl2(vform, temp1, src1);
3522 sxtl2(vform, temp2, src2);
3523 add(vform, dst, temp1, temp2);
3524 return dst;
3525 }
3526
3527
saddw(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3528 LogicVRegister Simulator::saddw(VectorFormat vform,
3529 LogicVRegister dst,
3530 const LogicVRegister& src1,
3531 const LogicVRegister& src2) {
3532 SimVRegister temp;
3533 sxtl(vform, temp, src2);
3534 add(vform, dst, src1, temp);
3535 return dst;
3536 }
3537
3538
saddw2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3539 LogicVRegister Simulator::saddw2(VectorFormat vform,
3540 LogicVRegister dst,
3541 const LogicVRegister& src1,
3542 const LogicVRegister& src2) {
3543 SimVRegister temp;
3544 sxtl2(vform, temp, src2);
3545 add(vform, dst, src1, temp);
3546 return dst;
3547 }
3548
3549
usubl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3550 LogicVRegister Simulator::usubl(VectorFormat vform,
3551 LogicVRegister dst,
3552 const LogicVRegister& src1,
3553 const LogicVRegister& src2) {
3554 SimVRegister temp1, temp2;
3555 uxtl(vform, temp1, src1);
3556 uxtl(vform, temp2, src2);
3557 sub(vform, dst, temp1, temp2);
3558 return dst;
3559 }
3560
3561
usubl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3562 LogicVRegister Simulator::usubl2(VectorFormat vform,
3563 LogicVRegister dst,
3564 const LogicVRegister& src1,
3565 const LogicVRegister& src2) {
3566 SimVRegister temp1, temp2;
3567 uxtl2(vform, temp1, src1);
3568 uxtl2(vform, temp2, src2);
3569 sub(vform, dst, temp1, temp2);
3570 return dst;
3571 }
3572
3573
usubw(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3574 LogicVRegister Simulator::usubw(VectorFormat vform,
3575 LogicVRegister dst,
3576 const LogicVRegister& src1,
3577 const LogicVRegister& src2) {
3578 SimVRegister temp;
3579 uxtl(vform, temp, src2);
3580 sub(vform, dst, src1, temp);
3581 return dst;
3582 }
3583
3584
usubw2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3585 LogicVRegister Simulator::usubw2(VectorFormat vform,
3586 LogicVRegister dst,
3587 const LogicVRegister& src1,
3588 const LogicVRegister& src2) {
3589 SimVRegister temp;
3590 uxtl2(vform, temp, src2);
3591 sub(vform, dst, src1, temp);
3592 return dst;
3593 }
3594
3595
ssubl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3596 LogicVRegister Simulator::ssubl(VectorFormat vform,
3597 LogicVRegister dst,
3598 const LogicVRegister& src1,
3599 const LogicVRegister& src2) {
3600 SimVRegister temp1, temp2;
3601 sxtl(vform, temp1, src1);
3602 sxtl(vform, temp2, src2);
3603 sub(vform, dst, temp1, temp2);
3604 return dst;
3605 }
3606
3607
ssubl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3608 LogicVRegister Simulator::ssubl2(VectorFormat vform,
3609 LogicVRegister dst,
3610 const LogicVRegister& src1,
3611 const LogicVRegister& src2) {
3612 SimVRegister temp1, temp2;
3613 sxtl2(vform, temp1, src1);
3614 sxtl2(vform, temp2, src2);
3615 sub(vform, dst, temp1, temp2);
3616 return dst;
3617 }
3618
3619
ssubw(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3620 LogicVRegister Simulator::ssubw(VectorFormat vform,
3621 LogicVRegister dst,
3622 const LogicVRegister& src1,
3623 const LogicVRegister& src2) {
3624 SimVRegister temp;
3625 sxtl(vform, temp, src2);
3626 sub(vform, dst, src1, temp);
3627 return dst;
3628 }
3629
3630
ssubw2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3631 LogicVRegister Simulator::ssubw2(VectorFormat vform,
3632 LogicVRegister dst,
3633 const LogicVRegister& src1,
3634 const LogicVRegister& src2) {
3635 SimVRegister temp;
3636 sxtl2(vform, temp, src2);
3637 sub(vform, dst, src1, temp);
3638 return dst;
3639 }
3640
3641
uabal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3642 LogicVRegister Simulator::uabal(VectorFormat vform,
3643 LogicVRegister dst,
3644 const LogicVRegister& src1,
3645 const LogicVRegister& src2) {
3646 SimVRegister temp1, temp2;
3647 uxtl(vform, temp1, src1);
3648 uxtl(vform, temp2, src2);
3649 uaba(vform, dst, temp1, temp2);
3650 return dst;
3651 }
3652
3653
uabal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3654 LogicVRegister Simulator::uabal2(VectorFormat vform,
3655 LogicVRegister dst,
3656 const LogicVRegister& src1,
3657 const LogicVRegister& src2) {
3658 SimVRegister temp1, temp2;
3659 uxtl2(vform, temp1, src1);
3660 uxtl2(vform, temp2, src2);
3661 uaba(vform, dst, temp1, temp2);
3662 return dst;
3663 }
3664
3665
sabal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3666 LogicVRegister Simulator::sabal(VectorFormat vform,
3667 LogicVRegister dst,
3668 const LogicVRegister& src1,
3669 const LogicVRegister& src2) {
3670 SimVRegister temp1, temp2;
3671 sxtl(vform, temp1, src1);
3672 sxtl(vform, temp2, src2);
3673 saba(vform, dst, temp1, temp2);
3674 return dst;
3675 }
3676
3677
sabal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3678 LogicVRegister Simulator::sabal2(VectorFormat vform,
3679 LogicVRegister dst,
3680 const LogicVRegister& src1,
3681 const LogicVRegister& src2) {
3682 SimVRegister temp1, temp2;
3683 sxtl2(vform, temp1, src1);
3684 sxtl2(vform, temp2, src2);
3685 saba(vform, dst, temp1, temp2);
3686 return dst;
3687 }
3688
3689
uabdl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3690 LogicVRegister Simulator::uabdl(VectorFormat vform,
3691 LogicVRegister dst,
3692 const LogicVRegister& src1,
3693 const LogicVRegister& src2) {
3694 SimVRegister temp1, temp2;
3695 uxtl(vform, temp1, src1);
3696 uxtl(vform, temp2, src2);
3697 absdiff(vform, dst, temp1, temp2, false);
3698 return dst;
3699 }
3700
3701
uabdl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3702 LogicVRegister Simulator::uabdl2(VectorFormat vform,
3703 LogicVRegister dst,
3704 const LogicVRegister& src1,
3705 const LogicVRegister& src2) {
3706 SimVRegister temp1, temp2;
3707 uxtl2(vform, temp1, src1);
3708 uxtl2(vform, temp2, src2);
3709 absdiff(vform, dst, temp1, temp2, false);
3710 return dst;
3711 }
3712
3713
sabdl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3714 LogicVRegister Simulator::sabdl(VectorFormat vform,
3715 LogicVRegister dst,
3716 const LogicVRegister& src1,
3717 const LogicVRegister& src2) {
3718 SimVRegister temp1, temp2;
3719 sxtl(vform, temp1, src1);
3720 sxtl(vform, temp2, src2);
3721 absdiff(vform, dst, temp1, temp2, true);
3722 return dst;
3723 }
3724
3725
sabdl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3726 LogicVRegister Simulator::sabdl2(VectorFormat vform,
3727 LogicVRegister dst,
3728 const LogicVRegister& src1,
3729 const LogicVRegister& src2) {
3730 SimVRegister temp1, temp2;
3731 sxtl2(vform, temp1, src1);
3732 sxtl2(vform, temp2, src2);
3733 absdiff(vform, dst, temp1, temp2, true);
3734 return dst;
3735 }
3736
3737
umull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3738 LogicVRegister Simulator::umull(VectorFormat vform,
3739 LogicVRegister dst,
3740 const LogicVRegister& src1,
3741 const LogicVRegister& src2) {
3742 SimVRegister temp1, temp2;
3743 uxtl(vform, temp1, src1);
3744 uxtl(vform, temp2, src2);
3745 mul(vform, dst, temp1, temp2);
3746 return dst;
3747 }
3748
3749
umull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3750 LogicVRegister Simulator::umull2(VectorFormat vform,
3751 LogicVRegister dst,
3752 const LogicVRegister& src1,
3753 const LogicVRegister& src2) {
3754 SimVRegister temp1, temp2;
3755 uxtl2(vform, temp1, src1);
3756 uxtl2(vform, temp2, src2);
3757 mul(vform, dst, temp1, temp2);
3758 return dst;
3759 }
3760
3761
smull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3762 LogicVRegister Simulator::smull(VectorFormat vform,
3763 LogicVRegister dst,
3764 const LogicVRegister& src1,
3765 const LogicVRegister& src2) {
3766 SimVRegister temp1, temp2;
3767 sxtl(vform, temp1, src1);
3768 sxtl(vform, temp2, src2);
3769 mul(vform, dst, temp1, temp2);
3770 return dst;
3771 }
3772
3773
smull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3774 LogicVRegister Simulator::smull2(VectorFormat vform,
3775 LogicVRegister dst,
3776 const LogicVRegister& src1,
3777 const LogicVRegister& src2) {
3778 SimVRegister temp1, temp2;
3779 sxtl2(vform, temp1, src1);
3780 sxtl2(vform, temp2, src2);
3781 mul(vform, dst, temp1, temp2);
3782 return dst;
3783 }
3784
3785
umlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3786 LogicVRegister Simulator::umlsl(VectorFormat vform,
3787 LogicVRegister dst,
3788 const LogicVRegister& src1,
3789 const LogicVRegister& src2) {
3790 SimVRegister temp1, temp2;
3791 uxtl(vform, temp1, src1);
3792 uxtl(vform, temp2, src2);
3793 mls(vform, dst, dst, temp1, temp2);
3794 return dst;
3795 }
3796
3797
umlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3798 LogicVRegister Simulator::umlsl2(VectorFormat vform,
3799 LogicVRegister dst,
3800 const LogicVRegister& src1,
3801 const LogicVRegister& src2) {
3802 SimVRegister temp1, temp2;
3803 uxtl2(vform, temp1, src1);
3804 uxtl2(vform, temp2, src2);
3805 mls(vform, dst, dst, temp1, temp2);
3806 return dst;
3807 }
3808
3809
smlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3810 LogicVRegister Simulator::smlsl(VectorFormat vform,
3811 LogicVRegister dst,
3812 const LogicVRegister& src1,
3813 const LogicVRegister& src2) {
3814 SimVRegister temp1, temp2;
3815 sxtl(vform, temp1, src1);
3816 sxtl(vform, temp2, src2);
3817 mls(vform, dst, dst, temp1, temp2);
3818 return dst;
3819 }
3820
3821
smlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3822 LogicVRegister Simulator::smlsl2(VectorFormat vform,
3823 LogicVRegister dst,
3824 const LogicVRegister& src1,
3825 const LogicVRegister& src2) {
3826 SimVRegister temp1, temp2;
3827 sxtl2(vform, temp1, src1);
3828 sxtl2(vform, temp2, src2);
3829 mls(vform, dst, dst, temp1, temp2);
3830 return dst;
3831 }
3832
3833
umlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3834 LogicVRegister Simulator::umlal(VectorFormat vform,
3835 LogicVRegister dst,
3836 const LogicVRegister& src1,
3837 const LogicVRegister& src2) {
3838 SimVRegister temp1, temp2;
3839 uxtl(vform, temp1, src1);
3840 uxtl(vform, temp2, src2);
3841 mla(vform, dst, dst, temp1, temp2);
3842 return dst;
3843 }
3844
3845
umlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3846 LogicVRegister Simulator::umlal2(VectorFormat vform,
3847 LogicVRegister dst,
3848 const LogicVRegister& src1,
3849 const LogicVRegister& src2) {
3850 SimVRegister temp1, temp2;
3851 uxtl2(vform, temp1, src1);
3852 uxtl2(vform, temp2, src2);
3853 mla(vform, dst, dst, temp1, temp2);
3854 return dst;
3855 }
3856
3857
smlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3858 LogicVRegister Simulator::smlal(VectorFormat vform,
3859 LogicVRegister dst,
3860 const LogicVRegister& src1,
3861 const LogicVRegister& src2) {
3862 SimVRegister temp1, temp2;
3863 sxtl(vform, temp1, src1);
3864 sxtl(vform, temp2, src2);
3865 mla(vform, dst, dst, temp1, temp2);
3866 return dst;
3867 }
3868
3869
smlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3870 LogicVRegister Simulator::smlal2(VectorFormat vform,
3871 LogicVRegister dst,
3872 const LogicVRegister& src1,
3873 const LogicVRegister& src2) {
3874 SimVRegister temp1, temp2;
3875 sxtl2(vform, temp1, src1);
3876 sxtl2(vform, temp2, src2);
3877 mla(vform, dst, dst, temp1, temp2);
3878 return dst;
3879 }
3880
3881
sqdmlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3882 LogicVRegister Simulator::sqdmlal(VectorFormat vform,
3883 LogicVRegister dst,
3884 const LogicVRegister& src1,
3885 const LogicVRegister& src2) {
3886 SimVRegister temp;
3887 LogicVRegister product = sqdmull(vform, temp, src1, src2);
3888 return add(vform, dst, dst, product).SignedSaturate(vform);
3889 }
3890
3891
sqdmlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3892 LogicVRegister Simulator::sqdmlal2(VectorFormat vform,
3893 LogicVRegister dst,
3894 const LogicVRegister& src1,
3895 const LogicVRegister& src2) {
3896 SimVRegister temp;
3897 LogicVRegister product = sqdmull2(vform, temp, src1, src2);
3898 return add(vform, dst, dst, product).SignedSaturate(vform);
3899 }
3900
3901
sqdmlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3902 LogicVRegister Simulator::sqdmlsl(VectorFormat vform,
3903 LogicVRegister dst,
3904 const LogicVRegister& src1,
3905 const LogicVRegister& src2) {
3906 SimVRegister temp;
3907 LogicVRegister product = sqdmull(vform, temp, src1, src2);
3908 return sub(vform, dst, dst, product).SignedSaturate(vform);
3909 }
3910
3911
sqdmlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3912 LogicVRegister Simulator::sqdmlsl2(VectorFormat vform,
3913 LogicVRegister dst,
3914 const LogicVRegister& src1,
3915 const LogicVRegister& src2) {
3916 SimVRegister temp;
3917 LogicVRegister product = sqdmull2(vform, temp, src1, src2);
3918 return sub(vform, dst, dst, product).SignedSaturate(vform);
3919 }
3920
3921
sqdmull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3922 LogicVRegister Simulator::sqdmull(VectorFormat vform,
3923 LogicVRegister dst,
3924 const LogicVRegister& src1,
3925 const LogicVRegister& src2) {
3926 SimVRegister temp;
3927 LogicVRegister product = smull(vform, temp, src1, src2);
3928 return add(vform, dst, product, product).SignedSaturate(vform);
3929 }
3930
3931
sqdmull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3932 LogicVRegister Simulator::sqdmull2(VectorFormat vform,
3933 LogicVRegister dst,
3934 const LogicVRegister& src1,
3935 const LogicVRegister& src2) {
3936 SimVRegister temp;
3937 LogicVRegister product = smull2(vform, temp, src1, src2);
3938 return add(vform, dst, product, product).SignedSaturate(vform);
3939 }
3940
3941
sqrdmulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool round)3942 LogicVRegister Simulator::sqrdmulh(VectorFormat vform,
3943 LogicVRegister dst,
3944 const LogicVRegister& src1,
3945 const LogicVRegister& src2,
3946 bool round) {
3947 // 2 * INT_32_MIN * INT_32_MIN causes int64_t to overflow.
3948 // To avoid this, we use (src1 * src2 + 1 << (esize - 2)) >> (esize - 1)
3949 // which is same as (2 * src1 * src2 + 1 << (esize - 1)) >> esize.
3950
3951 int esize = LaneSizeInBitsFromFormat(vform);
3952 int round_const = round ? (1 << (esize - 2)) : 0;
3953 int64_t product;
3954
3955 dst.ClearForWrite(vform);
3956 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3957 product = src1.Int(vform, i) * src2.Int(vform, i);
3958 product += round_const;
3959 product = product >> (esize - 1);
3960
3961 if (product > MaxIntFromFormat(vform)) {
3962 product = MaxIntFromFormat(vform);
3963 } else if (product < MinIntFromFormat(vform)) {
3964 product = MinIntFromFormat(vform);
3965 }
3966 dst.SetInt(vform, i, product);
3967 }
3968 return dst;
3969 }
3970
3971
dot(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool is_signed)3972 LogicVRegister Simulator::dot(VectorFormat vform,
3973 LogicVRegister dst,
3974 const LogicVRegister& src1,
3975 const LogicVRegister& src2,
3976 bool is_signed) {
3977 VectorFormat quarter_vform =
3978 VectorFormatHalfWidthDoubleLanes(VectorFormatHalfWidthDoubleLanes(vform));
3979
3980 dst.ClearForWrite(vform);
3981 for (int e = 0; e < LaneCountFromFormat(vform); e++) {
3982 uint64_t result = 0;
3983 int64_t element1, element2;
3984 for (int i = 0; i < 4; i++) {
3985 int index = 4 * e + i;
3986 if (is_signed) {
3987 element1 = src1.Int(quarter_vform, index);
3988 element2 = src2.Int(quarter_vform, index);
3989 } else {
3990 element1 = src1.Uint(quarter_vform, index);
3991 element2 = src2.Uint(quarter_vform, index);
3992 }
3993 result += element1 * element2;
3994 }
3995 dst.SetUint(vform, e, result + dst.Uint(vform, e));
3996 }
3997 return dst;
3998 }
3999
4000
sdot(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4001 LogicVRegister Simulator::sdot(VectorFormat vform,
4002 LogicVRegister dst,
4003 const LogicVRegister& src1,
4004 const LogicVRegister& src2) {
4005 return dot(vform, dst, src1, src2, true);
4006 }
4007
4008
udot(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4009 LogicVRegister Simulator::udot(VectorFormat vform,
4010 LogicVRegister dst,
4011 const LogicVRegister& src1,
4012 const LogicVRegister& src2) {
4013 return dot(vform, dst, src1, src2, false);
4014 }
4015
4016
sqrdmlash(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool round,bool sub_op)4017 LogicVRegister Simulator::sqrdmlash(VectorFormat vform,
4018 LogicVRegister dst,
4019 const LogicVRegister& src1,
4020 const LogicVRegister& src2,
4021 bool round,
4022 bool sub_op) {
4023 // 2 * INT_32_MIN * INT_32_MIN causes int64_t to overflow.
4024 // To avoid this, we use:
4025 // (dst << (esize - 1) + src1 * src2 + 1 << (esize - 2)) >> (esize - 1)
4026 // which is same as:
4027 // (dst << esize + 2 * src1 * src2 + 1 << (esize - 1)) >> esize.
4028
4029 int esize = LaneSizeInBitsFromFormat(vform);
4030 int round_const = round ? (1 << (esize - 2)) : 0;
4031 int64_t accum;
4032
4033 dst.ClearForWrite(vform);
4034 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4035 accum = dst.Int(vform, i) << (esize - 1);
4036 if (sub_op) {
4037 accum -= src1.Int(vform, i) * src2.Int(vform, i);
4038 } else {
4039 accum += src1.Int(vform, i) * src2.Int(vform, i);
4040 }
4041 accum += round_const;
4042 accum = accum >> (esize - 1);
4043
4044 if (accum > MaxIntFromFormat(vform)) {
4045 accum = MaxIntFromFormat(vform);
4046 } else if (accum < MinIntFromFormat(vform)) {
4047 accum = MinIntFromFormat(vform);
4048 }
4049 dst.SetInt(vform, i, accum);
4050 }
4051 return dst;
4052 }
4053
4054
sqrdmlah(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool round)4055 LogicVRegister Simulator::sqrdmlah(VectorFormat vform,
4056 LogicVRegister dst,
4057 const LogicVRegister& src1,
4058 const LogicVRegister& src2,
4059 bool round) {
4060 return sqrdmlash(vform, dst, src1, src2, round, false);
4061 }
4062
4063
sqrdmlsh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool round)4064 LogicVRegister Simulator::sqrdmlsh(VectorFormat vform,
4065 LogicVRegister dst,
4066 const LogicVRegister& src1,
4067 const LogicVRegister& src2,
4068 bool round) {
4069 return sqrdmlash(vform, dst, src1, src2, round, true);
4070 }
4071
4072
sqdmulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4073 LogicVRegister Simulator::sqdmulh(VectorFormat vform,
4074 LogicVRegister dst,
4075 const LogicVRegister& src1,
4076 const LogicVRegister& src2) {
4077 return sqrdmulh(vform, dst, src1, src2, false);
4078 }
4079
4080
addhn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4081 LogicVRegister Simulator::addhn(VectorFormat vform,
4082 LogicVRegister dst,
4083 const LogicVRegister& src1,
4084 const LogicVRegister& src2) {
4085 SimVRegister temp;
4086 add(VectorFormatDoubleWidth(vform), temp, src1, src2);
4087 shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4088 return dst;
4089 }
4090
4091
addhn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4092 LogicVRegister Simulator::addhn2(VectorFormat vform,
4093 LogicVRegister dst,
4094 const LogicVRegister& src1,
4095 const LogicVRegister& src2) {
4096 SimVRegister temp;
4097 add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
4098 shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4099 return dst;
4100 }
4101
4102
raddhn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4103 LogicVRegister Simulator::raddhn(VectorFormat vform,
4104 LogicVRegister dst,
4105 const LogicVRegister& src1,
4106 const LogicVRegister& src2) {
4107 SimVRegister temp;
4108 add(VectorFormatDoubleWidth(vform), temp, src1, src2);
4109 rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4110 return dst;
4111 }
4112
4113
raddhn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4114 LogicVRegister Simulator::raddhn2(VectorFormat vform,
4115 LogicVRegister dst,
4116 const LogicVRegister& src1,
4117 const LogicVRegister& src2) {
4118 SimVRegister temp;
4119 add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
4120 rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4121 return dst;
4122 }
4123
4124
subhn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4125 LogicVRegister Simulator::subhn(VectorFormat vform,
4126 LogicVRegister dst,
4127 const LogicVRegister& src1,
4128 const LogicVRegister& src2) {
4129 SimVRegister temp;
4130 sub(VectorFormatDoubleWidth(vform), temp, src1, src2);
4131 shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4132 return dst;
4133 }
4134
4135
subhn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4136 LogicVRegister Simulator::subhn2(VectorFormat vform,
4137 LogicVRegister dst,
4138 const LogicVRegister& src1,
4139 const LogicVRegister& src2) {
4140 SimVRegister temp;
4141 sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
4142 shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4143 return dst;
4144 }
4145
4146
rsubhn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4147 LogicVRegister Simulator::rsubhn(VectorFormat vform,
4148 LogicVRegister dst,
4149 const LogicVRegister& src1,
4150 const LogicVRegister& src2) {
4151 SimVRegister temp;
4152 sub(VectorFormatDoubleWidth(vform), temp, src1, src2);
4153 rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4154 return dst;
4155 }
4156
4157
rsubhn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4158 LogicVRegister Simulator::rsubhn2(VectorFormat vform,
4159 LogicVRegister dst,
4160 const LogicVRegister& src1,
4161 const LogicVRegister& src2) {
4162 SimVRegister temp;
4163 sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
4164 rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
4165 return dst;
4166 }
4167
4168
trn1(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4169 LogicVRegister Simulator::trn1(VectorFormat vform,
4170 LogicVRegister dst,
4171 const LogicVRegister& src1,
4172 const LogicVRegister& src2) {
4173 uint64_t result[kZRegMaxSizeInBytes];
4174 int lane_count = LaneCountFromFormat(vform);
4175 int pairs = lane_count / 2;
4176 for (int i = 0; i < pairs; ++i) {
4177 result[2 * i] = src1.Uint(vform, 2 * i);
4178 result[(2 * i) + 1] = src2.Uint(vform, 2 * i);
4179 }
4180
4181 dst.ClearForWrite(vform);
4182 for (int i = 0; i < lane_count; ++i) {
4183 dst.SetUint(vform, i, result[i]);
4184 }
4185 return dst;
4186 }
4187
4188
trn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4189 LogicVRegister Simulator::trn2(VectorFormat vform,
4190 LogicVRegister dst,
4191 const LogicVRegister& src1,
4192 const LogicVRegister& src2) {
4193 uint64_t result[kZRegMaxSizeInBytes];
4194 int lane_count = LaneCountFromFormat(vform);
4195 int pairs = lane_count / 2;
4196 for (int i = 0; i < pairs; ++i) {
4197 result[2 * i] = src1.Uint(vform, (2 * i) + 1);
4198 result[(2 * i) + 1] = src2.Uint(vform, (2 * i) + 1);
4199 }
4200
4201 dst.ClearForWrite(vform);
4202 for (int i = 0; i < lane_count; ++i) {
4203 dst.SetUint(vform, i, result[i]);
4204 }
4205 return dst;
4206 }
4207
4208
zip1(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4209 LogicVRegister Simulator::zip1(VectorFormat vform,
4210 LogicVRegister dst,
4211 const LogicVRegister& src1,
4212 const LogicVRegister& src2) {
4213 uint64_t result[kZRegMaxSizeInBytes];
4214 int lane_count = LaneCountFromFormat(vform);
4215 int pairs = lane_count / 2;
4216 for (int i = 0; i < pairs; ++i) {
4217 result[2 * i] = src1.Uint(vform, i);
4218 result[(2 * i) + 1] = src2.Uint(vform, i);
4219 }
4220
4221 dst.ClearForWrite(vform);
4222 for (int i = 0; i < lane_count; ++i) {
4223 dst.SetUint(vform, i, result[i]);
4224 }
4225 return dst;
4226 }
4227
4228
zip2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4229 LogicVRegister Simulator::zip2(VectorFormat vform,
4230 LogicVRegister dst,
4231 const LogicVRegister& src1,
4232 const LogicVRegister& src2) {
4233 uint64_t result[kZRegMaxSizeInBytes];
4234 int lane_count = LaneCountFromFormat(vform);
4235 int pairs = lane_count / 2;
4236 for (int i = 0; i < pairs; ++i) {
4237 result[2 * i] = src1.Uint(vform, pairs + i);
4238 result[(2 * i) + 1] = src2.Uint(vform, pairs + i);
4239 }
4240
4241 dst.ClearForWrite(vform);
4242 for (int i = 0; i < lane_count; ++i) {
4243 dst.SetUint(vform, i, result[i]);
4244 }
4245 return dst;
4246 }
4247
4248
uzp1(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4249 LogicVRegister Simulator::uzp1(VectorFormat vform,
4250 LogicVRegister dst,
4251 const LogicVRegister& src1,
4252 const LogicVRegister& src2) {
4253 uint64_t result[kZRegMaxSizeInBytes * 2];
4254 int lane_count = LaneCountFromFormat(vform);
4255 for (int i = 0; i < lane_count; ++i) {
4256 result[i] = src1.Uint(vform, i);
4257 result[lane_count + i] = src2.Uint(vform, i);
4258 }
4259
4260 dst.ClearForWrite(vform);
4261 for (int i = 0; i < lane_count; ++i) {
4262 dst.SetUint(vform, i, result[2 * i]);
4263 }
4264 return dst;
4265 }
4266
4267
uzp2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4268 LogicVRegister Simulator::uzp2(VectorFormat vform,
4269 LogicVRegister dst,
4270 const LogicVRegister& src1,
4271 const LogicVRegister& src2) {
4272 uint64_t result[kZRegMaxSizeInBytes * 2];
4273 int lane_count = LaneCountFromFormat(vform);
4274 for (int i = 0; i < lane_count; ++i) {
4275 result[i] = src1.Uint(vform, i);
4276 result[lane_count + i] = src2.Uint(vform, i);
4277 }
4278
4279 dst.ClearForWrite(vform);
4280 for (int i = 0; i < lane_count; ++i) {
4281 dst.SetUint(vform, i, result[(2 * i) + 1]);
4282 }
4283 return dst;
4284 }
4285
4286
4287 template <typename T>
FPNeg(T op)4288 T Simulator::FPNeg(T op) {
4289 return -op;
4290 }
4291
4292 template <typename T>
FPAdd(T op1,T op2)4293 T Simulator::FPAdd(T op1, T op2) {
4294 T result = FPProcessNaNs(op1, op2);
4295 if (IsNaN(result)) {
4296 return result;
4297 }
4298
4299 if (IsInf(op1) && IsInf(op2) && (op1 != op2)) {
4300 // inf + -inf returns the default NaN.
4301 FPProcessException();
4302 return FPDefaultNaN<T>();
4303 } else {
4304 // Other cases should be handled by standard arithmetic.
4305 return op1 + op2;
4306 }
4307 }
4308
4309
4310 template <typename T>
FPSub(T op1,T op2)4311 T Simulator::FPSub(T op1, T op2) {
4312 // NaNs should be handled elsewhere.
4313 VIXL_ASSERT(!IsNaN(op1) && !IsNaN(op2));
4314
4315 if (IsInf(op1) && IsInf(op2) && (op1 == op2)) {
4316 // inf - inf returns the default NaN.
4317 FPProcessException();
4318 return FPDefaultNaN<T>();
4319 } else {
4320 // Other cases should be handled by standard arithmetic.
4321 return op1 - op2;
4322 }
4323 }
4324
4325
4326 template <typename T>
FPMul(T op1,T op2)4327 T Simulator::FPMul(T op1, T op2) {
4328 // NaNs should be handled elsewhere.
4329 VIXL_ASSERT(!IsNaN(op1) && !IsNaN(op2));
4330
4331 if ((IsInf(op1) && (op2 == 0.0)) || (IsInf(op2) && (op1 == 0.0))) {
4332 // inf * 0.0 returns the default NaN.
4333 FPProcessException();
4334 return FPDefaultNaN<T>();
4335 } else {
4336 // Other cases should be handled by standard arithmetic.
4337 return op1 * op2;
4338 }
4339 }
4340
4341
4342 template <typename T>
FPMulx(T op1,T op2)4343 T Simulator::FPMulx(T op1, T op2) {
4344 if ((IsInf(op1) && (op2 == 0.0)) || (IsInf(op2) && (op1 == 0.0))) {
4345 // inf * 0.0 returns +/-2.0.
4346 T two = 2.0;
4347 return copysign(1.0, op1) * copysign(1.0, op2) * two;
4348 }
4349 return FPMul(op1, op2);
4350 }
4351
4352
4353 template <typename T>
FPMulAdd(T a,T op1,T op2)4354 T Simulator::FPMulAdd(T a, T op1, T op2) {
4355 T result = FPProcessNaNs3(a, op1, op2);
4356
4357 T sign_a = copysign(1.0, a);
4358 T sign_prod = copysign(1.0, op1) * copysign(1.0, op2);
4359 bool isinf_prod = IsInf(op1) || IsInf(op2);
4360 bool operation_generates_nan =
4361 (IsInf(op1) && (op2 == 0.0)) || // inf * 0.0
4362 (IsInf(op2) && (op1 == 0.0)) || // 0.0 * inf
4363 (IsInf(a) && isinf_prod && (sign_a != sign_prod)); // inf - inf
4364
4365 if (IsNaN(result)) {
4366 // Generated NaNs override quiet NaNs propagated from a.
4367 if (operation_generates_nan && IsQuietNaN(a)) {
4368 FPProcessException();
4369 return FPDefaultNaN<T>();
4370 } else {
4371 return result;
4372 }
4373 }
4374
4375 // If the operation would produce a NaN, return the default NaN.
4376 if (operation_generates_nan) {
4377 FPProcessException();
4378 return FPDefaultNaN<T>();
4379 }
4380
4381 // Work around broken fma implementations for exact zero results: The sign of
4382 // exact 0.0 results is positive unless both a and op1 * op2 are negative.
4383 if (((op1 == 0.0) || (op2 == 0.0)) && (a == 0.0)) {
4384 return ((sign_a < T(0.0)) && (sign_prod < T(0.0))) ? -0.0 : 0.0;
4385 }
4386
4387 result = FusedMultiplyAdd(op1, op2, a);
4388 VIXL_ASSERT(!IsNaN(result));
4389
4390 // Work around broken fma implementations for rounded zero results: If a is
4391 // 0.0, the sign of the result is the sign of op1 * op2 before rounding.
4392 if ((a == 0.0) && (result == 0.0)) {
4393 return copysign(0.0, sign_prod);
4394 }
4395
4396 return result;
4397 }
4398
4399
4400 template <typename T>
FPDiv(T op1,T op2)4401 T Simulator::FPDiv(T op1, T op2) {
4402 // NaNs should be handled elsewhere.
4403 VIXL_ASSERT(!IsNaN(op1) && !IsNaN(op2));
4404
4405 if ((IsInf(op1) && IsInf(op2)) || ((op1 == 0.0) && (op2 == 0.0))) {
4406 // inf / inf and 0.0 / 0.0 return the default NaN.
4407 FPProcessException();
4408 return FPDefaultNaN<T>();
4409 } else {
4410 if (op2 == 0.0) {
4411 FPProcessException();
4412 if (!IsNaN(op1)) {
4413 double op1_sign = copysign(1.0, op1);
4414 double op2_sign = copysign(1.0, op2);
4415 return static_cast<T>(op1_sign * op2_sign * kFP64PositiveInfinity);
4416 }
4417 }
4418
4419 // Other cases should be handled by standard arithmetic.
4420 return op1 / op2;
4421 }
4422 }
4423
4424
4425 template <typename T>
FPSqrt(T op)4426 T Simulator::FPSqrt(T op) {
4427 if (IsNaN(op)) {
4428 return FPProcessNaN(op);
4429 } else if (op < T(0.0)) {
4430 FPProcessException();
4431 return FPDefaultNaN<T>();
4432 } else {
4433 return sqrt(op);
4434 }
4435 }
4436
4437
4438 template <typename T>
FPMax(T a,T b)4439 T Simulator::FPMax(T a, T b) {
4440 T result = FPProcessNaNs(a, b);
4441 if (IsNaN(result)) return result;
4442
4443 if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) {
4444 // a and b are zero, and the sign differs: return +0.0.
4445 return 0.0;
4446 } else {
4447 return (a > b) ? a : b;
4448 }
4449 }
4450
4451
4452 template <typename T>
FPMaxNM(T a,T b)4453 T Simulator::FPMaxNM(T a, T b) {
4454 if (IsQuietNaN(a) && !IsQuietNaN(b)) {
4455 a = kFP64NegativeInfinity;
4456 } else if (!IsQuietNaN(a) && IsQuietNaN(b)) {
4457 b = kFP64NegativeInfinity;
4458 }
4459
4460 T result = FPProcessNaNs(a, b);
4461 return IsNaN(result) ? result : FPMax(a, b);
4462 }
4463
4464
4465 template <typename T>
FPMin(T a,T b)4466 T Simulator::FPMin(T a, T b) {
4467 T result = FPProcessNaNs(a, b);
4468 if (IsNaN(result)) return result;
4469
4470 if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) {
4471 // a and b are zero, and the sign differs: return -0.0.
4472 return -0.0;
4473 } else {
4474 return (a < b) ? a : b;
4475 }
4476 }
4477
4478
4479 template <typename T>
FPMinNM(T a,T b)4480 T Simulator::FPMinNM(T a, T b) {
4481 if (IsQuietNaN(a) && !IsQuietNaN(b)) {
4482 a = kFP64PositiveInfinity;
4483 } else if (!IsQuietNaN(a) && IsQuietNaN(b)) {
4484 b = kFP64PositiveInfinity;
4485 }
4486
4487 T result = FPProcessNaNs(a, b);
4488 return IsNaN(result) ? result : FPMin(a, b);
4489 }
4490
4491
4492 template <typename T>
FPRecipStepFused(T op1,T op2)4493 T Simulator::FPRecipStepFused(T op1, T op2) {
4494 const T two = 2.0;
4495 if ((IsInf(op1) && (op2 == 0.0)) || ((op1 == 0.0) && (IsInf(op2)))) {
4496 return two;
4497 } else if (IsInf(op1) || IsInf(op2)) {
4498 // Return +inf if signs match, otherwise -inf.
4499 return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity
4500 : kFP64NegativeInfinity;
4501 } else {
4502 return FusedMultiplyAdd(op1, op2, two);
4503 }
4504 }
4505
4506 template <typename T>
IsNormal(T value)4507 bool IsNormal(T value) {
4508 return std::isnormal(value);
4509 }
4510
4511 template <>
IsNormal(SimFloat16 value)4512 bool IsNormal(SimFloat16 value) {
4513 uint16_t rawbits = Float16ToRawbits(value);
4514 uint16_t exp_mask = 0x7c00;
4515 // Check that the exponent is neither all zeroes or all ones.
4516 return ((rawbits & exp_mask) != 0) && ((~rawbits & exp_mask) != 0);
4517 }
4518
4519
4520 template <typename T>
FPRSqrtStepFused(T op1,T op2)4521 T Simulator::FPRSqrtStepFused(T op1, T op2) {
4522 const T one_point_five = 1.5;
4523 const T two = 2.0;
4524
4525 if ((IsInf(op1) && (op2 == 0.0)) || ((op1 == 0.0) && (IsInf(op2)))) {
4526 return one_point_five;
4527 } else if (IsInf(op1) || IsInf(op2)) {
4528 // Return +inf if signs match, otherwise -inf.
4529 return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity
4530 : kFP64NegativeInfinity;
4531 } else {
4532 // The multiply-add-halve operation must be fully fused, so avoid interim
4533 // rounding by checking which operand can be losslessly divided by two
4534 // before doing the multiply-add.
4535 if (IsNormal(op1 / two)) {
4536 return FusedMultiplyAdd(op1 / two, op2, one_point_five);
4537 } else if (IsNormal(op2 / two)) {
4538 return FusedMultiplyAdd(op1, op2 / two, one_point_five);
4539 } else {
4540 // Neither operand is normal after halving: the result is dominated by
4541 // the addition term, so just return that.
4542 return one_point_five;
4543 }
4544 }
4545 }
4546
FPToFixedJS(double value)4547 int32_t Simulator::FPToFixedJS(double value) {
4548 // The Z-flag is set when the conversion from double precision floating-point
4549 // to 32-bit integer is exact. If the source value is +/-Infinity, -0.0, NaN,
4550 // outside the bounds of a 32-bit integer, or isn't an exact integer then the
4551 // Z-flag is unset.
4552 int Z = 1;
4553 int32_t result;
4554
4555 if ((value == 0.0) || (value == kFP64PositiveInfinity) ||
4556 (value == kFP64NegativeInfinity)) {
4557 // +/- zero and infinity all return zero, however -0 and +/- Infinity also
4558 // unset the Z-flag.
4559 result = 0.0;
4560 if ((value != 0.0) || std::signbit(value)) {
4561 Z = 0;
4562 }
4563 } else if (std::isnan(value)) {
4564 // NaN values unset the Z-flag and set the result to 0.
4565 FPProcessNaN(value);
4566 result = 0;
4567 Z = 0;
4568 } else {
4569 // All other values are converted to an integer representation, rounded
4570 // toward zero.
4571 double int_result = std::floor(value);
4572 double error = value - int_result;
4573
4574 if ((error != 0.0) && (int_result < 0.0)) {
4575 int_result++;
4576 }
4577
4578 // Constrain the value into the range [INT32_MIN, INT32_MAX]. We can almost
4579 // write a one-liner with std::round, but the behaviour on ties is incorrect
4580 // for our purposes.
4581 double mod_const = static_cast<double>(UINT64_C(1) << 32);
4582 double mod_error =
4583 (int_result / mod_const) - std::floor(int_result / mod_const);
4584 double constrained;
4585 if (mod_error == 0.5) {
4586 constrained = INT32_MIN;
4587 } else {
4588 constrained = int_result - mod_const * round(int_result / mod_const);
4589 }
4590
4591 VIXL_ASSERT(std::floor(constrained) == constrained);
4592 VIXL_ASSERT(constrained >= INT32_MIN);
4593 VIXL_ASSERT(constrained <= INT32_MAX);
4594
4595 // Take the bottom 32 bits of the result as a 32-bit integer.
4596 result = static_cast<int32_t>(constrained);
4597
4598 if ((int_result < INT32_MIN) || (int_result > INT32_MAX) ||
4599 (error != 0.0)) {
4600 // If the integer result is out of range or the conversion isn't exact,
4601 // take exception and unset the Z-flag.
4602 FPProcessException();
4603 Z = 0;
4604 }
4605 }
4606
4607 ReadNzcv().SetN(0);
4608 ReadNzcv().SetZ(Z);
4609 ReadNzcv().SetC(0);
4610 ReadNzcv().SetV(0);
4611
4612 return result;
4613 }
4614
FPRoundIntCommon(double value,FPRounding round_mode)4615 double Simulator::FPRoundIntCommon(double value, FPRounding round_mode) {
4616 VIXL_ASSERT((value != kFP64PositiveInfinity) &&
4617 (value != kFP64NegativeInfinity));
4618 VIXL_ASSERT(!IsNaN(value));
4619
4620 double int_result = std::floor(value);
4621 double error = value - int_result;
4622 switch (round_mode) {
4623 case FPTieAway: {
4624 // Take care of correctly handling the range ]-0.5, -0.0], which must
4625 // yield -0.0.
4626 if ((-0.5 < value) && (value < 0.0)) {
4627 int_result = -0.0;
4628
4629 } else if ((error > 0.5) || ((error == 0.5) && (int_result >= 0.0))) {
4630 // If the error is greater than 0.5, or is equal to 0.5 and the integer
4631 // result is positive, round up.
4632 int_result++;
4633 }
4634 break;
4635 }
4636 case FPTieEven: {
4637 // Take care of correctly handling the range [-0.5, -0.0], which must
4638 // yield -0.0.
4639 if ((-0.5 <= value) && (value < 0.0)) {
4640 int_result = -0.0;
4641
4642 // If the error is greater than 0.5, or is equal to 0.5 and the integer
4643 // result is odd, round up.
4644 } else if ((error > 0.5) ||
4645 ((error == 0.5) && (std::fmod(int_result, 2) != 0))) {
4646 int_result++;
4647 }
4648 break;
4649 }
4650 case FPZero: {
4651 // If value>0 then we take floor(value)
4652 // otherwise, ceil(value).
4653 if (value < 0) {
4654 int_result = ceil(value);
4655 }
4656 break;
4657 }
4658 case FPNegativeInfinity: {
4659 // We always use floor(value).
4660 break;
4661 }
4662 case FPPositiveInfinity: {
4663 // Take care of correctly handling the range ]-1.0, -0.0], which must
4664 // yield -0.0.
4665 if ((-1.0 < value) && (value < 0.0)) {
4666 int_result = -0.0;
4667
4668 // If the error is non-zero, round up.
4669 } else if (error > 0.0) {
4670 int_result++;
4671 }
4672 break;
4673 }
4674 default:
4675 VIXL_UNIMPLEMENTED();
4676 }
4677 return int_result;
4678 }
4679
FPRoundInt(double value,FPRounding round_mode)4680 double Simulator::FPRoundInt(double value, FPRounding round_mode) {
4681 if ((value == 0.0) || (value == kFP64PositiveInfinity) ||
4682 (value == kFP64NegativeInfinity)) {
4683 return value;
4684 } else if (IsNaN(value)) {
4685 return FPProcessNaN(value);
4686 }
4687 return FPRoundIntCommon(value, round_mode);
4688 }
4689
FPRoundInt(double value,FPRounding round_mode,FrintMode frint_mode)4690 double Simulator::FPRoundInt(double value,
4691 FPRounding round_mode,
4692 FrintMode frint_mode) {
4693 if (frint_mode == kFrintToInteger) {
4694 return FPRoundInt(value, round_mode);
4695 }
4696
4697 VIXL_ASSERT((frint_mode == kFrintToInt32) || (frint_mode == kFrintToInt64));
4698
4699 if (value == 0.0) {
4700 return value;
4701 }
4702
4703 if ((value == kFP64PositiveInfinity) || (value == kFP64NegativeInfinity) ||
4704 IsNaN(value)) {
4705 if (frint_mode == kFrintToInt32) {
4706 return INT32_MIN;
4707 } else {
4708 return INT64_MIN;
4709 }
4710 }
4711
4712 double result = FPRoundIntCommon(value, round_mode);
4713
4714 // We want to compare `result > INT64_MAX` below, but INT64_MAX isn't exactly
4715 // representable as a double, and is rounded to (INT64_MAX + 1) when
4716 // converted. To avoid this, we compare `result >= int64_max_plus_one`
4717 // instead; this is safe because `result` is known to be integral, and
4718 // `int64_max_plus_one` is exactly representable as a double.
4719 constexpr uint64_t int64_max_plus_one = static_cast<uint64_t>(INT64_MAX) + 1;
4720 VIXL_STATIC_ASSERT(static_cast<uint64_t>(static_cast<double>(
4721 int64_max_plus_one)) == int64_max_plus_one);
4722
4723 if (frint_mode == kFrintToInt32) {
4724 if ((result > INT32_MAX) || (result < INT32_MIN)) {
4725 return INT32_MIN;
4726 }
4727 } else if ((result >= int64_max_plus_one) || (result < INT64_MIN)) {
4728 return INT64_MIN;
4729 }
4730
4731 return result;
4732 }
4733
FPToInt16(double value,FPRounding rmode)4734 int16_t Simulator::FPToInt16(double value, FPRounding rmode) {
4735 value = FPRoundInt(value, rmode);
4736 if (value >= kHMaxInt) {
4737 return kHMaxInt;
4738 } else if (value < kHMinInt) {
4739 return kHMinInt;
4740 }
4741 return IsNaN(value) ? 0 : static_cast<int16_t>(value);
4742 }
4743
4744
FPToInt32(double value,FPRounding rmode)4745 int32_t Simulator::FPToInt32(double value, FPRounding rmode) {
4746 value = FPRoundInt(value, rmode);
4747 if (value >= kWMaxInt) {
4748 return kWMaxInt;
4749 } else if (value < kWMinInt) {
4750 return kWMinInt;
4751 }
4752 return IsNaN(value) ? 0 : static_cast<int32_t>(value);
4753 }
4754
4755
FPToInt64(double value,FPRounding rmode)4756 int64_t Simulator::FPToInt64(double value, FPRounding rmode) {
4757 value = FPRoundInt(value, rmode);
4758 if (value >= kXMaxInt) {
4759 return kXMaxInt;
4760 } else if (value < kXMinInt) {
4761 return kXMinInt;
4762 }
4763 return IsNaN(value) ? 0 : static_cast<int64_t>(value);
4764 }
4765
4766
FPToUInt16(double value,FPRounding rmode)4767 uint16_t Simulator::FPToUInt16(double value, FPRounding rmode) {
4768 value = FPRoundInt(value, rmode);
4769 if (value >= kHMaxUInt) {
4770 return kHMaxUInt;
4771 } else if (value < 0.0) {
4772 return 0;
4773 }
4774 return IsNaN(value) ? 0 : static_cast<uint16_t>(value);
4775 }
4776
4777
FPToUInt32(double value,FPRounding rmode)4778 uint32_t Simulator::FPToUInt32(double value, FPRounding rmode) {
4779 value = FPRoundInt(value, rmode);
4780 if (value >= kWMaxUInt) {
4781 return kWMaxUInt;
4782 } else if (value < 0.0) {
4783 return 0;
4784 }
4785 return IsNaN(value) ? 0 : static_cast<uint32_t>(value);
4786 }
4787
4788
FPToUInt64(double value,FPRounding rmode)4789 uint64_t Simulator::FPToUInt64(double value, FPRounding rmode) {
4790 value = FPRoundInt(value, rmode);
4791 if (value >= kXMaxUInt) {
4792 return kXMaxUInt;
4793 } else if (value < 0.0) {
4794 return 0;
4795 }
4796 return IsNaN(value) ? 0 : static_cast<uint64_t>(value);
4797 }
4798
4799
4800 #define DEFINE_NEON_FP_VECTOR_OP(FN, OP, PROCNAN) \
4801 template <typename T> \
4802 LogicVRegister Simulator::FN(VectorFormat vform, \
4803 LogicVRegister dst, \
4804 const LogicVRegister& src1, \
4805 const LogicVRegister& src2) { \
4806 dst.ClearForWrite(vform); \
4807 for (int i = 0; i < LaneCountFromFormat(vform); i++) { \
4808 T op1 = src1.Float<T>(i); \
4809 T op2 = src2.Float<T>(i); \
4810 T result; \
4811 if (PROCNAN) { \
4812 result = FPProcessNaNs(op1, op2); \
4813 if (!IsNaN(result)) { \
4814 result = OP(op1, op2); \
4815 } \
4816 } else { \
4817 result = OP(op1, op2); \
4818 } \
4819 dst.SetFloat(vform, i, result); \
4820 } \
4821 return dst; \
4822 } \
4823 \
4824 LogicVRegister Simulator::FN(VectorFormat vform, \
4825 LogicVRegister dst, \
4826 const LogicVRegister& src1, \
4827 const LogicVRegister& src2) { \
4828 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { \
4829 FN<SimFloat16>(vform, dst, src1, src2); \
4830 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { \
4831 FN<float>(vform, dst, src1, src2); \
4832 } else { \
4833 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); \
4834 FN<double>(vform, dst, src1, src2); \
4835 } \
4836 return dst; \
4837 }
NEON_FP3SAME_LIST(DEFINE_NEON_FP_VECTOR_OP)4838 NEON_FP3SAME_LIST(DEFINE_NEON_FP_VECTOR_OP)
4839 #undef DEFINE_NEON_FP_VECTOR_OP
4840
4841
4842 LogicVRegister Simulator::fnmul(VectorFormat vform,
4843 LogicVRegister dst,
4844 const LogicVRegister& src1,
4845 const LogicVRegister& src2) {
4846 SimVRegister temp;
4847 LogicVRegister product = fmul(vform, temp, src1, src2);
4848 return fneg(vform, dst, product);
4849 }
4850
4851
4852 template <typename T>
frecps(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4853 LogicVRegister Simulator::frecps(VectorFormat vform,
4854 LogicVRegister dst,
4855 const LogicVRegister& src1,
4856 const LogicVRegister& src2) {
4857 dst.ClearForWrite(vform);
4858 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4859 T op1 = -src1.Float<T>(i);
4860 T op2 = src2.Float<T>(i);
4861 T result = FPProcessNaNs(op1, op2);
4862 dst.SetFloat(vform, i, IsNaN(result) ? result : FPRecipStepFused(op1, op2));
4863 }
4864 return dst;
4865 }
4866
4867
frecps(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4868 LogicVRegister Simulator::frecps(VectorFormat vform,
4869 LogicVRegister dst,
4870 const LogicVRegister& src1,
4871 const LogicVRegister& src2) {
4872 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
4873 frecps<SimFloat16>(vform, dst, src1, src2);
4874 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4875 frecps<float>(vform, dst, src1, src2);
4876 } else {
4877 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4878 frecps<double>(vform, dst, src1, src2);
4879 }
4880 return dst;
4881 }
4882
4883
4884 template <typename T>
frsqrts(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4885 LogicVRegister Simulator::frsqrts(VectorFormat vform,
4886 LogicVRegister dst,
4887 const LogicVRegister& src1,
4888 const LogicVRegister& src2) {
4889 dst.ClearForWrite(vform);
4890 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4891 T op1 = -src1.Float<T>(i);
4892 T op2 = src2.Float<T>(i);
4893 T result = FPProcessNaNs(op1, op2);
4894 dst.SetFloat(vform, i, IsNaN(result) ? result : FPRSqrtStepFused(op1, op2));
4895 }
4896 return dst;
4897 }
4898
4899
frsqrts(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4900 LogicVRegister Simulator::frsqrts(VectorFormat vform,
4901 LogicVRegister dst,
4902 const LogicVRegister& src1,
4903 const LogicVRegister& src2) {
4904 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
4905 frsqrts<SimFloat16>(vform, dst, src1, src2);
4906 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4907 frsqrts<float>(vform, dst, src1, src2);
4908 } else {
4909 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4910 frsqrts<double>(vform, dst, src1, src2);
4911 }
4912 return dst;
4913 }
4914
4915
4916 template <typename T>
fcmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,Condition cond)4917 LogicVRegister Simulator::fcmp(VectorFormat vform,
4918 LogicVRegister dst,
4919 const LogicVRegister& src1,
4920 const LogicVRegister& src2,
4921 Condition cond) {
4922 dst.ClearForWrite(vform);
4923 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4924 bool result = false;
4925 T op1 = src1.Float<T>(i);
4926 T op2 = src2.Float<T>(i);
4927 bool unordered = IsNaN(FPProcessNaNs(op1, op2));
4928
4929 switch (cond) {
4930 case eq:
4931 result = (op1 == op2);
4932 break;
4933 case ge:
4934 result = (op1 >= op2);
4935 break;
4936 case gt:
4937 result = (op1 > op2);
4938 break;
4939 case le:
4940 result = (op1 <= op2);
4941 break;
4942 case lt:
4943 result = (op1 < op2);
4944 break;
4945 case ne:
4946 result = (op1 != op2);
4947 break;
4948 case uo:
4949 result = unordered;
4950 break;
4951 default:
4952 // Other conditions are defined in terms of those above.
4953 VIXL_UNREACHABLE();
4954 break;
4955 }
4956
4957 if (result && unordered) {
4958 // Only `uo` and `ne` can be true for unordered comparisons.
4959 VIXL_ASSERT((cond == uo) || (cond == ne));
4960 }
4961
4962 dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0);
4963 }
4964 return dst;
4965 }
4966
4967
fcmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,Condition cond)4968 LogicVRegister Simulator::fcmp(VectorFormat vform,
4969 LogicVRegister dst,
4970 const LogicVRegister& src1,
4971 const LogicVRegister& src2,
4972 Condition cond) {
4973 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
4974 fcmp<SimFloat16>(vform, dst, src1, src2, cond);
4975 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4976 fcmp<float>(vform, dst, src1, src2, cond);
4977 } else {
4978 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4979 fcmp<double>(vform, dst, src1, src2, cond);
4980 }
4981 return dst;
4982 }
4983
4984
fcmp_zero(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,Condition cond)4985 LogicVRegister Simulator::fcmp_zero(VectorFormat vform,
4986 LogicVRegister dst,
4987 const LogicVRegister& src,
4988 Condition cond) {
4989 SimVRegister temp;
4990 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
4991 LogicVRegister zero_reg =
4992 dup_immediate(vform, temp, Float16ToRawbits(SimFloat16(0.0)));
4993 fcmp<SimFloat16>(vform, dst, src, zero_reg, cond);
4994 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4995 LogicVRegister zero_reg = dup_immediate(vform, temp, FloatToRawbits(0.0));
4996 fcmp<float>(vform, dst, src, zero_reg, cond);
4997 } else {
4998 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4999 LogicVRegister zero_reg = dup_immediate(vform, temp, DoubleToRawbits(0.0));
5000 fcmp<double>(vform, dst, src, zero_reg, cond);
5001 }
5002 return dst;
5003 }
5004
5005
fabscmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,Condition cond)5006 LogicVRegister Simulator::fabscmp(VectorFormat vform,
5007 LogicVRegister dst,
5008 const LogicVRegister& src1,
5009 const LogicVRegister& src2,
5010 Condition cond) {
5011 SimVRegister temp1, temp2;
5012 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5013 LogicVRegister abs_src1 = fabs_<SimFloat16>(vform, temp1, src1);
5014 LogicVRegister abs_src2 = fabs_<SimFloat16>(vform, temp2, src2);
5015 fcmp<SimFloat16>(vform, dst, abs_src1, abs_src2, cond);
5016 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5017 LogicVRegister abs_src1 = fabs_<float>(vform, temp1, src1);
5018 LogicVRegister abs_src2 = fabs_<float>(vform, temp2, src2);
5019 fcmp<float>(vform, dst, abs_src1, abs_src2, cond);
5020 } else {
5021 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5022 LogicVRegister abs_src1 = fabs_<double>(vform, temp1, src1);
5023 LogicVRegister abs_src2 = fabs_<double>(vform, temp2, src2);
5024 fcmp<double>(vform, dst, abs_src1, abs_src2, cond);
5025 }
5026 return dst;
5027 }
5028
5029
5030 template <typename T>
fmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & srca,const LogicVRegister & src1,const LogicVRegister & src2)5031 LogicVRegister Simulator::fmla(VectorFormat vform,
5032 LogicVRegister dst,
5033 const LogicVRegister& srca,
5034 const LogicVRegister& src1,
5035 const LogicVRegister& src2) {
5036 dst.ClearForWrite(vform);
5037 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5038 T op1 = src1.Float<T>(i);
5039 T op2 = src2.Float<T>(i);
5040 T acc = srca.Float<T>(i);
5041 T result = FPMulAdd(acc, op1, op2);
5042 dst.SetFloat(vform, i, result);
5043 }
5044 return dst;
5045 }
5046
5047
fmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & srca,const LogicVRegister & src1,const LogicVRegister & src2)5048 LogicVRegister Simulator::fmla(VectorFormat vform,
5049 LogicVRegister dst,
5050 const LogicVRegister& srca,
5051 const LogicVRegister& src1,
5052 const LogicVRegister& src2) {
5053 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5054 fmla<SimFloat16>(vform, dst, srca, src1, src2);
5055 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5056 fmla<float>(vform, dst, srca, src1, src2);
5057 } else {
5058 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5059 fmla<double>(vform, dst, srca, src1, src2);
5060 }
5061 return dst;
5062 }
5063
5064
5065 template <typename T>
fmls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & srca,const LogicVRegister & src1,const LogicVRegister & src2)5066 LogicVRegister Simulator::fmls(VectorFormat vform,
5067 LogicVRegister dst,
5068 const LogicVRegister& srca,
5069 const LogicVRegister& src1,
5070 const LogicVRegister& src2) {
5071 dst.ClearForWrite(vform);
5072 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5073 T op1 = -src1.Float<T>(i);
5074 T op2 = src2.Float<T>(i);
5075 T acc = srca.Float<T>(i);
5076 T result = FPMulAdd(acc, op1, op2);
5077 dst.SetFloat(i, result);
5078 }
5079 return dst;
5080 }
5081
5082
fmls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & srca,const LogicVRegister & src1,const LogicVRegister & src2)5083 LogicVRegister Simulator::fmls(VectorFormat vform,
5084 LogicVRegister dst,
5085 const LogicVRegister& srca,
5086 const LogicVRegister& src1,
5087 const LogicVRegister& src2) {
5088 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5089 fmls<SimFloat16>(vform, dst, srca, src1, src2);
5090 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5091 fmls<float>(vform, dst, srca, src1, src2);
5092 } else {
5093 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5094 fmls<double>(vform, dst, srca, src1, src2);
5095 }
5096 return dst;
5097 }
5098
5099
fmlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)5100 LogicVRegister Simulator::fmlal(VectorFormat vform,
5101 LogicVRegister dst,
5102 const LogicVRegister& src1,
5103 const LogicVRegister& src2) {
5104 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5105 dst.ClearForWrite(vform);
5106 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5107 float op1 = FPToFloat(src1.Float<SimFloat16>(i), kIgnoreDefaultNaN);
5108 float op2 = FPToFloat(src2.Float<SimFloat16>(i), kIgnoreDefaultNaN);
5109 float acc = dst.Float<float>(i);
5110 float result = FPMulAdd(acc, op1, op2);
5111 dst.SetFloat(i, result);
5112 }
5113 return dst;
5114 }
5115
5116
fmlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)5117 LogicVRegister Simulator::fmlal2(VectorFormat vform,
5118 LogicVRegister dst,
5119 const LogicVRegister& src1,
5120 const LogicVRegister& src2) {
5121 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5122 dst.ClearForWrite(vform);
5123 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5124 int src = i + LaneCountFromFormat(vform);
5125 float op1 = FPToFloat(src1.Float<SimFloat16>(src), kIgnoreDefaultNaN);
5126 float op2 = FPToFloat(src2.Float<SimFloat16>(src), kIgnoreDefaultNaN);
5127 float acc = dst.Float<float>(i);
5128 float result = FPMulAdd(acc, op1, op2);
5129 dst.SetFloat(i, result);
5130 }
5131 return dst;
5132 }
5133
5134
fmlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)5135 LogicVRegister Simulator::fmlsl(VectorFormat vform,
5136 LogicVRegister dst,
5137 const LogicVRegister& src1,
5138 const LogicVRegister& src2) {
5139 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5140 dst.ClearForWrite(vform);
5141 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5142 float op1 = -FPToFloat(src1.Float<SimFloat16>(i), kIgnoreDefaultNaN);
5143 float op2 = FPToFloat(src2.Float<SimFloat16>(i), kIgnoreDefaultNaN);
5144 float acc = dst.Float<float>(i);
5145 float result = FPMulAdd(acc, op1, op2);
5146 dst.SetFloat(i, result);
5147 }
5148 return dst;
5149 }
5150
5151
fmlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)5152 LogicVRegister Simulator::fmlsl2(VectorFormat vform,
5153 LogicVRegister dst,
5154 const LogicVRegister& src1,
5155 const LogicVRegister& src2) {
5156 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5157 dst.ClearForWrite(vform);
5158 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5159 int src = i + LaneCountFromFormat(vform);
5160 float op1 = -FPToFloat(src1.Float<SimFloat16>(src), kIgnoreDefaultNaN);
5161 float op2 = FPToFloat(src2.Float<SimFloat16>(src), kIgnoreDefaultNaN);
5162 float acc = dst.Float<float>(i);
5163 float result = FPMulAdd(acc, op1, op2);
5164 dst.SetFloat(i, result);
5165 }
5166 return dst;
5167 }
5168
5169
fmlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)5170 LogicVRegister Simulator::fmlal(VectorFormat vform,
5171 LogicVRegister dst,
5172 const LogicVRegister& src1,
5173 const LogicVRegister& src2,
5174 int index) {
5175 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5176 dst.ClearForWrite(vform);
5177 float op2 = FPToFloat(src2.Float<SimFloat16>(index), kIgnoreDefaultNaN);
5178 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5179 float op1 = FPToFloat(src1.Float<SimFloat16>(i), kIgnoreDefaultNaN);
5180 float acc = dst.Float<float>(i);
5181 float result = FPMulAdd(acc, op1, op2);
5182 dst.SetFloat(i, result);
5183 }
5184 return dst;
5185 }
5186
5187
fmlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)5188 LogicVRegister Simulator::fmlal2(VectorFormat vform,
5189 LogicVRegister dst,
5190 const LogicVRegister& src1,
5191 const LogicVRegister& src2,
5192 int index) {
5193 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5194 dst.ClearForWrite(vform);
5195 float op2 = FPToFloat(src2.Float<SimFloat16>(index), kIgnoreDefaultNaN);
5196 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5197 int src = i + LaneCountFromFormat(vform);
5198 float op1 = FPToFloat(src1.Float<SimFloat16>(src), kIgnoreDefaultNaN);
5199 float acc = dst.Float<float>(i);
5200 float result = FPMulAdd(acc, op1, op2);
5201 dst.SetFloat(i, result);
5202 }
5203 return dst;
5204 }
5205
5206
fmlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)5207 LogicVRegister Simulator::fmlsl(VectorFormat vform,
5208 LogicVRegister dst,
5209 const LogicVRegister& src1,
5210 const LogicVRegister& src2,
5211 int index) {
5212 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5213 dst.ClearForWrite(vform);
5214 float op2 = FPToFloat(src2.Float<SimFloat16>(index), kIgnoreDefaultNaN);
5215 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5216 float op1 = -FPToFloat(src1.Float<SimFloat16>(i), kIgnoreDefaultNaN);
5217 float acc = dst.Float<float>(i);
5218 float result = FPMulAdd(acc, op1, op2);
5219 dst.SetFloat(i, result);
5220 }
5221 return dst;
5222 }
5223
5224
fmlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)5225 LogicVRegister Simulator::fmlsl2(VectorFormat vform,
5226 LogicVRegister dst,
5227 const LogicVRegister& src1,
5228 const LogicVRegister& src2,
5229 int index) {
5230 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5231 dst.ClearForWrite(vform);
5232 float op2 = FPToFloat(src2.Float<SimFloat16>(index), kIgnoreDefaultNaN);
5233 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5234 int src = i + LaneCountFromFormat(vform);
5235 float op1 = -FPToFloat(src1.Float<SimFloat16>(src), kIgnoreDefaultNaN);
5236 float acc = dst.Float<float>(i);
5237 float result = FPMulAdd(acc, op1, op2);
5238 dst.SetFloat(i, result);
5239 }
5240 return dst;
5241 }
5242
5243
5244 template <typename T>
fneg(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5245 LogicVRegister Simulator::fneg(VectorFormat vform,
5246 LogicVRegister dst,
5247 const LogicVRegister& src) {
5248 dst.ClearForWrite(vform);
5249 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5250 T op = src.Float<T>(i);
5251 op = -op;
5252 dst.SetFloat(i, op);
5253 }
5254 return dst;
5255 }
5256
5257
fneg(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5258 LogicVRegister Simulator::fneg(VectorFormat vform,
5259 LogicVRegister dst,
5260 const LogicVRegister& src) {
5261 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5262 fneg<SimFloat16>(vform, dst, src);
5263 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5264 fneg<float>(vform, dst, src);
5265 } else {
5266 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5267 fneg<double>(vform, dst, src);
5268 }
5269 return dst;
5270 }
5271
5272
5273 template <typename T>
fabs_(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5274 LogicVRegister Simulator::fabs_(VectorFormat vform,
5275 LogicVRegister dst,
5276 const LogicVRegister& src) {
5277 dst.ClearForWrite(vform);
5278 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5279 T op = src.Float<T>(i);
5280 if (copysign(1.0, op) < 0.0) {
5281 op = -op;
5282 }
5283 dst.SetFloat(i, op);
5284 }
5285 return dst;
5286 }
5287
5288
fabs_(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5289 LogicVRegister Simulator::fabs_(VectorFormat vform,
5290 LogicVRegister dst,
5291 const LogicVRegister& src) {
5292 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5293 fabs_<SimFloat16>(vform, dst, src);
5294 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5295 fabs_<float>(vform, dst, src);
5296 } else {
5297 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5298 fabs_<double>(vform, dst, src);
5299 }
5300 return dst;
5301 }
5302
5303
fabd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)5304 LogicVRegister Simulator::fabd(VectorFormat vform,
5305 LogicVRegister dst,
5306 const LogicVRegister& src1,
5307 const LogicVRegister& src2) {
5308 SimVRegister temp;
5309 fsub(vform, temp, src1, src2);
5310 fabs_(vform, dst, temp);
5311 return dst;
5312 }
5313
5314
fsqrt(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5315 LogicVRegister Simulator::fsqrt(VectorFormat vform,
5316 LogicVRegister dst,
5317 const LogicVRegister& src) {
5318 dst.ClearForWrite(vform);
5319 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5320 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5321 SimFloat16 result = FPSqrt(src.Float<SimFloat16>(i));
5322 dst.SetFloat(i, result);
5323 }
5324 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5325 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5326 float result = FPSqrt(src.Float<float>(i));
5327 dst.SetFloat(i, result);
5328 }
5329 } else {
5330 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5331 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5332 double result = FPSqrt(src.Float<double>(i));
5333 dst.SetFloat(i, result);
5334 }
5335 }
5336 return dst;
5337 }
5338
5339
5340 #define DEFINE_NEON_FP_PAIR_OP(FNP, FN, OP) \
5341 LogicVRegister Simulator::FNP(VectorFormat vform, \
5342 LogicVRegister dst, \
5343 const LogicVRegister& src1, \
5344 const LogicVRegister& src2) { \
5345 SimVRegister temp1, temp2; \
5346 uzp1(vform, temp1, src1, src2); \
5347 uzp2(vform, temp2, src1, src2); \
5348 FN(vform, dst, temp1, temp2); \
5349 return dst; \
5350 } \
5351 \
5352 LogicVRegister Simulator::FNP(VectorFormat vform, \
5353 LogicVRegister dst, \
5354 const LogicVRegister& src) { \
5355 if (vform == kFormatH) { \
5356 SimFloat16 result(OP(SimFloat16(RawbitsToFloat16(src.Uint(vform, 0))), \
5357 SimFloat16(RawbitsToFloat16(src.Uint(vform, 1))))); \
5358 dst.SetUint(vform, 0, Float16ToRawbits(result)); \
5359 } else if (vform == kFormatS) { \
5360 float result = OP(src.Float<float>(0), src.Float<float>(1)); \
5361 dst.SetFloat(0, result); \
5362 } else { \
5363 VIXL_ASSERT(vform == kFormatD); \
5364 double result = OP(src.Float<double>(0), src.Float<double>(1)); \
5365 dst.SetFloat(0, result); \
5366 } \
5367 dst.ClearForWrite(vform); \
5368 return dst; \
5369 }
NEON_FPPAIRWISE_LIST(DEFINE_NEON_FP_PAIR_OP)5370 NEON_FPPAIRWISE_LIST(DEFINE_NEON_FP_PAIR_OP)
5371 #undef DEFINE_NEON_FP_PAIR_OP
5372
5373 template <typename T>
5374 LogicVRegister Simulator::FPPairedAcrossHelper(VectorFormat vform,
5375 LogicVRegister dst,
5376 const LogicVRegister& src,
5377 typename TFPPairOp<T>::type fn,
5378 uint64_t inactive_value) {
5379 int lane_count = LaneCountFromFormat(vform);
5380 T result[kZRegMaxSizeInBytes / sizeof(T)];
5381 // Copy the source vector into a working array. Initialise the unused elements
5382 // at the end of the array to the same value that a false predicate would set.
5383 for (int i = 0; i < static_cast<int>(ArrayLength(result)); i++) {
5384 result[i] = (i < lane_count)
5385 ? src.Float<T>(i)
5386 : RawbitsWithSizeToFP<T>(sizeof(T) * 8, inactive_value);
5387 }
5388
5389 // Pairwise reduce the elements to a single value, using the pair op function
5390 // argument.
5391 for (int step = 1; step < lane_count; step *= 2) {
5392 for (int i = 0; i < lane_count; i += step * 2) {
5393 result[i] = (this->*fn)(result[i], result[i + step]);
5394 }
5395 }
5396 dst.ClearForWrite(ScalarFormatFromFormat(vform));
5397 dst.SetFloat<T>(0, result[0]);
5398 return dst;
5399 }
5400
FPPairedAcrossHelper(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,typename TFPPairOp<SimFloat16>::type fn16,typename TFPPairOp<float>::type fn32,typename TFPPairOp<double>::type fn64,uint64_t inactive_value)5401 LogicVRegister Simulator::FPPairedAcrossHelper(
5402 VectorFormat vform,
5403 LogicVRegister dst,
5404 const LogicVRegister& src,
5405 typename TFPPairOp<SimFloat16>::type fn16,
5406 typename TFPPairOp<float>::type fn32,
5407 typename TFPPairOp<double>::type fn64,
5408 uint64_t inactive_value) {
5409 switch (LaneSizeInBitsFromFormat(vform)) {
5410 case kHRegSize:
5411 return FPPairedAcrossHelper<SimFloat16>(vform,
5412 dst,
5413 src,
5414 fn16,
5415 inactive_value);
5416 case kSRegSize:
5417 return FPPairedAcrossHelper<float>(vform, dst, src, fn32, inactive_value);
5418 default:
5419 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5420 return FPPairedAcrossHelper<double>(vform,
5421 dst,
5422 src,
5423 fn64,
5424 inactive_value);
5425 }
5426 }
5427
faddv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5428 LogicVRegister Simulator::faddv(VectorFormat vform,
5429 LogicVRegister dst,
5430 const LogicVRegister& src) {
5431 return FPPairedAcrossHelper(vform,
5432 dst,
5433 src,
5434 &Simulator::FPAdd<SimFloat16>,
5435 &Simulator::FPAdd<float>,
5436 &Simulator::FPAdd<double>,
5437 0);
5438 }
5439
fmaxv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5440 LogicVRegister Simulator::fmaxv(VectorFormat vform,
5441 LogicVRegister dst,
5442 const LogicVRegister& src) {
5443 int lane_size = LaneSizeInBitsFromFormat(vform);
5444 uint64_t inactive_value =
5445 FPToRawbitsWithSize(lane_size, kFP64NegativeInfinity);
5446 return FPPairedAcrossHelper(vform,
5447 dst,
5448 src,
5449 &Simulator::FPMax<SimFloat16>,
5450 &Simulator::FPMax<float>,
5451 &Simulator::FPMax<double>,
5452 inactive_value);
5453 }
5454
5455
fminv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5456 LogicVRegister Simulator::fminv(VectorFormat vform,
5457 LogicVRegister dst,
5458 const LogicVRegister& src) {
5459 int lane_size = LaneSizeInBitsFromFormat(vform);
5460 uint64_t inactive_value =
5461 FPToRawbitsWithSize(lane_size, kFP64PositiveInfinity);
5462 return FPPairedAcrossHelper(vform,
5463 dst,
5464 src,
5465 &Simulator::FPMin<SimFloat16>,
5466 &Simulator::FPMin<float>,
5467 &Simulator::FPMin<double>,
5468 inactive_value);
5469 }
5470
5471
fmaxnmv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5472 LogicVRegister Simulator::fmaxnmv(VectorFormat vform,
5473 LogicVRegister dst,
5474 const LogicVRegister& src) {
5475 int lane_size = LaneSizeInBitsFromFormat(vform);
5476 uint64_t inactive_value = FPToRawbitsWithSize(lane_size, kFP64DefaultNaN);
5477 return FPPairedAcrossHelper(vform,
5478 dst,
5479 src,
5480 &Simulator::FPMaxNM<SimFloat16>,
5481 &Simulator::FPMaxNM<float>,
5482 &Simulator::FPMaxNM<double>,
5483 inactive_value);
5484 }
5485
5486
fminnmv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5487 LogicVRegister Simulator::fminnmv(VectorFormat vform,
5488 LogicVRegister dst,
5489 const LogicVRegister& src) {
5490 int lane_size = LaneSizeInBitsFromFormat(vform);
5491 uint64_t inactive_value = FPToRawbitsWithSize(lane_size, kFP64DefaultNaN);
5492 return FPPairedAcrossHelper(vform,
5493 dst,
5494 src,
5495 &Simulator::FPMinNM<SimFloat16>,
5496 &Simulator::FPMinNM<float>,
5497 &Simulator::FPMinNM<double>,
5498 inactive_value);
5499 }
5500
5501
fmul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)5502 LogicVRegister Simulator::fmul(VectorFormat vform,
5503 LogicVRegister dst,
5504 const LogicVRegister& src1,
5505 const LogicVRegister& src2,
5506 int index) {
5507 dst.ClearForWrite(vform);
5508 SimVRegister temp;
5509 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5510 LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index);
5511 fmul<SimFloat16>(vform, dst, src1, index_reg);
5512 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5513 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
5514 fmul<float>(vform, dst, src1, index_reg);
5515 } else {
5516 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5517 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
5518 fmul<double>(vform, dst, src1, index_reg);
5519 }
5520 return dst;
5521 }
5522
5523
fmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)5524 LogicVRegister Simulator::fmla(VectorFormat vform,
5525 LogicVRegister dst,
5526 const LogicVRegister& src1,
5527 const LogicVRegister& src2,
5528 int index) {
5529 dst.ClearForWrite(vform);
5530 SimVRegister temp;
5531 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5532 LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index);
5533 fmla<SimFloat16>(vform, dst, dst, src1, index_reg);
5534 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5535 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
5536 fmla<float>(vform, dst, dst, src1, index_reg);
5537 } else {
5538 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5539 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
5540 fmla<double>(vform, dst, dst, src1, index_reg);
5541 }
5542 return dst;
5543 }
5544
5545
fmls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)5546 LogicVRegister Simulator::fmls(VectorFormat vform,
5547 LogicVRegister dst,
5548 const LogicVRegister& src1,
5549 const LogicVRegister& src2,
5550 int index) {
5551 dst.ClearForWrite(vform);
5552 SimVRegister temp;
5553 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5554 LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index);
5555 fmls<SimFloat16>(vform, dst, dst, src1, index_reg);
5556 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5557 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
5558 fmls<float>(vform, dst, dst, src1, index_reg);
5559 } else {
5560 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5561 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
5562 fmls<double>(vform, dst, dst, src1, index_reg);
5563 }
5564 return dst;
5565 }
5566
5567
fmulx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)5568 LogicVRegister Simulator::fmulx(VectorFormat vform,
5569 LogicVRegister dst,
5570 const LogicVRegister& src1,
5571 const LogicVRegister& src2,
5572 int index) {
5573 dst.ClearForWrite(vform);
5574 SimVRegister temp;
5575 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5576 LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index);
5577 fmulx<SimFloat16>(vform, dst, src1, index_reg);
5578 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5579 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
5580 fmulx<float>(vform, dst, src1, index_reg);
5581 } else {
5582 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5583 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
5584 fmulx<double>(vform, dst, src1, index_reg);
5585 }
5586 return dst;
5587 }
5588
5589
frint(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,FPRounding rounding_mode,bool inexact_exception,FrintMode frint_mode)5590 LogicVRegister Simulator::frint(VectorFormat vform,
5591 LogicVRegister dst,
5592 const LogicVRegister& src,
5593 FPRounding rounding_mode,
5594 bool inexact_exception,
5595 FrintMode frint_mode) {
5596 dst.ClearForWrite(vform);
5597 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5598 VIXL_ASSERT(frint_mode == kFrintToInteger);
5599 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5600 SimFloat16 input = src.Float<SimFloat16>(i);
5601 SimFloat16 rounded = FPRoundInt(input, rounding_mode);
5602 if (inexact_exception && !IsNaN(input) && (input != rounded)) {
5603 FPProcessException();
5604 }
5605 dst.SetFloat<SimFloat16>(i, rounded);
5606 }
5607 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5608 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5609 float input = src.Float<float>(i);
5610 float rounded = FPRoundInt(input, rounding_mode, frint_mode);
5611
5612 if (inexact_exception && !IsNaN(input) && (input != rounded)) {
5613 FPProcessException();
5614 }
5615 dst.SetFloat<float>(i, rounded);
5616 }
5617 } else {
5618 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5619 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5620 double input = src.Float<double>(i);
5621 double rounded = FPRoundInt(input, rounding_mode, frint_mode);
5622 if (inexact_exception && !IsNaN(input) && (input != rounded)) {
5623 FPProcessException();
5624 }
5625 dst.SetFloat<double>(i, rounded);
5626 }
5627 }
5628 return dst;
5629 }
5630
fcvt(VectorFormat vform,unsigned dst_data_size_in_bits,unsigned src_data_size_in_bits,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src)5631 LogicVRegister Simulator::fcvt(VectorFormat vform,
5632 unsigned dst_data_size_in_bits,
5633 unsigned src_data_size_in_bits,
5634 LogicVRegister dst,
5635 const LogicPRegister& pg,
5636 const LogicVRegister& src) {
5637 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits);
5638 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits);
5639
5640 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5641 if (!pg.IsActive(vform, i)) continue;
5642
5643 uint64_t src_raw_bits = ExtractUnsignedBitfield64(src_data_size_in_bits - 1,
5644 0,
5645 src.Uint(vform, i));
5646 double dst_value =
5647 RawbitsWithSizeToFP<double>(src_data_size_in_bits, src_raw_bits);
5648
5649 uint64_t dst_raw_bits =
5650 FPToRawbitsWithSize(dst_data_size_in_bits, dst_value);
5651
5652 dst.SetUint(vform, i, dst_raw_bits);
5653 }
5654
5655 return dst;
5656 }
5657
fcvts(VectorFormat vform,unsigned dst_data_size_in_bits,unsigned src_data_size_in_bits,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src,FPRounding round,int fbits)5658 LogicVRegister Simulator::fcvts(VectorFormat vform,
5659 unsigned dst_data_size_in_bits,
5660 unsigned src_data_size_in_bits,
5661 LogicVRegister dst,
5662 const LogicPRegister& pg,
5663 const LogicVRegister& src,
5664 FPRounding round,
5665 int fbits) {
5666 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits);
5667 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits);
5668
5669 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5670 if (!pg.IsActive(vform, i)) continue;
5671
5672 uint64_t value = ExtractUnsignedBitfield64(src_data_size_in_bits - 1,
5673 0,
5674 src.Uint(vform, i));
5675 double result = RawbitsWithSizeToFP<double>(src_data_size_in_bits, value) *
5676 std::pow(2.0, fbits);
5677
5678 switch (dst_data_size_in_bits) {
5679 case kHRegSize:
5680 dst.SetInt(vform, i, FPToInt16(result, round));
5681 break;
5682 case kSRegSize:
5683 dst.SetInt(vform, i, FPToInt32(result, round));
5684 break;
5685 case kDRegSize:
5686 dst.SetInt(vform, i, FPToInt64(result, round));
5687 break;
5688 default:
5689 VIXL_UNIMPLEMENTED();
5690 break;
5691 }
5692 }
5693
5694 return dst;
5695 }
5696
fcvts(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,FPRounding round,int fbits)5697 LogicVRegister Simulator::fcvts(VectorFormat vform,
5698 LogicVRegister dst,
5699 const LogicVRegister& src,
5700 FPRounding round,
5701 int fbits) {
5702 dst.ClearForWrite(vform);
5703 return fcvts(vform,
5704 LaneSizeInBitsFromFormat(vform),
5705 LaneSizeInBitsFromFormat(vform),
5706 dst,
5707 GetPTrue(),
5708 src,
5709 round,
5710 fbits);
5711 }
5712
fcvtu(VectorFormat vform,unsigned dst_data_size_in_bits,unsigned src_data_size_in_bits,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src,FPRounding round,int fbits)5713 LogicVRegister Simulator::fcvtu(VectorFormat vform,
5714 unsigned dst_data_size_in_bits,
5715 unsigned src_data_size_in_bits,
5716 LogicVRegister dst,
5717 const LogicPRegister& pg,
5718 const LogicVRegister& src,
5719 FPRounding round,
5720 int fbits) {
5721 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits);
5722 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits);
5723
5724 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5725 if (!pg.IsActive(vform, i)) continue;
5726
5727 uint64_t value = ExtractUnsignedBitfield64(src_data_size_in_bits - 1,
5728 0,
5729 src.Uint(vform, i));
5730 double result = RawbitsWithSizeToFP<double>(src_data_size_in_bits, value) *
5731 std::pow(2.0, fbits);
5732
5733 switch (dst_data_size_in_bits) {
5734 case kHRegSize:
5735 dst.SetUint(vform, i, FPToUInt16(result, round));
5736 break;
5737 case kSRegSize:
5738 dst.SetUint(vform, i, FPToUInt32(result, round));
5739 break;
5740 case kDRegSize:
5741 dst.SetUint(vform, i, FPToUInt64(result, round));
5742 break;
5743 default:
5744 VIXL_UNIMPLEMENTED();
5745 break;
5746 }
5747 }
5748
5749 return dst;
5750 }
5751
fcvtu(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,FPRounding round,int fbits)5752 LogicVRegister Simulator::fcvtu(VectorFormat vform,
5753 LogicVRegister dst,
5754 const LogicVRegister& src,
5755 FPRounding round,
5756 int fbits) {
5757 dst.ClearForWrite(vform);
5758 return fcvtu(vform,
5759 LaneSizeInBitsFromFormat(vform),
5760 LaneSizeInBitsFromFormat(vform),
5761 dst,
5762 GetPTrue(),
5763 src,
5764 round,
5765 fbits);
5766 }
5767
fcvtl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5768 LogicVRegister Simulator::fcvtl(VectorFormat vform,
5769 LogicVRegister dst,
5770 const LogicVRegister& src) {
5771 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5772 for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
5773 // TODO: Full support for SimFloat16 in SimRegister(s).
5774 dst.SetFloat(i,
5775 FPToFloat(RawbitsToFloat16(src.Float<uint16_t>(i)),
5776 ReadDN()));
5777 }
5778 } else {
5779 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5780 for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
5781 dst.SetFloat(i, FPToDouble(src.Float<float>(i), ReadDN()));
5782 }
5783 }
5784 return dst;
5785 }
5786
5787
fcvtl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5788 LogicVRegister Simulator::fcvtl2(VectorFormat vform,
5789 LogicVRegister dst,
5790 const LogicVRegister& src) {
5791 int lane_count = LaneCountFromFormat(vform);
5792 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5793 for (int i = 0; i < lane_count; i++) {
5794 // TODO: Full support for SimFloat16 in SimRegister(s).
5795 dst.SetFloat(i,
5796 FPToFloat(RawbitsToFloat16(
5797 src.Float<uint16_t>(i + lane_count)),
5798 ReadDN()));
5799 }
5800 } else {
5801 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5802 for (int i = 0; i < lane_count; i++) {
5803 dst.SetFloat(i, FPToDouble(src.Float<float>(i + lane_count), ReadDN()));
5804 }
5805 }
5806 return dst;
5807 }
5808
5809
fcvtn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5810 LogicVRegister Simulator::fcvtn(VectorFormat vform,
5811 LogicVRegister dst,
5812 const LogicVRegister& src) {
5813 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5814 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5815 dst.SetFloat(i,
5816 Float16ToRawbits(
5817 FPToFloat16(src.Float<float>(i), FPTieEven, ReadDN())));
5818 }
5819 } else {
5820 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5821 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5822 dst.SetFloat(i, FPToFloat(src.Float<double>(i), FPTieEven, ReadDN()));
5823 }
5824 }
5825 return dst;
5826 }
5827
5828
fcvtn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5829 LogicVRegister Simulator::fcvtn2(VectorFormat vform,
5830 LogicVRegister dst,
5831 const LogicVRegister& src) {
5832 int lane_count = LaneCountFromFormat(vform) / 2;
5833 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5834 for (int i = lane_count - 1; i >= 0; i--) {
5835 dst.SetFloat(i + lane_count,
5836 Float16ToRawbits(
5837 FPToFloat16(src.Float<float>(i), FPTieEven, ReadDN())));
5838 }
5839 } else {
5840 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5841 for (int i = lane_count - 1; i >= 0; i--) {
5842 dst.SetFloat(i + lane_count,
5843 FPToFloat(src.Float<double>(i), FPTieEven, ReadDN()));
5844 }
5845 }
5846 return dst;
5847 }
5848
5849
fcvtxn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5850 LogicVRegister Simulator::fcvtxn(VectorFormat vform,
5851 LogicVRegister dst,
5852 const LogicVRegister& src) {
5853 dst.ClearForWrite(vform);
5854 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5855 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5856 dst.SetFloat(i, FPToFloat(src.Float<double>(i), FPRoundOdd, ReadDN()));
5857 }
5858 return dst;
5859 }
5860
5861
fcvtxn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5862 LogicVRegister Simulator::fcvtxn2(VectorFormat vform,
5863 LogicVRegister dst,
5864 const LogicVRegister& src) {
5865 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
5866 int lane_count = LaneCountFromFormat(vform) / 2;
5867 for (int i = lane_count - 1; i >= 0; i--) {
5868 dst.SetFloat(i + lane_count,
5869 FPToFloat(src.Float<double>(i), FPRoundOdd, ReadDN()));
5870 }
5871 return dst;
5872 }
5873
5874
5875 // Based on reference C function recip_sqrt_estimate from ARM ARM.
recip_sqrt_estimate(double a)5876 double Simulator::recip_sqrt_estimate(double a) {
5877 int q0, q1, s;
5878 double r;
5879 if (a < 0.5) {
5880 q0 = static_cast<int>(a * 512.0);
5881 r = 1.0 / sqrt((static_cast<double>(q0) + 0.5) / 512.0);
5882 } else {
5883 q1 = static_cast<int>(a * 256.0);
5884 r = 1.0 / sqrt((static_cast<double>(q1) + 0.5) / 256.0);
5885 }
5886 s = static_cast<int>(256.0 * r + 0.5);
5887 return static_cast<double>(s) / 256.0;
5888 }
5889
5890
Bits(uint64_t val,int start_bit,int end_bit)5891 static inline uint64_t Bits(uint64_t val, int start_bit, int end_bit) {
5892 return ExtractUnsignedBitfield64(start_bit, end_bit, val);
5893 }
5894
5895
5896 template <typename T>
FPRecipSqrtEstimate(T op)5897 T Simulator::FPRecipSqrtEstimate(T op) {
5898 if (IsNaN(op)) {
5899 return FPProcessNaN(op);
5900 } else if (op == 0.0) {
5901 if (copysign(1.0, op) < 0.0) {
5902 return kFP64NegativeInfinity;
5903 } else {
5904 return kFP64PositiveInfinity;
5905 }
5906 } else if (copysign(1.0, op) < 0.0) {
5907 FPProcessException();
5908 return FPDefaultNaN<T>();
5909 } else if (IsInf(op)) {
5910 return 0.0;
5911 } else {
5912 uint64_t fraction;
5913 int exp, result_exp;
5914
5915 if (IsFloat16<T>()) {
5916 exp = Float16Exp(op);
5917 fraction = Float16Mantissa(op);
5918 fraction <<= 42;
5919 } else if (IsFloat32<T>()) {
5920 exp = FloatExp(op);
5921 fraction = FloatMantissa(op);
5922 fraction <<= 29;
5923 } else {
5924 VIXL_ASSERT(IsFloat64<T>());
5925 exp = DoubleExp(op);
5926 fraction = DoubleMantissa(op);
5927 }
5928
5929 if (exp == 0) {
5930 while (Bits(fraction, 51, 51) == 0) {
5931 fraction = Bits(fraction, 50, 0) << 1;
5932 exp -= 1;
5933 }
5934 fraction = Bits(fraction, 50, 0) << 1;
5935 }
5936
5937 double scaled;
5938 if (Bits(exp, 0, 0) == 0) {
5939 scaled = DoublePack(0, 1022, Bits(fraction, 51, 44) << 44);
5940 } else {
5941 scaled = DoublePack(0, 1021, Bits(fraction, 51, 44) << 44);
5942 }
5943
5944 if (IsFloat16<T>()) {
5945 result_exp = (44 - exp) / 2;
5946 } else if (IsFloat32<T>()) {
5947 result_exp = (380 - exp) / 2;
5948 } else {
5949 VIXL_ASSERT(IsFloat64<T>());
5950 result_exp = (3068 - exp) / 2;
5951 }
5952
5953 uint64_t estimate = DoubleToRawbits(recip_sqrt_estimate(scaled));
5954
5955 if (IsFloat16<T>()) {
5956 uint16_t exp_bits = static_cast<uint16_t>(Bits(result_exp, 4, 0));
5957 uint16_t est_bits = static_cast<uint16_t>(Bits(estimate, 51, 42));
5958 return Float16Pack(0, exp_bits, est_bits);
5959 } else if (IsFloat32<T>()) {
5960 uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0));
5961 uint32_t est_bits = static_cast<uint32_t>(Bits(estimate, 51, 29));
5962 return FloatPack(0, exp_bits, est_bits);
5963 } else {
5964 VIXL_ASSERT(IsFloat64<T>());
5965 return DoublePack(0, Bits(result_exp, 10, 0), Bits(estimate, 51, 0));
5966 }
5967 }
5968 }
5969
5970
frsqrte(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5971 LogicVRegister Simulator::frsqrte(VectorFormat vform,
5972 LogicVRegister dst,
5973 const LogicVRegister& src) {
5974 dst.ClearForWrite(vform);
5975 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5976 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5977 SimFloat16 input = src.Float<SimFloat16>(i);
5978 dst.SetFloat(vform, i, FPRecipSqrtEstimate<SimFloat16>(input));
5979 }
5980 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5981 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5982 float input = src.Float<float>(i);
5983 dst.SetFloat(vform, i, FPRecipSqrtEstimate<float>(input));
5984 }
5985 } else {
5986 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5987 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5988 double input = src.Float<double>(i);
5989 dst.SetFloat(vform, i, FPRecipSqrtEstimate<double>(input));
5990 }
5991 }
5992 return dst;
5993 }
5994
5995 template <typename T>
FPRecipEstimate(T op,FPRounding rounding)5996 T Simulator::FPRecipEstimate(T op, FPRounding rounding) {
5997 uint32_t sign;
5998
5999 if (IsFloat16<T>()) {
6000 sign = Float16Sign(op);
6001 } else if (IsFloat32<T>()) {
6002 sign = FloatSign(op);
6003 } else {
6004 VIXL_ASSERT(IsFloat64<T>());
6005 sign = DoubleSign(op);
6006 }
6007
6008 if (IsNaN(op)) {
6009 return FPProcessNaN(op);
6010 } else if (IsInf(op)) {
6011 return (sign == 1) ? -0.0 : 0.0;
6012 } else if (op == 0.0) {
6013 FPProcessException(); // FPExc_DivideByZero exception.
6014 return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity;
6015 } else if ((IsFloat16<T>() && (std::fabs(op) < std::pow(2.0, -16.0))) ||
6016 (IsFloat32<T>() && (std::fabs(op) < std::pow(2.0, -128.0))) ||
6017 (IsFloat64<T>() && (std::fabs(op) < std::pow(2.0, -1024.0)))) {
6018 bool overflow_to_inf = false;
6019 switch (rounding) {
6020 case FPTieEven:
6021 overflow_to_inf = true;
6022 break;
6023 case FPPositiveInfinity:
6024 overflow_to_inf = (sign == 0);
6025 break;
6026 case FPNegativeInfinity:
6027 overflow_to_inf = (sign == 1);
6028 break;
6029 case FPZero:
6030 overflow_to_inf = false;
6031 break;
6032 default:
6033 break;
6034 }
6035 FPProcessException(); // FPExc_Overflow and FPExc_Inexact.
6036 if (overflow_to_inf) {
6037 return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity;
6038 } else {
6039 // Return FPMaxNormal(sign).
6040 if (IsFloat16<T>()) {
6041 return Float16Pack(sign, 0x1f, 0x3ff);
6042 } else if (IsFloat32<T>()) {
6043 return FloatPack(sign, 0xfe, 0x07fffff);
6044 } else {
6045 VIXL_ASSERT(IsFloat64<T>());
6046 return DoublePack(sign, 0x7fe, 0x0fffffffffffffl);
6047 }
6048 }
6049 } else {
6050 uint64_t fraction;
6051 int exp, result_exp;
6052 uint32_t sign;
6053
6054 if (IsFloat16<T>()) {
6055 sign = Float16Sign(op);
6056 exp = Float16Exp(op);
6057 fraction = Float16Mantissa(op);
6058 fraction <<= 42;
6059 } else if (IsFloat32<T>()) {
6060 sign = FloatSign(op);
6061 exp = FloatExp(op);
6062 fraction = FloatMantissa(op);
6063 fraction <<= 29;
6064 } else {
6065 VIXL_ASSERT(IsFloat64<T>());
6066 sign = DoubleSign(op);
6067 exp = DoubleExp(op);
6068 fraction = DoubleMantissa(op);
6069 }
6070
6071 if (exp == 0) {
6072 if (Bits(fraction, 51, 51) == 0) {
6073 exp -= 1;
6074 fraction = Bits(fraction, 49, 0) << 2;
6075 } else {
6076 fraction = Bits(fraction, 50, 0) << 1;
6077 }
6078 }
6079
6080 double scaled = DoublePack(0, 1022, Bits(fraction, 51, 44) << 44);
6081
6082 if (IsFloat16<T>()) {
6083 result_exp = (29 - exp); // In range 29-30 = -1 to 29+1 = 30.
6084 } else if (IsFloat32<T>()) {
6085 result_exp = (253 - exp); // In range 253-254 = -1 to 253+1 = 254.
6086 } else {
6087 VIXL_ASSERT(IsFloat64<T>());
6088 result_exp = (2045 - exp); // In range 2045-2046 = -1 to 2045+1 = 2046.
6089 }
6090
6091 double estimate = recip_estimate(scaled);
6092
6093 fraction = DoubleMantissa(estimate);
6094 if (result_exp == 0) {
6095 fraction = (UINT64_C(1) << 51) | Bits(fraction, 51, 1);
6096 } else if (result_exp == -1) {
6097 fraction = (UINT64_C(1) << 50) | Bits(fraction, 51, 2);
6098 result_exp = 0;
6099 }
6100 if (IsFloat16<T>()) {
6101 uint16_t exp_bits = static_cast<uint16_t>(Bits(result_exp, 4, 0));
6102 uint16_t frac_bits = static_cast<uint16_t>(Bits(fraction, 51, 42));
6103 return Float16Pack(sign, exp_bits, frac_bits);
6104 } else if (IsFloat32<T>()) {
6105 uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0));
6106 uint32_t frac_bits = static_cast<uint32_t>(Bits(fraction, 51, 29));
6107 return FloatPack(sign, exp_bits, frac_bits);
6108 } else {
6109 VIXL_ASSERT(IsFloat64<T>());
6110 return DoublePack(sign, Bits(result_exp, 10, 0), Bits(fraction, 51, 0));
6111 }
6112 }
6113 }
6114
6115
frecpe(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,FPRounding round)6116 LogicVRegister Simulator::frecpe(VectorFormat vform,
6117 LogicVRegister dst,
6118 const LogicVRegister& src,
6119 FPRounding round) {
6120 dst.ClearForWrite(vform);
6121 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
6122 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6123 SimFloat16 input = src.Float<SimFloat16>(i);
6124 dst.SetFloat(vform, i, FPRecipEstimate<SimFloat16>(input, round));
6125 }
6126 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
6127 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6128 float input = src.Float<float>(i);
6129 dst.SetFloat(vform, i, FPRecipEstimate<float>(input, round));
6130 }
6131 } else {
6132 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
6133 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6134 double input = src.Float<double>(i);
6135 dst.SetFloat(vform, i, FPRecipEstimate<double>(input, round));
6136 }
6137 }
6138 return dst;
6139 }
6140
6141
ursqrte(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)6142 LogicVRegister Simulator::ursqrte(VectorFormat vform,
6143 LogicVRegister dst,
6144 const LogicVRegister& src) {
6145 dst.ClearForWrite(vform);
6146 uint64_t operand;
6147 uint32_t result;
6148 double dp_operand, dp_result;
6149 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6150 operand = src.Uint(vform, i);
6151 if (operand <= 0x3FFFFFFF) {
6152 result = 0xFFFFFFFF;
6153 } else {
6154 dp_operand = operand * std::pow(2.0, -32);
6155 dp_result = recip_sqrt_estimate(dp_operand) * std::pow(2.0, 31);
6156 result = static_cast<uint32_t>(dp_result);
6157 }
6158 dst.SetUint(vform, i, result);
6159 }
6160 return dst;
6161 }
6162
6163
6164 // Based on reference C function recip_estimate from ARM ARM.
recip_estimate(double a)6165 double Simulator::recip_estimate(double a) {
6166 int q, s;
6167 double r;
6168 q = static_cast<int>(a * 512.0);
6169 r = 1.0 / ((static_cast<double>(q) + 0.5) / 512.0);
6170 s = static_cast<int>(256.0 * r + 0.5);
6171 return static_cast<double>(s) / 256.0;
6172 }
6173
6174
urecpe(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)6175 LogicVRegister Simulator::urecpe(VectorFormat vform,
6176 LogicVRegister dst,
6177 const LogicVRegister& src) {
6178 dst.ClearForWrite(vform);
6179 uint64_t operand;
6180 uint32_t result;
6181 double dp_operand, dp_result;
6182 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6183 operand = src.Uint(vform, i);
6184 if (operand <= 0x7FFFFFFF) {
6185 result = 0xFFFFFFFF;
6186 } else {
6187 dp_operand = operand * std::pow(2.0, -32);
6188 dp_result = recip_estimate(dp_operand) * std::pow(2.0, 31);
6189 result = static_cast<uint32_t>(dp_result);
6190 }
6191 dst.SetUint(vform, i, result);
6192 }
6193 return dst;
6194 }
6195
pfalse(LogicPRegister dst)6196 LogicPRegister Simulator::pfalse(LogicPRegister dst) {
6197 dst.Clear();
6198 return dst;
6199 }
6200
pfirst(LogicPRegister dst,const LogicPRegister & pg,const LogicPRegister & src)6201 LogicPRegister Simulator::pfirst(LogicPRegister dst,
6202 const LogicPRegister& pg,
6203 const LogicPRegister& src) {
6204 int first_pg = GetFirstActive(kFormatVnB, pg);
6205 VIXL_ASSERT(first_pg < LaneCountFromFormat(kFormatVnB));
6206 mov(dst, src);
6207 if (first_pg >= 0) dst.SetActive(kFormatVnB, first_pg, true);
6208 return dst;
6209 }
6210
ptrue(VectorFormat vform,LogicPRegister dst,int pattern)6211 LogicPRegister Simulator::ptrue(VectorFormat vform,
6212 LogicPRegister dst,
6213 int pattern) {
6214 int count = GetPredicateConstraintLaneCount(vform, pattern);
6215 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6216 dst.SetActive(vform, i, i < count);
6217 }
6218 return dst;
6219 }
6220
pnext(VectorFormat vform,LogicPRegister dst,const LogicPRegister & pg,const LogicPRegister & src)6221 LogicPRegister Simulator::pnext(VectorFormat vform,
6222 LogicPRegister dst,
6223 const LogicPRegister& pg,
6224 const LogicPRegister& src) {
6225 int next = GetLastActive(vform, src) + 1;
6226 while (next < LaneCountFromFormat(vform)) {
6227 if (pg.IsActive(vform, next)) break;
6228 next++;
6229 }
6230
6231 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6232 dst.SetActive(vform, i, (i == next));
6233 }
6234 return dst;
6235 }
6236
6237 template <typename T>
frecpx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)6238 LogicVRegister Simulator::frecpx(VectorFormat vform,
6239 LogicVRegister dst,
6240 const LogicVRegister& src) {
6241 dst.ClearForWrite(vform);
6242 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6243 T op = src.Float<T>(i);
6244 T result;
6245 if (IsNaN(op)) {
6246 result = FPProcessNaN(op);
6247 } else {
6248 int exp;
6249 uint32_t sign;
6250 if (IsFloat16<T>()) {
6251 sign = Float16Sign(op);
6252 exp = Float16Exp(op);
6253 exp = (exp == 0) ? (0x1F - 1) : static_cast<int>(Bits(~exp, 4, 0));
6254 result = Float16Pack(sign, exp, 0);
6255 } else if (IsFloat32<T>()) {
6256 sign = FloatSign(op);
6257 exp = FloatExp(op);
6258 exp = (exp == 0) ? (0xFF - 1) : static_cast<int>(Bits(~exp, 7, 0));
6259 result = FloatPack(sign, exp, 0);
6260 } else {
6261 VIXL_ASSERT(IsFloat64<T>());
6262 sign = DoubleSign(op);
6263 exp = DoubleExp(op);
6264 exp = (exp == 0) ? (0x7FF - 1) : static_cast<int>(Bits(~exp, 10, 0));
6265 result = DoublePack(sign, exp, 0);
6266 }
6267 }
6268 dst.SetFloat(i, result);
6269 }
6270 return dst;
6271 }
6272
6273
frecpx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)6274 LogicVRegister Simulator::frecpx(VectorFormat vform,
6275 LogicVRegister dst,
6276 const LogicVRegister& src) {
6277 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
6278 frecpx<SimFloat16>(vform, dst, src);
6279 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
6280 frecpx<float>(vform, dst, src);
6281 } else {
6282 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
6283 frecpx<double>(vform, dst, src);
6284 }
6285 return dst;
6286 }
6287
ftsmul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)6288 LogicVRegister Simulator::ftsmul(VectorFormat vform,
6289 LogicVRegister dst,
6290 const LogicVRegister& src1,
6291 const LogicVRegister& src2) {
6292 SimVRegister maybe_neg_src1;
6293
6294 // The bottom bit of src2 controls the sign of the result. Use it to
6295 // conditionally invert the sign of one `fmul` operand.
6296 shl(vform, maybe_neg_src1, src2, LaneSizeInBitsFromFormat(vform) - 1);
6297 eor(vform, maybe_neg_src1, maybe_neg_src1, src1);
6298
6299 // Multiply src1 by the modified neg_src1, which is potentially its negation.
6300 // In the case of NaNs, NaN * -NaN will return the first NaN intact, so src1,
6301 // rather than neg_src1, must be the first source argument.
6302 fmul(vform, dst, src1, maybe_neg_src1);
6303
6304 return dst;
6305 }
6306
ftssel(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)6307 LogicVRegister Simulator::ftssel(VectorFormat vform,
6308 LogicVRegister dst,
6309 const LogicVRegister& src1,
6310 const LogicVRegister& src2) {
6311 unsigned lane_bits = LaneSizeInBitsFromFormat(vform);
6312 uint64_t sign_bit = UINT64_C(1) << (lane_bits - 1);
6313 uint64_t one;
6314
6315 if (lane_bits == kHRegSize) {
6316 one = Float16ToRawbits(Float16(1.0));
6317 } else if (lane_bits == kSRegSize) {
6318 one = FloatToRawbits(1.0);
6319 } else {
6320 VIXL_ASSERT(lane_bits == kDRegSize);
6321 one = DoubleToRawbits(1.0);
6322 }
6323
6324 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6325 // Use integer accessors for this operation, as this is a data manipulation
6326 // task requiring no calculation.
6327 uint64_t op = src1.Uint(vform, i);
6328
6329 // Only the bottom two bits of the src2 register are significant, indicating
6330 // the quadrant. Bit 0 controls whether src1 or 1.0 is written to dst. Bit 1
6331 // determines the sign of the value written to dst.
6332 uint64_t q = src2.Uint(vform, i);
6333 if ((q & 1) == 1) op = one;
6334 if ((q & 2) == 2) op ^= sign_bit;
6335
6336 dst.SetUint(vform, i, op);
6337 }
6338
6339 return dst;
6340 }
6341
6342 template <typename T>
FTMaddHelper(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,uint64_t coeff_pos,uint64_t coeff_neg)6343 LogicVRegister Simulator::FTMaddHelper(VectorFormat vform,
6344 LogicVRegister dst,
6345 const LogicVRegister& src1,
6346 const LogicVRegister& src2,
6347 uint64_t coeff_pos,
6348 uint64_t coeff_neg) {
6349 SimVRegister zero;
6350 dup_immediate(kFormatVnB, zero, 0);
6351
6352 SimVRegister cf;
6353 SimVRegister cfn;
6354 dup_immediate(vform, cf, coeff_pos);
6355 dup_immediate(vform, cfn, coeff_neg);
6356
6357 // The specification requires testing the top bit of the raw value, rather
6358 // than the sign of the floating point number, so use an integer comparison
6359 // here.
6360 SimPRegister is_neg;
6361 SVEIntCompareVectorsHelper(lt,
6362 vform,
6363 is_neg,
6364 GetPTrue(),
6365 src2,
6366 zero,
6367 false,
6368 LeaveFlags);
6369 mov_merging(vform, cf, is_neg, cfn);
6370
6371 SimVRegister temp;
6372 fabs_<T>(vform, temp, src2);
6373 fmla<T>(vform, cf, cf, src1, temp);
6374 mov(vform, dst, cf);
6375 return dst;
6376 }
6377
6378
ftmad(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,unsigned index)6379 LogicVRegister Simulator::ftmad(VectorFormat vform,
6380 LogicVRegister dst,
6381 const LogicVRegister& src1,
6382 const LogicVRegister& src2,
6383 unsigned index) {
6384 static const uint64_t ftmad_coeff16[] = {0x3c00,
6385 0xb155,
6386 0x2030,
6387 0x0000,
6388 0x0000,
6389 0x0000,
6390 0x0000,
6391 0x0000,
6392 0x3c00,
6393 0xb800,
6394 0x293a,
6395 0x0000,
6396 0x0000,
6397 0x0000,
6398 0x0000,
6399 0x0000};
6400
6401 static const uint64_t ftmad_coeff32[] = {0x3f800000,
6402 0xbe2aaaab,
6403 0x3c088886,
6404 0xb95008b9,
6405 0x36369d6d,
6406 0x00000000,
6407 0x00000000,
6408 0x00000000,
6409 0x3f800000,
6410 0xbf000000,
6411 0x3d2aaaa6,
6412 0xbab60705,
6413 0x37cd37cc,
6414 0x00000000,
6415 0x00000000,
6416 0x00000000};
6417
6418 static const uint64_t ftmad_coeff64[] = {0x3ff0000000000000,
6419 0xbfc5555555555543,
6420 0x3f8111111110f30c,
6421 0xbf2a01a019b92fc6,
6422 0x3ec71de351f3d22b,
6423 0xbe5ae5e2b60f7b91,
6424 0x3de5d8408868552f,
6425 0x0000000000000000,
6426 0x3ff0000000000000,
6427 0xbfe0000000000000,
6428 0x3fa5555555555536,
6429 0xbf56c16c16c13a0b,
6430 0x3efa01a019b1e8d8,
6431 0xbe927e4f7282f468,
6432 0x3e21ee96d2641b13,
6433 0xbda8f76380fbb401};
6434 VIXL_ASSERT((index + 8) < ArrayLength(ftmad_coeff64));
6435 VIXL_ASSERT(ArrayLength(ftmad_coeff16) == ArrayLength(ftmad_coeff64));
6436 VIXL_ASSERT(ArrayLength(ftmad_coeff32) == ArrayLength(ftmad_coeff64));
6437
6438 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
6439 FTMaddHelper<SimFloat16>(vform,
6440 dst,
6441 src1,
6442 src2,
6443 ftmad_coeff16[index],
6444 ftmad_coeff16[index + 8]);
6445 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
6446 FTMaddHelper<float>(vform,
6447 dst,
6448 src1,
6449 src2,
6450 ftmad_coeff32[index],
6451 ftmad_coeff32[index + 8]);
6452 } else {
6453 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
6454 FTMaddHelper<double>(vform,
6455 dst,
6456 src1,
6457 src2,
6458 ftmad_coeff64[index],
6459 ftmad_coeff64[index + 8]);
6460 }
6461 return dst;
6462 }
6463
fexpa(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)6464 LogicVRegister Simulator::fexpa(VectorFormat vform,
6465 LogicVRegister dst,
6466 const LogicVRegister& src) {
6467 static const uint64_t fexpa_coeff16[] = {0x0000, 0x0016, 0x002d, 0x0045,
6468 0x005d, 0x0075, 0x008e, 0x00a8,
6469 0x00c2, 0x00dc, 0x00f8, 0x0114,
6470 0x0130, 0x014d, 0x016b, 0x0189,
6471 0x01a8, 0x01c8, 0x01e8, 0x0209,
6472 0x022b, 0x024e, 0x0271, 0x0295,
6473 0x02ba, 0x02e0, 0x0306, 0x032e,
6474 0x0356, 0x037f, 0x03a9, 0x03d4};
6475
6476 static const uint64_t fexpa_coeff32[] =
6477 {0x000000, 0x0164d2, 0x02cd87, 0x043a29, 0x05aac3, 0x071f62, 0x08980f,
6478 0x0a14d5, 0x0b95c2, 0x0d1adf, 0x0ea43a, 0x1031dc, 0x11c3d3, 0x135a2b,
6479 0x14f4f0, 0x16942d, 0x1837f0, 0x19e046, 0x1b8d3a, 0x1d3eda, 0x1ef532,
6480 0x20b051, 0x227043, 0x243516, 0x25fed7, 0x27cd94, 0x29a15b, 0x2b7a3a,
6481 0x2d583f, 0x2f3b79, 0x3123f6, 0x3311c4, 0x3504f3, 0x36fd92, 0x38fbaf,
6482 0x3aff5b, 0x3d08a4, 0x3f179a, 0x412c4d, 0x4346cd, 0x45672a, 0x478d75,
6483 0x49b9be, 0x4bec15, 0x4e248c, 0x506334, 0x52a81e, 0x54f35b, 0x5744fd,
6484 0x599d16, 0x5bfbb8, 0x5e60f5, 0x60ccdf, 0x633f89, 0x65b907, 0x68396a,
6485 0x6ac0c7, 0x6d4f30, 0x6fe4ba, 0x728177, 0x75257d, 0x77d0df, 0x7a83b3,
6486 0x7d3e0c};
6487
6488 static const uint64_t fexpa_coeff64[] =
6489 {0X0000000000000, 0X02c9a3e778061, 0X059b0d3158574, 0X0874518759bc8,
6490 0X0b5586cf9890f, 0X0e3ec32d3d1a2, 0X11301d0125b51, 0X1429aaea92de0,
6491 0X172b83c7d517b, 0X1a35beb6fcb75, 0X1d4873168b9aa, 0X2063b88628cd6,
6492 0X2387a6e756238, 0X26b4565e27cdd, 0X29e9df51fdee1, 0X2d285a6e4030b,
6493 0X306fe0a31b715, 0X33c08b26416ff, 0X371a7373aa9cb, 0X3a7db34e59ff7,
6494 0X3dea64c123422, 0X4160a21f72e2a, 0X44e086061892d, 0X486a2b5c13cd0,
6495 0X4bfdad5362a27, 0X4f9b2769d2ca7, 0X5342b569d4f82, 0X56f4736b527da,
6496 0X5ab07dd485429, 0X5e76f15ad2148, 0X6247eb03a5585, 0X6623882552225,
6497 0X6a09e667f3bcd, 0X6dfb23c651a2f, 0X71f75e8ec5f74, 0X75feb564267c9,
6498 0X7a11473eb0187, 0X7e2f336cf4e62, 0X82589994cce13, 0X868d99b4492ed,
6499 0X8ace5422aa0db, 0X8f1ae99157736, 0X93737b0cdc5e5, 0X97d829fde4e50,
6500 0X9c49182a3f090, 0Xa0c667b5de565, 0Xa5503b23e255d, 0Xa9e6b5579fdbf,
6501 0Xae89f995ad3ad, 0Xb33a2b84f15fb, 0Xb7f76f2fb5e47, 0Xbcc1e904bc1d2,
6502 0Xc199bdd85529c, 0Xc67f12e57d14b, 0Xcb720dcef9069, 0Xd072d4a07897c,
6503 0Xd5818dcfba487, 0Xda9e603db3285, 0Xdfc97337b9b5f, 0Xe502ee78b3ff6,
6504 0Xea4afa2a490da, 0Xefa1bee615a27, 0Xf50765b6e4540, 0Xfa7c1819e90d8};
6505
6506 unsigned lane_size = LaneSizeInBitsFromFormat(vform);
6507 int index_highbit = 5;
6508 int op_highbit, op_shift;
6509 const uint64_t* fexpa_coeff;
6510
6511 if (lane_size == kHRegSize) {
6512 index_highbit = 4;
6513 VIXL_ASSERT(ArrayLength(fexpa_coeff16) == (1U << (index_highbit + 1)));
6514 fexpa_coeff = fexpa_coeff16;
6515 op_highbit = 9;
6516 op_shift = 10;
6517 } else if (lane_size == kSRegSize) {
6518 VIXL_ASSERT(ArrayLength(fexpa_coeff32) == (1U << (index_highbit + 1)));
6519 fexpa_coeff = fexpa_coeff32;
6520 op_highbit = 13;
6521 op_shift = 23;
6522 } else {
6523 VIXL_ASSERT(lane_size == kDRegSize);
6524 VIXL_ASSERT(ArrayLength(fexpa_coeff64) == (1U << (index_highbit + 1)));
6525 fexpa_coeff = fexpa_coeff64;
6526 op_highbit = 16;
6527 op_shift = 52;
6528 }
6529
6530 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6531 uint64_t op = src.Uint(vform, i);
6532 uint64_t result = fexpa_coeff[Bits(op, index_highbit, 0)];
6533 result |= (Bits(op, op_highbit, index_highbit + 1) << op_shift);
6534 dst.SetUint(vform, i, result);
6535 }
6536 return dst;
6537 }
6538
6539 template <typename T>
fscale(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)6540 LogicVRegister Simulator::fscale(VectorFormat vform,
6541 LogicVRegister dst,
6542 const LogicVRegister& src1,
6543 const LogicVRegister& src2) {
6544 T two = T(2.0);
6545 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6546 T s1 = src1.Float<T>(i);
6547 if (!IsNaN(s1)) {
6548 int64_t scale = src2.Int(vform, i);
6549 // TODO: this is a low-performance implementation, but it's simple and
6550 // less likely to be buggy. Consider replacing it with something faster.
6551
6552 // Scales outside of these bounds become infinity or zero, so there's no
6553 // point iterating further.
6554 scale = std::min<int64_t>(std::max<int64_t>(scale, -2048), 2048);
6555
6556 // Compute s1 * 2 ^ scale. If scale is positive, multiply by two and
6557 // decrement scale until it's zero.
6558 while (scale-- > 0) {
6559 s1 = FPMul(s1, two);
6560 }
6561
6562 // If scale is negative, divide by two and increment scale until it's
6563 // zero. Initially, scale is (src2 - 1), so we pre-increment.
6564 while (++scale < 0) {
6565 s1 = FPDiv(s1, two);
6566 }
6567 }
6568 dst.SetFloat<T>(i, s1);
6569 }
6570 return dst;
6571 }
6572
fscale(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)6573 LogicVRegister Simulator::fscale(VectorFormat vform,
6574 LogicVRegister dst,
6575 const LogicVRegister& src1,
6576 const LogicVRegister& src2) {
6577 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
6578 fscale<SimFloat16>(vform, dst, src1, src2);
6579 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
6580 fscale<float>(vform, dst, src1, src2);
6581 } else {
6582 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
6583 fscale<double>(vform, dst, src1, src2);
6584 }
6585 return dst;
6586 }
6587
scvtf(VectorFormat vform,unsigned dst_data_size_in_bits,unsigned src_data_size_in_bits,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src,FPRounding round,int fbits)6588 LogicVRegister Simulator::scvtf(VectorFormat vform,
6589 unsigned dst_data_size_in_bits,
6590 unsigned src_data_size_in_bits,
6591 LogicVRegister dst,
6592 const LogicPRegister& pg,
6593 const LogicVRegister& src,
6594 FPRounding round,
6595 int fbits) {
6596 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits);
6597 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits);
6598
6599 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6600 if (!pg.IsActive(vform, i)) continue;
6601
6602 int64_t value = ExtractSignedBitfield64(src_data_size_in_bits - 1,
6603 0,
6604 src.Uint(vform, i));
6605
6606 switch (dst_data_size_in_bits) {
6607 case kHRegSize: {
6608 SimFloat16 result = FixedToFloat16(value, fbits, round);
6609 dst.SetUint(vform, i, Float16ToRawbits(result));
6610 break;
6611 }
6612 case kSRegSize: {
6613 float result = FixedToFloat(value, fbits, round);
6614 dst.SetUint(vform, i, FloatToRawbits(result));
6615 break;
6616 }
6617 case kDRegSize: {
6618 double result = FixedToDouble(value, fbits, round);
6619 dst.SetUint(vform, i, DoubleToRawbits(result));
6620 break;
6621 }
6622 default:
6623 VIXL_UNIMPLEMENTED();
6624 break;
6625 }
6626 }
6627
6628 return dst;
6629 }
6630
scvtf(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int fbits,FPRounding round)6631 LogicVRegister Simulator::scvtf(VectorFormat vform,
6632 LogicVRegister dst,
6633 const LogicVRegister& src,
6634 int fbits,
6635 FPRounding round) {
6636 return scvtf(vform,
6637 LaneSizeInBitsFromFormat(vform),
6638 LaneSizeInBitsFromFormat(vform),
6639 dst,
6640 GetPTrue(),
6641 src,
6642 round,
6643 fbits);
6644 }
6645
ucvtf(VectorFormat vform,unsigned dst_data_size_in_bits,unsigned src_data_size_in_bits,LogicVRegister dst,const LogicPRegister & pg,const LogicVRegister & src,FPRounding round,int fbits)6646 LogicVRegister Simulator::ucvtf(VectorFormat vform,
6647 unsigned dst_data_size_in_bits,
6648 unsigned src_data_size_in_bits,
6649 LogicVRegister dst,
6650 const LogicPRegister& pg,
6651 const LogicVRegister& src,
6652 FPRounding round,
6653 int fbits) {
6654 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= dst_data_size_in_bits);
6655 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) >= src_data_size_in_bits);
6656
6657 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6658 if (!pg.IsActive(vform, i)) continue;
6659
6660 uint64_t value = ExtractUnsignedBitfield64(src_data_size_in_bits - 1,
6661 0,
6662 src.Uint(vform, i));
6663
6664 switch (dst_data_size_in_bits) {
6665 case kHRegSize: {
6666 SimFloat16 result = UFixedToFloat16(value, fbits, round);
6667 dst.SetUint(vform, i, Float16ToRawbits(result));
6668 break;
6669 }
6670 case kSRegSize: {
6671 float result = UFixedToFloat(value, fbits, round);
6672 dst.SetUint(vform, i, FloatToRawbits(result));
6673 break;
6674 }
6675 case kDRegSize: {
6676 double result = UFixedToDouble(value, fbits, round);
6677 dst.SetUint(vform, i, DoubleToRawbits(result));
6678 break;
6679 }
6680 default:
6681 VIXL_UNIMPLEMENTED();
6682 break;
6683 }
6684 }
6685
6686 return dst;
6687 }
6688
ucvtf(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int fbits,FPRounding round)6689 LogicVRegister Simulator::ucvtf(VectorFormat vform,
6690 LogicVRegister dst,
6691 const LogicVRegister& src,
6692 int fbits,
6693 FPRounding round) {
6694 return ucvtf(vform,
6695 LaneSizeInBitsFromFormat(vform),
6696 LaneSizeInBitsFromFormat(vform),
6697 dst,
6698 GetPTrue(),
6699 src,
6700 round,
6701 fbits);
6702 }
6703
unpk(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,UnpackType unpack_type,ExtendType extend_type)6704 LogicVRegister Simulator::unpk(VectorFormat vform,
6705 LogicVRegister dst,
6706 const LogicVRegister& src,
6707 UnpackType unpack_type,
6708 ExtendType extend_type) {
6709 VectorFormat vform_half = VectorFormatHalfWidth(vform);
6710 const int lane_count = LaneCountFromFormat(vform);
6711 const int src_start_lane = (unpack_type == kLoHalf) ? 0 : lane_count;
6712
6713 switch (extend_type) {
6714 case kSignedExtend: {
6715 int64_t result[kZRegMaxSizeInBytes];
6716 for (int i = 0; i < lane_count; ++i) {
6717 result[i] = src.Int(vform_half, i + src_start_lane);
6718 }
6719 for (int i = 0; i < lane_count; ++i) {
6720 dst.SetInt(vform, i, result[i]);
6721 }
6722 break;
6723 }
6724 case kUnsignedExtend: {
6725 uint64_t result[kZRegMaxSizeInBytes];
6726 for (int i = 0; i < lane_count; ++i) {
6727 result[i] = src.Uint(vform_half, i + src_start_lane);
6728 }
6729 for (int i = 0; i < lane_count; ++i) {
6730 dst.SetUint(vform, i, result[i]);
6731 }
6732 break;
6733 }
6734 default:
6735 VIXL_UNREACHABLE();
6736 }
6737 return dst;
6738 }
6739
SVEIntCompareVectorsHelper(Condition cond,VectorFormat vform,LogicPRegister dst,const LogicPRegister & mask,const LogicVRegister & src1,const LogicVRegister & src2,bool is_wide_elements,FlagsUpdate flags)6740 LogicPRegister Simulator::SVEIntCompareVectorsHelper(Condition cond,
6741 VectorFormat vform,
6742 LogicPRegister dst,
6743 const LogicPRegister& mask,
6744 const LogicVRegister& src1,
6745 const LogicVRegister& src2,
6746 bool is_wide_elements,
6747 FlagsUpdate flags) {
6748 for (int lane = 0; lane < LaneCountFromFormat(vform); lane++) {
6749 bool result = false;
6750 if (mask.IsActive(vform, lane)) {
6751 int64_t op1 = 0xbadbeef;
6752 int64_t op2 = 0xbadbeef;
6753 int d_lane = (lane * LaneSizeInBitsFromFormat(vform)) / kDRegSize;
6754 switch (cond) {
6755 case eq:
6756 case ge:
6757 case gt:
6758 case lt:
6759 case le:
6760 case ne:
6761 op1 = src1.Int(vform, lane);
6762 op2 = is_wide_elements ? src2.Int(kFormatVnD, d_lane)
6763 : src2.Int(vform, lane);
6764 break;
6765 case hi:
6766 case hs:
6767 case ls:
6768 case lo:
6769 op1 = src1.Uint(vform, lane);
6770 op2 = is_wide_elements ? src2.Uint(kFormatVnD, d_lane)
6771 : src2.Uint(vform, lane);
6772 break;
6773 default:
6774 VIXL_UNREACHABLE();
6775 }
6776
6777 switch (cond) {
6778 case eq:
6779 result = (op1 == op2);
6780 break;
6781 case ne:
6782 result = (op1 != op2);
6783 break;
6784 case ge:
6785 result = (op1 >= op2);
6786 break;
6787 case gt:
6788 result = (op1 > op2);
6789 break;
6790 case le:
6791 result = (op1 <= op2);
6792 break;
6793 case lt:
6794 result = (op1 < op2);
6795 break;
6796 case hs:
6797 result = (static_cast<uint64_t>(op1) >= static_cast<uint64_t>(op2));
6798 break;
6799 case hi:
6800 result = (static_cast<uint64_t>(op1) > static_cast<uint64_t>(op2));
6801 break;
6802 case ls:
6803 result = (static_cast<uint64_t>(op1) <= static_cast<uint64_t>(op2));
6804 break;
6805 case lo:
6806 result = (static_cast<uint64_t>(op1) < static_cast<uint64_t>(op2));
6807 break;
6808 default:
6809 VIXL_UNREACHABLE();
6810 }
6811 }
6812 dst.SetActive(vform, lane, result);
6813 }
6814
6815 if (flags == SetFlags) PredTest(vform, mask, dst);
6816
6817 return dst;
6818 }
6819
SVEBitwiseShiftHelper(Shift shift_op,VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool is_wide_elements)6820 LogicVRegister Simulator::SVEBitwiseShiftHelper(Shift shift_op,
6821 VectorFormat vform,
6822 LogicVRegister dst,
6823 const LogicVRegister& src1,
6824 const LogicVRegister& src2,
6825 bool is_wide_elements) {
6826 unsigned lane_size = LaneSizeInBitsFromFormat(vform);
6827 VectorFormat shift_vform = is_wide_elements ? kFormatVnD : vform;
6828
6829 for (int lane = 0; lane < LaneCountFromFormat(vform); lane++) {
6830 int shift_src_lane = lane;
6831 if (is_wide_elements) {
6832 // If the shift amount comes from wide elements, select the D-sized lane
6833 // which occupies the corresponding lanes of the value to be shifted.
6834 shift_src_lane = (lane * lane_size) / kDRegSize;
6835 }
6836 uint64_t shift_amount = src2.Uint(shift_vform, shift_src_lane);
6837
6838 // Saturate shift_amount to the size of the lane that will be shifted.
6839 if (shift_amount > lane_size) shift_amount = lane_size;
6840
6841 uint64_t value = src1.Uint(vform, lane);
6842 int64_t result = ShiftOperand(lane_size,
6843 value,
6844 shift_op,
6845 static_cast<unsigned>(shift_amount));
6846 dst.SetUint(vform, lane, result);
6847 }
6848
6849 return dst;
6850 }
6851
asrd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,int shift)6852 LogicVRegister Simulator::asrd(VectorFormat vform,
6853 LogicVRegister dst,
6854 const LogicVRegister& src1,
6855 int shift) {
6856 VIXL_ASSERT((shift > 0) && (static_cast<unsigned>(shift) <=
6857 LaneSizeInBitsFromFormat(vform)));
6858
6859 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6860 int64_t value = src1.Int(vform, i);
6861 if (shift <= 63) {
6862 if (value < 0) {
6863 // The max possible mask is 0x7fff'ffff'ffff'ffff, which can be safely
6864 // cast to int64_t, and cannot cause signed overflow in the result.
6865 value = value + GetUintMask(shift);
6866 }
6867 value = ShiftOperand(kDRegSize, value, ASR, shift);
6868 } else {
6869 value = 0;
6870 }
6871 dst.SetInt(vform, i, value);
6872 }
6873 return dst;
6874 }
6875
SVEBitwiseLogicalUnpredicatedHelper(LogicalOp logical_op,VectorFormat vform,LogicVRegister zd,const LogicVRegister & zn,const LogicVRegister & zm)6876 LogicVRegister Simulator::SVEBitwiseLogicalUnpredicatedHelper(
6877 LogicalOp logical_op,
6878 VectorFormat vform,
6879 LogicVRegister zd,
6880 const LogicVRegister& zn,
6881 const LogicVRegister& zm) {
6882 VIXL_ASSERT(IsSVEFormat(vform));
6883 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6884 uint64_t op1 = zn.Uint(vform, i);
6885 uint64_t op2 = zm.Uint(vform, i);
6886 uint64_t result;
6887 switch (logical_op) {
6888 case AND:
6889 result = op1 & op2;
6890 break;
6891 case BIC:
6892 result = op1 & ~op2;
6893 break;
6894 case EOR:
6895 result = op1 ^ op2;
6896 break;
6897 case ORR:
6898 result = op1 | op2;
6899 break;
6900 default:
6901 result = 0;
6902 VIXL_UNIMPLEMENTED();
6903 }
6904 zd.SetUint(vform, i, result);
6905 }
6906
6907 return zd;
6908 }
6909
SVEPredicateLogicalHelper(SVEPredicateLogicalOp op,LogicPRegister pd,const LogicPRegister & pn,const LogicPRegister & pm)6910 LogicPRegister Simulator::SVEPredicateLogicalHelper(SVEPredicateLogicalOp op,
6911 LogicPRegister pd,
6912 const LogicPRegister& pn,
6913 const LogicPRegister& pm) {
6914 for (int i = 0; i < pn.GetChunkCount(); i++) {
6915 LogicPRegister::ChunkType op1 = pn.GetChunk(i);
6916 LogicPRegister::ChunkType op2 = pm.GetChunk(i);
6917 LogicPRegister::ChunkType result;
6918 switch (op) {
6919 case ANDS_p_p_pp_z:
6920 case AND_p_p_pp_z:
6921 result = op1 & op2;
6922 break;
6923 case BICS_p_p_pp_z:
6924 case BIC_p_p_pp_z:
6925 result = op1 & ~op2;
6926 break;
6927 case EORS_p_p_pp_z:
6928 case EOR_p_p_pp_z:
6929 result = op1 ^ op2;
6930 break;
6931 case NANDS_p_p_pp_z:
6932 case NAND_p_p_pp_z:
6933 result = ~(op1 & op2);
6934 break;
6935 case NORS_p_p_pp_z:
6936 case NOR_p_p_pp_z:
6937 result = ~(op1 | op2);
6938 break;
6939 case ORNS_p_p_pp_z:
6940 case ORN_p_p_pp_z:
6941 result = op1 | ~op2;
6942 break;
6943 case ORRS_p_p_pp_z:
6944 case ORR_p_p_pp_z:
6945 result = op1 | op2;
6946 break;
6947 default:
6948 result = 0;
6949 VIXL_UNIMPLEMENTED();
6950 }
6951 pd.SetChunk(i, result);
6952 }
6953 return pd;
6954 }
6955
SVEBitwiseImmHelper(SVEBitwiseLogicalWithImm_UnpredicatedOp op,VectorFormat vform,LogicVRegister zd,uint64_t imm)6956 LogicVRegister Simulator::SVEBitwiseImmHelper(
6957 SVEBitwiseLogicalWithImm_UnpredicatedOp op,
6958 VectorFormat vform,
6959 LogicVRegister zd,
6960 uint64_t imm) {
6961 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
6962 uint64_t op1 = zd.Uint(vform, i);
6963 uint64_t result;
6964 switch (op) {
6965 case AND_z_zi:
6966 result = op1 & imm;
6967 break;
6968 case EOR_z_zi:
6969 result = op1 ^ imm;
6970 break;
6971 case ORR_z_zi:
6972 result = op1 | imm;
6973 break;
6974 default:
6975 result = 0;
6976 VIXL_UNIMPLEMENTED();
6977 }
6978 zd.SetUint(vform, i, result);
6979 }
6980
6981 return zd;
6982 }
6983
SVEStructuredStoreHelper(VectorFormat vform,const LogicPRegister & pg,unsigned zt_code,const LogicSVEAddressVector & addr)6984 void Simulator::SVEStructuredStoreHelper(VectorFormat vform,
6985 const LogicPRegister& pg,
6986 unsigned zt_code,
6987 const LogicSVEAddressVector& addr) {
6988 VIXL_ASSERT(zt_code < kNumberOfZRegisters);
6989
6990 int esize_in_bytes_log2 = LaneSizeInBytesLog2FromFormat(vform);
6991 int msize_in_bytes_log2 = addr.GetMsizeInBytesLog2();
6992 int msize_in_bytes = addr.GetMsizeInBytes();
6993 int reg_count = addr.GetRegCount();
6994
6995 VIXL_ASSERT(esize_in_bytes_log2 >= msize_in_bytes_log2);
6996 VIXL_ASSERT((reg_count >= 1) && (reg_count <= 4));
6997
6998 unsigned zt_codes[4] = {zt_code,
6999 (zt_code + 1) % kNumberOfZRegisters,
7000 (zt_code + 2) % kNumberOfZRegisters,
7001 (zt_code + 3) % kNumberOfZRegisters};
7002
7003 LogicVRegister zt[4] = {
7004 ReadVRegister(zt_codes[0]),
7005 ReadVRegister(zt_codes[1]),
7006 ReadVRegister(zt_codes[2]),
7007 ReadVRegister(zt_codes[3]),
7008 };
7009
7010 // For unpacked forms (e.g. `st1b { z0.h }, ...`, the upper parts of the lanes
7011 // are ignored, so read the source register using the VectorFormat that
7012 // corresponds with the storage format, and multiply the index accordingly.
7013 VectorFormat unpack_vform =
7014 SVEFormatFromLaneSizeInBytesLog2(msize_in_bytes_log2);
7015 int unpack_shift = esize_in_bytes_log2 - msize_in_bytes_log2;
7016
7017 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7018 if (!pg.IsActive(vform, i)) continue;
7019
7020 for (int r = 0; r < reg_count; r++) {
7021 uint64_t element_address = addr.GetElementAddress(i, r);
7022 StoreLane(zt[r], unpack_vform, i << unpack_shift, element_address);
7023 }
7024 }
7025
7026 if (ShouldTraceWrites()) {
7027 PrintRegisterFormat format = GetPrintRegisterFormat(vform);
7028 if (esize_in_bytes_log2 == msize_in_bytes_log2) {
7029 // Use an FP format where it's likely that we're accessing FP data.
7030 format = GetPrintRegisterFormatTryFP(format);
7031 }
7032 // Stores don't represent a change to the source register's value, so only
7033 // print the relevant part of the value.
7034 format = GetPrintRegPartial(format);
7035
7036 PrintZStructAccess(zt_code,
7037 reg_count,
7038 pg,
7039 format,
7040 msize_in_bytes,
7041 "->",
7042 addr);
7043 }
7044 }
7045
SVEStructuredLoadHelper(VectorFormat vform,const LogicPRegister & pg,unsigned zt_code,const LogicSVEAddressVector & addr,bool is_signed)7046 void Simulator::SVEStructuredLoadHelper(VectorFormat vform,
7047 const LogicPRegister& pg,
7048 unsigned zt_code,
7049 const LogicSVEAddressVector& addr,
7050 bool is_signed) {
7051 int esize_in_bytes_log2 = LaneSizeInBytesLog2FromFormat(vform);
7052 int msize_in_bytes_log2 = addr.GetMsizeInBytesLog2();
7053 int msize_in_bytes = addr.GetMsizeInBytes();
7054 int reg_count = addr.GetRegCount();
7055
7056 VIXL_ASSERT(zt_code < kNumberOfZRegisters);
7057 VIXL_ASSERT(esize_in_bytes_log2 >= msize_in_bytes_log2);
7058 VIXL_ASSERT((reg_count >= 1) && (reg_count <= 4));
7059
7060 unsigned zt_codes[4] = {zt_code,
7061 (zt_code + 1) % kNumberOfZRegisters,
7062 (zt_code + 2) % kNumberOfZRegisters,
7063 (zt_code + 3) % kNumberOfZRegisters};
7064 LogicVRegister zt[4] = {
7065 ReadVRegister(zt_codes[0]),
7066 ReadVRegister(zt_codes[1]),
7067 ReadVRegister(zt_codes[2]),
7068 ReadVRegister(zt_codes[3]),
7069 };
7070
7071 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7072 for (int r = 0; r < reg_count; r++) {
7073 uint64_t element_address = addr.GetElementAddress(i, r);
7074
7075 if (!pg.IsActive(vform, i)) {
7076 zt[r].SetUint(vform, i, 0);
7077 continue;
7078 }
7079
7080 if (is_signed) {
7081 LoadIntToLane(zt[r], vform, msize_in_bytes, i, element_address);
7082 } else {
7083 LoadUintToLane(zt[r], vform, msize_in_bytes, i, element_address);
7084 }
7085 }
7086 }
7087
7088 if (ShouldTraceVRegs()) {
7089 PrintRegisterFormat format = GetPrintRegisterFormat(vform);
7090 if ((esize_in_bytes_log2 == msize_in_bytes_log2) && !is_signed) {
7091 // Use an FP format where it's likely that we're accessing FP data.
7092 format = GetPrintRegisterFormatTryFP(format);
7093 }
7094 PrintZStructAccess(zt_code,
7095 reg_count,
7096 pg,
7097 format,
7098 msize_in_bytes,
7099 "<-",
7100 addr);
7101 }
7102 }
7103
brka(LogicPRegister pd,const LogicPRegister & pg,const LogicPRegister & pn)7104 LogicPRegister Simulator::brka(LogicPRegister pd,
7105 const LogicPRegister& pg,
7106 const LogicPRegister& pn) {
7107 bool break_ = false;
7108 for (int i = 0; i < LaneCountFromFormat(kFormatVnB); i++) {
7109 if (pg.IsActive(kFormatVnB, i)) {
7110 pd.SetActive(kFormatVnB, i, !break_);
7111 break_ |= pn.IsActive(kFormatVnB, i);
7112 }
7113 }
7114
7115 return pd;
7116 }
7117
brkb(LogicPRegister pd,const LogicPRegister & pg,const LogicPRegister & pn)7118 LogicPRegister Simulator::brkb(LogicPRegister pd,
7119 const LogicPRegister& pg,
7120 const LogicPRegister& pn) {
7121 bool break_ = false;
7122 for (int i = 0; i < LaneCountFromFormat(kFormatVnB); i++) {
7123 if (pg.IsActive(kFormatVnB, i)) {
7124 break_ |= pn.IsActive(kFormatVnB, i);
7125 pd.SetActive(kFormatVnB, i, !break_);
7126 }
7127 }
7128
7129 return pd;
7130 }
7131
brkn(LogicPRegister pdm,const LogicPRegister & pg,const LogicPRegister & pn)7132 LogicPRegister Simulator::brkn(LogicPRegister pdm,
7133 const LogicPRegister& pg,
7134 const LogicPRegister& pn) {
7135 if (!IsLastActive(kFormatVnB, pg, pn)) {
7136 pfalse(pdm);
7137 }
7138 return pdm;
7139 }
7140
brkpa(LogicPRegister pd,const LogicPRegister & pg,const LogicPRegister & pn,const LogicPRegister & pm)7141 LogicPRegister Simulator::brkpa(LogicPRegister pd,
7142 const LogicPRegister& pg,
7143 const LogicPRegister& pn,
7144 const LogicPRegister& pm) {
7145 bool last_active = IsLastActive(kFormatVnB, pg, pn);
7146
7147 for (int i = 0; i < LaneCountFromFormat(kFormatVnB); i++) {
7148 bool active = false;
7149 if (pg.IsActive(kFormatVnB, i)) {
7150 active = last_active;
7151 last_active = last_active && !pm.IsActive(kFormatVnB, i);
7152 }
7153 pd.SetActive(kFormatVnB, i, active);
7154 }
7155
7156 return pd;
7157 }
7158
brkpb(LogicPRegister pd,const LogicPRegister & pg,const LogicPRegister & pn,const LogicPRegister & pm)7159 LogicPRegister Simulator::brkpb(LogicPRegister pd,
7160 const LogicPRegister& pg,
7161 const LogicPRegister& pn,
7162 const LogicPRegister& pm) {
7163 bool last_active = IsLastActive(kFormatVnB, pg, pn);
7164
7165 for (int i = 0; i < LaneCountFromFormat(kFormatVnB); i++) {
7166 bool active = false;
7167 if (pg.IsActive(kFormatVnB, i)) {
7168 last_active = last_active && !pm.IsActive(kFormatVnB, i);
7169 active = last_active;
7170 }
7171 pd.SetActive(kFormatVnB, i, active);
7172 }
7173
7174 return pd;
7175 }
7176
SVEFaultTolerantLoadHelper(VectorFormat vform,const LogicPRegister & pg,unsigned zt_code,const LogicSVEAddressVector & addr,SVEFaultTolerantLoadType type,bool is_signed)7177 void Simulator::SVEFaultTolerantLoadHelper(VectorFormat vform,
7178 const LogicPRegister& pg,
7179 unsigned zt_code,
7180 const LogicSVEAddressVector& addr,
7181 SVEFaultTolerantLoadType type,
7182 bool is_signed) {
7183 int esize_in_bytes = LaneSizeInBytesFromFormat(vform);
7184 int msize_in_bits = addr.GetMsizeInBits();
7185 int msize_in_bytes = addr.GetMsizeInBytes();
7186
7187 VIXL_ASSERT(zt_code < kNumberOfZRegisters);
7188 VIXL_ASSERT(esize_in_bytes >= msize_in_bytes);
7189 VIXL_ASSERT(addr.GetRegCount() == 1);
7190
7191 LogicVRegister zt = ReadVRegister(zt_code);
7192 LogicPRegister ffr = ReadFFR();
7193
7194 // Non-faulting loads are allowed to fail arbitrarily. To stress user
7195 // code, fail a random element in roughly one in eight full-vector loads.
7196 uint32_t rnd = static_cast<uint32_t>(jrand48(rand_state_));
7197 int fake_fault_at_lane = rnd % (LaneCountFromFormat(vform) * 8);
7198
7199 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7200 uint64_t value = 0;
7201
7202 if (pg.IsActive(vform, i)) {
7203 uint64_t element_address = addr.GetElementAddress(i, 0);
7204
7205 if (type == kSVEFirstFaultLoad) {
7206 // First-faulting loads always load the first active element, regardless
7207 // of FFR. The result will be discarded if its FFR lane is inactive, but
7208 // it could still generate a fault.
7209 value = MemReadUint(msize_in_bytes, element_address);
7210 // All subsequent elements have non-fault semantics.
7211 type = kSVENonFaultLoad;
7212
7213 } else if (ffr.IsActive(vform, i)) {
7214 // Simulation of fault-tolerant loads relies on system calls, and is
7215 // likely to be relatively slow, so we only actually perform the load if
7216 // its FFR lane is active.
7217
7218 bool can_read = (i < fake_fault_at_lane) &&
7219 CanReadMemory(element_address, msize_in_bytes);
7220 if (can_read) {
7221 value = MemReadUint(msize_in_bytes, element_address);
7222 } else {
7223 // Propagate the fault to the end of FFR.
7224 for (int j = i; j < LaneCountFromFormat(vform); j++) {
7225 ffr.SetActive(vform, j, false);
7226 }
7227 }
7228 }
7229 }
7230
7231 // The architecture permits a few possible results for inactive FFR lanes
7232 // (including those caused by a fault in this instruction). We choose to
7233 // leave the register value unchanged (like merging predication) because
7234 // no other input to this instruction can have the same behaviour.
7235 //
7236 // Note that this behaviour takes precedence over pg's zeroing predication.
7237
7238 if (ffr.IsActive(vform, i)) {
7239 int msb = msize_in_bits - 1;
7240 if (is_signed) {
7241 zt.SetInt(vform, i, ExtractSignedBitfield64(msb, 0, value));
7242 } else {
7243 zt.SetUint(vform, i, ExtractUnsignedBitfield64(msb, 0, value));
7244 }
7245 }
7246 }
7247
7248 if (ShouldTraceVRegs()) {
7249 PrintRegisterFormat format = GetPrintRegisterFormat(vform);
7250 if ((esize_in_bytes == msize_in_bytes) && !is_signed) {
7251 // Use an FP format where it's likely that we're accessing FP data.
7252 format = GetPrintRegisterFormatTryFP(format);
7253 }
7254 // Log accessed lanes that are active in both pg and ffr. PrintZStructAccess
7255 // expects a single mask, so combine the two predicates.
7256 SimPRegister mask;
7257 SVEPredicateLogicalHelper(AND_p_p_pp_z, mask, pg, ffr);
7258 PrintZStructAccess(zt_code, 1, mask, format, msize_in_bytes, "<-", addr);
7259 }
7260 }
7261
SVEGatherLoadScalarPlusVectorHelper(const Instruction * instr,VectorFormat vform,SVEOffsetModifier mod)7262 void Simulator::SVEGatherLoadScalarPlusVectorHelper(const Instruction* instr,
7263 VectorFormat vform,
7264 SVEOffsetModifier mod) {
7265 bool is_signed = instr->ExtractBit(14) == 0;
7266 bool is_ff = instr->ExtractBit(13) == 1;
7267 // Note that these instructions don't use the Dtype encoding.
7268 int msize_in_bytes_log2 = instr->ExtractBits(24, 23);
7269 int scale = instr->ExtractBit(21) * msize_in_bytes_log2;
7270 uint64_t base = ReadXRegister(instr->GetRn());
7271 LogicSVEAddressVector addr(base,
7272 &ReadVRegister(instr->GetRm()),
7273 vform,
7274 mod,
7275 scale);
7276 addr.SetMsizeInBytesLog2(msize_in_bytes_log2);
7277 if (is_ff) {
7278 SVEFaultTolerantLoadHelper(vform,
7279 ReadPRegister(instr->GetPgLow8()),
7280 instr->GetRt(),
7281 addr,
7282 kSVEFirstFaultLoad,
7283 is_signed);
7284 } else {
7285 SVEStructuredLoadHelper(vform,
7286 ReadPRegister(instr->GetPgLow8()),
7287 instr->GetRt(),
7288 addr,
7289 is_signed);
7290 }
7291 }
7292
GetFirstActive(VectorFormat vform,const LogicPRegister & pg) const7293 int Simulator::GetFirstActive(VectorFormat vform,
7294 const LogicPRegister& pg) const {
7295 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7296 if (pg.IsActive(vform, i)) return i;
7297 }
7298 return -1;
7299 }
7300
GetLastActive(VectorFormat vform,const LogicPRegister & pg) const7301 int Simulator::GetLastActive(VectorFormat vform,
7302 const LogicPRegister& pg) const {
7303 for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
7304 if (pg.IsActive(vform, i)) return i;
7305 }
7306 return -1;
7307 }
7308
CountActiveLanes(VectorFormat vform,const LogicPRegister & pg) const7309 int Simulator::CountActiveLanes(VectorFormat vform,
7310 const LogicPRegister& pg) const {
7311 int count = 0;
7312 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7313 count += pg.IsActive(vform, i) ? 1 : 0;
7314 }
7315 return count;
7316 }
7317
CountActiveAndTrueLanes(VectorFormat vform,const LogicPRegister & pg,const LogicPRegister & pn) const7318 int Simulator::CountActiveAndTrueLanes(VectorFormat vform,
7319 const LogicPRegister& pg,
7320 const LogicPRegister& pn) const {
7321 int count = 0;
7322 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
7323 count += (pg.IsActive(vform, i) && pn.IsActive(vform, i)) ? 1 : 0;
7324 }
7325 return count;
7326 }
7327
GetPredicateConstraintLaneCount(VectorFormat vform,int pattern) const7328 int Simulator::GetPredicateConstraintLaneCount(VectorFormat vform,
7329 int pattern) const {
7330 VIXL_ASSERT(IsSVEFormat(vform));
7331 int all = LaneCountFromFormat(vform);
7332 VIXL_ASSERT(all > 0);
7333
7334 switch (pattern) {
7335 case SVE_VL1:
7336 case SVE_VL2:
7337 case SVE_VL3:
7338 case SVE_VL4:
7339 case SVE_VL5:
7340 case SVE_VL6:
7341 case SVE_VL7:
7342 case SVE_VL8:
7343 // VL1-VL8 are encoded directly.
7344 VIXL_STATIC_ASSERT(SVE_VL1 == 1);
7345 VIXL_STATIC_ASSERT(SVE_VL8 == 8);
7346 return (pattern <= all) ? pattern : 0;
7347 case SVE_VL16:
7348 case SVE_VL32:
7349 case SVE_VL64:
7350 case SVE_VL128:
7351 case SVE_VL256: {
7352 // VL16-VL256 are encoded as log2(N) + c.
7353 int min = 16 << (pattern - SVE_VL16);
7354 return (min <= all) ? min : 0;
7355 }
7356 // Special cases.
7357 case SVE_POW2:
7358 return 1 << HighestSetBitPosition(all);
7359 case SVE_MUL4:
7360 return all - (all % 4);
7361 case SVE_MUL3:
7362 return all - (all % 3);
7363 case SVE_ALL:
7364 return all;
7365 }
7366 // Unnamed cases archicturally return 0.
7367 return 0;
7368 }
7369
GetStructAddress(int lane) const7370 uint64_t LogicSVEAddressVector::GetStructAddress(int lane) const {
7371 if (IsContiguous()) {
7372 return base_ + (lane * GetRegCount()) * GetMsizeInBytes();
7373 }
7374
7375 VIXL_ASSERT(IsScatterGather());
7376 VIXL_ASSERT(vector_ != NULL);
7377
7378 // For scatter-gather accesses, we need to extract the offset from vector_,
7379 // and apply modifiers.
7380
7381 uint64_t offset = 0;
7382 switch (vector_form_) {
7383 case kFormatVnS:
7384 offset = vector_->GetLane<uint32_t>(lane);
7385 break;
7386 case kFormatVnD:
7387 offset = vector_->GetLane<uint64_t>(lane);
7388 break;
7389 default:
7390 VIXL_UNIMPLEMENTED();
7391 break;
7392 }
7393
7394 switch (vector_mod_) {
7395 case SVE_MUL_VL:
7396 VIXL_UNIMPLEMENTED();
7397 break;
7398 case SVE_LSL:
7399 // We apply the shift below. There's nothing to do here.
7400 break;
7401 case NO_SVE_OFFSET_MODIFIER:
7402 VIXL_ASSERT(vector_shift_ == 0);
7403 break;
7404 case SVE_UXTW:
7405 offset = ExtractUnsignedBitfield64(kWRegSize - 1, 0, offset);
7406 break;
7407 case SVE_SXTW:
7408 offset = ExtractSignedBitfield64(kWRegSize - 1, 0, offset);
7409 break;
7410 }
7411
7412 return base_ + (offset << vector_shift_);
7413 }
7414
7415
7416 } // namespace aarch64
7417 } // namespace vixl
7418
7419 #endif // VIXL_INCLUDE_SIMULATOR_AARCH64
7420