1 // Copyright 2015, VIXL authors
2 // All rights reserved.
3 //
4 // Redistribution and use in source and binary forms, with or without
5 // modification, are permitted provided that the following conditions are met:
6 //
7 // * Redistributions of source code must retain the above copyright notice,
8 // this list of conditions and the following disclaimer.
9 // * Redistributions in binary form must reproduce the above copyright notice,
10 // this list of conditions and the following disclaimer in the documentation
11 // and/or other materials provided with the distribution.
12 // * Neither the name of ARM Limited nor the names of its contributors may be
13 // used to endorse or promote products derived from this software without
14 // specific prior written permission.
15 //
16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND
17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE
20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26
27 #ifdef VIXL_INCLUDE_SIMULATOR_AARCH64
28
29 #include <cmath>
30
31 #include "simulator-aarch64.h"
32
33 namespace vixl {
34 namespace aarch64 {
35
36 using vixl::internal::SimFloat16;
37
38 template <typename T>
IsFloat64()39 bool IsFloat64() {
40 return false;
41 }
42 template <>
IsFloat64()43 bool IsFloat64<double>() {
44 return true;
45 }
46
47 template <typename T>
IsFloat32()48 bool IsFloat32() {
49 return false;
50 }
51 template <>
IsFloat32()52 bool IsFloat32<float>() {
53 return true;
54 }
55
56 template <typename T>
IsFloat16()57 bool IsFloat16() {
58 return false;
59 }
60 template <>
IsFloat16()61 bool IsFloat16<Float16>() {
62 return true;
63 }
64 template <>
IsFloat16()65 bool IsFloat16<SimFloat16>() {
66 return true;
67 }
68
69 template <>
FPDefaultNaN()70 double Simulator::FPDefaultNaN<double>() {
71 return kFP64DefaultNaN;
72 }
73
74
75 template <>
FPDefaultNaN()76 float Simulator::FPDefaultNaN<float>() {
77 return kFP32DefaultNaN;
78 }
79
80
81 template <>
FPDefaultNaN()82 SimFloat16 Simulator::FPDefaultNaN<SimFloat16>() {
83 return SimFloat16(kFP16DefaultNaN);
84 }
85
86
FixedToDouble(int64_t src,int fbits,FPRounding round)87 double Simulator::FixedToDouble(int64_t src, int fbits, FPRounding round) {
88 if (src >= 0) {
89 return UFixedToDouble(src, fbits, round);
90 } else if (src == INT64_MIN) {
91 return -UFixedToDouble(src, fbits, round);
92 } else {
93 return -UFixedToDouble(-src, fbits, round);
94 }
95 }
96
97
UFixedToDouble(uint64_t src,int fbits,FPRounding round)98 double Simulator::UFixedToDouble(uint64_t src, int fbits, FPRounding round) {
99 // An input of 0 is a special case because the result is effectively
100 // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
101 if (src == 0) {
102 return 0.0;
103 }
104
105 // Calculate the exponent. The highest significant bit will have the value
106 // 2^exponent.
107 const int highest_significant_bit = 63 - CountLeadingZeros(src);
108 const int64_t exponent = highest_significant_bit - fbits;
109
110 return FPRoundToDouble(0, exponent, src, round);
111 }
112
113
FixedToFloat(int64_t src,int fbits,FPRounding round)114 float Simulator::FixedToFloat(int64_t src, int fbits, FPRounding round) {
115 if (src >= 0) {
116 return UFixedToFloat(src, fbits, round);
117 } else if (src == INT64_MIN) {
118 return -UFixedToFloat(src, fbits, round);
119 } else {
120 return -UFixedToFloat(-src, fbits, round);
121 }
122 }
123
124
UFixedToFloat(uint64_t src,int fbits,FPRounding round)125 float Simulator::UFixedToFloat(uint64_t src, int fbits, FPRounding round) {
126 // An input of 0 is a special case because the result is effectively
127 // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
128 if (src == 0) {
129 return 0.0f;
130 }
131
132 // Calculate the exponent. The highest significant bit will have the value
133 // 2^exponent.
134 const int highest_significant_bit = 63 - CountLeadingZeros(src);
135 const int32_t exponent = highest_significant_bit - fbits;
136
137 return FPRoundToFloat(0, exponent, src, round);
138 }
139
140
FixedToFloat16(int64_t src,int fbits,FPRounding round)141 SimFloat16 Simulator::FixedToFloat16(int64_t src, int fbits, FPRounding round) {
142 if (src >= 0) {
143 return UFixedToFloat16(src, fbits, round);
144 } else if (src == INT64_MIN) {
145 return -UFixedToFloat16(src, fbits, round);
146 } else {
147 return -UFixedToFloat16(-src, fbits, round);
148 }
149 }
150
151
UFixedToFloat16(uint64_t src,int fbits,FPRounding round)152 SimFloat16 Simulator::UFixedToFloat16(uint64_t src,
153 int fbits,
154 FPRounding round) {
155 // An input of 0 is a special case because the result is effectively
156 // subnormal: The exponent is encoded as 0 and there is no implicit 1 bit.
157 if (src == 0) {
158 return 0.0f;
159 }
160
161 // Calculate the exponent. The highest significant bit will have the value
162 // 2^exponent.
163 const int highest_significant_bit = 63 - CountLeadingZeros(src);
164 const int16_t exponent = highest_significant_bit - fbits;
165
166 return FPRoundToFloat16(0, exponent, src, round);
167 }
168
169
ld1(VectorFormat vform,LogicVRegister dst,uint64_t addr)170 void Simulator::ld1(VectorFormat vform, LogicVRegister dst, uint64_t addr) {
171 dst.ClearForWrite(vform);
172 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
173 dst.ReadUintFromMem(vform, i, addr);
174 addr += LaneSizeInBytesFromFormat(vform);
175 }
176 }
177
178
ld1(VectorFormat vform,LogicVRegister dst,int index,uint64_t addr)179 void Simulator::ld1(VectorFormat vform,
180 LogicVRegister dst,
181 int index,
182 uint64_t addr) {
183 dst.ReadUintFromMem(vform, index, addr);
184 }
185
186
ld1r(VectorFormat vform,LogicVRegister dst,uint64_t addr)187 void Simulator::ld1r(VectorFormat vform, LogicVRegister dst, uint64_t addr) {
188 dst.ClearForWrite(vform);
189 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
190 dst.ReadUintFromMem(vform, i, addr);
191 }
192 }
193
194
ld2(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,uint64_t addr1)195 void Simulator::ld2(VectorFormat vform,
196 LogicVRegister dst1,
197 LogicVRegister dst2,
198 uint64_t addr1) {
199 dst1.ClearForWrite(vform);
200 dst2.ClearForWrite(vform);
201 int esize = LaneSizeInBytesFromFormat(vform);
202 uint64_t addr2 = addr1 + esize;
203 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
204 dst1.ReadUintFromMem(vform, i, addr1);
205 dst2.ReadUintFromMem(vform, i, addr2);
206 addr1 += 2 * esize;
207 addr2 += 2 * esize;
208 }
209 }
210
211
ld2(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,int index,uint64_t addr1)212 void Simulator::ld2(VectorFormat vform,
213 LogicVRegister dst1,
214 LogicVRegister dst2,
215 int index,
216 uint64_t addr1) {
217 dst1.ClearForWrite(vform);
218 dst2.ClearForWrite(vform);
219 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
220 dst1.ReadUintFromMem(vform, index, addr1);
221 dst2.ReadUintFromMem(vform, index, addr2);
222 }
223
224
ld2r(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,uint64_t addr)225 void Simulator::ld2r(VectorFormat vform,
226 LogicVRegister dst1,
227 LogicVRegister dst2,
228 uint64_t addr) {
229 dst1.ClearForWrite(vform);
230 dst2.ClearForWrite(vform);
231 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
232 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
233 dst1.ReadUintFromMem(vform, i, addr);
234 dst2.ReadUintFromMem(vform, i, addr2);
235 }
236 }
237
238
ld3(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,uint64_t addr1)239 void Simulator::ld3(VectorFormat vform,
240 LogicVRegister dst1,
241 LogicVRegister dst2,
242 LogicVRegister dst3,
243 uint64_t addr1) {
244 dst1.ClearForWrite(vform);
245 dst2.ClearForWrite(vform);
246 dst3.ClearForWrite(vform);
247 int esize = LaneSizeInBytesFromFormat(vform);
248 uint64_t addr2 = addr1 + esize;
249 uint64_t addr3 = addr2 + esize;
250 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
251 dst1.ReadUintFromMem(vform, i, addr1);
252 dst2.ReadUintFromMem(vform, i, addr2);
253 dst3.ReadUintFromMem(vform, i, addr3);
254 addr1 += 3 * esize;
255 addr2 += 3 * esize;
256 addr3 += 3 * esize;
257 }
258 }
259
260
ld3(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,int index,uint64_t addr1)261 void Simulator::ld3(VectorFormat vform,
262 LogicVRegister dst1,
263 LogicVRegister dst2,
264 LogicVRegister dst3,
265 int index,
266 uint64_t addr1) {
267 dst1.ClearForWrite(vform);
268 dst2.ClearForWrite(vform);
269 dst3.ClearForWrite(vform);
270 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
271 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
272 dst1.ReadUintFromMem(vform, index, addr1);
273 dst2.ReadUintFromMem(vform, index, addr2);
274 dst3.ReadUintFromMem(vform, index, addr3);
275 }
276
277
ld3r(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,uint64_t addr)278 void Simulator::ld3r(VectorFormat vform,
279 LogicVRegister dst1,
280 LogicVRegister dst2,
281 LogicVRegister dst3,
282 uint64_t addr) {
283 dst1.ClearForWrite(vform);
284 dst2.ClearForWrite(vform);
285 dst3.ClearForWrite(vform);
286 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
287 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
288 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
289 dst1.ReadUintFromMem(vform, i, addr);
290 dst2.ReadUintFromMem(vform, i, addr2);
291 dst3.ReadUintFromMem(vform, i, addr3);
292 }
293 }
294
295
ld4(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,LogicVRegister dst4,uint64_t addr1)296 void Simulator::ld4(VectorFormat vform,
297 LogicVRegister dst1,
298 LogicVRegister dst2,
299 LogicVRegister dst3,
300 LogicVRegister dst4,
301 uint64_t addr1) {
302 dst1.ClearForWrite(vform);
303 dst2.ClearForWrite(vform);
304 dst3.ClearForWrite(vform);
305 dst4.ClearForWrite(vform);
306 int esize = LaneSizeInBytesFromFormat(vform);
307 uint64_t addr2 = addr1 + esize;
308 uint64_t addr3 = addr2 + esize;
309 uint64_t addr4 = addr3 + esize;
310 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
311 dst1.ReadUintFromMem(vform, i, addr1);
312 dst2.ReadUintFromMem(vform, i, addr2);
313 dst3.ReadUintFromMem(vform, i, addr3);
314 dst4.ReadUintFromMem(vform, i, addr4);
315 addr1 += 4 * esize;
316 addr2 += 4 * esize;
317 addr3 += 4 * esize;
318 addr4 += 4 * esize;
319 }
320 }
321
322
ld4(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,LogicVRegister dst4,int index,uint64_t addr1)323 void Simulator::ld4(VectorFormat vform,
324 LogicVRegister dst1,
325 LogicVRegister dst2,
326 LogicVRegister dst3,
327 LogicVRegister dst4,
328 int index,
329 uint64_t addr1) {
330 dst1.ClearForWrite(vform);
331 dst2.ClearForWrite(vform);
332 dst3.ClearForWrite(vform);
333 dst4.ClearForWrite(vform);
334 uint64_t addr2 = addr1 + LaneSizeInBytesFromFormat(vform);
335 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
336 uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform);
337 dst1.ReadUintFromMem(vform, index, addr1);
338 dst2.ReadUintFromMem(vform, index, addr2);
339 dst3.ReadUintFromMem(vform, index, addr3);
340 dst4.ReadUintFromMem(vform, index, addr4);
341 }
342
343
ld4r(VectorFormat vform,LogicVRegister dst1,LogicVRegister dst2,LogicVRegister dst3,LogicVRegister dst4,uint64_t addr)344 void Simulator::ld4r(VectorFormat vform,
345 LogicVRegister dst1,
346 LogicVRegister dst2,
347 LogicVRegister dst3,
348 LogicVRegister dst4,
349 uint64_t addr) {
350 dst1.ClearForWrite(vform);
351 dst2.ClearForWrite(vform);
352 dst3.ClearForWrite(vform);
353 dst4.ClearForWrite(vform);
354 uint64_t addr2 = addr + LaneSizeInBytesFromFormat(vform);
355 uint64_t addr3 = addr2 + LaneSizeInBytesFromFormat(vform);
356 uint64_t addr4 = addr3 + LaneSizeInBytesFromFormat(vform);
357 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
358 dst1.ReadUintFromMem(vform, i, addr);
359 dst2.ReadUintFromMem(vform, i, addr2);
360 dst3.ReadUintFromMem(vform, i, addr3);
361 dst4.ReadUintFromMem(vform, i, addr4);
362 }
363 }
364
365
st1(VectorFormat vform,LogicVRegister src,uint64_t addr)366 void Simulator::st1(VectorFormat vform, LogicVRegister src, uint64_t addr) {
367 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
368 src.WriteUintToMem(vform, i, addr);
369 addr += LaneSizeInBytesFromFormat(vform);
370 }
371 }
372
373
st1(VectorFormat vform,LogicVRegister src,int index,uint64_t addr)374 void Simulator::st1(VectorFormat vform,
375 LogicVRegister src,
376 int index,
377 uint64_t addr) {
378 src.WriteUintToMem(vform, index, addr);
379 }
380
381
st2(VectorFormat vform,LogicVRegister dst,LogicVRegister dst2,uint64_t addr)382 void Simulator::st2(VectorFormat vform,
383 LogicVRegister dst,
384 LogicVRegister dst2,
385 uint64_t addr) {
386 int esize = LaneSizeInBytesFromFormat(vform);
387 uint64_t addr2 = addr + esize;
388 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
389 dst.WriteUintToMem(vform, i, addr);
390 dst2.WriteUintToMem(vform, i, addr2);
391 addr += 2 * esize;
392 addr2 += 2 * esize;
393 }
394 }
395
396
st2(VectorFormat vform,LogicVRegister dst,LogicVRegister dst2,int index,uint64_t addr)397 void Simulator::st2(VectorFormat vform,
398 LogicVRegister dst,
399 LogicVRegister dst2,
400 int index,
401 uint64_t addr) {
402 int esize = LaneSizeInBytesFromFormat(vform);
403 dst.WriteUintToMem(vform, index, addr);
404 dst2.WriteUintToMem(vform, index, addr + 1 * esize);
405 }
406
407
st3(VectorFormat vform,LogicVRegister dst,LogicVRegister dst2,LogicVRegister dst3,uint64_t addr)408 void Simulator::st3(VectorFormat vform,
409 LogicVRegister dst,
410 LogicVRegister dst2,
411 LogicVRegister dst3,
412 uint64_t addr) {
413 int esize = LaneSizeInBytesFromFormat(vform);
414 uint64_t addr2 = addr + esize;
415 uint64_t addr3 = addr2 + esize;
416 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
417 dst.WriteUintToMem(vform, i, addr);
418 dst2.WriteUintToMem(vform, i, addr2);
419 dst3.WriteUintToMem(vform, i, addr3);
420 addr += 3 * esize;
421 addr2 += 3 * esize;
422 addr3 += 3 * esize;
423 }
424 }
425
426
st3(VectorFormat vform,LogicVRegister dst,LogicVRegister dst2,LogicVRegister dst3,int index,uint64_t addr)427 void Simulator::st3(VectorFormat vform,
428 LogicVRegister dst,
429 LogicVRegister dst2,
430 LogicVRegister dst3,
431 int index,
432 uint64_t addr) {
433 int esize = LaneSizeInBytesFromFormat(vform);
434 dst.WriteUintToMem(vform, index, addr);
435 dst2.WriteUintToMem(vform, index, addr + 1 * esize);
436 dst3.WriteUintToMem(vform, index, addr + 2 * esize);
437 }
438
439
st4(VectorFormat vform,LogicVRegister dst,LogicVRegister dst2,LogicVRegister dst3,LogicVRegister dst4,uint64_t addr)440 void Simulator::st4(VectorFormat vform,
441 LogicVRegister dst,
442 LogicVRegister dst2,
443 LogicVRegister dst3,
444 LogicVRegister dst4,
445 uint64_t addr) {
446 int esize = LaneSizeInBytesFromFormat(vform);
447 uint64_t addr2 = addr + esize;
448 uint64_t addr3 = addr2 + esize;
449 uint64_t addr4 = addr3 + esize;
450 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
451 dst.WriteUintToMem(vform, i, addr);
452 dst2.WriteUintToMem(vform, i, addr2);
453 dst3.WriteUintToMem(vform, i, addr3);
454 dst4.WriteUintToMem(vform, i, addr4);
455 addr += 4 * esize;
456 addr2 += 4 * esize;
457 addr3 += 4 * esize;
458 addr4 += 4 * esize;
459 }
460 }
461
462
st4(VectorFormat vform,LogicVRegister dst,LogicVRegister dst2,LogicVRegister dst3,LogicVRegister dst4,int index,uint64_t addr)463 void Simulator::st4(VectorFormat vform,
464 LogicVRegister dst,
465 LogicVRegister dst2,
466 LogicVRegister dst3,
467 LogicVRegister dst4,
468 int index,
469 uint64_t addr) {
470 int esize = LaneSizeInBytesFromFormat(vform);
471 dst.WriteUintToMem(vform, index, addr);
472 dst2.WriteUintToMem(vform, index, addr + 1 * esize);
473 dst3.WriteUintToMem(vform, index, addr + 2 * esize);
474 dst4.WriteUintToMem(vform, index, addr + 3 * esize);
475 }
476
477
cmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,Condition cond)478 LogicVRegister Simulator::cmp(VectorFormat vform,
479 LogicVRegister dst,
480 const LogicVRegister& src1,
481 const LogicVRegister& src2,
482 Condition cond) {
483 dst.ClearForWrite(vform);
484 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
485 int64_t sa = src1.Int(vform, i);
486 int64_t sb = src2.Int(vform, i);
487 uint64_t ua = src1.Uint(vform, i);
488 uint64_t ub = src2.Uint(vform, i);
489 bool result = false;
490 switch (cond) {
491 case eq:
492 result = (ua == ub);
493 break;
494 case ge:
495 result = (sa >= sb);
496 break;
497 case gt:
498 result = (sa > sb);
499 break;
500 case hi:
501 result = (ua > ub);
502 break;
503 case hs:
504 result = (ua >= ub);
505 break;
506 case lt:
507 result = (sa < sb);
508 break;
509 case le:
510 result = (sa <= sb);
511 break;
512 default:
513 VIXL_UNREACHABLE();
514 break;
515 }
516 dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0);
517 }
518 return dst;
519 }
520
521
cmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,int imm,Condition cond)522 LogicVRegister Simulator::cmp(VectorFormat vform,
523 LogicVRegister dst,
524 const LogicVRegister& src1,
525 int imm,
526 Condition cond) {
527 SimVRegister temp;
528 LogicVRegister imm_reg = dup_immediate(vform, temp, imm);
529 return cmp(vform, dst, src1, imm_reg, cond);
530 }
531
532
cmptst(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)533 LogicVRegister Simulator::cmptst(VectorFormat vform,
534 LogicVRegister dst,
535 const LogicVRegister& src1,
536 const LogicVRegister& src2) {
537 dst.ClearForWrite(vform);
538 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
539 uint64_t ua = src1.Uint(vform, i);
540 uint64_t ub = src2.Uint(vform, i);
541 dst.SetUint(vform, i, ((ua & ub) != 0) ? MaxUintFromFormat(vform) : 0);
542 }
543 return dst;
544 }
545
546
add(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)547 LogicVRegister Simulator::add(VectorFormat vform,
548 LogicVRegister dst,
549 const LogicVRegister& src1,
550 const LogicVRegister& src2) {
551 int lane_size = LaneSizeInBitsFromFormat(vform);
552 dst.ClearForWrite(vform);
553 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
554 // Test for unsigned saturation.
555 uint64_t ua = src1.UintLeftJustified(vform, i);
556 uint64_t ub = src2.UintLeftJustified(vform, i);
557 uint64_t ur = ua + ub;
558 if (ur < ua) {
559 dst.SetUnsignedSat(i, true);
560 }
561
562 // Test for signed saturation.
563 bool pos_a = (ua >> 63) == 0;
564 bool pos_b = (ub >> 63) == 0;
565 bool pos_r = (ur >> 63) == 0;
566 // If the signs of the operands are the same, but different from the result,
567 // there was an overflow.
568 if ((pos_a == pos_b) && (pos_a != pos_r)) {
569 dst.SetSignedSat(i, pos_a);
570 }
571
572 dst.SetInt(vform, i, ur >> (64 - lane_size));
573 }
574 return dst;
575 }
576
577
addp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)578 LogicVRegister Simulator::addp(VectorFormat vform,
579 LogicVRegister dst,
580 const LogicVRegister& src1,
581 const LogicVRegister& src2) {
582 SimVRegister temp1, temp2;
583 uzp1(vform, temp1, src1, src2);
584 uzp2(vform, temp2, src1, src2);
585 add(vform, dst, temp1, temp2);
586 return dst;
587 }
588
589
mla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)590 LogicVRegister Simulator::mla(VectorFormat vform,
591 LogicVRegister dst,
592 const LogicVRegister& src1,
593 const LogicVRegister& src2) {
594 SimVRegister temp;
595 mul(vform, temp, src1, src2);
596 add(vform, dst, dst, temp);
597 return dst;
598 }
599
600
mls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)601 LogicVRegister Simulator::mls(VectorFormat vform,
602 LogicVRegister dst,
603 const LogicVRegister& src1,
604 const LogicVRegister& src2) {
605 SimVRegister temp;
606 mul(vform, temp, src1, src2);
607 sub(vform, dst, dst, temp);
608 return dst;
609 }
610
611
mul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)612 LogicVRegister Simulator::mul(VectorFormat vform,
613 LogicVRegister dst,
614 const LogicVRegister& src1,
615 const LogicVRegister& src2) {
616 dst.ClearForWrite(vform);
617 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
618 dst.SetUint(vform, i, src1.Uint(vform, i) * src2.Uint(vform, i));
619 }
620 return dst;
621 }
622
623
mul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)624 LogicVRegister Simulator::mul(VectorFormat vform,
625 LogicVRegister dst,
626 const LogicVRegister& src1,
627 const LogicVRegister& src2,
628 int index) {
629 SimVRegister temp;
630 VectorFormat indexform = VectorFormatFillQ(vform);
631 return mul(vform, dst, src1, dup_element(indexform, temp, src2, index));
632 }
633
634
mla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)635 LogicVRegister Simulator::mla(VectorFormat vform,
636 LogicVRegister dst,
637 const LogicVRegister& src1,
638 const LogicVRegister& src2,
639 int index) {
640 SimVRegister temp;
641 VectorFormat indexform = VectorFormatFillQ(vform);
642 return mla(vform, dst, src1, dup_element(indexform, temp, src2, index));
643 }
644
645
mls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)646 LogicVRegister Simulator::mls(VectorFormat vform,
647 LogicVRegister dst,
648 const LogicVRegister& src1,
649 const LogicVRegister& src2,
650 int index) {
651 SimVRegister temp;
652 VectorFormat indexform = VectorFormatFillQ(vform);
653 return mls(vform, dst, src1, dup_element(indexform, temp, src2, index));
654 }
655
656
smull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)657 LogicVRegister Simulator::smull(VectorFormat vform,
658 LogicVRegister dst,
659 const LogicVRegister& src1,
660 const LogicVRegister& src2,
661 int index) {
662 SimVRegister temp;
663 VectorFormat indexform =
664 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
665 return smull(vform, dst, src1, dup_element(indexform, temp, src2, index));
666 }
667
668
smull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)669 LogicVRegister Simulator::smull2(VectorFormat vform,
670 LogicVRegister dst,
671 const LogicVRegister& src1,
672 const LogicVRegister& src2,
673 int index) {
674 SimVRegister temp;
675 VectorFormat indexform =
676 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
677 return smull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
678 }
679
680
umull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)681 LogicVRegister Simulator::umull(VectorFormat vform,
682 LogicVRegister dst,
683 const LogicVRegister& src1,
684 const LogicVRegister& src2,
685 int index) {
686 SimVRegister temp;
687 VectorFormat indexform =
688 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
689 return umull(vform, dst, src1, dup_element(indexform, temp, src2, index));
690 }
691
692
umull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)693 LogicVRegister Simulator::umull2(VectorFormat vform,
694 LogicVRegister dst,
695 const LogicVRegister& src1,
696 const LogicVRegister& src2,
697 int index) {
698 SimVRegister temp;
699 VectorFormat indexform =
700 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
701 return umull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
702 }
703
704
smlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)705 LogicVRegister Simulator::smlal(VectorFormat vform,
706 LogicVRegister dst,
707 const LogicVRegister& src1,
708 const LogicVRegister& src2,
709 int index) {
710 SimVRegister temp;
711 VectorFormat indexform =
712 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
713 return smlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
714 }
715
716
smlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)717 LogicVRegister Simulator::smlal2(VectorFormat vform,
718 LogicVRegister dst,
719 const LogicVRegister& src1,
720 const LogicVRegister& src2,
721 int index) {
722 SimVRegister temp;
723 VectorFormat indexform =
724 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
725 return smlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
726 }
727
728
umlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)729 LogicVRegister Simulator::umlal(VectorFormat vform,
730 LogicVRegister dst,
731 const LogicVRegister& src1,
732 const LogicVRegister& src2,
733 int index) {
734 SimVRegister temp;
735 VectorFormat indexform =
736 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
737 return umlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
738 }
739
740
umlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)741 LogicVRegister Simulator::umlal2(VectorFormat vform,
742 LogicVRegister dst,
743 const LogicVRegister& src1,
744 const LogicVRegister& src2,
745 int index) {
746 SimVRegister temp;
747 VectorFormat indexform =
748 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
749 return umlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
750 }
751
752
smlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)753 LogicVRegister Simulator::smlsl(VectorFormat vform,
754 LogicVRegister dst,
755 const LogicVRegister& src1,
756 const LogicVRegister& src2,
757 int index) {
758 SimVRegister temp;
759 VectorFormat indexform =
760 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
761 return smlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
762 }
763
764
smlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)765 LogicVRegister Simulator::smlsl2(VectorFormat vform,
766 LogicVRegister dst,
767 const LogicVRegister& src1,
768 const LogicVRegister& src2,
769 int index) {
770 SimVRegister temp;
771 VectorFormat indexform =
772 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
773 return smlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
774 }
775
776
umlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)777 LogicVRegister Simulator::umlsl(VectorFormat vform,
778 LogicVRegister dst,
779 const LogicVRegister& src1,
780 const LogicVRegister& src2,
781 int index) {
782 SimVRegister temp;
783 VectorFormat indexform =
784 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
785 return umlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
786 }
787
788
umlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)789 LogicVRegister Simulator::umlsl2(VectorFormat vform,
790 LogicVRegister dst,
791 const LogicVRegister& src1,
792 const LogicVRegister& src2,
793 int index) {
794 SimVRegister temp;
795 VectorFormat indexform =
796 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
797 return umlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
798 }
799
800
sqdmull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)801 LogicVRegister Simulator::sqdmull(VectorFormat vform,
802 LogicVRegister dst,
803 const LogicVRegister& src1,
804 const LogicVRegister& src2,
805 int index) {
806 SimVRegister temp;
807 VectorFormat indexform =
808 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
809 return sqdmull(vform, dst, src1, dup_element(indexform, temp, src2, index));
810 }
811
812
sqdmull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)813 LogicVRegister Simulator::sqdmull2(VectorFormat vform,
814 LogicVRegister dst,
815 const LogicVRegister& src1,
816 const LogicVRegister& src2,
817 int index) {
818 SimVRegister temp;
819 VectorFormat indexform =
820 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
821 return sqdmull2(vform, dst, src1, dup_element(indexform, temp, src2, index));
822 }
823
824
sqdmlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)825 LogicVRegister Simulator::sqdmlal(VectorFormat vform,
826 LogicVRegister dst,
827 const LogicVRegister& src1,
828 const LogicVRegister& src2,
829 int index) {
830 SimVRegister temp;
831 VectorFormat indexform =
832 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
833 return sqdmlal(vform, dst, src1, dup_element(indexform, temp, src2, index));
834 }
835
836
sqdmlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)837 LogicVRegister Simulator::sqdmlal2(VectorFormat vform,
838 LogicVRegister dst,
839 const LogicVRegister& src1,
840 const LogicVRegister& src2,
841 int index) {
842 SimVRegister temp;
843 VectorFormat indexform =
844 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
845 return sqdmlal2(vform, dst, src1, dup_element(indexform, temp, src2, index));
846 }
847
848
sqdmlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)849 LogicVRegister Simulator::sqdmlsl(VectorFormat vform,
850 LogicVRegister dst,
851 const LogicVRegister& src1,
852 const LogicVRegister& src2,
853 int index) {
854 SimVRegister temp;
855 VectorFormat indexform =
856 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
857 return sqdmlsl(vform, dst, src1, dup_element(indexform, temp, src2, index));
858 }
859
860
sqdmlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)861 LogicVRegister Simulator::sqdmlsl2(VectorFormat vform,
862 LogicVRegister dst,
863 const LogicVRegister& src1,
864 const LogicVRegister& src2,
865 int index) {
866 SimVRegister temp;
867 VectorFormat indexform =
868 VectorFormatHalfWidthDoubleLanes(VectorFormatFillQ(vform));
869 return sqdmlsl2(vform, dst, src1, dup_element(indexform, temp, src2, index));
870 }
871
872
sqdmulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)873 LogicVRegister Simulator::sqdmulh(VectorFormat vform,
874 LogicVRegister dst,
875 const LogicVRegister& src1,
876 const LogicVRegister& src2,
877 int index) {
878 SimVRegister temp;
879 VectorFormat indexform = VectorFormatFillQ(vform);
880 return sqdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index));
881 }
882
883
sqrdmulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)884 LogicVRegister Simulator::sqrdmulh(VectorFormat vform,
885 LogicVRegister dst,
886 const LogicVRegister& src1,
887 const LogicVRegister& src2,
888 int index) {
889 SimVRegister temp;
890 VectorFormat indexform = VectorFormatFillQ(vform);
891 return sqrdmulh(vform, dst, src1, dup_element(indexform, temp, src2, index));
892 }
893
894
sdot(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)895 LogicVRegister Simulator::sdot(VectorFormat vform,
896 LogicVRegister dst,
897 const LogicVRegister& src1,
898 const LogicVRegister& src2,
899 int index) {
900 SimVRegister temp;
901 VectorFormat indexform = VectorFormatFillQ(vform);
902 return sdot(vform, dst, src1, dup_element(indexform, temp, src2, index));
903 }
904
905
sqrdmlah(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)906 LogicVRegister Simulator::sqrdmlah(VectorFormat vform,
907 LogicVRegister dst,
908 const LogicVRegister& src1,
909 const LogicVRegister& src2,
910 int index) {
911 SimVRegister temp;
912 VectorFormat indexform = VectorFormatFillQ(vform);
913 return sqrdmlah(vform, dst, src1, dup_element(indexform, temp, src2, index));
914 }
915
916
udot(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)917 LogicVRegister Simulator::udot(VectorFormat vform,
918 LogicVRegister dst,
919 const LogicVRegister& src1,
920 const LogicVRegister& src2,
921 int index) {
922 SimVRegister temp;
923 VectorFormat indexform = VectorFormatFillQ(vform);
924 return udot(vform, dst, src1, dup_element(indexform, temp, src2, index));
925 }
926
927
sqrdmlsh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)928 LogicVRegister Simulator::sqrdmlsh(VectorFormat vform,
929 LogicVRegister dst,
930 const LogicVRegister& src1,
931 const LogicVRegister& src2,
932 int index) {
933 SimVRegister temp;
934 VectorFormat indexform = VectorFormatFillQ(vform);
935 return sqrdmlsh(vform, dst, src1, dup_element(indexform, temp, src2, index));
936 }
937
938
PolynomialMult(uint8_t op1,uint8_t op2) const939 uint16_t Simulator::PolynomialMult(uint8_t op1, uint8_t op2) const {
940 uint16_t result = 0;
941 uint16_t extended_op2 = op2;
942 for (int i = 0; i < 8; ++i) {
943 if ((op1 >> i) & 1) {
944 result = result ^ (extended_op2 << i);
945 }
946 }
947 return result;
948 }
949
950
pmul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)951 LogicVRegister Simulator::pmul(VectorFormat vform,
952 LogicVRegister dst,
953 const LogicVRegister& src1,
954 const LogicVRegister& src2) {
955 dst.ClearForWrite(vform);
956 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
957 dst.SetUint(vform,
958 i,
959 PolynomialMult(src1.Uint(vform, i), src2.Uint(vform, i)));
960 }
961 return dst;
962 }
963
964
pmull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)965 LogicVRegister Simulator::pmull(VectorFormat vform,
966 LogicVRegister dst,
967 const LogicVRegister& src1,
968 const LogicVRegister& src2) {
969 VectorFormat vform_src = VectorFormatHalfWidth(vform);
970 dst.ClearForWrite(vform);
971 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
972 dst.SetUint(vform,
973 i,
974 PolynomialMult(src1.Uint(vform_src, i),
975 src2.Uint(vform_src, i)));
976 }
977 return dst;
978 }
979
980
pmull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)981 LogicVRegister Simulator::pmull2(VectorFormat vform,
982 LogicVRegister dst,
983 const LogicVRegister& src1,
984 const LogicVRegister& src2) {
985 VectorFormat vform_src = VectorFormatHalfWidthDoubleLanes(vform);
986 dst.ClearForWrite(vform);
987 int lane_count = LaneCountFromFormat(vform);
988 for (int i = 0; i < lane_count; i++) {
989 dst.SetUint(vform,
990 i,
991 PolynomialMult(src1.Uint(vform_src, lane_count + i),
992 src2.Uint(vform_src, lane_count + i)));
993 }
994 return dst;
995 }
996
997
sub(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)998 LogicVRegister Simulator::sub(VectorFormat vform,
999 LogicVRegister dst,
1000 const LogicVRegister& src1,
1001 const LogicVRegister& src2) {
1002 int lane_size = LaneSizeInBitsFromFormat(vform);
1003 dst.ClearForWrite(vform);
1004 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1005 // Test for unsigned saturation.
1006 uint64_t ua = src1.UintLeftJustified(vform, i);
1007 uint64_t ub = src2.UintLeftJustified(vform, i);
1008 uint64_t ur = ua - ub;
1009 if (ub > ua) {
1010 dst.SetUnsignedSat(i, false);
1011 }
1012
1013 // Test for signed saturation.
1014 bool pos_a = (ua >> 63) == 0;
1015 bool pos_b = (ub >> 63) == 0;
1016 bool pos_r = (ur >> 63) == 0;
1017 // If the signs of the operands are different, and the sign of the first
1018 // operand doesn't match the result, there was an overflow.
1019 if ((pos_a != pos_b) && (pos_a != pos_r)) {
1020 dst.SetSignedSat(i, pos_a);
1021 }
1022
1023 dst.SetInt(vform, i, ur >> (64 - lane_size));
1024 }
1025 return dst;
1026 }
1027
1028
and_(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1029 LogicVRegister Simulator::and_(VectorFormat vform,
1030 LogicVRegister dst,
1031 const LogicVRegister& src1,
1032 const LogicVRegister& src2) {
1033 dst.ClearForWrite(vform);
1034 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1035 dst.SetUint(vform, i, src1.Uint(vform, i) & src2.Uint(vform, i));
1036 }
1037 return dst;
1038 }
1039
1040
orr(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1041 LogicVRegister Simulator::orr(VectorFormat vform,
1042 LogicVRegister dst,
1043 const LogicVRegister& src1,
1044 const LogicVRegister& src2) {
1045 dst.ClearForWrite(vform);
1046 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1047 dst.SetUint(vform, i, src1.Uint(vform, i) | src2.Uint(vform, i));
1048 }
1049 return dst;
1050 }
1051
1052
orn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1053 LogicVRegister Simulator::orn(VectorFormat vform,
1054 LogicVRegister dst,
1055 const LogicVRegister& src1,
1056 const LogicVRegister& src2) {
1057 dst.ClearForWrite(vform);
1058 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1059 dst.SetUint(vform, i, src1.Uint(vform, i) | ~src2.Uint(vform, i));
1060 }
1061 return dst;
1062 }
1063
1064
eor(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1065 LogicVRegister Simulator::eor(VectorFormat vform,
1066 LogicVRegister dst,
1067 const LogicVRegister& src1,
1068 const LogicVRegister& src2) {
1069 dst.ClearForWrite(vform);
1070 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1071 dst.SetUint(vform, i, src1.Uint(vform, i) ^ src2.Uint(vform, i));
1072 }
1073 return dst;
1074 }
1075
1076
bic(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1077 LogicVRegister Simulator::bic(VectorFormat vform,
1078 LogicVRegister dst,
1079 const LogicVRegister& src1,
1080 const LogicVRegister& src2) {
1081 dst.ClearForWrite(vform);
1082 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1083 dst.SetUint(vform, i, src1.Uint(vform, i) & ~src2.Uint(vform, i));
1084 }
1085 return dst;
1086 }
1087
1088
bic(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,uint64_t imm)1089 LogicVRegister Simulator::bic(VectorFormat vform,
1090 LogicVRegister dst,
1091 const LogicVRegister& src,
1092 uint64_t imm) {
1093 uint64_t result[16];
1094 int laneCount = LaneCountFromFormat(vform);
1095 for (int i = 0; i < laneCount; ++i) {
1096 result[i] = src.Uint(vform, i) & ~imm;
1097 }
1098 dst.ClearForWrite(vform);
1099 for (int i = 0; i < laneCount; ++i) {
1100 dst.SetUint(vform, i, result[i]);
1101 }
1102 return dst;
1103 }
1104
1105
bif(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1106 LogicVRegister Simulator::bif(VectorFormat vform,
1107 LogicVRegister dst,
1108 const LogicVRegister& src1,
1109 const LogicVRegister& src2) {
1110 dst.ClearForWrite(vform);
1111 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1112 uint64_t operand1 = dst.Uint(vform, i);
1113 uint64_t operand2 = ~src2.Uint(vform, i);
1114 uint64_t operand3 = src1.Uint(vform, i);
1115 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1116 dst.SetUint(vform, i, result);
1117 }
1118 return dst;
1119 }
1120
1121
bit(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1122 LogicVRegister Simulator::bit(VectorFormat vform,
1123 LogicVRegister dst,
1124 const LogicVRegister& src1,
1125 const LogicVRegister& src2) {
1126 dst.ClearForWrite(vform);
1127 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1128 uint64_t operand1 = dst.Uint(vform, i);
1129 uint64_t operand2 = src2.Uint(vform, i);
1130 uint64_t operand3 = src1.Uint(vform, i);
1131 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1132 dst.SetUint(vform, i, result);
1133 }
1134 return dst;
1135 }
1136
1137
bsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1138 LogicVRegister Simulator::bsl(VectorFormat vform,
1139 LogicVRegister dst,
1140 const LogicVRegister& src1,
1141 const LogicVRegister& src2) {
1142 dst.ClearForWrite(vform);
1143 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1144 uint64_t operand1 = src2.Uint(vform, i);
1145 uint64_t operand2 = dst.Uint(vform, i);
1146 uint64_t operand3 = src1.Uint(vform, i);
1147 uint64_t result = operand1 ^ ((operand1 ^ operand3) & operand2);
1148 dst.SetUint(vform, i, result);
1149 }
1150 return dst;
1151 }
1152
1153
sminmax(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool max)1154 LogicVRegister Simulator::sminmax(VectorFormat vform,
1155 LogicVRegister dst,
1156 const LogicVRegister& src1,
1157 const LogicVRegister& src2,
1158 bool max) {
1159 dst.ClearForWrite(vform);
1160 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1161 int64_t src1_val = src1.Int(vform, i);
1162 int64_t src2_val = src2.Int(vform, i);
1163 int64_t dst_val;
1164 if (max) {
1165 dst_val = (src1_val > src2_val) ? src1_val : src2_val;
1166 } else {
1167 dst_val = (src1_val < src2_val) ? src1_val : src2_val;
1168 }
1169 dst.SetInt(vform, i, dst_val);
1170 }
1171 return dst;
1172 }
1173
1174
smax(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1175 LogicVRegister Simulator::smax(VectorFormat vform,
1176 LogicVRegister dst,
1177 const LogicVRegister& src1,
1178 const LogicVRegister& src2) {
1179 return sminmax(vform, dst, src1, src2, true);
1180 }
1181
1182
smin(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1183 LogicVRegister Simulator::smin(VectorFormat vform,
1184 LogicVRegister dst,
1185 const LogicVRegister& src1,
1186 const LogicVRegister& src2) {
1187 return sminmax(vform, dst, src1, src2, false);
1188 }
1189
1190
sminmaxp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool max)1191 LogicVRegister Simulator::sminmaxp(VectorFormat vform,
1192 LogicVRegister dst,
1193 const LogicVRegister& src1,
1194 const LogicVRegister& src2,
1195 bool max) {
1196 int lanes = LaneCountFromFormat(vform);
1197 int64_t result[kMaxLanesPerVector];
1198 const LogicVRegister* src = &src1;
1199 for (int j = 0; j < 2; j++) {
1200 for (int i = 0; i < lanes; i += 2) {
1201 int64_t first_val = src->Int(vform, i);
1202 int64_t second_val = src->Int(vform, i + 1);
1203 int64_t dst_val;
1204 if (max) {
1205 dst_val = (first_val > second_val) ? first_val : second_val;
1206 } else {
1207 dst_val = (first_val < second_val) ? first_val : second_val;
1208 }
1209 VIXL_ASSERT(((i >> 1) + (j * lanes / 2)) < kMaxLanesPerVector);
1210 result[(i >> 1) + (j * lanes / 2)] = dst_val;
1211 }
1212 src = &src2;
1213 }
1214 dst.SetIntArray(vform, result);
1215 return dst;
1216 }
1217
1218
smaxp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1219 LogicVRegister Simulator::smaxp(VectorFormat vform,
1220 LogicVRegister dst,
1221 const LogicVRegister& src1,
1222 const LogicVRegister& src2) {
1223 return sminmaxp(vform, dst, src1, src2, true);
1224 }
1225
1226
sminp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1227 LogicVRegister Simulator::sminp(VectorFormat vform,
1228 LogicVRegister dst,
1229 const LogicVRegister& src1,
1230 const LogicVRegister& src2) {
1231 return sminmaxp(vform, dst, src1, src2, false);
1232 }
1233
1234
addp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1235 LogicVRegister Simulator::addp(VectorFormat vform,
1236 LogicVRegister dst,
1237 const LogicVRegister& src) {
1238 VIXL_ASSERT(vform == kFormatD);
1239
1240 uint64_t dst_val = src.Uint(kFormat2D, 0) + src.Uint(kFormat2D, 1);
1241 dst.ClearForWrite(vform);
1242 dst.SetUint(vform, 0, dst_val);
1243 return dst;
1244 }
1245
1246
addv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1247 LogicVRegister Simulator::addv(VectorFormat vform,
1248 LogicVRegister dst,
1249 const LogicVRegister& src) {
1250 VectorFormat vform_dst =
1251 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform));
1252
1253
1254 int64_t dst_val = 0;
1255 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1256 dst_val += src.Int(vform, i);
1257 }
1258
1259 dst.ClearForWrite(vform_dst);
1260 dst.SetInt(vform_dst, 0, dst_val);
1261 return dst;
1262 }
1263
1264
saddlv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1265 LogicVRegister Simulator::saddlv(VectorFormat vform,
1266 LogicVRegister dst,
1267 const LogicVRegister& src) {
1268 VectorFormat vform_dst =
1269 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2);
1270
1271 int64_t dst_val = 0;
1272 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1273 dst_val += src.Int(vform, i);
1274 }
1275
1276 dst.ClearForWrite(vform_dst);
1277 dst.SetInt(vform_dst, 0, dst_val);
1278 return dst;
1279 }
1280
1281
uaddlv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1282 LogicVRegister Simulator::uaddlv(VectorFormat vform,
1283 LogicVRegister dst,
1284 const LogicVRegister& src) {
1285 VectorFormat vform_dst =
1286 ScalarFormatFromLaneSize(LaneSizeInBitsFromFormat(vform) * 2);
1287
1288 uint64_t dst_val = 0;
1289 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1290 dst_val += src.Uint(vform, i);
1291 }
1292
1293 dst.ClearForWrite(vform_dst);
1294 dst.SetUint(vform_dst, 0, dst_val);
1295 return dst;
1296 }
1297
1298
sminmaxv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,bool max)1299 LogicVRegister Simulator::sminmaxv(VectorFormat vform,
1300 LogicVRegister dst,
1301 const LogicVRegister& src,
1302 bool max) {
1303 int64_t dst_val = max ? INT64_MIN : INT64_MAX;
1304 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1305 int64_t src_val = src.Int(vform, i);
1306 if (max) {
1307 dst_val = (src_val > dst_val) ? src_val : dst_val;
1308 } else {
1309 dst_val = (src_val < dst_val) ? src_val : dst_val;
1310 }
1311 }
1312 dst.ClearForWrite(ScalarFormatFromFormat(vform));
1313 dst.SetInt(vform, 0, dst_val);
1314 return dst;
1315 }
1316
1317
smaxv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1318 LogicVRegister Simulator::smaxv(VectorFormat vform,
1319 LogicVRegister dst,
1320 const LogicVRegister& src) {
1321 sminmaxv(vform, dst, src, true);
1322 return dst;
1323 }
1324
1325
sminv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1326 LogicVRegister Simulator::sminv(VectorFormat vform,
1327 LogicVRegister dst,
1328 const LogicVRegister& src) {
1329 sminmaxv(vform, dst, src, false);
1330 return dst;
1331 }
1332
1333
uminmax(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool max)1334 LogicVRegister Simulator::uminmax(VectorFormat vform,
1335 LogicVRegister dst,
1336 const LogicVRegister& src1,
1337 const LogicVRegister& src2,
1338 bool max) {
1339 dst.ClearForWrite(vform);
1340 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1341 uint64_t src1_val = src1.Uint(vform, i);
1342 uint64_t src2_val = src2.Uint(vform, i);
1343 uint64_t dst_val;
1344 if (max) {
1345 dst_val = (src1_val > src2_val) ? src1_val : src2_val;
1346 } else {
1347 dst_val = (src1_val < src2_val) ? src1_val : src2_val;
1348 }
1349 dst.SetUint(vform, i, dst_val);
1350 }
1351 return dst;
1352 }
1353
1354
umax(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1355 LogicVRegister Simulator::umax(VectorFormat vform,
1356 LogicVRegister dst,
1357 const LogicVRegister& src1,
1358 const LogicVRegister& src2) {
1359 return uminmax(vform, dst, src1, src2, true);
1360 }
1361
1362
umin(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1363 LogicVRegister Simulator::umin(VectorFormat vform,
1364 LogicVRegister dst,
1365 const LogicVRegister& src1,
1366 const LogicVRegister& src2) {
1367 return uminmax(vform, dst, src1, src2, false);
1368 }
1369
1370
uminmaxp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool max)1371 LogicVRegister Simulator::uminmaxp(VectorFormat vform,
1372 LogicVRegister dst,
1373 const LogicVRegister& src1,
1374 const LogicVRegister& src2,
1375 bool max) {
1376 int lanes = LaneCountFromFormat(vform);
1377 uint64_t result[kMaxLanesPerVector];
1378 const LogicVRegister* src = &src1;
1379 for (int j = 0; j < 2; j++) {
1380 for (int i = 0; i < LaneCountFromFormat(vform); i += 2) {
1381 uint64_t first_val = src->Uint(vform, i);
1382 uint64_t second_val = src->Uint(vform, i + 1);
1383 uint64_t dst_val;
1384 if (max) {
1385 dst_val = (first_val > second_val) ? first_val : second_val;
1386 } else {
1387 dst_val = (first_val < second_val) ? first_val : second_val;
1388 }
1389 VIXL_ASSERT(((i >> 1) + (j * lanes / 2)) < kMaxLanesPerVector);
1390 result[(i >> 1) + (j * lanes / 2)] = dst_val;
1391 }
1392 src = &src2;
1393 }
1394 dst.SetUintArray(vform, result);
1395 return dst;
1396 }
1397
1398
umaxp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1399 LogicVRegister Simulator::umaxp(VectorFormat vform,
1400 LogicVRegister dst,
1401 const LogicVRegister& src1,
1402 const LogicVRegister& src2) {
1403 return uminmaxp(vform, dst, src1, src2, true);
1404 }
1405
1406
uminp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1407 LogicVRegister Simulator::uminp(VectorFormat vform,
1408 LogicVRegister dst,
1409 const LogicVRegister& src1,
1410 const LogicVRegister& src2) {
1411 return uminmaxp(vform, dst, src1, src2, false);
1412 }
1413
1414
uminmaxv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,bool max)1415 LogicVRegister Simulator::uminmaxv(VectorFormat vform,
1416 LogicVRegister dst,
1417 const LogicVRegister& src,
1418 bool max) {
1419 uint64_t dst_val = max ? 0 : UINT64_MAX;
1420 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1421 uint64_t src_val = src.Uint(vform, i);
1422 if (max) {
1423 dst_val = (src_val > dst_val) ? src_val : dst_val;
1424 } else {
1425 dst_val = (src_val < dst_val) ? src_val : dst_val;
1426 }
1427 }
1428 dst.ClearForWrite(ScalarFormatFromFormat(vform));
1429 dst.SetUint(vform, 0, dst_val);
1430 return dst;
1431 }
1432
1433
umaxv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1434 LogicVRegister Simulator::umaxv(VectorFormat vform,
1435 LogicVRegister dst,
1436 const LogicVRegister& src) {
1437 uminmaxv(vform, dst, src, true);
1438 return dst;
1439 }
1440
1441
uminv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1442 LogicVRegister Simulator::uminv(VectorFormat vform,
1443 LogicVRegister dst,
1444 const LogicVRegister& src) {
1445 uminmaxv(vform, dst, src, false);
1446 return dst;
1447 }
1448
1449
shl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1450 LogicVRegister Simulator::shl(VectorFormat vform,
1451 LogicVRegister dst,
1452 const LogicVRegister& src,
1453 int shift) {
1454 VIXL_ASSERT(shift >= 0);
1455 SimVRegister temp;
1456 LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1457 return ushl(vform, dst, src, shiftreg);
1458 }
1459
1460
sshll(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1461 LogicVRegister Simulator::sshll(VectorFormat vform,
1462 LogicVRegister dst,
1463 const LogicVRegister& src,
1464 int shift) {
1465 VIXL_ASSERT(shift >= 0);
1466 SimVRegister temp1, temp2;
1467 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1468 LogicVRegister extendedreg = sxtl(vform, temp2, src);
1469 return sshl(vform, dst, extendedreg, shiftreg);
1470 }
1471
1472
sshll2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1473 LogicVRegister Simulator::sshll2(VectorFormat vform,
1474 LogicVRegister dst,
1475 const LogicVRegister& src,
1476 int shift) {
1477 VIXL_ASSERT(shift >= 0);
1478 SimVRegister temp1, temp2;
1479 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1480 LogicVRegister extendedreg = sxtl2(vform, temp2, src);
1481 return sshl(vform, dst, extendedreg, shiftreg);
1482 }
1483
1484
shll(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1485 LogicVRegister Simulator::shll(VectorFormat vform,
1486 LogicVRegister dst,
1487 const LogicVRegister& src) {
1488 int shift = LaneSizeInBitsFromFormat(vform) / 2;
1489 return sshll(vform, dst, src, shift);
1490 }
1491
1492
shll2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1493 LogicVRegister Simulator::shll2(VectorFormat vform,
1494 LogicVRegister dst,
1495 const LogicVRegister& src) {
1496 int shift = LaneSizeInBitsFromFormat(vform) / 2;
1497 return sshll2(vform, dst, src, shift);
1498 }
1499
1500
ushll(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1501 LogicVRegister Simulator::ushll(VectorFormat vform,
1502 LogicVRegister dst,
1503 const LogicVRegister& src,
1504 int shift) {
1505 VIXL_ASSERT(shift >= 0);
1506 SimVRegister temp1, temp2;
1507 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1508 LogicVRegister extendedreg = uxtl(vform, temp2, src);
1509 return ushl(vform, dst, extendedreg, shiftreg);
1510 }
1511
1512
ushll2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1513 LogicVRegister Simulator::ushll2(VectorFormat vform,
1514 LogicVRegister dst,
1515 const LogicVRegister& src,
1516 int shift) {
1517 VIXL_ASSERT(shift >= 0);
1518 SimVRegister temp1, temp2;
1519 LogicVRegister shiftreg = dup_immediate(vform, temp1, shift);
1520 LogicVRegister extendedreg = uxtl2(vform, temp2, src);
1521 return ushl(vform, dst, extendedreg, shiftreg);
1522 }
1523
1524
sli(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1525 LogicVRegister Simulator::sli(VectorFormat vform,
1526 LogicVRegister dst,
1527 const LogicVRegister& src,
1528 int shift) {
1529 dst.ClearForWrite(vform);
1530 int laneCount = LaneCountFromFormat(vform);
1531 for (int i = 0; i < laneCount; i++) {
1532 uint64_t src_lane = src.Uint(vform, i);
1533 uint64_t dst_lane = dst.Uint(vform, i);
1534 uint64_t shifted = src_lane << shift;
1535 uint64_t mask = MaxUintFromFormat(vform) << shift;
1536 dst.SetUint(vform, i, (dst_lane & ~mask) | shifted);
1537 }
1538 return dst;
1539 }
1540
1541
sqshl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1542 LogicVRegister Simulator::sqshl(VectorFormat vform,
1543 LogicVRegister dst,
1544 const LogicVRegister& src,
1545 int shift) {
1546 VIXL_ASSERT(shift >= 0);
1547 SimVRegister temp;
1548 LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1549 return sshl(vform, dst, src, shiftreg).SignedSaturate(vform);
1550 }
1551
1552
uqshl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1553 LogicVRegister Simulator::uqshl(VectorFormat vform,
1554 LogicVRegister dst,
1555 const LogicVRegister& src,
1556 int shift) {
1557 VIXL_ASSERT(shift >= 0);
1558 SimVRegister temp;
1559 LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1560 return ushl(vform, dst, src, shiftreg).UnsignedSaturate(vform);
1561 }
1562
1563
sqshlu(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1564 LogicVRegister Simulator::sqshlu(VectorFormat vform,
1565 LogicVRegister dst,
1566 const LogicVRegister& src,
1567 int shift) {
1568 VIXL_ASSERT(shift >= 0);
1569 SimVRegister temp;
1570 LogicVRegister shiftreg = dup_immediate(vform, temp, shift);
1571 return sshl(vform, dst, src, shiftreg).UnsignedSaturate(vform);
1572 }
1573
1574
sri(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1575 LogicVRegister Simulator::sri(VectorFormat vform,
1576 LogicVRegister dst,
1577 const LogicVRegister& src,
1578 int shift) {
1579 dst.ClearForWrite(vform);
1580 int laneCount = LaneCountFromFormat(vform);
1581 VIXL_ASSERT((shift > 0) &&
1582 (shift <= static_cast<int>(LaneSizeInBitsFromFormat(vform))));
1583 for (int i = 0; i < laneCount; i++) {
1584 uint64_t src_lane = src.Uint(vform, i);
1585 uint64_t dst_lane = dst.Uint(vform, i);
1586 uint64_t shifted;
1587 uint64_t mask;
1588 if (shift == 64) {
1589 shifted = 0;
1590 mask = 0;
1591 } else {
1592 shifted = src_lane >> shift;
1593 mask = MaxUintFromFormat(vform) >> shift;
1594 }
1595 dst.SetUint(vform, i, (dst_lane & ~mask) | shifted);
1596 }
1597 return dst;
1598 }
1599
1600
ushr(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1601 LogicVRegister Simulator::ushr(VectorFormat vform,
1602 LogicVRegister dst,
1603 const LogicVRegister& src,
1604 int shift) {
1605 VIXL_ASSERT(shift >= 0);
1606 SimVRegister temp;
1607 LogicVRegister shiftreg = dup_immediate(vform, temp, -shift);
1608 return ushl(vform, dst, src, shiftreg);
1609 }
1610
1611
sshr(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1612 LogicVRegister Simulator::sshr(VectorFormat vform,
1613 LogicVRegister dst,
1614 const LogicVRegister& src,
1615 int shift) {
1616 VIXL_ASSERT(shift >= 0);
1617 SimVRegister temp;
1618 LogicVRegister shiftreg = dup_immediate(vform, temp, -shift);
1619 return sshl(vform, dst, src, shiftreg);
1620 }
1621
1622
ssra(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1623 LogicVRegister Simulator::ssra(VectorFormat vform,
1624 LogicVRegister dst,
1625 const LogicVRegister& src,
1626 int shift) {
1627 SimVRegister temp;
1628 LogicVRegister shifted_reg = sshr(vform, temp, src, shift);
1629 return add(vform, dst, dst, shifted_reg);
1630 }
1631
1632
usra(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1633 LogicVRegister Simulator::usra(VectorFormat vform,
1634 LogicVRegister dst,
1635 const LogicVRegister& src,
1636 int shift) {
1637 SimVRegister temp;
1638 LogicVRegister shifted_reg = ushr(vform, temp, src, shift);
1639 return add(vform, dst, dst, shifted_reg);
1640 }
1641
1642
srsra(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1643 LogicVRegister Simulator::srsra(VectorFormat vform,
1644 LogicVRegister dst,
1645 const LogicVRegister& src,
1646 int shift) {
1647 SimVRegister temp;
1648 LogicVRegister shifted_reg = sshr(vform, temp, src, shift).Round(vform);
1649 return add(vform, dst, dst, shifted_reg);
1650 }
1651
1652
ursra(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)1653 LogicVRegister Simulator::ursra(VectorFormat vform,
1654 LogicVRegister dst,
1655 const LogicVRegister& src,
1656 int shift) {
1657 SimVRegister temp;
1658 LogicVRegister shifted_reg = ushr(vform, temp, src, shift).Round(vform);
1659 return add(vform, dst, dst, shifted_reg);
1660 }
1661
1662
cls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1663 LogicVRegister Simulator::cls(VectorFormat vform,
1664 LogicVRegister dst,
1665 const LogicVRegister& src) {
1666 uint64_t result[16];
1667 int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
1668 int laneCount = LaneCountFromFormat(vform);
1669 for (int i = 0; i < laneCount; i++) {
1670 result[i] = CountLeadingSignBits(src.Int(vform, i), laneSizeInBits);
1671 }
1672
1673 dst.ClearForWrite(vform);
1674 for (int i = 0; i < laneCount; ++i) {
1675 dst.SetUint(vform, i, result[i]);
1676 }
1677 return dst;
1678 }
1679
1680
clz(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1681 LogicVRegister Simulator::clz(VectorFormat vform,
1682 LogicVRegister dst,
1683 const LogicVRegister& src) {
1684 uint64_t result[16];
1685 int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
1686 int laneCount = LaneCountFromFormat(vform);
1687 for (int i = 0; i < laneCount; i++) {
1688 result[i] = CountLeadingZeros(src.Uint(vform, i), laneSizeInBits);
1689 }
1690
1691 dst.ClearForWrite(vform);
1692 for (int i = 0; i < laneCount; ++i) {
1693 dst.SetUint(vform, i, result[i]);
1694 }
1695 return dst;
1696 }
1697
1698
cnt(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1699 LogicVRegister Simulator::cnt(VectorFormat vform,
1700 LogicVRegister dst,
1701 const LogicVRegister& src) {
1702 uint64_t result[16];
1703 int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
1704 int laneCount = LaneCountFromFormat(vform);
1705 for (int i = 0; i < laneCount; i++) {
1706 uint64_t value = src.Uint(vform, i);
1707 result[i] = 0;
1708 for (int j = 0; j < laneSizeInBits; j++) {
1709 result[i] += (value & 1);
1710 value >>= 1;
1711 }
1712 }
1713
1714 dst.ClearForWrite(vform);
1715 for (int i = 0; i < laneCount; ++i) {
1716 dst.SetUint(vform, i, result[i]);
1717 }
1718 return dst;
1719 }
1720
1721
sshl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1722 LogicVRegister Simulator::sshl(VectorFormat vform,
1723 LogicVRegister dst,
1724 const LogicVRegister& src1,
1725 const LogicVRegister& src2) {
1726 dst.ClearForWrite(vform);
1727 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1728 int8_t shift_val = src2.Int(vform, i);
1729 int64_t lj_src_val = src1.IntLeftJustified(vform, i);
1730
1731 // Set signed saturation state.
1732 if ((shift_val > CountLeadingSignBits(lj_src_val)) && (lj_src_val != 0)) {
1733 dst.SetSignedSat(i, lj_src_val >= 0);
1734 }
1735
1736 // Set unsigned saturation state.
1737 if (lj_src_val < 0) {
1738 dst.SetUnsignedSat(i, false);
1739 } else if ((shift_val > CountLeadingZeros(lj_src_val)) &&
1740 (lj_src_val != 0)) {
1741 dst.SetUnsignedSat(i, true);
1742 }
1743
1744 int64_t src_val = src1.Int(vform, i);
1745 bool src_is_negative = src_val < 0;
1746 if (shift_val > 63) {
1747 dst.SetInt(vform, i, 0);
1748 } else if (shift_val < -63) {
1749 dst.SetRounding(i, src_is_negative);
1750 dst.SetInt(vform, i, src_is_negative ? -1 : 0);
1751 } else {
1752 // Use unsigned types for shifts, as behaviour is undefined for signed
1753 // lhs.
1754 uint64_t usrc_val = static_cast<uint64_t>(src_val);
1755
1756 if (shift_val < 0) {
1757 // Convert to right shift.
1758 shift_val = -shift_val;
1759
1760 // Set rounding state by testing most-significant bit shifted out.
1761 // Rounding only needed on right shifts.
1762 if (((usrc_val >> (shift_val - 1)) & 1) == 1) {
1763 dst.SetRounding(i, true);
1764 }
1765
1766 usrc_val >>= shift_val;
1767
1768 if (src_is_negative) {
1769 // Simulate sign-extension.
1770 usrc_val |= (~UINT64_C(0) << (64 - shift_val));
1771 }
1772 } else {
1773 usrc_val <<= shift_val;
1774 }
1775 dst.SetUint(vform, i, usrc_val);
1776 }
1777 }
1778 return dst;
1779 }
1780
1781
ushl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)1782 LogicVRegister Simulator::ushl(VectorFormat vform,
1783 LogicVRegister dst,
1784 const LogicVRegister& src1,
1785 const LogicVRegister& src2) {
1786 dst.ClearForWrite(vform);
1787 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1788 int8_t shift_val = src2.Int(vform, i);
1789 uint64_t lj_src_val = src1.UintLeftJustified(vform, i);
1790
1791 // Set saturation state.
1792 if ((shift_val > CountLeadingZeros(lj_src_val)) && (lj_src_val != 0)) {
1793 dst.SetUnsignedSat(i, true);
1794 }
1795
1796 uint64_t src_val = src1.Uint(vform, i);
1797 if ((shift_val > 63) || (shift_val < -64)) {
1798 dst.SetUint(vform, i, 0);
1799 } else {
1800 if (shift_val < 0) {
1801 // Set rounding state. Rounding only needed on right shifts.
1802 if (((src_val >> (-shift_val - 1)) & 1) == 1) {
1803 dst.SetRounding(i, true);
1804 }
1805
1806 if (shift_val == -64) {
1807 src_val = 0;
1808 } else {
1809 src_val >>= -shift_val;
1810 }
1811 } else {
1812 src_val <<= shift_val;
1813 }
1814 dst.SetUint(vform, i, src_val);
1815 }
1816 }
1817 return dst;
1818 }
1819
1820
neg(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1821 LogicVRegister Simulator::neg(VectorFormat vform,
1822 LogicVRegister dst,
1823 const LogicVRegister& src) {
1824 dst.ClearForWrite(vform);
1825 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1826 // Test for signed saturation.
1827 int64_t sa = src.Int(vform, i);
1828 if (sa == MinIntFromFormat(vform)) {
1829 dst.SetSignedSat(i, true);
1830 }
1831 dst.SetInt(vform, i, (sa == INT64_MIN) ? sa : -sa);
1832 }
1833 return dst;
1834 }
1835
1836
suqadd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1837 LogicVRegister Simulator::suqadd(VectorFormat vform,
1838 LogicVRegister dst,
1839 const LogicVRegister& src) {
1840 dst.ClearForWrite(vform);
1841 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1842 int64_t sa = dst.IntLeftJustified(vform, i);
1843 uint64_t ub = src.UintLeftJustified(vform, i);
1844 uint64_t ur = sa + ub;
1845
1846 int64_t sr;
1847 memcpy(&sr, &ur, sizeof(sr));
1848 if (sr < sa) { // Test for signed positive saturation.
1849 dst.SetInt(vform, i, MaxIntFromFormat(vform));
1850 } else {
1851 dst.SetUint(vform, i, dst.Int(vform, i) + src.Uint(vform, i));
1852 }
1853 }
1854 return dst;
1855 }
1856
1857
usqadd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1858 LogicVRegister Simulator::usqadd(VectorFormat vform,
1859 LogicVRegister dst,
1860 const LogicVRegister& src) {
1861 dst.ClearForWrite(vform);
1862 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1863 uint64_t ua = dst.UintLeftJustified(vform, i);
1864 int64_t sb = src.IntLeftJustified(vform, i);
1865 uint64_t ur = ua + sb;
1866
1867 if ((sb > 0) && (ur <= ua)) {
1868 dst.SetUint(vform, i, MaxUintFromFormat(vform)); // Positive saturation.
1869 } else if ((sb < 0) && (ur >= ua)) {
1870 dst.SetUint(vform, i, 0); // Negative saturation.
1871 } else {
1872 dst.SetUint(vform, i, dst.Uint(vform, i) + src.Int(vform, i));
1873 }
1874 }
1875 return dst;
1876 }
1877
1878
abs(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)1879 LogicVRegister Simulator::abs(VectorFormat vform,
1880 LogicVRegister dst,
1881 const LogicVRegister& src) {
1882 dst.ClearForWrite(vform);
1883 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
1884 // Test for signed saturation.
1885 int64_t sa = src.Int(vform, i);
1886 if (sa == MinIntFromFormat(vform)) {
1887 dst.SetSignedSat(i, true);
1888 }
1889 if (sa < 0) {
1890 dst.SetInt(vform, i, (sa == INT64_MIN) ? sa : -sa);
1891 } else {
1892 dst.SetInt(vform, i, sa);
1893 }
1894 }
1895 return dst;
1896 }
1897
1898
extractnarrow(VectorFormat dstform,LogicVRegister dst,bool dstIsSigned,const LogicVRegister & src,bool srcIsSigned)1899 LogicVRegister Simulator::extractnarrow(VectorFormat dstform,
1900 LogicVRegister dst,
1901 bool dstIsSigned,
1902 const LogicVRegister& src,
1903 bool srcIsSigned) {
1904 bool upperhalf = false;
1905 VectorFormat srcform = kFormatUndefined;
1906 int64_t ssrc[8];
1907 uint64_t usrc[8];
1908
1909 switch (dstform) {
1910 case kFormat8B:
1911 upperhalf = false;
1912 srcform = kFormat8H;
1913 break;
1914 case kFormat16B:
1915 upperhalf = true;
1916 srcform = kFormat8H;
1917 break;
1918 case kFormat4H:
1919 upperhalf = false;
1920 srcform = kFormat4S;
1921 break;
1922 case kFormat8H:
1923 upperhalf = true;
1924 srcform = kFormat4S;
1925 break;
1926 case kFormat2S:
1927 upperhalf = false;
1928 srcform = kFormat2D;
1929 break;
1930 case kFormat4S:
1931 upperhalf = true;
1932 srcform = kFormat2D;
1933 break;
1934 case kFormatB:
1935 upperhalf = false;
1936 srcform = kFormatH;
1937 break;
1938 case kFormatH:
1939 upperhalf = false;
1940 srcform = kFormatS;
1941 break;
1942 case kFormatS:
1943 upperhalf = false;
1944 srcform = kFormatD;
1945 break;
1946 default:
1947 VIXL_UNIMPLEMENTED();
1948 }
1949
1950 for (int i = 0; i < LaneCountFromFormat(srcform); i++) {
1951 ssrc[i] = src.Int(srcform, i);
1952 usrc[i] = src.Uint(srcform, i);
1953 }
1954
1955 int offset;
1956 if (upperhalf) {
1957 offset = LaneCountFromFormat(dstform) / 2;
1958 } else {
1959 offset = 0;
1960 dst.ClearForWrite(dstform);
1961 }
1962
1963 for (int i = 0; i < LaneCountFromFormat(srcform); i++) {
1964 // Test for signed saturation
1965 if (ssrc[i] > MaxIntFromFormat(dstform)) {
1966 dst.SetSignedSat(offset + i, true);
1967 } else if (ssrc[i] < MinIntFromFormat(dstform)) {
1968 dst.SetSignedSat(offset + i, false);
1969 }
1970
1971 // Test for unsigned saturation
1972 if (srcIsSigned) {
1973 if (ssrc[i] > static_cast<int64_t>(MaxUintFromFormat(dstform))) {
1974 dst.SetUnsignedSat(offset + i, true);
1975 } else if (ssrc[i] < 0) {
1976 dst.SetUnsignedSat(offset + i, false);
1977 }
1978 } else {
1979 if (usrc[i] > MaxUintFromFormat(dstform)) {
1980 dst.SetUnsignedSat(offset + i, true);
1981 }
1982 }
1983
1984 int64_t result;
1985 if (srcIsSigned) {
1986 result = ssrc[i] & MaxUintFromFormat(dstform);
1987 } else {
1988 result = usrc[i] & MaxUintFromFormat(dstform);
1989 }
1990
1991 if (dstIsSigned) {
1992 dst.SetInt(dstform, offset + i, result);
1993 } else {
1994 dst.SetUint(dstform, offset + i, result);
1995 }
1996 }
1997 return dst;
1998 }
1999
2000
xtn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2001 LogicVRegister Simulator::xtn(VectorFormat vform,
2002 LogicVRegister dst,
2003 const LogicVRegister& src) {
2004 return extractnarrow(vform, dst, true, src, true);
2005 }
2006
2007
sqxtn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2008 LogicVRegister Simulator::sqxtn(VectorFormat vform,
2009 LogicVRegister dst,
2010 const LogicVRegister& src) {
2011 return extractnarrow(vform, dst, true, src, true).SignedSaturate(vform);
2012 }
2013
2014
sqxtun(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2015 LogicVRegister Simulator::sqxtun(VectorFormat vform,
2016 LogicVRegister dst,
2017 const LogicVRegister& src) {
2018 return extractnarrow(vform, dst, false, src, true).UnsignedSaturate(vform);
2019 }
2020
2021
uqxtn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2022 LogicVRegister Simulator::uqxtn(VectorFormat vform,
2023 LogicVRegister dst,
2024 const LogicVRegister& src) {
2025 return extractnarrow(vform, dst, false, src, false).UnsignedSaturate(vform);
2026 }
2027
2028
absdiff(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool issigned)2029 LogicVRegister Simulator::absdiff(VectorFormat vform,
2030 LogicVRegister dst,
2031 const LogicVRegister& src1,
2032 const LogicVRegister& src2,
2033 bool issigned) {
2034 dst.ClearForWrite(vform);
2035 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2036 if (issigned) {
2037 int64_t sr = src1.Int(vform, i) - src2.Int(vform, i);
2038 sr = sr > 0 ? sr : -sr;
2039 dst.SetInt(vform, i, sr);
2040 } else {
2041 int64_t sr = src1.Uint(vform, i) - src2.Uint(vform, i);
2042 sr = sr > 0 ? sr : -sr;
2043 dst.SetUint(vform, i, sr);
2044 }
2045 }
2046 return dst;
2047 }
2048
2049
saba(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2050 LogicVRegister Simulator::saba(VectorFormat vform,
2051 LogicVRegister dst,
2052 const LogicVRegister& src1,
2053 const LogicVRegister& src2) {
2054 SimVRegister temp;
2055 dst.ClearForWrite(vform);
2056 absdiff(vform, temp, src1, src2, true);
2057 add(vform, dst, dst, temp);
2058 return dst;
2059 }
2060
2061
uaba(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2062 LogicVRegister Simulator::uaba(VectorFormat vform,
2063 LogicVRegister dst,
2064 const LogicVRegister& src1,
2065 const LogicVRegister& src2) {
2066 SimVRegister temp;
2067 dst.ClearForWrite(vform);
2068 absdiff(vform, temp, src1, src2, false);
2069 add(vform, dst, dst, temp);
2070 return dst;
2071 }
2072
2073
not_(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2074 LogicVRegister Simulator::not_(VectorFormat vform,
2075 LogicVRegister dst,
2076 const LogicVRegister& src) {
2077 dst.ClearForWrite(vform);
2078 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2079 dst.SetUint(vform, i, ~src.Uint(vform, i));
2080 }
2081 return dst;
2082 }
2083
2084
rbit(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2085 LogicVRegister Simulator::rbit(VectorFormat vform,
2086 LogicVRegister dst,
2087 const LogicVRegister& src) {
2088 uint64_t result[16];
2089 int laneCount = LaneCountFromFormat(vform);
2090 int laneSizeInBits = LaneSizeInBitsFromFormat(vform);
2091 uint64_t reversed_value;
2092 uint64_t value;
2093 for (int i = 0; i < laneCount; i++) {
2094 value = src.Uint(vform, i);
2095 reversed_value = 0;
2096 for (int j = 0; j < laneSizeInBits; j++) {
2097 reversed_value = (reversed_value << 1) | (value & 1);
2098 value >>= 1;
2099 }
2100 result[i] = reversed_value;
2101 }
2102
2103 dst.ClearForWrite(vform);
2104 for (int i = 0; i < laneCount; ++i) {
2105 dst.SetUint(vform, i, result[i]);
2106 }
2107 return dst;
2108 }
2109
2110
rev(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int revSize)2111 LogicVRegister Simulator::rev(VectorFormat vform,
2112 LogicVRegister dst,
2113 const LogicVRegister& src,
2114 int revSize) {
2115 uint64_t result[16];
2116 int laneCount = LaneCountFromFormat(vform);
2117 int laneSize = LaneSizeInBytesFromFormat(vform);
2118 int lanesPerLoop = revSize / laneSize;
2119 for (int i = 0; i < laneCount; i += lanesPerLoop) {
2120 for (int j = 0; j < lanesPerLoop; j++) {
2121 result[i + lanesPerLoop - 1 - j] = src.Uint(vform, i + j);
2122 }
2123 }
2124 dst.ClearForWrite(vform);
2125 for (int i = 0; i < laneCount; ++i) {
2126 dst.SetUint(vform, i, result[i]);
2127 }
2128 return dst;
2129 }
2130
2131
rev16(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2132 LogicVRegister Simulator::rev16(VectorFormat vform,
2133 LogicVRegister dst,
2134 const LogicVRegister& src) {
2135 return rev(vform, dst, src, 2);
2136 }
2137
2138
rev32(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2139 LogicVRegister Simulator::rev32(VectorFormat vform,
2140 LogicVRegister dst,
2141 const LogicVRegister& src) {
2142 return rev(vform, dst, src, 4);
2143 }
2144
2145
rev64(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2146 LogicVRegister Simulator::rev64(VectorFormat vform,
2147 LogicVRegister dst,
2148 const LogicVRegister& src) {
2149 return rev(vform, dst, src, 8);
2150 }
2151
2152
addlp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,bool is_signed,bool do_accumulate)2153 LogicVRegister Simulator::addlp(VectorFormat vform,
2154 LogicVRegister dst,
2155 const LogicVRegister& src,
2156 bool is_signed,
2157 bool do_accumulate) {
2158 VectorFormat vformsrc = VectorFormatHalfWidthDoubleLanes(vform);
2159 VIXL_ASSERT(LaneSizeInBitsFromFormat(vformsrc) <= 32);
2160 VIXL_ASSERT(LaneCountFromFormat(vform) <= 8);
2161
2162 uint64_t result[8];
2163 int lane_count = LaneCountFromFormat(vform);
2164 for (int i = 0; i < lane_count; i++) {
2165 if (is_signed) {
2166 result[i] = static_cast<uint64_t>(src.Int(vformsrc, 2 * i) +
2167 src.Int(vformsrc, 2 * i + 1));
2168 } else {
2169 result[i] = src.Uint(vformsrc, 2 * i) + src.Uint(vformsrc, 2 * i + 1);
2170 }
2171 }
2172
2173 dst.ClearForWrite(vform);
2174 for (int i = 0; i < lane_count; ++i) {
2175 if (do_accumulate) {
2176 result[i] += dst.Uint(vform, i);
2177 }
2178 dst.SetUint(vform, i, result[i]);
2179 }
2180
2181 return dst;
2182 }
2183
2184
saddlp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2185 LogicVRegister Simulator::saddlp(VectorFormat vform,
2186 LogicVRegister dst,
2187 const LogicVRegister& src) {
2188 return addlp(vform, dst, src, true, false);
2189 }
2190
2191
uaddlp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2192 LogicVRegister Simulator::uaddlp(VectorFormat vform,
2193 LogicVRegister dst,
2194 const LogicVRegister& src) {
2195 return addlp(vform, dst, src, false, false);
2196 }
2197
2198
sadalp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2199 LogicVRegister Simulator::sadalp(VectorFormat vform,
2200 LogicVRegister dst,
2201 const LogicVRegister& src) {
2202 return addlp(vform, dst, src, true, true);
2203 }
2204
2205
uadalp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2206 LogicVRegister Simulator::uadalp(VectorFormat vform,
2207 LogicVRegister dst,
2208 const LogicVRegister& src) {
2209 return addlp(vform, dst, src, false, true);
2210 }
2211
2212
ext(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)2213 LogicVRegister Simulator::ext(VectorFormat vform,
2214 LogicVRegister dst,
2215 const LogicVRegister& src1,
2216 const LogicVRegister& src2,
2217 int index) {
2218 uint8_t result[16];
2219 int laneCount = LaneCountFromFormat(vform);
2220 for (int i = 0; i < laneCount - index; ++i) {
2221 result[i] = src1.Uint(vform, i + index);
2222 }
2223 for (int i = 0; i < index; ++i) {
2224 result[laneCount - index + i] = src2.Uint(vform, i);
2225 }
2226 dst.ClearForWrite(vform);
2227 for (int i = 0; i < laneCount; ++i) {
2228 dst.SetUint(vform, i, result[i]);
2229 }
2230 return dst;
2231 }
2232
2233 template <typename T>
fcadd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int rot)2234 LogicVRegister Simulator::fcadd(VectorFormat vform,
2235 LogicVRegister dst, // d
2236 const LogicVRegister& src1, // n
2237 const LogicVRegister& src2, // m
2238 int rot) {
2239 int elements = LaneCountFromFormat(vform);
2240
2241 T element1, element3;
2242 rot = (rot == 1) ? 270 : 90;
2243
2244 // Loop example:
2245 // 2S --> (2/2 = 1 - 1 = 0) --> 1 x Complex Number (2x components: r+i)
2246 // 4S --> (4/2 = 2) - 1 = 1) --> 2 x Complex Number (2x2 components: r+i)
2247
2248 for (int e = 0; e <= (elements / 2) - 1; e++) {
2249 switch (rot) {
2250 case 90:
2251 element1 = FPNeg(src2.Float<T>(e * 2 + 1));
2252 element3 = src2.Float<T>(e * 2);
2253 break;
2254 case 270:
2255 element1 = src2.Float<T>(e * 2 + 1);
2256 element3 = FPNeg(src2.Float<T>(e * 2));
2257 break;
2258 default:
2259 VIXL_UNREACHABLE();
2260 return dst; // prevents "element(n) may be unintialized" errors
2261 }
2262 dst.ClearForWrite(vform);
2263 dst.SetFloat<T>(e * 2, FPAdd(src1.Float<T>(e * 2), element1));
2264 dst.SetFloat<T>(e * 2 + 1, FPAdd(src1.Float<T>(e * 2 + 1), element3));
2265 }
2266 return dst;
2267 }
2268
2269
fcadd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int rot)2270 LogicVRegister Simulator::fcadd(VectorFormat vform,
2271 LogicVRegister dst, // d
2272 const LogicVRegister& src1, // n
2273 const LogicVRegister& src2, // m
2274 int rot) {
2275 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
2276 VIXL_UNIMPLEMENTED();
2277 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
2278 fcadd<float>(vform, dst, src1, src2, rot);
2279 } else {
2280 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
2281 fcadd<double>(vform, dst, src1, src2, rot);
2282 }
2283 return dst;
2284 }
2285
2286
2287 template <typename T>
fcmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index,int rot)2288 LogicVRegister Simulator::fcmla(VectorFormat vform,
2289 LogicVRegister dst, // d
2290 const LogicVRegister& src1, // n
2291 const LogicVRegister& src2, // m
2292 int index,
2293 int rot) {
2294 int elements = LaneCountFromFormat(vform);
2295
2296 T element1, element2, element3, element4;
2297 rot *= 90;
2298
2299 // Loop example:
2300 // 2S --> (2/2 = 1 - 1 = 0) --> 1 x Complex Number (2x components: r+i)
2301 // 4S --> (4/2 = 2) - 1 = 1) --> 2 x Complex Number (2x2 components: r+i)
2302
2303 for (int e = 0; e <= (elements / 2) - 1; e++) {
2304 switch (rot) {
2305 case 0:
2306 element1 = src2.Float<T>(index * 2);
2307 element2 = src1.Float<T>(e * 2);
2308 element3 = src2.Float<T>(index * 2 + 1);
2309 element4 = src1.Float<T>(e * 2);
2310 break;
2311 case 90:
2312 element1 = FPNeg(src2.Float<T>(index * 2 + 1));
2313 element2 = src1.Float<T>(e * 2 + 1);
2314 element3 = src2.Float<T>(index * 2);
2315 element4 = src1.Float<T>(e * 2 + 1);
2316 break;
2317 case 180:
2318 element1 = FPNeg(src2.Float<T>(index * 2));
2319 element2 = src1.Float<T>(e * 2);
2320 element3 = FPNeg(src2.Float<T>(index * 2 + 1));
2321 element4 = src1.Float<T>(e * 2);
2322 break;
2323 case 270:
2324 element1 = src2.Float<T>(index * 2 + 1);
2325 element2 = src1.Float<T>(e * 2 + 1);
2326 element3 = FPNeg(src2.Float<T>(index * 2));
2327 element4 = src1.Float<T>(e * 2 + 1);
2328 break;
2329 default:
2330 VIXL_UNREACHABLE();
2331 return dst; // prevents "element(n) may be unintialized" errors
2332 }
2333 dst.ClearForWrite(vform);
2334 dst.SetFloat<T>(e * 2, FPMulAdd(dst.Float<T>(e * 2), element2, element1));
2335 dst.SetFloat<T>(e * 2 + 1,
2336 FPMulAdd(dst.Float<T>(e * 2 + 1), element4, element3));
2337 }
2338 return dst;
2339 }
2340
2341
2342 template <typename T>
fcmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int rot)2343 LogicVRegister Simulator::fcmla(VectorFormat vform,
2344 LogicVRegister dst, // d
2345 const LogicVRegister& src1, // n
2346 const LogicVRegister& src2, // m
2347 int rot) {
2348 int elements = LaneCountFromFormat(vform);
2349
2350 T element1, element2, element3, element4;
2351 rot *= 90;
2352
2353 // Loop example:
2354 // 2S --> (2/2 = 1 - 1 = 0) --> 1 x Complex Number (2x components: r+i)
2355 // 4S --> (4/2 = 2) - 1 = 1) --> 2 x Complex Number (2x2 components: r+i)
2356
2357 for (int e = 0; e <= (elements / 2) - 1; e++) {
2358 switch (rot) {
2359 case 0:
2360 element1 = src2.Float<T>(e * 2);
2361 element2 = src1.Float<T>(e * 2);
2362 element3 = src2.Float<T>(e * 2 + 1);
2363 element4 = src1.Float<T>(e * 2);
2364 break;
2365 case 90:
2366 element1 = FPNeg(src2.Float<T>(e * 2 + 1));
2367 element2 = src1.Float<T>(e * 2 + 1);
2368 element3 = src2.Float<T>(e * 2);
2369 element4 = src1.Float<T>(e * 2 + 1);
2370 break;
2371 case 180:
2372 element1 = FPNeg(src2.Float<T>(e * 2));
2373 element2 = src1.Float<T>(e * 2);
2374 element3 = FPNeg(src2.Float<T>(e * 2 + 1));
2375 element4 = src1.Float<T>(e * 2);
2376 break;
2377 case 270:
2378 element1 = src2.Float<T>(e * 2 + 1);
2379 element2 = src1.Float<T>(e * 2 + 1);
2380 element3 = FPNeg(src2.Float<T>(e * 2));
2381 element4 = src1.Float<T>(e * 2 + 1);
2382 break;
2383 default:
2384 VIXL_UNREACHABLE();
2385 return dst; // prevents "element(n) may be unintialized" errors
2386 }
2387 dst.ClearForWrite(vform);
2388 dst.SetFloat<T>(e * 2, FPMulAdd(dst.Float<T>(e * 2), element2, element1));
2389 dst.SetFloat<T>(e * 2 + 1,
2390 FPMulAdd(dst.Float<T>(e * 2 + 1), element4, element3));
2391 }
2392 return dst;
2393 }
2394
2395
fcmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int rot)2396 LogicVRegister Simulator::fcmla(VectorFormat vform,
2397 LogicVRegister dst, // d
2398 const LogicVRegister& src1, // n
2399 const LogicVRegister& src2, // m
2400 int rot) {
2401 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
2402 VIXL_UNIMPLEMENTED();
2403 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
2404 fcmla<float>(vform, dst, src1, src2, rot);
2405 } else {
2406 fcmla<double>(vform, dst, src1, src2, rot);
2407 }
2408 return dst;
2409 }
2410
2411
fcmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index,int rot)2412 LogicVRegister Simulator::fcmla(VectorFormat vform,
2413 LogicVRegister dst, // d
2414 const LogicVRegister& src1, // n
2415 const LogicVRegister& src2, // m
2416 int index,
2417 int rot) {
2418 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
2419 VIXL_UNIMPLEMENTED();
2420 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
2421 fcmla<float>(vform, dst, src1, src2, index, rot);
2422 } else {
2423 fcmla<double>(vform, dst, src1, src2, index, rot);
2424 }
2425 return dst;
2426 }
2427
2428
dup_element(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int src_index)2429 LogicVRegister Simulator::dup_element(VectorFormat vform,
2430 LogicVRegister dst,
2431 const LogicVRegister& src,
2432 int src_index) {
2433 int laneCount = LaneCountFromFormat(vform);
2434 uint64_t value = src.Uint(vform, src_index);
2435 dst.ClearForWrite(vform);
2436 for (int i = 0; i < laneCount; ++i) {
2437 dst.SetUint(vform, i, value);
2438 }
2439 return dst;
2440 }
2441
2442
dup_immediate(VectorFormat vform,LogicVRegister dst,uint64_t imm)2443 LogicVRegister Simulator::dup_immediate(VectorFormat vform,
2444 LogicVRegister dst,
2445 uint64_t imm) {
2446 int laneCount = LaneCountFromFormat(vform);
2447 uint64_t value = imm & MaxUintFromFormat(vform);
2448 dst.ClearForWrite(vform);
2449 for (int i = 0; i < laneCount; ++i) {
2450 dst.SetUint(vform, i, value);
2451 }
2452 return dst;
2453 }
2454
2455
ins_element(VectorFormat vform,LogicVRegister dst,int dst_index,const LogicVRegister & src,int src_index)2456 LogicVRegister Simulator::ins_element(VectorFormat vform,
2457 LogicVRegister dst,
2458 int dst_index,
2459 const LogicVRegister& src,
2460 int src_index) {
2461 dst.SetUint(vform, dst_index, src.Uint(vform, src_index));
2462 return dst;
2463 }
2464
2465
ins_immediate(VectorFormat vform,LogicVRegister dst,int dst_index,uint64_t imm)2466 LogicVRegister Simulator::ins_immediate(VectorFormat vform,
2467 LogicVRegister dst,
2468 int dst_index,
2469 uint64_t imm) {
2470 uint64_t value = imm & MaxUintFromFormat(vform);
2471 dst.SetUint(vform, dst_index, value);
2472 return dst;
2473 }
2474
2475
movi(VectorFormat vform,LogicVRegister dst,uint64_t imm)2476 LogicVRegister Simulator::movi(VectorFormat vform,
2477 LogicVRegister dst,
2478 uint64_t imm) {
2479 int laneCount = LaneCountFromFormat(vform);
2480 dst.ClearForWrite(vform);
2481 for (int i = 0; i < laneCount; ++i) {
2482 dst.SetUint(vform, i, imm);
2483 }
2484 return dst;
2485 }
2486
2487
mvni(VectorFormat vform,LogicVRegister dst,uint64_t imm)2488 LogicVRegister Simulator::mvni(VectorFormat vform,
2489 LogicVRegister dst,
2490 uint64_t imm) {
2491 int laneCount = LaneCountFromFormat(vform);
2492 dst.ClearForWrite(vform);
2493 for (int i = 0; i < laneCount; ++i) {
2494 dst.SetUint(vform, i, ~imm);
2495 }
2496 return dst;
2497 }
2498
2499
orr(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,uint64_t imm)2500 LogicVRegister Simulator::orr(VectorFormat vform,
2501 LogicVRegister dst,
2502 const LogicVRegister& src,
2503 uint64_t imm) {
2504 uint64_t result[16];
2505 int laneCount = LaneCountFromFormat(vform);
2506 for (int i = 0; i < laneCount; ++i) {
2507 result[i] = src.Uint(vform, i) | imm;
2508 }
2509 dst.ClearForWrite(vform);
2510 for (int i = 0; i < laneCount; ++i) {
2511 dst.SetUint(vform, i, result[i]);
2512 }
2513 return dst;
2514 }
2515
2516
uxtl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2517 LogicVRegister Simulator::uxtl(VectorFormat vform,
2518 LogicVRegister dst,
2519 const LogicVRegister& src) {
2520 VectorFormat vform_half = VectorFormatHalfWidth(vform);
2521
2522 dst.ClearForWrite(vform);
2523 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2524 dst.SetUint(vform, i, src.Uint(vform_half, i));
2525 }
2526 return dst;
2527 }
2528
2529
sxtl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2530 LogicVRegister Simulator::sxtl(VectorFormat vform,
2531 LogicVRegister dst,
2532 const LogicVRegister& src) {
2533 VectorFormat vform_half = VectorFormatHalfWidth(vform);
2534
2535 dst.ClearForWrite(vform);
2536 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2537 dst.SetInt(vform, i, src.Int(vform_half, i));
2538 }
2539 return dst;
2540 }
2541
2542
uxtl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2543 LogicVRegister Simulator::uxtl2(VectorFormat vform,
2544 LogicVRegister dst,
2545 const LogicVRegister& src) {
2546 VectorFormat vform_half = VectorFormatHalfWidth(vform);
2547 int lane_count = LaneCountFromFormat(vform);
2548
2549 dst.ClearForWrite(vform);
2550 for (int i = 0; i < lane_count; i++) {
2551 dst.SetUint(vform, i, src.Uint(vform_half, lane_count + i));
2552 }
2553 return dst;
2554 }
2555
2556
sxtl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)2557 LogicVRegister Simulator::sxtl2(VectorFormat vform,
2558 LogicVRegister dst,
2559 const LogicVRegister& src) {
2560 VectorFormat vform_half = VectorFormatHalfWidth(vform);
2561 int lane_count = LaneCountFromFormat(vform);
2562
2563 dst.ClearForWrite(vform);
2564 for (int i = 0; i < lane_count; i++) {
2565 dst.SetInt(vform, i, src.Int(vform_half, lane_count + i));
2566 }
2567 return dst;
2568 }
2569
2570
shrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2571 LogicVRegister Simulator::shrn(VectorFormat vform,
2572 LogicVRegister dst,
2573 const LogicVRegister& src,
2574 int shift) {
2575 SimVRegister temp;
2576 VectorFormat vform_src = VectorFormatDoubleWidth(vform);
2577 VectorFormat vform_dst = vform;
2578 LogicVRegister shifted_src = ushr(vform_src, temp, src, shift);
2579 return extractnarrow(vform_dst, dst, false, shifted_src, false);
2580 }
2581
2582
shrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2583 LogicVRegister Simulator::shrn2(VectorFormat vform,
2584 LogicVRegister dst,
2585 const LogicVRegister& src,
2586 int shift) {
2587 SimVRegister temp;
2588 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2589 VectorFormat vformdst = vform;
2590 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift);
2591 return extractnarrow(vformdst, dst, false, shifted_src, false);
2592 }
2593
2594
rshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2595 LogicVRegister Simulator::rshrn(VectorFormat vform,
2596 LogicVRegister dst,
2597 const LogicVRegister& src,
2598 int shift) {
2599 SimVRegister temp;
2600 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2601 VectorFormat vformdst = vform;
2602 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc);
2603 return extractnarrow(vformdst, dst, false, shifted_src, false);
2604 }
2605
2606
rshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2607 LogicVRegister Simulator::rshrn2(VectorFormat vform,
2608 LogicVRegister dst,
2609 const LogicVRegister& src,
2610 int shift) {
2611 SimVRegister temp;
2612 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2613 VectorFormat vformdst = vform;
2614 LogicVRegister shifted_src = ushr(vformsrc, temp, src, shift).Round(vformsrc);
2615 return extractnarrow(vformdst, dst, false, shifted_src, false);
2616 }
2617
2618
Table(VectorFormat vform,LogicVRegister dst,const LogicVRegister & ind,bool zero_out_of_bounds,const LogicVRegister * tab1,const LogicVRegister * tab2,const LogicVRegister * tab3,const LogicVRegister * tab4)2619 LogicVRegister Simulator::Table(VectorFormat vform,
2620 LogicVRegister dst,
2621 const LogicVRegister& ind,
2622 bool zero_out_of_bounds,
2623 const LogicVRegister* tab1,
2624 const LogicVRegister* tab2,
2625 const LogicVRegister* tab3,
2626 const LogicVRegister* tab4) {
2627 VIXL_ASSERT(tab1 != NULL);
2628 const LogicVRegister* tab[4] = {tab1, tab2, tab3, tab4};
2629 uint64_t result[kMaxLanesPerVector];
2630 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2631 result[i] = zero_out_of_bounds ? 0 : dst.Uint(kFormat16B, i);
2632 }
2633 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
2634 uint64_t j = ind.Uint(vform, i);
2635 int tab_idx = static_cast<int>(j >> 4);
2636 int j_idx = static_cast<int>(j & 15);
2637 if ((tab_idx < 4) && (tab[tab_idx] != NULL)) {
2638 result[i] = tab[tab_idx]->Uint(kFormat16B, j_idx);
2639 }
2640 }
2641 dst.SetUintArray(vform, result);
2642 return dst;
2643 }
2644
2645
tbl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & ind)2646 LogicVRegister Simulator::tbl(VectorFormat vform,
2647 LogicVRegister dst,
2648 const LogicVRegister& tab,
2649 const LogicVRegister& ind) {
2650 return Table(vform, dst, ind, true, &tab);
2651 }
2652
2653
tbl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & ind)2654 LogicVRegister Simulator::tbl(VectorFormat vform,
2655 LogicVRegister dst,
2656 const LogicVRegister& tab,
2657 const LogicVRegister& tab2,
2658 const LogicVRegister& ind) {
2659 return Table(vform, dst, ind, true, &tab, &tab2);
2660 }
2661
2662
tbl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & tab3,const LogicVRegister & ind)2663 LogicVRegister Simulator::tbl(VectorFormat vform,
2664 LogicVRegister dst,
2665 const LogicVRegister& tab,
2666 const LogicVRegister& tab2,
2667 const LogicVRegister& tab3,
2668 const LogicVRegister& ind) {
2669 return Table(vform, dst, ind, true, &tab, &tab2, &tab3);
2670 }
2671
2672
tbl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & tab3,const LogicVRegister & tab4,const LogicVRegister & ind)2673 LogicVRegister Simulator::tbl(VectorFormat vform,
2674 LogicVRegister dst,
2675 const LogicVRegister& tab,
2676 const LogicVRegister& tab2,
2677 const LogicVRegister& tab3,
2678 const LogicVRegister& tab4,
2679 const LogicVRegister& ind) {
2680 return Table(vform, dst, ind, true, &tab, &tab2, &tab3, &tab4);
2681 }
2682
2683
tbx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & ind)2684 LogicVRegister Simulator::tbx(VectorFormat vform,
2685 LogicVRegister dst,
2686 const LogicVRegister& tab,
2687 const LogicVRegister& ind) {
2688 return Table(vform, dst, ind, false, &tab);
2689 }
2690
2691
tbx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & ind)2692 LogicVRegister Simulator::tbx(VectorFormat vform,
2693 LogicVRegister dst,
2694 const LogicVRegister& tab,
2695 const LogicVRegister& tab2,
2696 const LogicVRegister& ind) {
2697 return Table(vform, dst, ind, false, &tab, &tab2);
2698 }
2699
2700
tbx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & tab3,const LogicVRegister & ind)2701 LogicVRegister Simulator::tbx(VectorFormat vform,
2702 LogicVRegister dst,
2703 const LogicVRegister& tab,
2704 const LogicVRegister& tab2,
2705 const LogicVRegister& tab3,
2706 const LogicVRegister& ind) {
2707 return Table(vform, dst, ind, false, &tab, &tab2, &tab3);
2708 }
2709
2710
tbx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & tab,const LogicVRegister & tab2,const LogicVRegister & tab3,const LogicVRegister & tab4,const LogicVRegister & ind)2711 LogicVRegister Simulator::tbx(VectorFormat vform,
2712 LogicVRegister dst,
2713 const LogicVRegister& tab,
2714 const LogicVRegister& tab2,
2715 const LogicVRegister& tab3,
2716 const LogicVRegister& tab4,
2717 const LogicVRegister& ind) {
2718 return Table(vform, dst, ind, false, &tab, &tab2, &tab3, &tab4);
2719 }
2720
2721
uqshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2722 LogicVRegister Simulator::uqshrn(VectorFormat vform,
2723 LogicVRegister dst,
2724 const LogicVRegister& src,
2725 int shift) {
2726 return shrn(vform, dst, src, shift).UnsignedSaturate(vform);
2727 }
2728
2729
uqshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2730 LogicVRegister Simulator::uqshrn2(VectorFormat vform,
2731 LogicVRegister dst,
2732 const LogicVRegister& src,
2733 int shift) {
2734 return shrn2(vform, dst, src, shift).UnsignedSaturate(vform);
2735 }
2736
2737
uqrshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2738 LogicVRegister Simulator::uqrshrn(VectorFormat vform,
2739 LogicVRegister dst,
2740 const LogicVRegister& src,
2741 int shift) {
2742 return rshrn(vform, dst, src, shift).UnsignedSaturate(vform);
2743 }
2744
2745
uqrshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2746 LogicVRegister Simulator::uqrshrn2(VectorFormat vform,
2747 LogicVRegister dst,
2748 const LogicVRegister& src,
2749 int shift) {
2750 return rshrn2(vform, dst, src, shift).UnsignedSaturate(vform);
2751 }
2752
2753
sqshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2754 LogicVRegister Simulator::sqshrn(VectorFormat vform,
2755 LogicVRegister dst,
2756 const LogicVRegister& src,
2757 int shift) {
2758 SimVRegister temp;
2759 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2760 VectorFormat vformdst = vform;
2761 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
2762 return sqxtn(vformdst, dst, shifted_src);
2763 }
2764
2765
sqshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2766 LogicVRegister Simulator::sqshrn2(VectorFormat vform,
2767 LogicVRegister dst,
2768 const LogicVRegister& src,
2769 int shift) {
2770 SimVRegister temp;
2771 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2772 VectorFormat vformdst = vform;
2773 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
2774 return sqxtn(vformdst, dst, shifted_src);
2775 }
2776
2777
sqrshrn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2778 LogicVRegister Simulator::sqrshrn(VectorFormat vform,
2779 LogicVRegister dst,
2780 const LogicVRegister& src,
2781 int shift) {
2782 SimVRegister temp;
2783 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2784 VectorFormat vformdst = vform;
2785 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
2786 return sqxtn(vformdst, dst, shifted_src);
2787 }
2788
2789
sqrshrn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2790 LogicVRegister Simulator::sqrshrn2(VectorFormat vform,
2791 LogicVRegister dst,
2792 const LogicVRegister& src,
2793 int shift) {
2794 SimVRegister temp;
2795 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2796 VectorFormat vformdst = vform;
2797 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
2798 return sqxtn(vformdst, dst, shifted_src);
2799 }
2800
2801
sqshrun(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2802 LogicVRegister Simulator::sqshrun(VectorFormat vform,
2803 LogicVRegister dst,
2804 const LogicVRegister& src,
2805 int shift) {
2806 SimVRegister temp;
2807 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2808 VectorFormat vformdst = vform;
2809 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
2810 return sqxtun(vformdst, dst, shifted_src);
2811 }
2812
2813
sqshrun2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2814 LogicVRegister Simulator::sqshrun2(VectorFormat vform,
2815 LogicVRegister dst,
2816 const LogicVRegister& src,
2817 int shift) {
2818 SimVRegister temp;
2819 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2820 VectorFormat vformdst = vform;
2821 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift);
2822 return sqxtun(vformdst, dst, shifted_src);
2823 }
2824
2825
sqrshrun(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2826 LogicVRegister Simulator::sqrshrun(VectorFormat vform,
2827 LogicVRegister dst,
2828 const LogicVRegister& src,
2829 int shift) {
2830 SimVRegister temp;
2831 VectorFormat vformsrc = VectorFormatDoubleWidth(vform);
2832 VectorFormat vformdst = vform;
2833 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
2834 return sqxtun(vformdst, dst, shifted_src);
2835 }
2836
2837
sqrshrun2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int shift)2838 LogicVRegister Simulator::sqrshrun2(VectorFormat vform,
2839 LogicVRegister dst,
2840 const LogicVRegister& src,
2841 int shift) {
2842 SimVRegister temp;
2843 VectorFormat vformsrc = VectorFormatDoubleWidth(VectorFormatHalfLanes(vform));
2844 VectorFormat vformdst = vform;
2845 LogicVRegister shifted_src = sshr(vformsrc, temp, src, shift).Round(vformsrc);
2846 return sqxtun(vformdst, dst, shifted_src);
2847 }
2848
2849
uaddl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2850 LogicVRegister Simulator::uaddl(VectorFormat vform,
2851 LogicVRegister dst,
2852 const LogicVRegister& src1,
2853 const LogicVRegister& src2) {
2854 SimVRegister temp1, temp2;
2855 uxtl(vform, temp1, src1);
2856 uxtl(vform, temp2, src2);
2857 add(vform, dst, temp1, temp2);
2858 return dst;
2859 }
2860
2861
uaddl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2862 LogicVRegister Simulator::uaddl2(VectorFormat vform,
2863 LogicVRegister dst,
2864 const LogicVRegister& src1,
2865 const LogicVRegister& src2) {
2866 SimVRegister temp1, temp2;
2867 uxtl2(vform, temp1, src1);
2868 uxtl2(vform, temp2, src2);
2869 add(vform, dst, temp1, temp2);
2870 return dst;
2871 }
2872
2873
uaddw(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2874 LogicVRegister Simulator::uaddw(VectorFormat vform,
2875 LogicVRegister dst,
2876 const LogicVRegister& src1,
2877 const LogicVRegister& src2) {
2878 SimVRegister temp;
2879 uxtl(vform, temp, src2);
2880 add(vform, dst, src1, temp);
2881 return dst;
2882 }
2883
2884
uaddw2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2885 LogicVRegister Simulator::uaddw2(VectorFormat vform,
2886 LogicVRegister dst,
2887 const LogicVRegister& src1,
2888 const LogicVRegister& src2) {
2889 SimVRegister temp;
2890 uxtl2(vform, temp, src2);
2891 add(vform, dst, src1, temp);
2892 return dst;
2893 }
2894
2895
saddl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2896 LogicVRegister Simulator::saddl(VectorFormat vform,
2897 LogicVRegister dst,
2898 const LogicVRegister& src1,
2899 const LogicVRegister& src2) {
2900 SimVRegister temp1, temp2;
2901 sxtl(vform, temp1, src1);
2902 sxtl(vform, temp2, src2);
2903 add(vform, dst, temp1, temp2);
2904 return dst;
2905 }
2906
2907
saddl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2908 LogicVRegister Simulator::saddl2(VectorFormat vform,
2909 LogicVRegister dst,
2910 const LogicVRegister& src1,
2911 const LogicVRegister& src2) {
2912 SimVRegister temp1, temp2;
2913 sxtl2(vform, temp1, src1);
2914 sxtl2(vform, temp2, src2);
2915 add(vform, dst, temp1, temp2);
2916 return dst;
2917 }
2918
2919
saddw(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2920 LogicVRegister Simulator::saddw(VectorFormat vform,
2921 LogicVRegister dst,
2922 const LogicVRegister& src1,
2923 const LogicVRegister& src2) {
2924 SimVRegister temp;
2925 sxtl(vform, temp, src2);
2926 add(vform, dst, src1, temp);
2927 return dst;
2928 }
2929
2930
saddw2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2931 LogicVRegister Simulator::saddw2(VectorFormat vform,
2932 LogicVRegister dst,
2933 const LogicVRegister& src1,
2934 const LogicVRegister& src2) {
2935 SimVRegister temp;
2936 sxtl2(vform, temp, src2);
2937 add(vform, dst, src1, temp);
2938 return dst;
2939 }
2940
2941
usubl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2942 LogicVRegister Simulator::usubl(VectorFormat vform,
2943 LogicVRegister dst,
2944 const LogicVRegister& src1,
2945 const LogicVRegister& src2) {
2946 SimVRegister temp1, temp2;
2947 uxtl(vform, temp1, src1);
2948 uxtl(vform, temp2, src2);
2949 sub(vform, dst, temp1, temp2);
2950 return dst;
2951 }
2952
2953
usubl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2954 LogicVRegister Simulator::usubl2(VectorFormat vform,
2955 LogicVRegister dst,
2956 const LogicVRegister& src1,
2957 const LogicVRegister& src2) {
2958 SimVRegister temp1, temp2;
2959 uxtl2(vform, temp1, src1);
2960 uxtl2(vform, temp2, src2);
2961 sub(vform, dst, temp1, temp2);
2962 return dst;
2963 }
2964
2965
usubw(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2966 LogicVRegister Simulator::usubw(VectorFormat vform,
2967 LogicVRegister dst,
2968 const LogicVRegister& src1,
2969 const LogicVRegister& src2) {
2970 SimVRegister temp;
2971 uxtl(vform, temp, src2);
2972 sub(vform, dst, src1, temp);
2973 return dst;
2974 }
2975
2976
usubw2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2977 LogicVRegister Simulator::usubw2(VectorFormat vform,
2978 LogicVRegister dst,
2979 const LogicVRegister& src1,
2980 const LogicVRegister& src2) {
2981 SimVRegister temp;
2982 uxtl2(vform, temp, src2);
2983 sub(vform, dst, src1, temp);
2984 return dst;
2985 }
2986
2987
ssubl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)2988 LogicVRegister Simulator::ssubl(VectorFormat vform,
2989 LogicVRegister dst,
2990 const LogicVRegister& src1,
2991 const LogicVRegister& src2) {
2992 SimVRegister temp1, temp2;
2993 sxtl(vform, temp1, src1);
2994 sxtl(vform, temp2, src2);
2995 sub(vform, dst, temp1, temp2);
2996 return dst;
2997 }
2998
2999
ssubl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3000 LogicVRegister Simulator::ssubl2(VectorFormat vform,
3001 LogicVRegister dst,
3002 const LogicVRegister& src1,
3003 const LogicVRegister& src2) {
3004 SimVRegister temp1, temp2;
3005 sxtl2(vform, temp1, src1);
3006 sxtl2(vform, temp2, src2);
3007 sub(vform, dst, temp1, temp2);
3008 return dst;
3009 }
3010
3011
ssubw(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3012 LogicVRegister Simulator::ssubw(VectorFormat vform,
3013 LogicVRegister dst,
3014 const LogicVRegister& src1,
3015 const LogicVRegister& src2) {
3016 SimVRegister temp;
3017 sxtl(vform, temp, src2);
3018 sub(vform, dst, src1, temp);
3019 return dst;
3020 }
3021
3022
ssubw2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3023 LogicVRegister Simulator::ssubw2(VectorFormat vform,
3024 LogicVRegister dst,
3025 const LogicVRegister& src1,
3026 const LogicVRegister& src2) {
3027 SimVRegister temp;
3028 sxtl2(vform, temp, src2);
3029 sub(vform, dst, src1, temp);
3030 return dst;
3031 }
3032
3033
uabal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3034 LogicVRegister Simulator::uabal(VectorFormat vform,
3035 LogicVRegister dst,
3036 const LogicVRegister& src1,
3037 const LogicVRegister& src2) {
3038 SimVRegister temp1, temp2;
3039 uxtl(vform, temp1, src1);
3040 uxtl(vform, temp2, src2);
3041 uaba(vform, dst, temp1, temp2);
3042 return dst;
3043 }
3044
3045
uabal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3046 LogicVRegister Simulator::uabal2(VectorFormat vform,
3047 LogicVRegister dst,
3048 const LogicVRegister& src1,
3049 const LogicVRegister& src2) {
3050 SimVRegister temp1, temp2;
3051 uxtl2(vform, temp1, src1);
3052 uxtl2(vform, temp2, src2);
3053 uaba(vform, dst, temp1, temp2);
3054 return dst;
3055 }
3056
3057
sabal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3058 LogicVRegister Simulator::sabal(VectorFormat vform,
3059 LogicVRegister dst,
3060 const LogicVRegister& src1,
3061 const LogicVRegister& src2) {
3062 SimVRegister temp1, temp2;
3063 sxtl(vform, temp1, src1);
3064 sxtl(vform, temp2, src2);
3065 saba(vform, dst, temp1, temp2);
3066 return dst;
3067 }
3068
3069
sabal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3070 LogicVRegister Simulator::sabal2(VectorFormat vform,
3071 LogicVRegister dst,
3072 const LogicVRegister& src1,
3073 const LogicVRegister& src2) {
3074 SimVRegister temp1, temp2;
3075 sxtl2(vform, temp1, src1);
3076 sxtl2(vform, temp2, src2);
3077 saba(vform, dst, temp1, temp2);
3078 return dst;
3079 }
3080
3081
uabdl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3082 LogicVRegister Simulator::uabdl(VectorFormat vform,
3083 LogicVRegister dst,
3084 const LogicVRegister& src1,
3085 const LogicVRegister& src2) {
3086 SimVRegister temp1, temp2;
3087 uxtl(vform, temp1, src1);
3088 uxtl(vform, temp2, src2);
3089 absdiff(vform, dst, temp1, temp2, false);
3090 return dst;
3091 }
3092
3093
uabdl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3094 LogicVRegister Simulator::uabdl2(VectorFormat vform,
3095 LogicVRegister dst,
3096 const LogicVRegister& src1,
3097 const LogicVRegister& src2) {
3098 SimVRegister temp1, temp2;
3099 uxtl2(vform, temp1, src1);
3100 uxtl2(vform, temp2, src2);
3101 absdiff(vform, dst, temp1, temp2, false);
3102 return dst;
3103 }
3104
3105
sabdl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3106 LogicVRegister Simulator::sabdl(VectorFormat vform,
3107 LogicVRegister dst,
3108 const LogicVRegister& src1,
3109 const LogicVRegister& src2) {
3110 SimVRegister temp1, temp2;
3111 sxtl(vform, temp1, src1);
3112 sxtl(vform, temp2, src2);
3113 absdiff(vform, dst, temp1, temp2, true);
3114 return dst;
3115 }
3116
3117
sabdl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3118 LogicVRegister Simulator::sabdl2(VectorFormat vform,
3119 LogicVRegister dst,
3120 const LogicVRegister& src1,
3121 const LogicVRegister& src2) {
3122 SimVRegister temp1, temp2;
3123 sxtl2(vform, temp1, src1);
3124 sxtl2(vform, temp2, src2);
3125 absdiff(vform, dst, temp1, temp2, true);
3126 return dst;
3127 }
3128
3129
umull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3130 LogicVRegister Simulator::umull(VectorFormat vform,
3131 LogicVRegister dst,
3132 const LogicVRegister& src1,
3133 const LogicVRegister& src2) {
3134 SimVRegister temp1, temp2;
3135 uxtl(vform, temp1, src1);
3136 uxtl(vform, temp2, src2);
3137 mul(vform, dst, temp1, temp2);
3138 return dst;
3139 }
3140
3141
umull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3142 LogicVRegister Simulator::umull2(VectorFormat vform,
3143 LogicVRegister dst,
3144 const LogicVRegister& src1,
3145 const LogicVRegister& src2) {
3146 SimVRegister temp1, temp2;
3147 uxtl2(vform, temp1, src1);
3148 uxtl2(vform, temp2, src2);
3149 mul(vform, dst, temp1, temp2);
3150 return dst;
3151 }
3152
3153
smull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3154 LogicVRegister Simulator::smull(VectorFormat vform,
3155 LogicVRegister dst,
3156 const LogicVRegister& src1,
3157 const LogicVRegister& src2) {
3158 SimVRegister temp1, temp2;
3159 sxtl(vform, temp1, src1);
3160 sxtl(vform, temp2, src2);
3161 mul(vform, dst, temp1, temp2);
3162 return dst;
3163 }
3164
3165
smull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3166 LogicVRegister Simulator::smull2(VectorFormat vform,
3167 LogicVRegister dst,
3168 const LogicVRegister& src1,
3169 const LogicVRegister& src2) {
3170 SimVRegister temp1, temp2;
3171 sxtl2(vform, temp1, src1);
3172 sxtl2(vform, temp2, src2);
3173 mul(vform, dst, temp1, temp2);
3174 return dst;
3175 }
3176
3177
umlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3178 LogicVRegister Simulator::umlsl(VectorFormat vform,
3179 LogicVRegister dst,
3180 const LogicVRegister& src1,
3181 const LogicVRegister& src2) {
3182 SimVRegister temp1, temp2;
3183 uxtl(vform, temp1, src1);
3184 uxtl(vform, temp2, src2);
3185 mls(vform, dst, temp1, temp2);
3186 return dst;
3187 }
3188
3189
umlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3190 LogicVRegister Simulator::umlsl2(VectorFormat vform,
3191 LogicVRegister dst,
3192 const LogicVRegister& src1,
3193 const LogicVRegister& src2) {
3194 SimVRegister temp1, temp2;
3195 uxtl2(vform, temp1, src1);
3196 uxtl2(vform, temp2, src2);
3197 mls(vform, dst, temp1, temp2);
3198 return dst;
3199 }
3200
3201
smlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3202 LogicVRegister Simulator::smlsl(VectorFormat vform,
3203 LogicVRegister dst,
3204 const LogicVRegister& src1,
3205 const LogicVRegister& src2) {
3206 SimVRegister temp1, temp2;
3207 sxtl(vform, temp1, src1);
3208 sxtl(vform, temp2, src2);
3209 mls(vform, dst, temp1, temp2);
3210 return dst;
3211 }
3212
3213
smlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3214 LogicVRegister Simulator::smlsl2(VectorFormat vform,
3215 LogicVRegister dst,
3216 const LogicVRegister& src1,
3217 const LogicVRegister& src2) {
3218 SimVRegister temp1, temp2;
3219 sxtl2(vform, temp1, src1);
3220 sxtl2(vform, temp2, src2);
3221 mls(vform, dst, temp1, temp2);
3222 return dst;
3223 }
3224
3225
umlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3226 LogicVRegister Simulator::umlal(VectorFormat vform,
3227 LogicVRegister dst,
3228 const LogicVRegister& src1,
3229 const LogicVRegister& src2) {
3230 SimVRegister temp1, temp2;
3231 uxtl(vform, temp1, src1);
3232 uxtl(vform, temp2, src2);
3233 mla(vform, dst, temp1, temp2);
3234 return dst;
3235 }
3236
3237
umlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3238 LogicVRegister Simulator::umlal2(VectorFormat vform,
3239 LogicVRegister dst,
3240 const LogicVRegister& src1,
3241 const LogicVRegister& src2) {
3242 SimVRegister temp1, temp2;
3243 uxtl2(vform, temp1, src1);
3244 uxtl2(vform, temp2, src2);
3245 mla(vform, dst, temp1, temp2);
3246 return dst;
3247 }
3248
3249
smlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3250 LogicVRegister Simulator::smlal(VectorFormat vform,
3251 LogicVRegister dst,
3252 const LogicVRegister& src1,
3253 const LogicVRegister& src2) {
3254 SimVRegister temp1, temp2;
3255 sxtl(vform, temp1, src1);
3256 sxtl(vform, temp2, src2);
3257 mla(vform, dst, temp1, temp2);
3258 return dst;
3259 }
3260
3261
smlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3262 LogicVRegister Simulator::smlal2(VectorFormat vform,
3263 LogicVRegister dst,
3264 const LogicVRegister& src1,
3265 const LogicVRegister& src2) {
3266 SimVRegister temp1, temp2;
3267 sxtl2(vform, temp1, src1);
3268 sxtl2(vform, temp2, src2);
3269 mla(vform, dst, temp1, temp2);
3270 return dst;
3271 }
3272
3273
sqdmlal(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3274 LogicVRegister Simulator::sqdmlal(VectorFormat vform,
3275 LogicVRegister dst,
3276 const LogicVRegister& src1,
3277 const LogicVRegister& src2) {
3278 SimVRegister temp;
3279 LogicVRegister product = sqdmull(vform, temp, src1, src2);
3280 return add(vform, dst, dst, product).SignedSaturate(vform);
3281 }
3282
3283
sqdmlal2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3284 LogicVRegister Simulator::sqdmlal2(VectorFormat vform,
3285 LogicVRegister dst,
3286 const LogicVRegister& src1,
3287 const LogicVRegister& src2) {
3288 SimVRegister temp;
3289 LogicVRegister product = sqdmull2(vform, temp, src1, src2);
3290 return add(vform, dst, dst, product).SignedSaturate(vform);
3291 }
3292
3293
sqdmlsl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3294 LogicVRegister Simulator::sqdmlsl(VectorFormat vform,
3295 LogicVRegister dst,
3296 const LogicVRegister& src1,
3297 const LogicVRegister& src2) {
3298 SimVRegister temp;
3299 LogicVRegister product = sqdmull(vform, temp, src1, src2);
3300 return sub(vform, dst, dst, product).SignedSaturate(vform);
3301 }
3302
3303
sqdmlsl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3304 LogicVRegister Simulator::sqdmlsl2(VectorFormat vform,
3305 LogicVRegister dst,
3306 const LogicVRegister& src1,
3307 const LogicVRegister& src2) {
3308 SimVRegister temp;
3309 LogicVRegister product = sqdmull2(vform, temp, src1, src2);
3310 return sub(vform, dst, dst, product).SignedSaturate(vform);
3311 }
3312
3313
sqdmull(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3314 LogicVRegister Simulator::sqdmull(VectorFormat vform,
3315 LogicVRegister dst,
3316 const LogicVRegister& src1,
3317 const LogicVRegister& src2) {
3318 SimVRegister temp;
3319 LogicVRegister product = smull(vform, temp, src1, src2);
3320 return add(vform, dst, product, product).SignedSaturate(vform);
3321 }
3322
3323
sqdmull2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3324 LogicVRegister Simulator::sqdmull2(VectorFormat vform,
3325 LogicVRegister dst,
3326 const LogicVRegister& src1,
3327 const LogicVRegister& src2) {
3328 SimVRegister temp;
3329 LogicVRegister product = smull2(vform, temp, src1, src2);
3330 return add(vform, dst, product, product).SignedSaturate(vform);
3331 }
3332
3333
sqrdmulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool round)3334 LogicVRegister Simulator::sqrdmulh(VectorFormat vform,
3335 LogicVRegister dst,
3336 const LogicVRegister& src1,
3337 const LogicVRegister& src2,
3338 bool round) {
3339 // 2 * INT_32_MIN * INT_32_MIN causes int64_t to overflow.
3340 // To avoid this, we use (src1 * src2 + 1 << (esize - 2)) >> (esize - 1)
3341 // which is same as (2 * src1 * src2 + 1 << (esize - 1)) >> esize.
3342
3343 int esize = LaneSizeInBitsFromFormat(vform);
3344 int round_const = round ? (1 << (esize - 2)) : 0;
3345 int64_t product;
3346
3347 dst.ClearForWrite(vform);
3348 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3349 product = src1.Int(vform, i) * src2.Int(vform, i);
3350 product += round_const;
3351 product = product >> (esize - 1);
3352
3353 if (product > MaxIntFromFormat(vform)) {
3354 product = MaxIntFromFormat(vform);
3355 } else if (product < MinIntFromFormat(vform)) {
3356 product = MinIntFromFormat(vform);
3357 }
3358 dst.SetInt(vform, i, product);
3359 }
3360 return dst;
3361 }
3362
3363
dot(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool is_signed)3364 LogicVRegister Simulator::dot(VectorFormat vform,
3365 LogicVRegister dst,
3366 const LogicVRegister& src1,
3367 const LogicVRegister& src2,
3368 bool is_signed) {
3369 VectorFormat quarter_vform =
3370 VectorFormatHalfWidthDoubleLanes(VectorFormatHalfWidthDoubleLanes(vform));
3371
3372 dst.ClearForWrite(vform);
3373 for (int e = 0; e < LaneCountFromFormat(vform); e++) {
3374 int64_t result = 0;
3375 int64_t element1, element2;
3376 for (int i = 0; i < 4; i++) {
3377 int index = 4 * e + i;
3378 if (is_signed) {
3379 element1 = src1.Int(quarter_vform, index);
3380 element2 = src2.Int(quarter_vform, index);
3381 } else {
3382 element1 = src1.Uint(quarter_vform, index);
3383 element2 = src2.Uint(quarter_vform, index);
3384 }
3385 result += element1 * element2;
3386 }
3387
3388 result += dst.Int(vform, e);
3389 dst.SetInt(vform, e, result);
3390 }
3391 return dst;
3392 }
3393
3394
sdot(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3395 LogicVRegister Simulator::sdot(VectorFormat vform,
3396 LogicVRegister dst,
3397 const LogicVRegister& src1,
3398 const LogicVRegister& src2) {
3399 return dot(vform, dst, src1, src2, true);
3400 }
3401
3402
udot(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3403 LogicVRegister Simulator::udot(VectorFormat vform,
3404 LogicVRegister dst,
3405 const LogicVRegister& src1,
3406 const LogicVRegister& src2) {
3407 return dot(vform, dst, src1, src2, false);
3408 }
3409
3410
sqrdmlash(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool round,bool sub_op)3411 LogicVRegister Simulator::sqrdmlash(VectorFormat vform,
3412 LogicVRegister dst,
3413 const LogicVRegister& src1,
3414 const LogicVRegister& src2,
3415 bool round,
3416 bool sub_op) {
3417 // 2 * INT_32_MIN * INT_32_MIN causes int64_t to overflow.
3418 // To avoid this, we use:
3419 // (dst << (esize - 1) + src1 * src2 + 1 << (esize - 2)) >> (esize - 1)
3420 // which is same as:
3421 // (dst << esize + 2 * src1 * src2 + 1 << (esize - 1)) >> esize.
3422
3423 int esize = LaneSizeInBitsFromFormat(vform);
3424 int round_const = round ? (1 << (esize - 2)) : 0;
3425 int64_t accum;
3426
3427 dst.ClearForWrite(vform);
3428 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
3429 accum = dst.Int(vform, i) << (esize - 1);
3430 if (sub_op) {
3431 accum -= src1.Int(vform, i) * src2.Int(vform, i);
3432 } else {
3433 accum += src1.Int(vform, i) * src2.Int(vform, i);
3434 }
3435 accum += round_const;
3436 accum = accum >> (esize - 1);
3437
3438 if (accum > MaxIntFromFormat(vform)) {
3439 accum = MaxIntFromFormat(vform);
3440 } else if (accum < MinIntFromFormat(vform)) {
3441 accum = MinIntFromFormat(vform);
3442 }
3443 dst.SetInt(vform, i, accum);
3444 }
3445 return dst;
3446 }
3447
3448
sqrdmlah(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool round)3449 LogicVRegister Simulator::sqrdmlah(VectorFormat vform,
3450 LogicVRegister dst,
3451 const LogicVRegister& src1,
3452 const LogicVRegister& src2,
3453 bool round) {
3454 return sqrdmlash(vform, dst, src1, src2, round, false);
3455 }
3456
3457
sqrdmlsh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,bool round)3458 LogicVRegister Simulator::sqrdmlsh(VectorFormat vform,
3459 LogicVRegister dst,
3460 const LogicVRegister& src1,
3461 const LogicVRegister& src2,
3462 bool round) {
3463 return sqrdmlash(vform, dst, src1, src2, round, true);
3464 }
3465
3466
sqdmulh(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3467 LogicVRegister Simulator::sqdmulh(VectorFormat vform,
3468 LogicVRegister dst,
3469 const LogicVRegister& src1,
3470 const LogicVRegister& src2) {
3471 return sqrdmulh(vform, dst, src1, src2, false);
3472 }
3473
3474
addhn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3475 LogicVRegister Simulator::addhn(VectorFormat vform,
3476 LogicVRegister dst,
3477 const LogicVRegister& src1,
3478 const LogicVRegister& src2) {
3479 SimVRegister temp;
3480 add(VectorFormatDoubleWidth(vform), temp, src1, src2);
3481 shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
3482 return dst;
3483 }
3484
3485
addhn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3486 LogicVRegister Simulator::addhn2(VectorFormat vform,
3487 LogicVRegister dst,
3488 const LogicVRegister& src1,
3489 const LogicVRegister& src2) {
3490 SimVRegister temp;
3491 add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
3492 shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
3493 return dst;
3494 }
3495
3496
raddhn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3497 LogicVRegister Simulator::raddhn(VectorFormat vform,
3498 LogicVRegister dst,
3499 const LogicVRegister& src1,
3500 const LogicVRegister& src2) {
3501 SimVRegister temp;
3502 add(VectorFormatDoubleWidth(vform), temp, src1, src2);
3503 rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
3504 return dst;
3505 }
3506
3507
raddhn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3508 LogicVRegister Simulator::raddhn2(VectorFormat vform,
3509 LogicVRegister dst,
3510 const LogicVRegister& src1,
3511 const LogicVRegister& src2) {
3512 SimVRegister temp;
3513 add(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
3514 rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
3515 return dst;
3516 }
3517
3518
subhn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3519 LogicVRegister Simulator::subhn(VectorFormat vform,
3520 LogicVRegister dst,
3521 const LogicVRegister& src1,
3522 const LogicVRegister& src2) {
3523 SimVRegister temp;
3524 sub(VectorFormatDoubleWidth(vform), temp, src1, src2);
3525 shrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
3526 return dst;
3527 }
3528
3529
subhn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3530 LogicVRegister Simulator::subhn2(VectorFormat vform,
3531 LogicVRegister dst,
3532 const LogicVRegister& src1,
3533 const LogicVRegister& src2) {
3534 SimVRegister temp;
3535 sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
3536 shrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
3537 return dst;
3538 }
3539
3540
rsubhn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3541 LogicVRegister Simulator::rsubhn(VectorFormat vform,
3542 LogicVRegister dst,
3543 const LogicVRegister& src1,
3544 const LogicVRegister& src2) {
3545 SimVRegister temp;
3546 sub(VectorFormatDoubleWidth(vform), temp, src1, src2);
3547 rshrn(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
3548 return dst;
3549 }
3550
3551
rsubhn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3552 LogicVRegister Simulator::rsubhn2(VectorFormat vform,
3553 LogicVRegister dst,
3554 const LogicVRegister& src1,
3555 const LogicVRegister& src2) {
3556 SimVRegister temp;
3557 sub(VectorFormatDoubleWidth(VectorFormatHalfLanes(vform)), temp, src1, src2);
3558 rshrn2(vform, dst, temp, LaneSizeInBitsFromFormat(vform));
3559 return dst;
3560 }
3561
3562
trn1(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3563 LogicVRegister Simulator::trn1(VectorFormat vform,
3564 LogicVRegister dst,
3565 const LogicVRegister& src1,
3566 const LogicVRegister& src2) {
3567 uint64_t result[16];
3568 int laneCount = LaneCountFromFormat(vform);
3569 int pairs = laneCount / 2;
3570 for (int i = 0; i < pairs; ++i) {
3571 result[2 * i] = src1.Uint(vform, 2 * i);
3572 result[(2 * i) + 1] = src2.Uint(vform, 2 * i);
3573 }
3574
3575 dst.ClearForWrite(vform);
3576 for (int i = 0; i < laneCount; ++i) {
3577 dst.SetUint(vform, i, result[i]);
3578 }
3579 return dst;
3580 }
3581
3582
trn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3583 LogicVRegister Simulator::trn2(VectorFormat vform,
3584 LogicVRegister dst,
3585 const LogicVRegister& src1,
3586 const LogicVRegister& src2) {
3587 uint64_t result[16];
3588 int laneCount = LaneCountFromFormat(vform);
3589 int pairs = laneCount / 2;
3590 for (int i = 0; i < pairs; ++i) {
3591 result[2 * i] = src1.Uint(vform, (2 * i) + 1);
3592 result[(2 * i) + 1] = src2.Uint(vform, (2 * i) + 1);
3593 }
3594
3595 dst.ClearForWrite(vform);
3596 for (int i = 0; i < laneCount; ++i) {
3597 dst.SetUint(vform, i, result[i]);
3598 }
3599 return dst;
3600 }
3601
3602
zip1(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3603 LogicVRegister Simulator::zip1(VectorFormat vform,
3604 LogicVRegister dst,
3605 const LogicVRegister& src1,
3606 const LogicVRegister& src2) {
3607 uint64_t result[16];
3608 int laneCount = LaneCountFromFormat(vform);
3609 int pairs = laneCount / 2;
3610 for (int i = 0; i < pairs; ++i) {
3611 result[2 * i] = src1.Uint(vform, i);
3612 result[(2 * i) + 1] = src2.Uint(vform, i);
3613 }
3614
3615 dst.ClearForWrite(vform);
3616 for (int i = 0; i < laneCount; ++i) {
3617 dst.SetUint(vform, i, result[i]);
3618 }
3619 return dst;
3620 }
3621
3622
zip2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3623 LogicVRegister Simulator::zip2(VectorFormat vform,
3624 LogicVRegister dst,
3625 const LogicVRegister& src1,
3626 const LogicVRegister& src2) {
3627 uint64_t result[16];
3628 int laneCount = LaneCountFromFormat(vform);
3629 int pairs = laneCount / 2;
3630 for (int i = 0; i < pairs; ++i) {
3631 result[2 * i] = src1.Uint(vform, pairs + i);
3632 result[(2 * i) + 1] = src2.Uint(vform, pairs + i);
3633 }
3634
3635 dst.ClearForWrite(vform);
3636 for (int i = 0; i < laneCount; ++i) {
3637 dst.SetUint(vform, i, result[i]);
3638 }
3639 return dst;
3640 }
3641
3642
uzp1(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3643 LogicVRegister Simulator::uzp1(VectorFormat vform,
3644 LogicVRegister dst,
3645 const LogicVRegister& src1,
3646 const LogicVRegister& src2) {
3647 uint64_t result[32];
3648 int laneCount = LaneCountFromFormat(vform);
3649 for (int i = 0; i < laneCount; ++i) {
3650 result[i] = src1.Uint(vform, i);
3651 result[laneCount + i] = src2.Uint(vform, i);
3652 }
3653
3654 dst.ClearForWrite(vform);
3655 for (int i = 0; i < laneCount; ++i) {
3656 dst.SetUint(vform, i, result[2 * i]);
3657 }
3658 return dst;
3659 }
3660
3661
uzp2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)3662 LogicVRegister Simulator::uzp2(VectorFormat vform,
3663 LogicVRegister dst,
3664 const LogicVRegister& src1,
3665 const LogicVRegister& src2) {
3666 uint64_t result[32];
3667 int laneCount = LaneCountFromFormat(vform);
3668 for (int i = 0; i < laneCount; ++i) {
3669 result[i] = src1.Uint(vform, i);
3670 result[laneCount + i] = src2.Uint(vform, i);
3671 }
3672
3673 dst.ClearForWrite(vform);
3674 for (int i = 0; i < laneCount; ++i) {
3675 dst.SetUint(vform, i, result[(2 * i) + 1]);
3676 }
3677 return dst;
3678 }
3679
3680
3681 template <typename T>
FPNeg(T op)3682 T Simulator::FPNeg(T op) {
3683 return -op;
3684 }
3685
3686 template <typename T>
FPAdd(T op1,T op2)3687 T Simulator::FPAdd(T op1, T op2) {
3688 T result = FPProcessNaNs(op1, op2);
3689 if (IsNaN(result)) {
3690 return result;
3691 }
3692
3693 if (IsInf(op1) && IsInf(op2) && (op1 != op2)) {
3694 // inf + -inf returns the default NaN.
3695 FPProcessException();
3696 return FPDefaultNaN<T>();
3697 } else {
3698 // Other cases should be handled by standard arithmetic.
3699 return op1 + op2;
3700 }
3701 }
3702
3703
3704 template <typename T>
FPSub(T op1,T op2)3705 T Simulator::FPSub(T op1, T op2) {
3706 // NaNs should be handled elsewhere.
3707 VIXL_ASSERT(!IsNaN(op1) && !IsNaN(op2));
3708
3709 if (IsInf(op1) && IsInf(op2) && (op1 == op2)) {
3710 // inf - inf returns the default NaN.
3711 FPProcessException();
3712 return FPDefaultNaN<T>();
3713 } else {
3714 // Other cases should be handled by standard arithmetic.
3715 return op1 - op2;
3716 }
3717 }
3718
3719
3720 template <typename T>
FPMul(T op1,T op2)3721 T Simulator::FPMul(T op1, T op2) {
3722 // NaNs should be handled elsewhere.
3723 VIXL_ASSERT(!IsNaN(op1) && !IsNaN(op2));
3724
3725 if ((IsInf(op1) && (op2 == 0.0)) || (IsInf(op2) && (op1 == 0.0))) {
3726 // inf * 0.0 returns the default NaN.
3727 FPProcessException();
3728 return FPDefaultNaN<T>();
3729 } else {
3730 // Other cases should be handled by standard arithmetic.
3731 return op1 * op2;
3732 }
3733 }
3734
3735
3736 template <typename T>
FPMulx(T op1,T op2)3737 T Simulator::FPMulx(T op1, T op2) {
3738 if ((IsInf(op1) && (op2 == 0.0)) || (IsInf(op2) && (op1 == 0.0))) {
3739 // inf * 0.0 returns +/-2.0.
3740 T two = 2.0;
3741 return copysign(1.0, op1) * copysign(1.0, op2) * two;
3742 }
3743 return FPMul(op1, op2);
3744 }
3745
3746
3747 template <typename T>
FPMulAdd(T a,T op1,T op2)3748 T Simulator::FPMulAdd(T a, T op1, T op2) {
3749 T result = FPProcessNaNs3(a, op1, op2);
3750
3751 T sign_a = copysign(1.0, a);
3752 T sign_prod = copysign(1.0, op1) * copysign(1.0, op2);
3753 bool isinf_prod = IsInf(op1) || IsInf(op2);
3754 bool operation_generates_nan =
3755 (IsInf(op1) && (op2 == 0.0)) || // inf * 0.0
3756 (IsInf(op2) && (op1 == 0.0)) || // 0.0 * inf
3757 (IsInf(a) && isinf_prod && (sign_a != sign_prod)); // inf - inf
3758
3759 if (IsNaN(result)) {
3760 // Generated NaNs override quiet NaNs propagated from a.
3761 if (operation_generates_nan && IsQuietNaN(a)) {
3762 FPProcessException();
3763 return FPDefaultNaN<T>();
3764 } else {
3765 return result;
3766 }
3767 }
3768
3769 // If the operation would produce a NaN, return the default NaN.
3770 if (operation_generates_nan) {
3771 FPProcessException();
3772 return FPDefaultNaN<T>();
3773 }
3774
3775 // Work around broken fma implementations for exact zero results: The sign of
3776 // exact 0.0 results is positive unless both a and op1 * op2 are negative.
3777 if (((op1 == 0.0) || (op2 == 0.0)) && (a == 0.0)) {
3778 return ((sign_a < T(0.0)) && (sign_prod < T(0.0))) ? -0.0 : 0.0;
3779 }
3780
3781 result = FusedMultiplyAdd(op1, op2, a);
3782 VIXL_ASSERT(!IsNaN(result));
3783
3784 // Work around broken fma implementations for rounded zero results: If a is
3785 // 0.0, the sign of the result is the sign of op1 * op2 before rounding.
3786 if ((a == 0.0) && (result == 0.0)) {
3787 return copysign(0.0, sign_prod);
3788 }
3789
3790 return result;
3791 }
3792
3793
3794 template <typename T>
FPDiv(T op1,T op2)3795 T Simulator::FPDiv(T op1, T op2) {
3796 // NaNs should be handled elsewhere.
3797 VIXL_ASSERT(!IsNaN(op1) && !IsNaN(op2));
3798
3799 if ((IsInf(op1) && IsInf(op2)) || ((op1 == 0.0) && (op2 == 0.0))) {
3800 // inf / inf and 0.0 / 0.0 return the default NaN.
3801 FPProcessException();
3802 return FPDefaultNaN<T>();
3803 } else {
3804 if (op2 == 0.0) {
3805 FPProcessException();
3806 if (!IsNaN(op1)) {
3807 double op1_sign = copysign(1.0, op1);
3808 double op2_sign = copysign(1.0, op2);
3809 return static_cast<T>(op1_sign * op2_sign * kFP64PositiveInfinity);
3810 }
3811 }
3812
3813 // Other cases should be handled by standard arithmetic.
3814 return op1 / op2;
3815 }
3816 }
3817
3818
3819 template <typename T>
FPSqrt(T op)3820 T Simulator::FPSqrt(T op) {
3821 if (IsNaN(op)) {
3822 return FPProcessNaN(op);
3823 } else if (op < T(0.0)) {
3824 FPProcessException();
3825 return FPDefaultNaN<T>();
3826 } else {
3827 return sqrt(op);
3828 }
3829 }
3830
3831
3832 template <typename T>
FPMax(T a,T b)3833 T Simulator::FPMax(T a, T b) {
3834 T result = FPProcessNaNs(a, b);
3835 if (IsNaN(result)) return result;
3836
3837 if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) {
3838 // a and b are zero, and the sign differs: return +0.0.
3839 return 0.0;
3840 } else {
3841 return (a > b) ? a : b;
3842 }
3843 }
3844
3845
3846 template <typename T>
FPMaxNM(T a,T b)3847 T Simulator::FPMaxNM(T a, T b) {
3848 if (IsQuietNaN(a) && !IsQuietNaN(b)) {
3849 a = kFP64NegativeInfinity;
3850 } else if (!IsQuietNaN(a) && IsQuietNaN(b)) {
3851 b = kFP64NegativeInfinity;
3852 }
3853
3854 T result = FPProcessNaNs(a, b);
3855 return IsNaN(result) ? result : FPMax(a, b);
3856 }
3857
3858
3859 template <typename T>
FPMin(T a,T b)3860 T Simulator::FPMin(T a, T b) {
3861 T result = FPProcessNaNs(a, b);
3862 if (IsNaN(result)) return result;
3863
3864 if ((a == 0.0) && (b == 0.0) && (copysign(1.0, a) != copysign(1.0, b))) {
3865 // a and b are zero, and the sign differs: return -0.0.
3866 return -0.0;
3867 } else {
3868 return (a < b) ? a : b;
3869 }
3870 }
3871
3872
3873 template <typename T>
FPMinNM(T a,T b)3874 T Simulator::FPMinNM(T a, T b) {
3875 if (IsQuietNaN(a) && !IsQuietNaN(b)) {
3876 a = kFP64PositiveInfinity;
3877 } else if (!IsQuietNaN(a) && IsQuietNaN(b)) {
3878 b = kFP64PositiveInfinity;
3879 }
3880
3881 T result = FPProcessNaNs(a, b);
3882 return IsNaN(result) ? result : FPMin(a, b);
3883 }
3884
3885
3886 template <typename T>
FPRecipStepFused(T op1,T op2)3887 T Simulator::FPRecipStepFused(T op1, T op2) {
3888 const T two = 2.0;
3889 if ((IsInf(op1) && (op2 == 0.0)) || ((op1 == 0.0) && (IsInf(op2)))) {
3890 return two;
3891 } else if (IsInf(op1) || IsInf(op2)) {
3892 // Return +inf if signs match, otherwise -inf.
3893 return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity
3894 : kFP64NegativeInfinity;
3895 } else {
3896 return FusedMultiplyAdd(op1, op2, two);
3897 }
3898 }
3899
3900 template <typename T>
IsNormal(T value)3901 bool IsNormal(T value) {
3902 return std::isnormal(value);
3903 }
3904
3905 template <>
IsNormal(SimFloat16 value)3906 bool IsNormal(SimFloat16 value) {
3907 uint16_t rawbits = Float16ToRawbits(value);
3908 uint16_t exp_mask = 0x7c00;
3909 // Check that the exponent is neither all zeroes or all ones.
3910 return ((rawbits & exp_mask) != 0) && ((~rawbits & exp_mask) != 0);
3911 }
3912
3913
3914 template <typename T>
FPRSqrtStepFused(T op1,T op2)3915 T Simulator::FPRSqrtStepFused(T op1, T op2) {
3916 const T one_point_five = 1.5;
3917 const T two = 2.0;
3918
3919 if ((IsInf(op1) && (op2 == 0.0)) || ((op1 == 0.0) && (IsInf(op2)))) {
3920 return one_point_five;
3921 } else if (IsInf(op1) || IsInf(op2)) {
3922 // Return +inf if signs match, otherwise -inf.
3923 return ((op1 >= 0.0) == (op2 >= 0.0)) ? kFP64PositiveInfinity
3924 : kFP64NegativeInfinity;
3925 } else {
3926 // The multiply-add-halve operation must be fully fused, so avoid interim
3927 // rounding by checking which operand can be losslessly divided by two
3928 // before doing the multiply-add.
3929 if (IsNormal(op1 / two)) {
3930 return FusedMultiplyAdd(op1 / two, op2, one_point_five);
3931 } else if (IsNormal(op2 / two)) {
3932 return FusedMultiplyAdd(op1, op2 / two, one_point_five);
3933 } else {
3934 // Neither operand is normal after halving: the result is dominated by
3935 // the addition term, so just return that.
3936 return one_point_five;
3937 }
3938 }
3939 }
3940
FPToFixedJS(double value)3941 int32_t Simulator::FPToFixedJS(double value) {
3942 // The Z-flag is set when the conversion from double precision floating-point
3943 // to 32-bit integer is exact. If the source value is +/-Infinity, -0.0, NaN,
3944 // outside the bounds of a 32-bit integer, or isn't an exact integer then the
3945 // Z-flag is unset.
3946 int Z = 1;
3947 int32_t result;
3948
3949 if ((value == 0.0) || (value == kFP64PositiveInfinity) ||
3950 (value == kFP64NegativeInfinity)) {
3951 // +/- zero and infinity all return zero, however -0 and +/- Infinity also
3952 // unset the Z-flag.
3953 result = 0.0;
3954 if ((value != 0.0) || std::signbit(value)) {
3955 Z = 0;
3956 }
3957 } else if (std::isnan(value)) {
3958 // NaN values unset the Z-flag and set the result to 0.
3959 FPProcessNaN(value);
3960 result = 0;
3961 Z = 0;
3962 } else {
3963 // All other values are converted to an integer representation, rounded
3964 // toward zero.
3965 double int_result = std::floor(value);
3966 double error = value - int_result;
3967
3968 if ((error != 0.0) && (int_result < 0.0)) {
3969 int_result++;
3970 }
3971
3972 // Constrain the value into the range [INT32_MIN, INT32_MAX]. We can almost
3973 // write a one-liner with std::round, but the behaviour on ties is incorrect
3974 // for our purposes.
3975 double mod_const = static_cast<double>(UINT64_C(1) << 32);
3976 double mod_error =
3977 (int_result / mod_const) - std::floor(int_result / mod_const);
3978 double constrained;
3979 if (mod_error == 0.5) {
3980 constrained = INT32_MIN;
3981 } else {
3982 constrained = int_result - mod_const * round(int_result / mod_const);
3983 }
3984
3985 VIXL_ASSERT(std::floor(constrained) == constrained);
3986 VIXL_ASSERT(constrained >= INT32_MIN);
3987 VIXL_ASSERT(constrained <= INT32_MAX);
3988
3989 // Take the bottom 32 bits of the result as a 32-bit integer.
3990 result = static_cast<int32_t>(constrained);
3991
3992 if ((int_result < INT32_MIN) || (int_result > INT32_MAX) ||
3993 (error != 0.0)) {
3994 // If the integer result is out of range or the conversion isn't exact,
3995 // take exception and unset the Z-flag.
3996 FPProcessException();
3997 Z = 0;
3998 }
3999 }
4000
4001 ReadNzcv().SetN(0);
4002 ReadNzcv().SetZ(Z);
4003 ReadNzcv().SetC(0);
4004 ReadNzcv().SetV(0);
4005
4006 return result;
4007 }
4008
4009
FPRoundInt(double value,FPRounding round_mode)4010 double Simulator::FPRoundInt(double value, FPRounding round_mode) {
4011 if ((value == 0.0) || (value == kFP64PositiveInfinity) ||
4012 (value == kFP64NegativeInfinity)) {
4013 return value;
4014 } else if (IsNaN(value)) {
4015 return FPProcessNaN(value);
4016 }
4017
4018 double int_result = std::floor(value);
4019 double error = value - int_result;
4020 switch (round_mode) {
4021 case FPTieAway: {
4022 // Take care of correctly handling the range ]-0.5, -0.0], which must
4023 // yield -0.0.
4024 if ((-0.5 < value) && (value < 0.0)) {
4025 int_result = -0.0;
4026
4027 } else if ((error > 0.5) || ((error == 0.5) && (int_result >= 0.0))) {
4028 // If the error is greater than 0.5, or is equal to 0.5 and the integer
4029 // result is positive, round up.
4030 int_result++;
4031 }
4032 break;
4033 }
4034 case FPTieEven: {
4035 // Take care of correctly handling the range [-0.5, -0.0], which must
4036 // yield -0.0.
4037 if ((-0.5 <= value) && (value < 0.0)) {
4038 int_result = -0.0;
4039
4040 // If the error is greater than 0.5, or is equal to 0.5 and the integer
4041 // result is odd, round up.
4042 } else if ((error > 0.5) ||
4043 ((error == 0.5) && (std::fmod(int_result, 2) != 0))) {
4044 int_result++;
4045 }
4046 break;
4047 }
4048 case FPZero: {
4049 // If value>0 then we take floor(value)
4050 // otherwise, ceil(value).
4051 if (value < 0) {
4052 int_result = ceil(value);
4053 }
4054 break;
4055 }
4056 case FPNegativeInfinity: {
4057 // We always use floor(value).
4058 break;
4059 }
4060 case FPPositiveInfinity: {
4061 // Take care of correctly handling the range ]-1.0, -0.0], which must
4062 // yield -0.0.
4063 if ((-1.0 < value) && (value < 0.0)) {
4064 int_result = -0.0;
4065
4066 // If the error is non-zero, round up.
4067 } else if (error > 0.0) {
4068 int_result++;
4069 }
4070 break;
4071 }
4072 default:
4073 VIXL_UNIMPLEMENTED();
4074 }
4075 return int_result;
4076 }
4077
4078
FPToInt16(double value,FPRounding rmode)4079 int16_t Simulator::FPToInt16(double value, FPRounding rmode) {
4080 value = FPRoundInt(value, rmode);
4081 if (value >= kHMaxInt) {
4082 return kHMaxInt;
4083 } else if (value < kHMinInt) {
4084 return kHMinInt;
4085 }
4086 return IsNaN(value) ? 0 : static_cast<int16_t>(value);
4087 }
4088
4089
FPToInt32(double value,FPRounding rmode)4090 int32_t Simulator::FPToInt32(double value, FPRounding rmode) {
4091 value = FPRoundInt(value, rmode);
4092 if (value >= kWMaxInt) {
4093 return kWMaxInt;
4094 } else if (value < kWMinInt) {
4095 return kWMinInt;
4096 }
4097 return IsNaN(value) ? 0 : static_cast<int32_t>(value);
4098 }
4099
4100
FPToInt64(double value,FPRounding rmode)4101 int64_t Simulator::FPToInt64(double value, FPRounding rmode) {
4102 value = FPRoundInt(value, rmode);
4103 if (value >= kXMaxInt) {
4104 return kXMaxInt;
4105 } else if (value < kXMinInt) {
4106 return kXMinInt;
4107 }
4108 return IsNaN(value) ? 0 : static_cast<int64_t>(value);
4109 }
4110
4111
FPToUInt16(double value,FPRounding rmode)4112 uint16_t Simulator::FPToUInt16(double value, FPRounding rmode) {
4113 value = FPRoundInt(value, rmode);
4114 if (value >= kHMaxUInt) {
4115 return kHMaxUInt;
4116 } else if (value < 0.0) {
4117 return 0;
4118 }
4119 return IsNaN(value) ? 0 : static_cast<uint16_t>(value);
4120 }
4121
4122
FPToUInt32(double value,FPRounding rmode)4123 uint32_t Simulator::FPToUInt32(double value, FPRounding rmode) {
4124 value = FPRoundInt(value, rmode);
4125 if (value >= kWMaxUInt) {
4126 return kWMaxUInt;
4127 } else if (value < 0.0) {
4128 return 0;
4129 }
4130 return IsNaN(value) ? 0 : static_cast<uint32_t>(value);
4131 }
4132
4133
FPToUInt64(double value,FPRounding rmode)4134 uint64_t Simulator::FPToUInt64(double value, FPRounding rmode) {
4135 value = FPRoundInt(value, rmode);
4136 if (value >= kXMaxUInt) {
4137 return kXMaxUInt;
4138 } else if (value < 0.0) {
4139 return 0;
4140 }
4141 return IsNaN(value) ? 0 : static_cast<uint64_t>(value);
4142 }
4143
4144
4145 #define DEFINE_NEON_FP_VECTOR_OP(FN, OP, PROCNAN) \
4146 template <typename T> \
4147 LogicVRegister Simulator::FN(VectorFormat vform, \
4148 LogicVRegister dst, \
4149 const LogicVRegister& src1, \
4150 const LogicVRegister& src2) { \
4151 dst.ClearForWrite(vform); \
4152 for (int i = 0; i < LaneCountFromFormat(vform); i++) { \
4153 T op1 = src1.Float<T>(i); \
4154 T op2 = src2.Float<T>(i); \
4155 T result; \
4156 if (PROCNAN) { \
4157 result = FPProcessNaNs(op1, op2); \
4158 if (!IsNaN(result)) { \
4159 result = OP(op1, op2); \
4160 } \
4161 } else { \
4162 result = OP(op1, op2); \
4163 } \
4164 dst.SetFloat(i, result); \
4165 } \
4166 return dst; \
4167 } \
4168 \
4169 LogicVRegister Simulator::FN(VectorFormat vform, \
4170 LogicVRegister dst, \
4171 const LogicVRegister& src1, \
4172 const LogicVRegister& src2) { \
4173 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) { \
4174 FN<SimFloat16>(vform, dst, src1, src2); \
4175 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) { \
4176 FN<float>(vform, dst, src1, src2); \
4177 } else { \
4178 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize); \
4179 FN<double>(vform, dst, src1, src2); \
4180 } \
4181 return dst; \
4182 }
NEON_FP3SAME_LIST(DEFINE_NEON_FP_VECTOR_OP)4183 NEON_FP3SAME_LIST(DEFINE_NEON_FP_VECTOR_OP)
4184 #undef DEFINE_NEON_FP_VECTOR_OP
4185
4186
4187 LogicVRegister Simulator::fnmul(VectorFormat vform,
4188 LogicVRegister dst,
4189 const LogicVRegister& src1,
4190 const LogicVRegister& src2) {
4191 SimVRegister temp;
4192 LogicVRegister product = fmul(vform, temp, src1, src2);
4193 return fneg(vform, dst, product);
4194 }
4195
4196
4197 template <typename T>
frecps(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4198 LogicVRegister Simulator::frecps(VectorFormat vform,
4199 LogicVRegister dst,
4200 const LogicVRegister& src1,
4201 const LogicVRegister& src2) {
4202 dst.ClearForWrite(vform);
4203 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4204 T op1 = -src1.Float<T>(i);
4205 T op2 = src2.Float<T>(i);
4206 T result = FPProcessNaNs(op1, op2);
4207 dst.SetFloat(i, IsNaN(result) ? result : FPRecipStepFused(op1, op2));
4208 }
4209 return dst;
4210 }
4211
4212
frecps(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4213 LogicVRegister Simulator::frecps(VectorFormat vform,
4214 LogicVRegister dst,
4215 const LogicVRegister& src1,
4216 const LogicVRegister& src2) {
4217 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
4218 frecps<SimFloat16>(vform, dst, src1, src2);
4219 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4220 frecps<float>(vform, dst, src1, src2);
4221 } else {
4222 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4223 frecps<double>(vform, dst, src1, src2);
4224 }
4225 return dst;
4226 }
4227
4228
4229 template <typename T>
frsqrts(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4230 LogicVRegister Simulator::frsqrts(VectorFormat vform,
4231 LogicVRegister dst,
4232 const LogicVRegister& src1,
4233 const LogicVRegister& src2) {
4234 dst.ClearForWrite(vform);
4235 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4236 T op1 = -src1.Float<T>(i);
4237 T op2 = src2.Float<T>(i);
4238 T result = FPProcessNaNs(op1, op2);
4239 dst.SetFloat(i, IsNaN(result) ? result : FPRSqrtStepFused(op1, op2));
4240 }
4241 return dst;
4242 }
4243
4244
frsqrts(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4245 LogicVRegister Simulator::frsqrts(VectorFormat vform,
4246 LogicVRegister dst,
4247 const LogicVRegister& src1,
4248 const LogicVRegister& src2) {
4249 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
4250 frsqrts<SimFloat16>(vform, dst, src1, src2);
4251 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4252 frsqrts<float>(vform, dst, src1, src2);
4253 } else {
4254 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4255 frsqrts<double>(vform, dst, src1, src2);
4256 }
4257 return dst;
4258 }
4259
4260
4261 template <typename T>
fcmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,Condition cond)4262 LogicVRegister Simulator::fcmp(VectorFormat vform,
4263 LogicVRegister dst,
4264 const LogicVRegister& src1,
4265 const LogicVRegister& src2,
4266 Condition cond) {
4267 dst.ClearForWrite(vform);
4268 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4269 bool result = false;
4270 T op1 = src1.Float<T>(i);
4271 T op2 = src2.Float<T>(i);
4272 T nan_result = FPProcessNaNs(op1, op2);
4273 if (!IsNaN(nan_result)) {
4274 switch (cond) {
4275 case eq:
4276 result = (op1 == op2);
4277 break;
4278 case ge:
4279 result = (op1 >= op2);
4280 break;
4281 case gt:
4282 result = (op1 > op2);
4283 break;
4284 case le:
4285 result = (op1 <= op2);
4286 break;
4287 case lt:
4288 result = (op1 < op2);
4289 break;
4290 default:
4291 VIXL_UNREACHABLE();
4292 break;
4293 }
4294 }
4295 dst.SetUint(vform, i, result ? MaxUintFromFormat(vform) : 0);
4296 }
4297 return dst;
4298 }
4299
4300
fcmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,Condition cond)4301 LogicVRegister Simulator::fcmp(VectorFormat vform,
4302 LogicVRegister dst,
4303 const LogicVRegister& src1,
4304 const LogicVRegister& src2,
4305 Condition cond) {
4306 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
4307 fcmp<SimFloat16>(vform, dst, src1, src2, cond);
4308 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4309 fcmp<float>(vform, dst, src1, src2, cond);
4310 } else {
4311 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4312 fcmp<double>(vform, dst, src1, src2, cond);
4313 }
4314 return dst;
4315 }
4316
4317
fcmp_zero(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,Condition cond)4318 LogicVRegister Simulator::fcmp_zero(VectorFormat vform,
4319 LogicVRegister dst,
4320 const LogicVRegister& src,
4321 Condition cond) {
4322 SimVRegister temp;
4323 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
4324 LogicVRegister zero_reg =
4325 dup_immediate(vform, temp, Float16ToRawbits(SimFloat16(0.0)));
4326 fcmp<SimFloat16>(vform, dst, src, zero_reg, cond);
4327 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4328 LogicVRegister zero_reg = dup_immediate(vform, temp, FloatToRawbits(0.0));
4329 fcmp<float>(vform, dst, src, zero_reg, cond);
4330 } else {
4331 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4332 LogicVRegister zero_reg = dup_immediate(vform, temp, DoubleToRawbits(0.0));
4333 fcmp<double>(vform, dst, src, zero_reg, cond);
4334 }
4335 return dst;
4336 }
4337
4338
fabscmp(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,Condition cond)4339 LogicVRegister Simulator::fabscmp(VectorFormat vform,
4340 LogicVRegister dst,
4341 const LogicVRegister& src1,
4342 const LogicVRegister& src2,
4343 Condition cond) {
4344 SimVRegister temp1, temp2;
4345 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
4346 LogicVRegister abs_src1 = fabs_<SimFloat16>(vform, temp1, src1);
4347 LogicVRegister abs_src2 = fabs_<SimFloat16>(vform, temp2, src2);
4348 fcmp<SimFloat16>(vform, dst, abs_src1, abs_src2, cond);
4349 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4350 LogicVRegister abs_src1 = fabs_<float>(vform, temp1, src1);
4351 LogicVRegister abs_src2 = fabs_<float>(vform, temp2, src2);
4352 fcmp<float>(vform, dst, abs_src1, abs_src2, cond);
4353 } else {
4354 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4355 LogicVRegister abs_src1 = fabs_<double>(vform, temp1, src1);
4356 LogicVRegister abs_src2 = fabs_<double>(vform, temp2, src2);
4357 fcmp<double>(vform, dst, abs_src1, abs_src2, cond);
4358 }
4359 return dst;
4360 }
4361
4362
4363 template <typename T>
fmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4364 LogicVRegister Simulator::fmla(VectorFormat vform,
4365 LogicVRegister dst,
4366 const LogicVRegister& src1,
4367 const LogicVRegister& src2) {
4368 dst.ClearForWrite(vform);
4369 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4370 T op1 = src1.Float<T>(i);
4371 T op2 = src2.Float<T>(i);
4372 T acc = dst.Float<T>(i);
4373 T result = FPMulAdd(acc, op1, op2);
4374 dst.SetFloat(i, result);
4375 }
4376 return dst;
4377 }
4378
4379
fmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4380 LogicVRegister Simulator::fmla(VectorFormat vform,
4381 LogicVRegister dst,
4382 const LogicVRegister& src1,
4383 const LogicVRegister& src2) {
4384 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
4385 fmla<SimFloat16>(vform, dst, src1, src2);
4386 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4387 fmla<float>(vform, dst, src1, src2);
4388 } else {
4389 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4390 fmla<double>(vform, dst, src1, src2);
4391 }
4392 return dst;
4393 }
4394
4395
4396 template <typename T>
fmls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4397 LogicVRegister Simulator::fmls(VectorFormat vform,
4398 LogicVRegister dst,
4399 const LogicVRegister& src1,
4400 const LogicVRegister& src2) {
4401 dst.ClearForWrite(vform);
4402 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4403 T op1 = -src1.Float<T>(i);
4404 T op2 = src2.Float<T>(i);
4405 T acc = dst.Float<T>(i);
4406 T result = FPMulAdd(acc, op1, op2);
4407 dst.SetFloat(i, result);
4408 }
4409 return dst;
4410 }
4411
4412
fmls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4413 LogicVRegister Simulator::fmls(VectorFormat vform,
4414 LogicVRegister dst,
4415 const LogicVRegister& src1,
4416 const LogicVRegister& src2) {
4417 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
4418 fmls<SimFloat16>(vform, dst, src1, src2);
4419 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4420 fmls<float>(vform, dst, src1, src2);
4421 } else {
4422 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4423 fmls<double>(vform, dst, src1, src2);
4424 }
4425 return dst;
4426 }
4427
4428
4429 template <typename T>
fneg(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4430 LogicVRegister Simulator::fneg(VectorFormat vform,
4431 LogicVRegister dst,
4432 const LogicVRegister& src) {
4433 dst.ClearForWrite(vform);
4434 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4435 T op = src.Float<T>(i);
4436 op = -op;
4437 dst.SetFloat(i, op);
4438 }
4439 return dst;
4440 }
4441
4442
fneg(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4443 LogicVRegister Simulator::fneg(VectorFormat vform,
4444 LogicVRegister dst,
4445 const LogicVRegister& src) {
4446 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
4447 fneg<SimFloat16>(vform, dst, src);
4448 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4449 fneg<float>(vform, dst, src);
4450 } else {
4451 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4452 fneg<double>(vform, dst, src);
4453 }
4454 return dst;
4455 }
4456
4457
4458 template <typename T>
fabs_(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4459 LogicVRegister Simulator::fabs_(VectorFormat vform,
4460 LogicVRegister dst,
4461 const LogicVRegister& src) {
4462 dst.ClearForWrite(vform);
4463 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4464 T op = src.Float<T>(i);
4465 if (copysign(1.0, op) < 0.0) {
4466 op = -op;
4467 }
4468 dst.SetFloat(i, op);
4469 }
4470 return dst;
4471 }
4472
4473
fabs_(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4474 LogicVRegister Simulator::fabs_(VectorFormat vform,
4475 LogicVRegister dst,
4476 const LogicVRegister& src) {
4477 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
4478 fabs_<SimFloat16>(vform, dst, src);
4479 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4480 fabs_<float>(vform, dst, src);
4481 } else {
4482 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4483 fabs_<double>(vform, dst, src);
4484 }
4485 return dst;
4486 }
4487
4488
fabd(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2)4489 LogicVRegister Simulator::fabd(VectorFormat vform,
4490 LogicVRegister dst,
4491 const LogicVRegister& src1,
4492 const LogicVRegister& src2) {
4493 SimVRegister temp;
4494 fsub(vform, temp, src1, src2);
4495 fabs_(vform, dst, temp);
4496 return dst;
4497 }
4498
4499
fsqrt(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4500 LogicVRegister Simulator::fsqrt(VectorFormat vform,
4501 LogicVRegister dst,
4502 const LogicVRegister& src) {
4503 dst.ClearForWrite(vform);
4504 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
4505 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4506 SimFloat16 result = FPSqrt(src.Float<SimFloat16>(i));
4507 dst.SetFloat(i, result);
4508 }
4509 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4510 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4511 float result = FPSqrt(src.Float<float>(i));
4512 dst.SetFloat(i, result);
4513 }
4514 } else {
4515 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4516 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4517 double result = FPSqrt(src.Float<double>(i));
4518 dst.SetFloat(i, result);
4519 }
4520 }
4521 return dst;
4522 }
4523
4524
4525 #define DEFINE_NEON_FP_PAIR_OP(FNP, FN, OP) \
4526 LogicVRegister Simulator::FNP(VectorFormat vform, \
4527 LogicVRegister dst, \
4528 const LogicVRegister& src1, \
4529 const LogicVRegister& src2) { \
4530 SimVRegister temp1, temp2; \
4531 uzp1(vform, temp1, src1, src2); \
4532 uzp2(vform, temp2, src1, src2); \
4533 FN(vform, dst, temp1, temp2); \
4534 return dst; \
4535 } \
4536 \
4537 LogicVRegister Simulator::FNP(VectorFormat vform, \
4538 LogicVRegister dst, \
4539 const LogicVRegister& src) { \
4540 if (vform == kFormatH) { \
4541 SimFloat16 result(OP(SimFloat16(RawbitsToFloat16(src.Uint(vform, 0))), \
4542 SimFloat16(RawbitsToFloat16(src.Uint(vform, 1))))); \
4543 dst.SetUint(vform, 0, Float16ToRawbits(result)); \
4544 } else if (vform == kFormatS) { \
4545 float result = OP(src.Float<float>(0), src.Float<float>(1)); \
4546 dst.SetFloat(0, result); \
4547 } else { \
4548 VIXL_ASSERT(vform == kFormatD); \
4549 double result = OP(src.Float<double>(0), src.Float<double>(1)); \
4550 dst.SetFloat(0, result); \
4551 } \
4552 dst.ClearForWrite(vform); \
4553 return dst; \
4554 }
NEON_FPPAIRWISE_LIST(DEFINE_NEON_FP_PAIR_OP)4555 NEON_FPPAIRWISE_LIST(DEFINE_NEON_FP_PAIR_OP)
4556 #undef DEFINE_NEON_FP_PAIR_OP
4557
4558 template <typename T>
4559 LogicVRegister Simulator::fminmaxv(VectorFormat vform,
4560 LogicVRegister dst,
4561 const LogicVRegister& src,
4562 typename TFPMinMaxOp<T>::type Op) {
4563 VIXL_ASSERT((vform == kFormat4H) || (vform == kFormat8H) ||
4564 (vform == kFormat4S));
4565 USE(vform);
4566 T result1 = (this->*Op)(src.Float<T>(0), src.Float<T>(1));
4567 T result2 = (this->*Op)(src.Float<T>(2), src.Float<T>(3));
4568 if (vform == kFormat8H) {
4569 T result3 = (this->*Op)(src.Float<T>(4), src.Float<T>(5));
4570 T result4 = (this->*Op)(src.Float<T>(6), src.Float<T>(7));
4571 result1 = (this->*Op)(result1, result3);
4572 result2 = (this->*Op)(result2, result4);
4573 }
4574 T result = (this->*Op)(result1, result2);
4575 dst.ClearForWrite(ScalarFormatFromFormat(vform));
4576 dst.SetFloat<T>(0, result);
4577 return dst;
4578 }
4579
4580
fmaxv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4581 LogicVRegister Simulator::fmaxv(VectorFormat vform,
4582 LogicVRegister dst,
4583 const LogicVRegister& src) {
4584 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
4585 return fminmaxv<SimFloat16>(vform, dst, src, &Simulator::FPMax<SimFloat16>);
4586 } else {
4587 return fminmaxv<float>(vform, dst, src, &Simulator::FPMax<float>);
4588 }
4589 }
4590
4591
fminv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4592 LogicVRegister Simulator::fminv(VectorFormat vform,
4593 LogicVRegister dst,
4594 const LogicVRegister& src) {
4595 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
4596 return fminmaxv<SimFloat16>(vform, dst, src, &Simulator::FPMin<SimFloat16>);
4597 } else {
4598 return fminmaxv<float>(vform, dst, src, &Simulator::FPMin<float>);
4599 }
4600 }
4601
4602
fmaxnmv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4603 LogicVRegister Simulator::fmaxnmv(VectorFormat vform,
4604 LogicVRegister dst,
4605 const LogicVRegister& src) {
4606 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
4607 return fminmaxv<SimFloat16>(vform,
4608 dst,
4609 src,
4610 &Simulator::FPMaxNM<SimFloat16>);
4611 } else {
4612 return fminmaxv<float>(vform, dst, src, &Simulator::FPMaxNM<float>);
4613 }
4614 }
4615
4616
fminnmv(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4617 LogicVRegister Simulator::fminnmv(VectorFormat vform,
4618 LogicVRegister dst,
4619 const LogicVRegister& src) {
4620 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
4621 return fminmaxv<SimFloat16>(vform,
4622 dst,
4623 src,
4624 &Simulator::FPMinNM<SimFloat16>);
4625 } else {
4626 return fminmaxv<float>(vform, dst, src, &Simulator::FPMinNM<float>);
4627 }
4628 }
4629
4630
fmul(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)4631 LogicVRegister Simulator::fmul(VectorFormat vform,
4632 LogicVRegister dst,
4633 const LogicVRegister& src1,
4634 const LogicVRegister& src2,
4635 int index) {
4636 dst.ClearForWrite(vform);
4637 SimVRegister temp;
4638 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
4639 LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index);
4640 fmul<SimFloat16>(vform, dst, src1, index_reg);
4641 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4642 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
4643 fmul<float>(vform, dst, src1, index_reg);
4644 } else {
4645 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4646 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
4647 fmul<double>(vform, dst, src1, index_reg);
4648 }
4649 return dst;
4650 }
4651
4652
fmla(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)4653 LogicVRegister Simulator::fmla(VectorFormat vform,
4654 LogicVRegister dst,
4655 const LogicVRegister& src1,
4656 const LogicVRegister& src2,
4657 int index) {
4658 dst.ClearForWrite(vform);
4659 SimVRegister temp;
4660 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
4661 LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index);
4662 fmla<SimFloat16>(vform, dst, src1, index_reg);
4663 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4664 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
4665 fmla<float>(vform, dst, src1, index_reg);
4666 } else {
4667 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4668 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
4669 fmla<double>(vform, dst, src1, index_reg);
4670 }
4671 return dst;
4672 }
4673
4674
fmls(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)4675 LogicVRegister Simulator::fmls(VectorFormat vform,
4676 LogicVRegister dst,
4677 const LogicVRegister& src1,
4678 const LogicVRegister& src2,
4679 int index) {
4680 dst.ClearForWrite(vform);
4681 SimVRegister temp;
4682 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
4683 LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index);
4684 fmls<SimFloat16>(vform, dst, src1, index_reg);
4685 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4686 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
4687 fmls<float>(vform, dst, src1, index_reg);
4688 } else {
4689 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4690 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
4691 fmls<double>(vform, dst, src1, index_reg);
4692 }
4693 return dst;
4694 }
4695
4696
fmulx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src1,const LogicVRegister & src2,int index)4697 LogicVRegister Simulator::fmulx(VectorFormat vform,
4698 LogicVRegister dst,
4699 const LogicVRegister& src1,
4700 const LogicVRegister& src2,
4701 int index) {
4702 dst.ClearForWrite(vform);
4703 SimVRegister temp;
4704 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
4705 LogicVRegister index_reg = dup_element(kFormat8H, temp, src2, index);
4706 fmulx<SimFloat16>(vform, dst, src1, index_reg);
4707 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4708 LogicVRegister index_reg = dup_element(kFormat4S, temp, src2, index);
4709 fmulx<float>(vform, dst, src1, index_reg);
4710 } else {
4711 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4712 LogicVRegister index_reg = dup_element(kFormat2D, temp, src2, index);
4713 fmulx<double>(vform, dst, src1, index_reg);
4714 }
4715 return dst;
4716 }
4717
4718
frint(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,FPRounding rounding_mode,bool inexact_exception)4719 LogicVRegister Simulator::frint(VectorFormat vform,
4720 LogicVRegister dst,
4721 const LogicVRegister& src,
4722 FPRounding rounding_mode,
4723 bool inexact_exception) {
4724 dst.ClearForWrite(vform);
4725 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
4726 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4727 SimFloat16 input = src.Float<SimFloat16>(i);
4728 SimFloat16 rounded = FPRoundInt(input, rounding_mode);
4729 if (inexact_exception && !IsNaN(input) && (input != rounded)) {
4730 FPProcessException();
4731 }
4732 dst.SetFloat<SimFloat16>(i, rounded);
4733 }
4734 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4735 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4736 float input = src.Float<float>(i);
4737 float rounded = FPRoundInt(input, rounding_mode);
4738 if (inexact_exception && !IsNaN(input) && (input != rounded)) {
4739 FPProcessException();
4740 }
4741 dst.SetFloat<float>(i, rounded);
4742 }
4743 } else {
4744 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4745 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4746 double input = src.Float<double>(i);
4747 double rounded = FPRoundInt(input, rounding_mode);
4748 if (inexact_exception && !IsNaN(input) && (input != rounded)) {
4749 FPProcessException();
4750 }
4751 dst.SetFloat<double>(i, rounded);
4752 }
4753 }
4754 return dst;
4755 }
4756
4757
fcvts(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,FPRounding rounding_mode,int fbits)4758 LogicVRegister Simulator::fcvts(VectorFormat vform,
4759 LogicVRegister dst,
4760 const LogicVRegister& src,
4761 FPRounding rounding_mode,
4762 int fbits) {
4763 dst.ClearForWrite(vform);
4764 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
4765 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4766 SimFloat16 op =
4767 static_cast<double>(src.Float<SimFloat16>(i)) * std::pow(2.0, fbits);
4768 dst.SetInt(vform, i, FPToInt16(op, rounding_mode));
4769 }
4770 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4771 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4772 float op = src.Float<float>(i) * std::pow(2.0f, fbits);
4773 dst.SetInt(vform, i, FPToInt32(op, rounding_mode));
4774 }
4775 } else {
4776 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4777 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4778 double op = src.Float<double>(i) * std::pow(2.0, fbits);
4779 dst.SetInt(vform, i, FPToInt64(op, rounding_mode));
4780 }
4781 }
4782 return dst;
4783 }
4784
4785
fcvtu(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,FPRounding rounding_mode,int fbits)4786 LogicVRegister Simulator::fcvtu(VectorFormat vform,
4787 LogicVRegister dst,
4788 const LogicVRegister& src,
4789 FPRounding rounding_mode,
4790 int fbits) {
4791 dst.ClearForWrite(vform);
4792 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
4793 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4794 SimFloat16 op =
4795 static_cast<double>(src.Float<SimFloat16>(i)) * std::pow(2.0, fbits);
4796 dst.SetUint(vform, i, FPToUInt16(op, rounding_mode));
4797 }
4798 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4799 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4800 float op = src.Float<float>(i) * std::pow(2.0f, fbits);
4801 dst.SetUint(vform, i, FPToUInt32(op, rounding_mode));
4802 }
4803 } else {
4804 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4805 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4806 double op = src.Float<double>(i) * std::pow(2.0, fbits);
4807 dst.SetUint(vform, i, FPToUInt64(op, rounding_mode));
4808 }
4809 }
4810 return dst;
4811 }
4812
4813
fcvtl(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4814 LogicVRegister Simulator::fcvtl(VectorFormat vform,
4815 LogicVRegister dst,
4816 const LogicVRegister& src) {
4817 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4818 for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
4819 // TODO: Full support for SimFloat16 in SimRegister(s).
4820 dst.SetFloat(i,
4821 FPToFloat(RawbitsToFloat16(src.Float<uint16_t>(i)),
4822 ReadDN()));
4823 }
4824 } else {
4825 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4826 for (int i = LaneCountFromFormat(vform) - 1; i >= 0; i--) {
4827 dst.SetFloat(i, FPToDouble(src.Float<float>(i), ReadDN()));
4828 }
4829 }
4830 return dst;
4831 }
4832
4833
fcvtl2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4834 LogicVRegister Simulator::fcvtl2(VectorFormat vform,
4835 LogicVRegister dst,
4836 const LogicVRegister& src) {
4837 int lane_count = LaneCountFromFormat(vform);
4838 if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
4839 for (int i = 0; i < lane_count; i++) {
4840 // TODO: Full support for SimFloat16 in SimRegister(s).
4841 dst.SetFloat(i,
4842 FPToFloat(RawbitsToFloat16(
4843 src.Float<uint16_t>(i + lane_count)),
4844 ReadDN()));
4845 }
4846 } else {
4847 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
4848 for (int i = 0; i < lane_count; i++) {
4849 dst.SetFloat(i, FPToDouble(src.Float<float>(i + lane_count), ReadDN()));
4850 }
4851 }
4852 return dst;
4853 }
4854
4855
fcvtn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4856 LogicVRegister Simulator::fcvtn(VectorFormat vform,
4857 LogicVRegister dst,
4858 const LogicVRegister& src) {
4859 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
4860 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4861 dst.SetFloat(i,
4862 Float16ToRawbits(
4863 FPToFloat16(src.Float<float>(i), FPTieEven, ReadDN())));
4864 }
4865 } else {
4866 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
4867 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4868 dst.SetFloat(i, FPToFloat(src.Float<double>(i), FPTieEven, ReadDN()));
4869 }
4870 }
4871 return dst;
4872 }
4873
4874
fcvtn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4875 LogicVRegister Simulator::fcvtn2(VectorFormat vform,
4876 LogicVRegister dst,
4877 const LogicVRegister& src) {
4878 int lane_count = LaneCountFromFormat(vform) / 2;
4879 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
4880 for (int i = lane_count - 1; i >= 0; i--) {
4881 dst.SetFloat(i + lane_count,
4882 Float16ToRawbits(
4883 FPToFloat16(src.Float<float>(i), FPTieEven, ReadDN())));
4884 }
4885 } else {
4886 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
4887 for (int i = lane_count - 1; i >= 0; i--) {
4888 dst.SetFloat(i + lane_count,
4889 FPToFloat(src.Float<double>(i), FPTieEven, ReadDN()));
4890 }
4891 }
4892 return dst;
4893 }
4894
4895
fcvtxn(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4896 LogicVRegister Simulator::fcvtxn(VectorFormat vform,
4897 LogicVRegister dst,
4898 const LogicVRegister& src) {
4899 dst.ClearForWrite(vform);
4900 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
4901 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
4902 dst.SetFloat(i, FPToFloat(src.Float<double>(i), FPRoundOdd, ReadDN()));
4903 }
4904 return dst;
4905 }
4906
4907
fcvtxn2(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)4908 LogicVRegister Simulator::fcvtxn2(VectorFormat vform,
4909 LogicVRegister dst,
4910 const LogicVRegister& src) {
4911 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kSRegSize);
4912 int lane_count = LaneCountFromFormat(vform) / 2;
4913 for (int i = lane_count - 1; i >= 0; i--) {
4914 dst.SetFloat(i + lane_count,
4915 FPToFloat(src.Float<double>(i), FPRoundOdd, ReadDN()));
4916 }
4917 return dst;
4918 }
4919
4920
4921 // Based on reference C function recip_sqrt_estimate from ARM ARM.
recip_sqrt_estimate(double a)4922 double Simulator::recip_sqrt_estimate(double a) {
4923 int q0, q1, s;
4924 double r;
4925 if (a < 0.5) {
4926 q0 = static_cast<int>(a * 512.0);
4927 r = 1.0 / sqrt((static_cast<double>(q0) + 0.5) / 512.0);
4928 } else {
4929 q1 = static_cast<int>(a * 256.0);
4930 r = 1.0 / sqrt((static_cast<double>(q1) + 0.5) / 256.0);
4931 }
4932 s = static_cast<int>(256.0 * r + 0.5);
4933 return static_cast<double>(s) / 256.0;
4934 }
4935
4936
Bits(uint64_t val,int start_bit,int end_bit)4937 static inline uint64_t Bits(uint64_t val, int start_bit, int end_bit) {
4938 return ExtractUnsignedBitfield64(start_bit, end_bit, val);
4939 }
4940
4941
4942 template <typename T>
FPRecipSqrtEstimate(T op)4943 T Simulator::FPRecipSqrtEstimate(T op) {
4944 if (IsNaN(op)) {
4945 return FPProcessNaN(op);
4946 } else if (op == 0.0) {
4947 if (copysign(1.0, op) < 0.0) {
4948 return kFP64NegativeInfinity;
4949 } else {
4950 return kFP64PositiveInfinity;
4951 }
4952 } else if (copysign(1.0, op) < 0.0) {
4953 FPProcessException();
4954 return FPDefaultNaN<T>();
4955 } else if (IsInf(op)) {
4956 return 0.0;
4957 } else {
4958 uint64_t fraction;
4959 int exp, result_exp;
4960
4961 if (IsFloat16<T>()) {
4962 exp = Float16Exp(op);
4963 fraction = Float16Mantissa(op);
4964 fraction <<= 42;
4965 } else if (IsFloat32<T>()) {
4966 exp = FloatExp(op);
4967 fraction = FloatMantissa(op);
4968 fraction <<= 29;
4969 } else {
4970 VIXL_ASSERT(IsFloat64<T>());
4971 exp = DoubleExp(op);
4972 fraction = DoubleMantissa(op);
4973 }
4974
4975 if (exp == 0) {
4976 while (Bits(fraction, 51, 51) == 0) {
4977 fraction = Bits(fraction, 50, 0) << 1;
4978 exp -= 1;
4979 }
4980 fraction = Bits(fraction, 50, 0) << 1;
4981 }
4982
4983 double scaled;
4984 if (Bits(exp, 0, 0) == 0) {
4985 scaled = DoublePack(0, 1022, Bits(fraction, 51, 44) << 44);
4986 } else {
4987 scaled = DoublePack(0, 1021, Bits(fraction, 51, 44) << 44);
4988 }
4989
4990 if (IsFloat16<T>()) {
4991 result_exp = (44 - exp) / 2;
4992 } else if (IsFloat32<T>()) {
4993 result_exp = (380 - exp) / 2;
4994 } else {
4995 VIXL_ASSERT(IsFloat64<T>());
4996 result_exp = (3068 - exp) / 2;
4997 }
4998
4999 uint64_t estimate = DoubleToRawbits(recip_sqrt_estimate(scaled));
5000
5001 if (IsFloat16<T>()) {
5002 uint16_t exp_bits = static_cast<uint16_t>(Bits(result_exp, 4, 0));
5003 uint16_t est_bits = static_cast<uint16_t>(Bits(estimate, 51, 42));
5004 return Float16Pack(0, exp_bits, est_bits);
5005 } else if (IsFloat32<T>()) {
5006 uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0));
5007 uint32_t est_bits = static_cast<uint32_t>(Bits(estimate, 51, 29));
5008 return FloatPack(0, exp_bits, est_bits);
5009 } else {
5010 VIXL_ASSERT(IsFloat64<T>());
5011 return DoublePack(0, Bits(result_exp, 10, 0), Bits(estimate, 51, 0));
5012 }
5013 }
5014 }
5015
5016
frsqrte(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5017 LogicVRegister Simulator::frsqrte(VectorFormat vform,
5018 LogicVRegister dst,
5019 const LogicVRegister& src) {
5020 dst.ClearForWrite(vform);
5021 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5022 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5023 SimFloat16 input = src.Float<SimFloat16>(i);
5024 dst.SetFloat(i, FPRecipSqrtEstimate<SimFloat16>(input));
5025 }
5026 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5027 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5028 float input = src.Float<float>(i);
5029 dst.SetFloat(i, FPRecipSqrtEstimate<float>(input));
5030 }
5031 } else {
5032 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5033 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5034 double input = src.Float<double>(i);
5035 dst.SetFloat(i, FPRecipSqrtEstimate<double>(input));
5036 }
5037 }
5038 return dst;
5039 }
5040
5041 template <typename T>
FPRecipEstimate(T op,FPRounding rounding)5042 T Simulator::FPRecipEstimate(T op, FPRounding rounding) {
5043 uint32_t sign;
5044
5045 if (IsFloat16<T>()) {
5046 sign = Float16Sign(op);
5047 } else if (IsFloat32<T>()) {
5048 sign = FloatSign(op);
5049 } else {
5050 VIXL_ASSERT(IsFloat64<T>());
5051 sign = DoubleSign(op);
5052 }
5053
5054 if (IsNaN(op)) {
5055 return FPProcessNaN(op);
5056 } else if (IsInf(op)) {
5057 return (sign == 1) ? -0.0 : 0.0;
5058 } else if (op == 0.0) {
5059 FPProcessException(); // FPExc_DivideByZero exception.
5060 return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity;
5061 } else if ((IsFloat16<T>() && (std::fabs(op) < std::pow(2.0, -16.0))) ||
5062 (IsFloat32<T>() && (std::fabs(op) < std::pow(2.0, -128.0))) ||
5063 (IsFloat64<T>() && (std::fabs(op) < std::pow(2.0, -1024.0)))) {
5064 bool overflow_to_inf = false;
5065 switch (rounding) {
5066 case FPTieEven:
5067 overflow_to_inf = true;
5068 break;
5069 case FPPositiveInfinity:
5070 overflow_to_inf = (sign == 0);
5071 break;
5072 case FPNegativeInfinity:
5073 overflow_to_inf = (sign == 1);
5074 break;
5075 case FPZero:
5076 overflow_to_inf = false;
5077 break;
5078 default:
5079 break;
5080 }
5081 FPProcessException(); // FPExc_Overflow and FPExc_Inexact.
5082 if (overflow_to_inf) {
5083 return (sign == 1) ? kFP64NegativeInfinity : kFP64PositiveInfinity;
5084 } else {
5085 // Return FPMaxNormal(sign).
5086 if (IsFloat16<T>()) {
5087 return Float16Pack(sign, 0x1f, 0x3ff);
5088 } else if (IsFloat32<T>()) {
5089 return FloatPack(sign, 0xfe, 0x07fffff);
5090 } else {
5091 VIXL_ASSERT(IsFloat64<T>());
5092 return DoublePack(sign, 0x7fe, 0x0fffffffffffffl);
5093 }
5094 }
5095 } else {
5096 uint64_t fraction;
5097 int exp, result_exp;
5098 uint32_t sign;
5099
5100 if (IsFloat16<T>()) {
5101 sign = Float16Sign(op);
5102 exp = Float16Exp(op);
5103 fraction = Float16Mantissa(op);
5104 fraction <<= 42;
5105 } else if (IsFloat32<T>()) {
5106 sign = FloatSign(op);
5107 exp = FloatExp(op);
5108 fraction = FloatMantissa(op);
5109 fraction <<= 29;
5110 } else {
5111 VIXL_ASSERT(IsFloat64<T>());
5112 sign = DoubleSign(op);
5113 exp = DoubleExp(op);
5114 fraction = DoubleMantissa(op);
5115 }
5116
5117 if (exp == 0) {
5118 if (Bits(fraction, 51, 51) == 0) {
5119 exp -= 1;
5120 fraction = Bits(fraction, 49, 0) << 2;
5121 } else {
5122 fraction = Bits(fraction, 50, 0) << 1;
5123 }
5124 }
5125
5126 double scaled = DoublePack(0, 1022, Bits(fraction, 51, 44) << 44);
5127
5128 if (IsFloat16<T>()) {
5129 result_exp = (29 - exp); // In range 29-30 = -1 to 29+1 = 30.
5130 } else if (IsFloat32<T>()) {
5131 result_exp = (253 - exp); // In range 253-254 = -1 to 253+1 = 254.
5132 } else {
5133 VIXL_ASSERT(IsFloat64<T>());
5134 result_exp = (2045 - exp); // In range 2045-2046 = -1 to 2045+1 = 2046.
5135 }
5136
5137 double estimate = recip_estimate(scaled);
5138
5139 fraction = DoubleMantissa(estimate);
5140 if (result_exp == 0) {
5141 fraction = (UINT64_C(1) << 51) | Bits(fraction, 51, 1);
5142 } else if (result_exp == -1) {
5143 fraction = (UINT64_C(1) << 50) | Bits(fraction, 51, 2);
5144 result_exp = 0;
5145 }
5146 if (IsFloat16<T>()) {
5147 uint16_t exp_bits = static_cast<uint16_t>(Bits(result_exp, 4, 0));
5148 uint16_t frac_bits = static_cast<uint16_t>(Bits(fraction, 51, 42));
5149 return Float16Pack(sign, exp_bits, frac_bits);
5150 } else if (IsFloat32<T>()) {
5151 uint32_t exp_bits = static_cast<uint32_t>(Bits(result_exp, 7, 0));
5152 uint32_t frac_bits = static_cast<uint32_t>(Bits(fraction, 51, 29));
5153 return FloatPack(sign, exp_bits, frac_bits);
5154 } else {
5155 VIXL_ASSERT(IsFloat64<T>());
5156 return DoublePack(sign, Bits(result_exp, 10, 0), Bits(fraction, 51, 0));
5157 }
5158 }
5159 }
5160
5161
frecpe(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,FPRounding round)5162 LogicVRegister Simulator::frecpe(VectorFormat vform,
5163 LogicVRegister dst,
5164 const LogicVRegister& src,
5165 FPRounding round) {
5166 dst.ClearForWrite(vform);
5167 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5168 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5169 SimFloat16 input = src.Float<SimFloat16>(i);
5170 dst.SetFloat(i, FPRecipEstimate<SimFloat16>(input, round));
5171 }
5172 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5173 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5174 float input = src.Float<float>(i);
5175 dst.SetFloat(i, FPRecipEstimate<float>(input, round));
5176 }
5177 } else {
5178 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5179 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5180 double input = src.Float<double>(i);
5181 dst.SetFloat(i, FPRecipEstimate<double>(input, round));
5182 }
5183 }
5184 return dst;
5185 }
5186
5187
ursqrte(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5188 LogicVRegister Simulator::ursqrte(VectorFormat vform,
5189 LogicVRegister dst,
5190 const LogicVRegister& src) {
5191 dst.ClearForWrite(vform);
5192 uint64_t operand;
5193 uint32_t result;
5194 double dp_operand, dp_result;
5195 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5196 operand = src.Uint(vform, i);
5197 if (operand <= 0x3FFFFFFF) {
5198 result = 0xFFFFFFFF;
5199 } else {
5200 dp_operand = operand * std::pow(2.0, -32);
5201 dp_result = recip_sqrt_estimate(dp_operand) * std::pow(2.0, 31);
5202 result = static_cast<uint32_t>(dp_result);
5203 }
5204 dst.SetUint(vform, i, result);
5205 }
5206 return dst;
5207 }
5208
5209
5210 // Based on reference C function recip_estimate from ARM ARM.
recip_estimate(double a)5211 double Simulator::recip_estimate(double a) {
5212 int q, s;
5213 double r;
5214 q = static_cast<int>(a * 512.0);
5215 r = 1.0 / ((static_cast<double>(q) + 0.5) / 512.0);
5216 s = static_cast<int>(256.0 * r + 0.5);
5217 return static_cast<double>(s) / 256.0;
5218 }
5219
5220
urecpe(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5221 LogicVRegister Simulator::urecpe(VectorFormat vform,
5222 LogicVRegister dst,
5223 const LogicVRegister& src) {
5224 dst.ClearForWrite(vform);
5225 uint64_t operand;
5226 uint32_t result;
5227 double dp_operand, dp_result;
5228 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5229 operand = src.Uint(vform, i);
5230 if (operand <= 0x7FFFFFFF) {
5231 result = 0xFFFFFFFF;
5232 } else {
5233 dp_operand = operand * std::pow(2.0, -32);
5234 dp_result = recip_estimate(dp_operand) * std::pow(2.0, 31);
5235 result = static_cast<uint32_t>(dp_result);
5236 }
5237 dst.SetUint(vform, i, result);
5238 }
5239 return dst;
5240 }
5241
5242 template <typename T>
frecpx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5243 LogicVRegister Simulator::frecpx(VectorFormat vform,
5244 LogicVRegister dst,
5245 const LogicVRegister& src) {
5246 dst.ClearForWrite(vform);
5247 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5248 T op = src.Float<T>(i);
5249 T result;
5250 if (IsNaN(op)) {
5251 result = FPProcessNaN(op);
5252 } else {
5253 int exp;
5254 uint32_t sign;
5255 if (IsFloat16<T>()) {
5256 sign = Float16Sign(op);
5257 exp = Float16Exp(op);
5258 exp = (exp == 0) ? (0x1F - 1) : static_cast<int>(Bits(~exp, 4, 0));
5259 result = Float16Pack(sign, exp, 0);
5260 } else if (IsFloat32<T>()) {
5261 sign = FloatSign(op);
5262 exp = FloatExp(op);
5263 exp = (exp == 0) ? (0xFF - 1) : static_cast<int>(Bits(~exp, 7, 0));
5264 result = FloatPack(sign, exp, 0);
5265 } else {
5266 VIXL_ASSERT(IsFloat64<T>());
5267 sign = DoubleSign(op);
5268 exp = DoubleExp(op);
5269 exp = (exp == 0) ? (0x7FF - 1) : static_cast<int>(Bits(~exp, 10, 0));
5270 result = DoublePack(sign, exp, 0);
5271 }
5272 }
5273 dst.SetFloat(i, result);
5274 }
5275 return dst;
5276 }
5277
5278
frecpx(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src)5279 LogicVRegister Simulator::frecpx(VectorFormat vform,
5280 LogicVRegister dst,
5281 const LogicVRegister& src) {
5282 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5283 frecpx<SimFloat16>(vform, dst, src);
5284 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5285 frecpx<float>(vform, dst, src);
5286 } else {
5287 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5288 frecpx<double>(vform, dst, src);
5289 }
5290 return dst;
5291 }
5292
scvtf(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int fbits,FPRounding round)5293 LogicVRegister Simulator::scvtf(VectorFormat vform,
5294 LogicVRegister dst,
5295 const LogicVRegister& src,
5296 int fbits,
5297 FPRounding round) {
5298 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5299 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5300 SimFloat16 result = FixedToFloat16(src.Int(kFormatH, i), fbits, round);
5301 dst.SetFloat<SimFloat16>(i, result);
5302 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5303 float result = FixedToFloat(src.Int(kFormatS, i), fbits, round);
5304 dst.SetFloat<float>(i, result);
5305 } else {
5306 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5307 double result = FixedToDouble(src.Int(kFormatD, i), fbits, round);
5308 dst.SetFloat<double>(i, result);
5309 }
5310 }
5311 return dst;
5312 }
5313
5314
ucvtf(VectorFormat vform,LogicVRegister dst,const LogicVRegister & src,int fbits,FPRounding round)5315 LogicVRegister Simulator::ucvtf(VectorFormat vform,
5316 LogicVRegister dst,
5317 const LogicVRegister& src,
5318 int fbits,
5319 FPRounding round) {
5320 for (int i = 0; i < LaneCountFromFormat(vform); i++) {
5321 if (LaneSizeInBitsFromFormat(vform) == kHRegSize) {
5322 SimFloat16 result = UFixedToFloat16(src.Uint(kFormatH, i), fbits, round);
5323 dst.SetFloat<SimFloat16>(i, result);
5324 } else if (LaneSizeInBitsFromFormat(vform) == kSRegSize) {
5325 float result = UFixedToFloat(src.Uint(kFormatS, i), fbits, round);
5326 dst.SetFloat<float>(i, result);
5327 } else {
5328 VIXL_ASSERT(LaneSizeInBitsFromFormat(vform) == kDRegSize);
5329 double result = UFixedToDouble(src.Uint(kFormatD, i), fbits, round);
5330 dst.SetFloat<double>(i, result);
5331 }
5332 }
5333 return dst;
5334 }
5335
5336
5337 } // namespace aarch64
5338 } // namespace vixl
5339
5340 #endif // VIXL_INCLUDE_SIMULATOR_AARCH64
5341