• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2019 The Android Open Source Project
3  *
4  * Licensed under the Apache License, Version 2.0 (the "License");
5  * you may not use this file except in compliance with the License.
6  * You may obtain a copy of the License at
7  *
8  *      http://www.apache.org/licenses/LICENSE-2.0
9  *
10  * Unless required by applicable law or agreed to in writing, software
11  * distributed under the License is distributed on an "AS IS" BASIS,
12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13  * See the License for the specific language governing permissions and
14  * limitations under the License.
15  */
16 
17 #include "gtest/gtest.h"
18 
19 #include <cstdint>
20 #include <initializer_list>
21 #include <limits>
22 
23 #include "utility.h"
24 
25 namespace {
26 
TEST(Arm64InsnTest,UnsignedBitfieldMoveNoShift)27 TEST(Arm64InsnTest, UnsignedBitfieldMoveNoShift) {
28   uint64_t arg = 0x3952247371907021ULL;
29   uint64_t res;
30 
31   asm("ubfm %0, %1, #0, #63" : "=r"(res) : "r"(arg));
32 
33   ASSERT_EQ(res, 0x3952247371907021ULL);
34 }
35 
TEST(Arm64InsnTest,BitfieldLeftInsertion)36 TEST(Arm64InsnTest, BitfieldLeftInsertion) {
37   uint64_t arg = 0x389522868478abcdULL;
38   uint64_t res = 0x1101044682325271ULL;
39 
40   asm("bfm %0, %1, #40, #15" : "=r"(res) : "r"(arg), "0"(res));
41 
42   ASSERT_EQ(res, 0x110104abcd325271ULL);
43 }
44 
TEST(Arm64InsnTest,BitfieldRightInsertion)45 TEST(Arm64InsnTest, BitfieldRightInsertion) {
46   uint64_t arg = 0x3276561809377344ULL;
47   uint64_t res = 0x1668039626579787ULL;
48 
49   asm("bfm %0, %1, #4, #39" : "=r"(res) : "r"(arg), "0"(res));
50 
51   ASSERT_EQ(res, 0x1668039180937734ULL);
52 }
53 
TEST(Arm64InsnTest,MoveImmToFp32)54 TEST(Arm64InsnTest, MoveImmToFp32) {
55   // The tests below verify that fmov works with various immediates.
56   // Specifically, the instruction has an 8-bit immediate field consisting of
57   // the following four subfields:
58   //
59   // - sign (one bit)
60   // - upper exponent (one bit)
61   // - lower exponent (two bits)
62   // - mantisa (four bits)
63   //
64   // For example, we decompose imm8 = 0b01001111 into:
65   //
66   // - sign = 0 (positive)
67   // - upper exponent = 1
68   // - lower exponent = 00
69   // - mantisa = 1111
70   //
71   // This immediate corresponds to 32-bit floating point value:
72   //
73   // 0 011111 00 1111 0000000000000000000
74   // | |      |  |    |
75   // | |      |  |    +- 19 zeros
76   // | |      |  +------ mantisa
77   // | |      +--------- lower exponent
78   // | +---------------- upper exponent (custom extended to 6 bits)
79   // +------------------ sign
80   //
81   // Thus we have:
82   //
83   //   1.11110000... * 2^(124-127) = 0.2421875
84   //
85   // where 1.11110000... is in binary.
86   //
87   // See VFPExpandImm in the ARM Architecture Manual for details.
88   //
89   // We enumerate all possible 8-bit immediate encodings of the form:
90   //
91   //   {0,1}{0,1}{00,11}{0000,1111}
92   //
93   // to verify that the decoder correctly splits the immediate into the
94   // subfields and reconstructs the intended floating-point value.
95 
96   // imm8 = 0b00000000
97   __uint128_t res1 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #2.0e+00")();
98   ASSERT_EQ(res1, MakeUInt128(0x40000000U, 0U));
99 
100   // imm8 = 0b00001111
101   __uint128_t res2 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #3.8750e+00")();
102   ASSERT_EQ(res2, MakeUInt128(0x40780000U, 0U));
103 
104   // imm8 = 0b00110000
105   __uint128_t res3 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #1.60e+01")();
106   ASSERT_EQ(res3, MakeUInt128(0x41800000U, 0U));
107 
108   // imm8 = 0b00111111
109   __uint128_t res4 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #3.10e+01")();
110   ASSERT_EQ(res4, MakeUInt128(0x41f80000U, 0U));
111 
112   // imm8 = 0b01000000
113   __uint128_t res5 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #1.250e-01")();
114   ASSERT_EQ(res5, MakeUInt128(0x3e000000U, 0U));
115 
116   // imm8 = 0b01001111
117   __uint128_t res6 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #2.4218750e-01")();
118   ASSERT_EQ(res6, MakeUInt128(0x3e780000U, 0U));
119 
120   // imm8 = 0b01110000
121   __uint128_t res7 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #1.0e+00")();
122   ASSERT_EQ(res7, MakeUInt128(0x3f800000U, 0U));
123 
124   // imm8 = 0b01111111
125   __uint128_t res8 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #1.93750e+00")();
126   ASSERT_EQ(res8, MakeUInt128(0x3ff80000U, 0U));
127 
128   // imm8 = 0b10000000
129   __uint128_t res9 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #-2.0e+00")();
130   ASSERT_EQ(res9, MakeUInt128(0xc0000000U, 0U));
131 
132   // imm8 = 0b10001111
133   __uint128_t res10 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #-3.8750e+00")();
134   ASSERT_EQ(res10, MakeUInt128(0xc0780000U, 0U));
135 
136   // imm8 = 0b10110000
137   __uint128_t res11 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #-1.60e+01")();
138   ASSERT_EQ(res11, MakeUInt128(0xc1800000U, 0U));
139 
140   // imm8 = 0b10111111
141   __uint128_t res12 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #-3.10e+01")();
142   ASSERT_EQ(res12, MakeUInt128(0xc1f80000U, 0U));
143 
144   // imm8 = 0b11000000
145   __uint128_t res13 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #-1.250e-01")();
146   ASSERT_EQ(res13, MakeUInt128(0xbe000000U, 0U));
147 
148   // imm8 = 0b11001111
149   __uint128_t res14 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #-2.4218750e-01")();
150   ASSERT_EQ(res14, MakeUInt128(0xbe780000U, 0U));
151 
152   // imm8 = 0b11110000
153   __uint128_t res15 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #-1.0e+00")();
154   ASSERT_EQ(res15, MakeUInt128(0xbf800000U, 0U));
155 
156   // imm8 = 0b11111111
157   __uint128_t res16 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #-1.93750e+00")();
158   ASSERT_EQ(res16, MakeUInt128(0xbff80000U, 0U));
159 }
160 
TEST(Arm64InsnTest,MoveImmToFp64)161 TEST(Arm64InsnTest, MoveImmToFp64) {
162   // The tests below verify that fmov works with various immediates.
163   // Specifically, the instruction has an 8-bit immediate field consisting of
164   // the following four subfields:
165   //
166   // - sign (one bit)
167   // - upper exponent (one bit)
168   // - lower exponent (two bits)
169   // - mantisa (four bits)
170   //
171   // For example, we decompose imm8 = 0b01001111 into:
172   //
173   // - sign = 0 (positive)
174   // - upper exponent = 1
175   // - lower exponent = 00
176   // - mantisa = 1111
177   //
178   // This immediate corresponds to 64-bit floating point value:
179   //
180   // 0 011111111 00 1111 000000000000000000000000000000000000000000000000
181   // | |         |  |    |
182   // | |         |  |    +- 48 zeros
183   // | |         |  +------ mantisa
184   // | |         +--------- lower exponent
185   // | +------------------- upper exponent (custom extended to 9 bits)
186   // +--------------------- sign
187   //
188   // Thus we have:
189   //
190   //   1.11110000... * 2^(1020-1023) = 0.2421875
191   //
192   // where 1.11110000... is in binary.
193   //
194   // See VFPExpandImm in the ARM Architecture Manual for details.
195   //
196   // We enumerate all possible 8-bit immediate encodings of the form:
197   //
198   //   {0,1}{0,1}{00,11}{0000,1111}
199   //
200   // to verify that the decoder correctly splits the immediate into the
201   // subfields and reconstructs the intended floating-point value.
202 
203   // imm8 = 0b00000000
204   __uint128_t res1 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #2.0e+00")();
205   ASSERT_EQ(res1, MakeUInt128(0x4000000000000000ULL, 0U));
206 
207   // imm8 = 0b00001111
208   __uint128_t res2 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #3.8750e+00")();
209   ASSERT_EQ(res2, MakeUInt128(0x400f000000000000ULL, 0U));
210 
211   // imm8 = 0b00110000
212   __uint128_t res3 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #1.60e+01")();
213   ASSERT_EQ(res3, MakeUInt128(0x4030000000000000ULL, 0U));
214 
215   // imm8 = 0b00111111
216   __uint128_t res4 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #3.10e+01")();
217   ASSERT_EQ(res4, MakeUInt128(0x403f000000000000ULL, 0U));
218 
219   // imm8 = 0b01000000
220   __uint128_t res5 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #1.250e-01")();
221   ASSERT_EQ(res5, MakeUInt128(0x3fc0000000000000ULL, 0U));
222 
223   // imm8 = 0b01001111
224   __uint128_t res6 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #2.4218750e-01")();
225   ASSERT_EQ(res6, MakeUInt128(0x3fcf000000000000ULL, 0U));
226 
227   // imm8 = 0b01110000
228   __uint128_t res7 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #1.0e+00")();
229   ASSERT_EQ(res7, MakeUInt128(0x3ff0000000000000ULL, 0U));
230 
231   // imm8 = 0b01111111
232   __uint128_t res8 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #1.93750e+00")();
233   ASSERT_EQ(res8, MakeUInt128(0x3fff000000000000ULL, 0U));
234 
235   // imm8 = 0b10000000
236   __uint128_t res9 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #-2.0e+00")();
237   ASSERT_EQ(res9, MakeUInt128(0xc000000000000000ULL, 0U));
238 
239   // imm8 = 0b10001111
240   __uint128_t res10 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #-3.8750e+00")();
241   ASSERT_EQ(res10, MakeUInt128(0xc00f000000000000ULL, 0U));
242 
243   // imm8 = 0b10110000
244   __uint128_t res11 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #-1.60e+01")();
245   ASSERT_EQ(res11, MakeUInt128(0xc030000000000000ULL, 0U));
246 
247   // imm8 = 0b10111111
248   __uint128_t res12 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #-3.10e+01")();
249   ASSERT_EQ(res12, MakeUInt128(0xc03f000000000000ULL, 0U));
250 
251   // imm8 = 0b11000000
252   __uint128_t res13 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #-1.250e-01")();
253   ASSERT_EQ(res13, MakeUInt128(0xbfc0000000000000ULL, 0U));
254 
255   // imm8 = 0b11001111
256   __uint128_t res14 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #-2.4218750e-01")();
257   ASSERT_EQ(res14, MakeUInt128(0xbfcf000000000000ULL, 0U));
258 
259   // imm8 = 0b11110000
260   __uint128_t res15 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #-1.0e+00")();
261   ASSERT_EQ(res15, MakeUInt128(0xbff0000000000000ULL, 0U));
262 
263   // imm8 = 0b11111111
264   __uint128_t res16 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #-1.93750e+00")();
265   ASSERT_EQ(res16, MakeUInt128(0xbfff000000000000ULL, 0U));
266 }
267 
TEST(Arm64InsnTest,MoveImmToF32x4)268 TEST(Arm64InsnTest, MoveImmToF32x4) {
269   // The tests below verify that fmov works with various immediates.
270   // Specifically, the instruction has an 8-bit immediate field consisting of
271   // the following four subfields:
272   //
273   // - sign (one bit)
274   // - upper exponent (one bit)
275   // - lower exponent (two bits)
276   // - mantisa (four bits)
277   //
278   // We enumerate all possible 8-bit immediate encodings of the form:
279   //
280   //   {0,1}{0,1}{00,11}{0000,1111}
281   //
282   // to verify that the decoder correctly splits the immediate into the
283   // subfields and reconstructs the intended floating-point value.
284 
285   // imm8 = 0b00000000
286   __uint128_t res1 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #2.0e+00")();
287   ASSERT_EQ(res1, MakeUInt128(0x4000000040000000ULL, 0x4000000040000000ULL));
288 
289   // imm8 = 0b00001111
290   __uint128_t res2 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #3.8750e+00")();
291   ASSERT_EQ(res2, MakeUInt128(0x4078000040780000ULL, 0x4078000040780000ULL));
292 
293   // imm8 = 0b00110000
294   __uint128_t res3 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #1.60e+01")();
295   ASSERT_EQ(res3, MakeUInt128(0x4180000041800000ULL, 0x4180000041800000ULL));
296 
297   // imm8 = 0b00111111
298   __uint128_t res4 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #3.10e+01")();
299   ASSERT_EQ(res4, MakeUInt128(0x41f8000041f80000ULL, 0x41f8000041f80000ULL));
300 
301   // imm8 = 0b01000000
302   __uint128_t res5 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #1.250e-01")();
303   ASSERT_EQ(res5, MakeUInt128(0x3e0000003e000000ULL, 0x3e0000003e000000ULL));
304 
305   // imm8 = 0b01001111
306   __uint128_t res6 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #2.4218750e-01")();
307   ASSERT_EQ(res6, MakeUInt128(0x3e7800003e780000ULL, 0x3e7800003e780000ULL));
308 
309   // imm8 = 0b01110000
310   __uint128_t res7 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #1.0e+00")();
311   ASSERT_EQ(res7, MakeUInt128(0x3f8000003f800000ULL, 0x3f8000003f800000ULL));
312 
313   // imm8 = 0b01111111
314   __uint128_t res8 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #1.93750e+00")();
315   ASSERT_EQ(res8, MakeUInt128(0x3ff800003ff80000ULL, 0x3ff800003ff80000ULL));
316 
317   // imm8 = 0b10000000
318   __uint128_t res9 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #-2.0e+00")();
319   ASSERT_EQ(res9, MakeUInt128(0xc0000000c0000000ULL, 0xc0000000c0000000ULL));
320 
321   // imm8 = 0b10001111
322   __uint128_t res10 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #-3.8750e+00")();
323   ASSERT_EQ(res10, MakeUInt128(0xc0780000c0780000ULL, 0xc0780000c0780000ULL));
324 
325   // imm8 = 0b10110000
326   __uint128_t res11 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #-1.60e+01")();
327   ASSERT_EQ(res11, MakeUInt128(0xc1800000c1800000ULL, 0xc1800000c1800000ULL));
328 
329   // imm8 = 0b10111111
330   __uint128_t res12 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #-3.10e+01")();
331   ASSERT_EQ(res12, MakeUInt128(0xc1f80000c1f80000ULL, 0xc1f80000c1f80000ULL));
332 
333   // imm8 = 0b11000000
334   __uint128_t res13 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #-1.250e-01")();
335   ASSERT_EQ(res13, MakeUInt128(0xbe000000be000000ULL, 0xbe000000be000000ULL));
336 
337   // imm8 = 0b11001111
338   __uint128_t res14 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #-2.4218750e-01")();
339   ASSERT_EQ(res14, MakeUInt128(0xbe780000be780000ULL, 0xbe780000be780000ULL));
340 
341   // imm8 = 0b11110000
342   __uint128_t res15 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #-1.0e+00")();
343   ASSERT_EQ(res15, MakeUInt128(0xbf800000bf800000ULL, 0xbf800000bf800000ULL));
344 
345   // imm8 = 0b11111111
346   __uint128_t res16 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #-1.93750e+00")();
347   ASSERT_EQ(res16, MakeUInt128(0xbff80000bff80000ULL, 0xbff80000bff80000ULL));
348 }
349 
TEST(Arm64InsnTest,MoveImmToF64x2)350 TEST(Arm64InsnTest, MoveImmToF64x2) {
351   // The tests below verify that fmov works with various immediates.
352   // Specifically, the instruction has an 8-bit immediate field consisting of
353   // the following four subfields:
354   //
355   // - sign (one bit)
356   // - upper exponent (one bit)
357   // - lower exponent (two bits)
358   // - mantisa (four bits)
359   //
360   // We enumerate all possible 8-bit immediate encodings of the form:
361   //
362   //   {0,1}{0,1}{00,11}{0000,1111}
363   //
364   // to verify that the decoder correctly splits the immediate into the
365   // subfields and reconstructs the intended floating-point value.
366 
367   // imm8 = 0b00000000
368   __uint128_t res1 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #2.0e+00")();
369   ASSERT_EQ(res1, MakeUInt128(0x4000000000000000ULL, 0x4000000000000000ULL));
370 
371   // imm8 = 0b00001111
372   __uint128_t res2 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #3.8750e+00")();
373   ASSERT_EQ(res2, MakeUInt128(0x400f000000000000ULL, 0x400f000000000000ULL));
374 
375   // imm8 = 0b00110000
376   __uint128_t res3 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #1.60e+01")();
377   ASSERT_EQ(res3, MakeUInt128(0x4030000000000000ULL, 0x4030000000000000ULL));
378 
379   // imm8 = 0b00111111
380   __uint128_t res4 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #3.10e+01")();
381   ASSERT_EQ(res4, MakeUInt128(0x403f000000000000ULL, 0x403f000000000000ULL));
382 
383   // imm8 = 0b01000000
384   __uint128_t res5 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #1.250e-01")();
385   ASSERT_EQ(res5, MakeUInt128(0x3fc0000000000000ULL, 0x3fc0000000000000ULL));
386 
387   // imm8 = 0b01001111
388   __uint128_t res6 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #2.4218750e-01")();
389   ASSERT_EQ(res6, MakeUInt128(0x3fcf000000000000ULL, 0x3fcf000000000000ULL));
390 
391   // imm8 = 0b01110000
392   __uint128_t res7 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #1.0e+00")();
393   ASSERT_EQ(res7, MakeUInt128(0x3ff0000000000000ULL, 0x3ff0000000000000ULL));
394 
395   // imm8 = 0b01111111
396   __uint128_t res8 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #1.93750e+00")();
397   ASSERT_EQ(res8, MakeUInt128(0x3fff000000000000ULL, 0x3fff000000000000ULL));
398 
399   // imm8 = 0b10000000
400   __uint128_t res9 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #-2.0e+00")();
401   ASSERT_EQ(res9, MakeUInt128(0xc000000000000000ULL, 0xc000000000000000ULL));
402 
403   // imm8 = 0b10001111
404   __uint128_t res10 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #-3.8750e+00")();
405   ASSERT_EQ(res10, MakeUInt128(0xc00f000000000000ULL, 0xc00f000000000000ULL));
406 
407   // imm8 = 0b10110000
408   __uint128_t res11 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #-1.60e+01")();
409   ASSERT_EQ(res11, MakeUInt128(0xc030000000000000ULL, 0xc030000000000000ULL));
410 
411   // imm8 = 0b10111111
412   __uint128_t res12 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #-3.10e+01")();
413   ASSERT_EQ(res12, MakeUInt128(0xc03f000000000000ULL, 0xc03f000000000000ULL));
414 
415   // imm8 = 0b11000000
416   __uint128_t res13 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #-1.250e-01")();
417   ASSERT_EQ(res13, MakeUInt128(0xbfc0000000000000ULL, 0xbfc0000000000000ULL));
418 
419   // imm8 = 0b11001111
420   __uint128_t res14 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #-2.4218750e-01")();
421   ASSERT_EQ(res14, MakeUInt128(0xbfcf000000000000ULL, 0xbfcf000000000000ULL));
422 
423   // imm8 = 0b11110000
424   __uint128_t res15 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #-1.0e+00")();
425   ASSERT_EQ(res15, MakeUInt128(0xbff0000000000000ULL, 0xbff0000000000000ULL));
426 
427   // imm8 = 0b11111111
428   __uint128_t res16 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #-1.93750e+00")();
429   ASSERT_EQ(res16, MakeUInt128(0xbfff000000000000ULL, 0xbfff000000000000ULL));
430 }
431 
TEST(Arm64InsnTest,MoveFpRegToReg)432 TEST(Arm64InsnTest, MoveFpRegToReg) {
433   __uint128_t arg = MakeUInt128(0x1111aaaa2222bbbbULL, 0x3333cccc4444ddddULL);
434   uint64_t res = 0xffffeeeeddddccccULL;
435 
436   // Move from high double.
437   asm("fmov %0, %1.d[1]" : "=r"(res) : "w"(arg));
438   ASSERT_EQ(res, 0x3333cccc4444ddddULL);
439 
440   // Move from low double.
441   asm("fmov %0, %d1" : "=r"(res) : "w"(arg));
442   ASSERT_EQ(res, 0x1111aaaa2222bbbbULL);
443 
444   // Move from single.
445   asm("fmov %w0, %s1" : "=r"(res) : "w"(arg));
446   ASSERT_EQ(res, 0x2222bbbbULL);
447 }
448 
TEST(Arm64InsnTest,MoveRegToFpReg)449 TEST(Arm64InsnTest, MoveRegToFpReg) {
450   uint64_t arg = 0xffffeeeeddddccccULL;
451   __uint128_t res = MakeUInt128(0x1111aaaa2222bbbbULL, 0x3333cccc4444ddddULL);
452 
453   // Move to high double.
454   asm("fmov %0.d[1], %1" : "=w"(res) : "r"(arg), "0"(res));
455   ASSERT_EQ(res, MakeUInt128(0x1111aaaa2222bbbbULL, 0xffffeeeeddddccccULL));
456 
457   // Move to low double.
458   asm("fmov %d0, %1" : "=w"(res) : "r"(arg));
459   ASSERT_EQ(res, MakeUInt128(0xffffeeeeddddccccULL, 0x0));
460 
461   // Move to single.
462   asm("fmov %s0, %w1" : "=w"(res) : "r"(arg));
463   ASSERT_EQ(res, MakeUInt128(0xddddccccULL, 0x0));
464 }
465 
TEST(Arm64InsnTest,MoveFpRegToFpReg)466 TEST(Arm64InsnTest, MoveFpRegToFpReg) {
467   __uint128_t res;
468 
469   __uint128_t fp64_arg =
470       MakeUInt128(0x402e9eb851eb851fULL, 0xdeadbeefaabbccddULL);  // 15.31 in double
471   asm("fmov %d0, %d1" : "=w"(res) : "w"(fp64_arg));
472   ASSERT_EQ(res, MakeUInt128(0x402e9eb851eb851fULL, 0ULL));
473 
474   __uint128_t fp32_arg =
475       MakeUInt128(0xaabbccdd40e51eb8ULL, 0x0011223344556677ULL);  // 7.16 in float
476   asm("fmov %s0, %s1" : "=w"(res) : "w"(fp32_arg));
477   ASSERT_EQ(res, MakeUInt128(0x40e51eb8ULL, 0ULL));
478 }
479 
TEST(Arm64InsnTest,InsertRegPartIntoSimd128)480 TEST(Arm64InsnTest, InsertRegPartIntoSimd128) {
481   uint64_t arg = 0xffffeeeeddddccccULL;
482   __uint128_t res = MakeUInt128(0x1111aaaa2222bbbbULL, 0x3333cccc4444ddddULL);
483 
484   // Byte.
485   asm("mov %0.b[3], %w1" : "=w"(res) : "r"(arg), "0"(res));
486   ASSERT_EQ(res, MakeUInt128(0x1111aaaacc22bbbbULL, 0x3333cccc4444ddddULL));
487 
488   // Double word.
489   asm("mov %0.d[1], %1" : "=w"(res) : "r"(arg), "0"(res));
490   ASSERT_EQ(res, MakeUInt128(0x1111aaaacc22bbbbULL, 0xffffeeeeddddccccULL));
491 }
492 
TEST(Arm64InsnTest,DuplicateRegIntoSimd128)493 TEST(Arm64InsnTest, DuplicateRegIntoSimd128) {
494   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("dup %0.16b, %w1")(0xabU);
495   ASSERT_EQ(res, MakeUInt128(0xababababababababULL, 0xababababababababULL));
496 }
497 
TEST(Arm64InsnTest,MoveSimd128ElemToRegSigned)498 TEST(Arm64InsnTest, MoveSimd128ElemToRegSigned) {
499   uint64_t res = 0;
500   __uint128_t arg = MakeUInt128(0x9796959493929190ULL, 0x9f9e9d9c9b9a99ULL);
501 
502   // Single word.
503   asm("smov %0, %1.s[0]" : "=r"(res) : "w"(arg));
504   ASSERT_EQ(res, 0xffffffff93929190ULL);
505 
506   asm("smov %0, %1.s[2]" : "=r"(res) : "w"(arg));
507   ASSERT_EQ(res, 0xffffffff9c9b9a99ULL);
508 
509   // Half word.
510   asm("smov %w0, %1.h[0]" : "=r"(res) : "w"(arg));
511   ASSERT_EQ(res, 0x00000000ffff9190ULL);
512 
513   asm("smov %w0, %1.h[2]" : "=r"(res) : "w"(arg));
514   ASSERT_EQ(res, 0x00000000ffff9594ULL);
515 
516   // Byte.
517   asm("smov %w0, %1.b[0]" : "=r"(res) : "w"(arg));
518   ASSERT_EQ(res, 0x00000000ffffff90ULL);
519 
520   asm("smov %w0, %1.b[2]" : "=r"(res) : "w"(arg));
521   ASSERT_EQ(res, 0x00000000ffffff92ULL);
522 }
523 
TEST(Arm64InsnTest,MoveSimd128ElemToRegUnsigned)524 TEST(Arm64InsnTest, MoveSimd128ElemToRegUnsigned) {
525   uint64_t res = 0;
526   __uint128_t arg = MakeUInt128(0xaaaabbbbcccceeeeULL, 0xffff000011112222ULL);
527 
528   // Double word.
529   asm("umov %0, %1.d[0]" : "=r"(res) : "w"(arg));
530   ASSERT_EQ(res, 0xaaaabbbbcccceeeeULL);
531 
532   asm("umov %0, %1.d[1]" : "=r"(res) : "w"(arg));
533   ASSERT_EQ(res, 0xffff000011112222ULL);
534 
535   // Single word.
536   asm("umov %w0, %1.s[0]" : "=r"(res) : "w"(arg));
537   ASSERT_EQ(res, 0xcccceeeeULL);
538 
539   asm("umov %w0, %1.s[2]" : "=r"(res) : "w"(arg));
540   ASSERT_EQ(res, 0x11112222ULL);
541 
542   // Half word.
543   asm("umov %w0, %1.h[0]" : "=r"(res) : "w"(arg));
544   ASSERT_EQ(res, 0xeeeeULL);
545 
546   asm("umov %w0, %1.h[2]" : "=r"(res) : "w"(arg));
547   ASSERT_EQ(res, 0xbbbbULL);
548 
549   // Byte.
550   asm("umov %w0, %1.b[0]" : "=r"(res) : "w"(arg));
551   ASSERT_EQ(res, 0xeeULL);
552 
553   asm("umov %w0, %1.b[2]" : "=r"(res) : "w"(arg));
554   ASSERT_EQ(res, 0xccULL);
555 }
556 
TEST(Arm64InsnTest,SignedMultiplyAddLongElemI16x4)557 TEST(Arm64InsnTest, SignedMultiplyAddLongElemI16x4) {
558   __uint128_t arg1 = MakeUInt128(0x9463229563989898ULL, 0x9358211674562701ULL);
559   __uint128_t arg2 = MakeUInt128(0x0218356462201349ULL, 0x6715188190973038ULL);
560   __uint128_t arg3 = MakeUInt128(0x1198004973407239ULL, 0x6103685406643193ULL);
561   __uint128_t res =
562       ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("smlal %0.4s, %1.4h, %2.h[1]")(arg1, arg2, arg3);
563   ASSERT_EQ(res, MakeUInt128(0x37c4a3494b9db539ULL, 0x37c3dab413a58e33ULL));
564 }
565 
TEST(Arm64InsnTest,SignedMultiplyAddLongElemI16x4Upper)566 TEST(Arm64InsnTest, SignedMultiplyAddLongElemI16x4Upper) {
567   __uint128_t arg1 = MakeUInt128(0x9478221818528624ULL, 0x0851400666044332ULL);
568   __uint128_t arg2 = MakeUInt128(0x5888569867054315ULL, 0x4706965747458550ULL);
569   __uint128_t arg3 = MakeUInt128(0x3323233421073015ULL, 0x4594051655379068ULL);
570   __uint128_t res =
571       ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("smlal2 %0.4s, %1.8h, %2.h[1]")(arg1, arg2, arg3);
572   ASSERT_EQ(res, MakeUInt128(0x5c30bd483c119e0fULL, 0x48ecc5ab6efb3a86ULL));
573 }
574 
TEST(Arm64InsnTest,SignedMultiplyAddLongElemI16x4Upper2)575 TEST(Arm64InsnTest, SignedMultiplyAddLongElemI16x4Upper2) {
576   __uint128_t arg1 = MakeUInt128(0x9968262824727064ULL, 0x1336222178923903ULL);
577   __uint128_t arg2 = MakeUInt128(0x1760854289437339ULL, 0x3561889165125042ULL);
578   __uint128_t arg3 = MakeUInt128(0x4404008952719837ULL, 0x8738648058472689ULL);
579   __uint128_t res =
580       ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("smlal2 %0.4s, %1.8h, %2.h[7]")(arg1, arg2, arg3);
581   ASSERT_EQ(res, MakeUInt128(0x5d27e9db5e54d15aULL, 0x8b39d9f65f64ea0aULL));
582 }
583 
TEST(Arm64InsnTest,SignedMultiplySubtractLongElemI16x4)584 TEST(Arm64InsnTest, SignedMultiplySubtractLongElemI16x4) {
585   __uint128_t arg1 = MakeUInt128(0x9143447886360410ULL, 0x3182350736502778ULL);
586   __uint128_t arg2 = MakeUInt128(0x5908975782727313ULL, 0x0504889398900992ULL);
587   __uint128_t arg3 = MakeUInt128(0x3913503373250855ULL, 0x9826558670892426ULL);
588   __uint128_t res =
589       ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("smlsl %0.4s, %1.4h, %2.h[1]")(arg1, arg2, arg3);
590   ASSERT_EQ(res, MakeUInt128(0xfd58202775231935ULL, 0x61d69fb0921db6b6ULL));
591 }
592 
TEST(Arm64InsnTest,SignedMultiplySubtractLongElemI16x4Upper)593 TEST(Arm64InsnTest, SignedMultiplySubtractLongElemI16x4Upper) {
594   __uint128_t arg1 = MakeUInt128(0x9320199199688285ULL, 0x1718395366913452ULL);
595   __uint128_t arg2 = MakeUInt128(0x2244470804592396ULL, 0x6028171565515656ULL);
596   __uint128_t arg3 = MakeUInt128(0x6611135982311225ULL, 0x0628905854914509ULL);
597   __uint128_t res =
598       ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("smlsl2 %0.4s, %1.8h, %2.h[1]")(arg1, arg2, arg3);
599   ASSERT_EQ(res, MakeUInt128(0x645326f0814d99a3ULL, 0x05c4290053980b2eULL));
600 }
601 
TEST(Arm64InsnTest,UnsignedMultiplyAddLongElemI16x4)602 TEST(Arm64InsnTest, UnsignedMultiplyAddLongElemI16x4) {
603   __uint128_t arg1 = MakeUInt128(0x9027601834840306ULL, 0x8113818551059797ULL);
604   __uint128_t arg2 = MakeUInt128(0x0566400750942608ULL, 0x7885735796037324ULL);
605   __uint128_t arg3 = MakeUInt128(0x5141467867036880ULL, 0x9880609716425849ULL);
606   __uint128_t res =
607       ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("umlal %0.4s, %1.4h, %2.h[1]")(arg1, arg2, arg3);
608   ASSERT_EQ(res, MakeUInt128(0x61c8e2c867f707f8ULL, 0xc5dfe72334816629ULL));
609 }
610 
TEST(Arm64InsnTest,UnsignedMultiplyAddLongElemI16x4Upper)611 TEST(Arm64InsnTest, UnsignedMultiplyAddLongElemI16x4Upper) {
612   __uint128_t arg1 = MakeUInt128(0x9454236828860613ULL, 0x4084148637767009ULL);
613   __uint128_t arg2 = MakeUInt128(0x6120715124914043ULL, 0x0272538607648236ULL);
614   __uint128_t arg3 = MakeUInt128(0x3414334623518975ULL, 0x7664521641376796ULL);
615   __uint128_t res =
616       ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("umlal2 %0.4s, %1.8h, %2.h[1]")(arg1, arg2, arg3);
617   ASSERT_EQ(res, MakeUInt128(0x3c00351c3352428eULL, 0x7f9b6cda4425df7cULL));
618 }
619 
TEST(Arm64InsnTest,UnsignedMultiplySubtractLongElemI16x4)620 TEST(Arm64InsnTest, UnsignedMultiplySubtractLongElemI16x4) {
621   __uint128_t arg1 = MakeUInt128(0x9128009282525619ULL, 0x0205263016391147ULL);
622   __uint128_t arg2 = MakeUInt128(0x7247331485739107ULL, 0x7758744253876117ULL);
623   __uint128_t arg3 = MakeUInt128(0x4657867116941477ULL, 0x6421441111263583ULL);
624   __uint128_t res =
625       ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("umlsl %0.4s, %1.4h, %2.h[1]")(arg1, arg2, arg3);
626   ASSERT_EQ(res, MakeUInt128(0x0268619be9b26a3cULL, 0x1876471910da19edULL));
627 }
628 
TEST(Arm64InsnTest,UnsignedMultiplySubtractLongElemI16x4Upper)629 TEST(Arm64InsnTest, UnsignedMultiplySubtractLongElemI16x4Upper) {
630   __uint128_t arg1 = MakeUInt128(0x9420757136275167ULL, 0x4573189189456283ULL);
631   __uint128_t arg2 = MakeUInt128(0x5257044133543758ULL, 0x5753426986994725ULL);
632   __uint128_t arg3 = MakeUInt128(0x4703165661399199ULL, 0x9682628247270641ULL);
633   __uint128_t res =
634       ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("umlsl2 %0.4s, %1.8h, %2.h[1]")(arg1, arg2, arg3);
635   ASSERT_EQ(res, MakeUInt128(0x2b7d4cb24d79259dULL, 0x8895afc6423a13adULL));
636 }
637 
TEST(Arm64InsnTest,AsmConvertI32F32)638 TEST(Arm64InsnTest, AsmConvertI32F32) {
639   constexpr auto AsmConvertI32F32 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("scvtf %s0, %w1");
640   ASSERT_EQ(AsmConvertI32F32(21), MakeUInt128(0x41a80000U, 0U));
641 }
642 
TEST(Arm64InsnTest,AsmConvertU32F32)643 TEST(Arm64InsnTest, AsmConvertU32F32) {
644   constexpr auto AsmConvertU32F32 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("ucvtf %s0, %w1");
645 
646   ASSERT_EQ(AsmConvertU32F32(29), MakeUInt128(0x41e80000U, 0U));
647 
648   // Test that the topmost bit isn't treated as the sign.
649   ASSERT_EQ(AsmConvertU32F32(1U << 31), MakeUInt128(0x4f000000U, 0U));
650 }
651 
TEST(Arm64InsnTest,AsmConvertU32F32FromSimdReg)652 TEST(Arm64InsnTest, AsmConvertU32F32FromSimdReg) {
653   constexpr auto AsmUcvtf = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ucvtf %s0, %s1");
654 
655   ASSERT_EQ(AsmUcvtf(28), MakeUInt128(0x41e00000U, 0U));
656 
657   // Test that the topmost bit isn't treated as the sign.
658   ASSERT_EQ(AsmUcvtf(1U << 31), MakeUInt128(0x4f000000U, 0U));
659 }
660 
TEST(Arm64InsnTest,AsmConvertI32F64)661 TEST(Arm64InsnTest, AsmConvertI32F64) {
662   constexpr auto AsmConvertI32F64 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("scvtf %d0, %w1");
663   ASSERT_EQ(AsmConvertI32F64(21), MakeUInt128(0x4035000000000000ULL, 0U));
664 }
665 
TEST(Arm64InsnTest,AsmConvertU32F64)666 TEST(Arm64InsnTest, AsmConvertU32F64) {
667   constexpr auto AsmConvertU32F64 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("ucvtf %d0, %w1");
668 
669   ASSERT_EQ(AsmConvertU32F64(18), MakeUInt128(0x4032000000000000ULL, 0U));
670 
671   // Test that the topmost bit isn't treated as the sign.
672   ASSERT_EQ(AsmConvertU32F64(1U << 31), MakeUInt128(0x41e0000000000000ULL, 0U));
673 }
674 
TEST(Arm64InsnTest,AsmConvertI64F32)675 TEST(Arm64InsnTest, AsmConvertI64F32) {
676   constexpr auto AsmConvertI64F32 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("scvtf %s0, %x1");
677   ASSERT_EQ(AsmConvertI64F32(11), MakeUInt128(0x41300000U, 0U));
678 }
679 
TEST(Arm64InsnTest,AsmConvertU64F32)680 TEST(Arm64InsnTest, AsmConvertU64F32) {
681   constexpr auto AsmConvertU64F32 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("ucvtf %s0, %x1");
682 
683   ASSERT_EQ(AsmConvertU64F32(3), MakeUInt128(0x40400000U, 0U));
684 
685   // Test that the topmost bit isn't treated as the sign.
686   ASSERT_EQ(AsmConvertU64F32(1ULL << 63), MakeUInt128(0x5f000000U, 0U));
687 }
688 
TEST(Arm64InsnTest,AsmConvertI64F64)689 TEST(Arm64InsnTest, AsmConvertI64F64) {
690   constexpr auto AsmConvertI64F64 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("scvtf %d0, %x1");
691   ASSERT_EQ(AsmConvertI64F64(137), MakeUInt128(0x4061200000000000ULL, 0U));
692 }
693 
TEST(Arm64InsnTest,AsmConvertI32F32FromSimdReg)694 TEST(Arm64InsnTest, AsmConvertI32F32FromSimdReg) {
695   constexpr auto AsmConvertI32F32 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("scvtf %s0, %s1");
696   ASSERT_EQ(AsmConvertI32F32(1109), MakeUInt128(0x448aa000ULL, 0U));
697 }
698 
TEST(Arm64InsnTest,AsmConvertI64F64FromSimdReg)699 TEST(Arm64InsnTest, AsmConvertI64F64FromSimdReg) {
700   constexpr auto AsmConvertI64F64 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("scvtf %d0, %d1");
701   ASSERT_EQ(AsmConvertI64F64(123), MakeUInt128(0x405ec00000000000ULL, 0U));
702 }
703 
TEST(Arm64InsnTest,AsmConvertI32x4F32x4)704 TEST(Arm64InsnTest, AsmConvertI32x4F32x4) {
705   constexpr auto AsmConvertI32F32 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("scvtf %0.4s, %1.4s");
706   __uint128_t arg = MakeUInt128(0x0000003500000014ULL, 0x0000005400000009ULL);
707   ASSERT_EQ(AsmConvertI32F32(arg), MakeUInt128(0x4254000041a00000ULL, 0x42a8000041100000ULL));
708 }
709 
TEST(Arm64InsnTest,AsmConvertI64x2F64x2)710 TEST(Arm64InsnTest, AsmConvertI64x2F64x2) {
711   constexpr auto AsmConvertI64F64 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("scvtf %0.2d, %1.2d");
712   __uint128_t arg = MakeUInt128(static_cast<int64_t>(-9), 17U);
713   ASSERT_EQ(AsmConvertI64F64(arg), MakeUInt128(0xc022000000000000ULL, 0x4031000000000000ULL));
714 }
715 
TEST(Arm64InsnTest,AsmConvertU32x4F32x4)716 TEST(Arm64InsnTest, AsmConvertU32x4F32x4) {
717   constexpr auto AsmConvertU32F32 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ucvtf %0.4s, %1.4s");
718   __uint128_t arg = MakeUInt128(0x8000000000000019ULL, 0x0000005800000010ULL);
719   ASSERT_EQ(AsmConvertU32F32(arg), MakeUInt128(0x4f00000041c80000ULL, 0x42b0000041800000ULL));
720 }
721 
TEST(Arm64InsnTest,AsmConvertU64x2F64x2)722 TEST(Arm64InsnTest, AsmConvertU64x2F64x2) {
723   constexpr auto AsmConvertU64F64 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ucvtf %0.2d, %1.2d");
724   __uint128_t arg = MakeUInt128(1ULL << 63, 29U);
725   ASSERT_EQ(AsmConvertU64F64(arg), MakeUInt128(0x43e0000000000000ULL, 0x403d000000000000ULL));
726 }
727 
TEST(Arm64InsnTest,AsmConvertU64F64)728 TEST(Arm64InsnTest, AsmConvertU64F64) {
729   constexpr auto AsmConvertU64F64 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("ucvtf %d0, %x1");
730 
731   ASSERT_EQ(AsmConvertU64F64(49), MakeUInt128(0x4048800000000000ULL, 0U));
732 
733   // Test that the topmost bit isn't treated as the sign.
734   ASSERT_EQ(AsmConvertU64F64(1ULL << 63), MakeUInt128(0x43e0000000000000ULL, 0U));
735 }
736 
TEST(Arm64InsnTest,AsmConvertU64F64FromSimdReg)737 TEST(Arm64InsnTest, AsmConvertU64F64FromSimdReg) {
738   constexpr auto AsmUcvtf = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ucvtf %d0, %d1");
739 
740   ASSERT_EQ(AsmUcvtf(47), MakeUInt128(0x4047800000000000ULL, 0U));
741 
742   // Test that the topmost bit isn't treated as the sign.
743   ASSERT_EQ(AsmUcvtf(1ULL << 63), MakeUInt128(0x43e0000000000000ULL, 0U));
744 }
745 
TEST(Arm64InsnTest,AsmConvertLiterals)746 TEST(Arm64InsnTest, AsmConvertLiterals) {
747   // Verify that the compiler encodes the floating-point literals used in the
748   // conversion tests below exactly as expected.
749   ASSERT_EQ(bit_cast<uint32_t>(-7.50f), 0xc0f00000U);
750   ASSERT_EQ(bit_cast<uint32_t>(-6.75f), 0xc0d80000U);
751   ASSERT_EQ(bit_cast<uint32_t>(-6.50f), 0xc0d00000U);
752   ASSERT_EQ(bit_cast<uint32_t>(-6.25f), 0xc0c80000U);
753   ASSERT_EQ(bit_cast<uint32_t>(6.25f), 0x40c80000U);
754   ASSERT_EQ(bit_cast<uint32_t>(6.50f), 0x40d00000U);
755   ASSERT_EQ(bit_cast<uint32_t>(6.75f), 0x40d80000U);
756   ASSERT_EQ(bit_cast<uint32_t>(7.50f), 0x40f00000U);
757 
758   ASSERT_EQ(bit_cast<uint64_t>(-7.50), 0xc01e000000000000ULL);
759   ASSERT_EQ(bit_cast<uint64_t>(-6.75), 0xc01b000000000000ULL);
760   ASSERT_EQ(bit_cast<uint64_t>(-6.50), 0xc01a000000000000ULL);
761   ASSERT_EQ(bit_cast<uint64_t>(-6.25), 0xc019000000000000ULL);
762   ASSERT_EQ(bit_cast<uint64_t>(6.25), 0x4019000000000000ULL);
763   ASSERT_EQ(bit_cast<uint64_t>(6.50), 0x401a000000000000ULL);
764   ASSERT_EQ(bit_cast<uint64_t>(6.75), 0x401b000000000000ULL);
765   ASSERT_EQ(bit_cast<uint64_t>(7.50), 0x401e000000000000ULL);
766 }
767 
768 template <typename IntType, typename FuncType>
TestConvertF32ToInt(FuncType AsmFunc,std::initializer_list<int> expected)769 void TestConvertF32ToInt(FuncType AsmFunc, std::initializer_list<int> expected) {
770   // Note that bit_cast isn't a constexpr.
771   static const uint32_t kConvertF32ToIntInputs[] = {
772       bit_cast<uint32_t>(-7.50f),
773       bit_cast<uint32_t>(-6.75f),
774       bit_cast<uint32_t>(-6.50f),
775       bit_cast<uint32_t>(-6.25f),
776       bit_cast<uint32_t>(6.25f),
777       bit_cast<uint32_t>(6.50f),
778       bit_cast<uint32_t>(6.75f),
779       bit_cast<uint32_t>(7.50f),
780   };
781 
782   const size_t kConvertF32ToIntInputsSize = sizeof(kConvertF32ToIntInputs) / sizeof(uint32_t);
783   ASSERT_EQ(kConvertF32ToIntInputsSize, expected.size());
784 
785   auto expected_it = expected.begin();
786   for (size_t input_it = 0; input_it < kConvertF32ToIntInputsSize; input_it++) {
787     ASSERT_EQ(AsmFunc(kConvertF32ToIntInputs[input_it]), static_cast<IntType>(*expected_it++));
788   }
789 }
790 
TEST(Arm64InsnTest,AsmConvertF32I32TieAway)791 TEST(Arm64InsnTest, AsmConvertF32I32TieAway) {
792   constexpr auto AsmFcvtas = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtas %w0, %s1");
793   TestConvertF32ToInt<uint32_t>(AsmFcvtas, {-8, -7, -7, -6, 6U, 7U, 7U, 8U});
794 }
795 
TEST(Arm64InsnTest,AsmConvertF32U32TieAway)796 TEST(Arm64InsnTest, AsmConvertF32U32TieAway) {
797   constexpr auto AsmFcvtau = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtau %w0, %s1");
798   TestConvertF32ToInt<uint32_t>(AsmFcvtau, {0U, 0U, 0U, 0U, 6U, 7U, 7U, 8U});
799 }
800 
TEST(Arm64InsnTest,AsmConvertF32I32NegInf)801 TEST(Arm64InsnTest, AsmConvertF32I32NegInf) {
802   constexpr auto AsmFcvtms = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtms %w0, %s1");
803   TestConvertF32ToInt<uint32_t>(AsmFcvtms, {-8, -7, -7, -7, 6U, 6U, 6U, 7U});
804 }
805 
TEST(Arm64InsnTest,AsmConvertF32U32NegInf)806 TEST(Arm64InsnTest, AsmConvertF32U32NegInf) {
807   constexpr auto AsmFcvtmu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtmu %w0, %s1");
808   TestConvertF32ToInt<uint32_t>(AsmFcvtmu, {0U, 0U, 0U, 0U, 6U, 6U, 6U, 7U});
809 }
810 
TEST(Arm64InsnTest,AsmConvertF32I32TieEven)811 TEST(Arm64InsnTest, AsmConvertF32I32TieEven) {
812   constexpr auto AsmFcvtns = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtns %w0, %s1");
813   TestConvertF32ToInt<uint32_t>(AsmFcvtns, {-8, -7, -6, -6, 6U, 6U, 7U, 8U});
814 }
815 
TEST(Arm64InsnTest,AsmConvertF32U32TieEven)816 TEST(Arm64InsnTest, AsmConvertF32U32TieEven) {
817   constexpr auto AsmFcvtnu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtnu %w0, %s1");
818   TestConvertF32ToInt<uint32_t>(AsmFcvtnu, {0U, 0U, 0U, 0U, 6U, 6U, 7U, 8U});
819 }
820 
TEST(Arm64InsnTest,AsmConvertF32I32PosInf)821 TEST(Arm64InsnTest, AsmConvertF32I32PosInf) {
822   constexpr auto AsmFcvtps = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtps %w0, %s1");
823   TestConvertF32ToInt<uint32_t>(AsmFcvtps, {-7, -6, -6, -6, 7U, 7U, 7U, 8U});
824 }
825 
TEST(Arm64InsnTest,AsmConvertF32U32PosInf)826 TEST(Arm64InsnTest, AsmConvertF32U32PosInf) {
827   constexpr auto AsmFcvtpu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtpu %w0, %s1");
828   TestConvertF32ToInt<uint32_t>(AsmFcvtpu, {0U, 0U, 0U, 0U, 7U, 7U, 7U, 8U});
829 }
830 
TEST(Arm64InsnTest,AsmConvertF32I32Truncate)831 TEST(Arm64InsnTest, AsmConvertF32I32Truncate) {
832   constexpr auto AsmFcvtzs = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzs %w0, %s1");
833   TestConvertF32ToInt<uint32_t>(AsmFcvtzs, {-7, -6, -6, -6, 6U, 6U, 6U, 7U});
834 }
835 
TEST(Arm64InsnTest,AsmConvertF32U32Truncate)836 TEST(Arm64InsnTest, AsmConvertF32U32Truncate) {
837   constexpr auto AsmFcvtzu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzu %w0, %s1");
838   TestConvertF32ToInt<uint32_t>(AsmFcvtzu, {0U, 0U, 0U, 0U, 6U, 6U, 6U, 7U});
839 }
840 
TEST(Arm64InsnTest,AsmConvertF32I64TieAway)841 TEST(Arm64InsnTest, AsmConvertF32I64TieAway) {
842   constexpr auto AsmFcvtas = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtas %x0, %s1");
843   TestConvertF32ToInt<uint64_t>(AsmFcvtas, {-8, -7, -7, -6, 6U, 7U, 7U, 8U});
844 }
845 
TEST(Arm64InsnTest,AsmConvertF32U64TieAway)846 TEST(Arm64InsnTest, AsmConvertF32U64TieAway) {
847   constexpr auto AsmFcvtau = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtau %x0, %s1");
848   TestConvertF32ToInt<uint64_t>(AsmFcvtau, {0U, 0U, 0U, 0U, 6U, 7U, 7U, 8U});
849 }
850 
TEST(Arm64InsnTest,AsmConvertF32I64NegInf)851 TEST(Arm64InsnTest, AsmConvertF32I64NegInf) {
852   constexpr auto AsmFcvtms = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtms %x0, %s1");
853   TestConvertF32ToInt<uint64_t>(AsmFcvtms, {-8, -7, -7, -7, 6U, 6U, 6U, 7U});
854 }
855 
TEST(Arm64InsnTest,AsmConvertF32U64NegInf)856 TEST(Arm64InsnTest, AsmConvertF32U64NegInf) {
857   constexpr auto AsmFcvtmu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtmu %x0, %s1");
858   TestConvertF32ToInt<uint64_t>(AsmFcvtmu, {0U, 0U, 0U, 0U, 6U, 6U, 6U, 7U});
859 }
860 
TEST(Arm64InsnTest,AsmConvertF32I64TieEven)861 TEST(Arm64InsnTest, AsmConvertF32I64TieEven) {
862   constexpr auto AsmFcvtns = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtns %x0, %s1");
863   TestConvertF32ToInt<uint64_t>(AsmFcvtns, {-8, -7, -6, -6, 6U, 6U, 7U, 8U});
864 }
865 
TEST(Arm64InsnTest,AsmConvertF32U64TieEven)866 TEST(Arm64InsnTest, AsmConvertF32U64TieEven) {
867   constexpr auto AsmFcvtnu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtnu %x0, %s1");
868   TestConvertF32ToInt<uint64_t>(AsmFcvtnu, {0U, 0U, 0U, 0U, 6U, 6U, 7U, 8U});
869 }
870 
TEST(Arm64InsnTest,AsmConvertF32I64PosInf)871 TEST(Arm64InsnTest, AsmConvertF32I64PosInf) {
872   constexpr auto AsmFcvtps = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtps %x0, %s1");
873   TestConvertF32ToInt<uint64_t>(AsmFcvtps, {-7, -6, -6, -6, 7U, 7U, 7U, 8U});
874 }
875 
TEST(Arm64InsnTest,AsmConvertF32U64PosInf)876 TEST(Arm64InsnTest, AsmConvertF32U64PosInf) {
877   constexpr auto AsmFcvtpu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtpu %x0, %s1");
878   TestConvertF32ToInt<uint64_t>(AsmFcvtpu, {0U, 0U, 0U, 0U, 7U, 7U, 7U, 8U});
879 }
880 
TEST(Arm64InsnTest,AsmConvertF32I64Truncate)881 TEST(Arm64InsnTest, AsmConvertF32I64Truncate) {
882   constexpr auto AsmFcvtzs = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzs %x0, %s1");
883   TestConvertF32ToInt<uint64_t>(AsmFcvtzs, {-7, -6, -6, -6, 6U, 6U, 6U, 7U});
884 }
885 
TEST(Arm64InsnTest,AsmConvertF32U64Truncate)886 TEST(Arm64InsnTest, AsmConvertF32U64Truncate) {
887   constexpr auto AsmFcvtzu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzu %x0, %s1");
888   TestConvertF32ToInt<uint64_t>(AsmFcvtzu, {0U, 0U, 0U, 0U, 6U, 6U, 6U, 7U});
889 }
890 
891 template <typename IntType, typename FuncType>
TestConvertF64ToInt(FuncType AsmFunc,std::initializer_list<int> expected)892 void TestConvertF64ToInt(FuncType AsmFunc, std::initializer_list<int> expected) {
893   // Note that bit_cast isn't a constexpr.
894   static const uint64_t kConvertF64ToIntInputs[] = {
895       bit_cast<uint64_t>(-7.50),
896       bit_cast<uint64_t>(-6.75),
897       bit_cast<uint64_t>(-6.50),
898       bit_cast<uint64_t>(-6.25),
899       bit_cast<uint64_t>(6.25),
900       bit_cast<uint64_t>(6.50),
901       bit_cast<uint64_t>(6.75),
902       bit_cast<uint64_t>(7.50),
903   };
904 
905   const size_t kConvertF64ToIntInputsSize = sizeof(kConvertF64ToIntInputs) / sizeof(uint64_t);
906   ASSERT_EQ(kConvertF64ToIntInputsSize, expected.size());
907 
908   auto expected_it = expected.begin();
909   for (size_t input_it = 0; input_it < kConvertF64ToIntInputsSize; input_it++) {
910     ASSERT_EQ(AsmFunc(kConvertF64ToIntInputs[input_it]), static_cast<IntType>(*expected_it++));
911   }
912 }
913 
TEST(Arm64InsnTest,AsmConvertF64I32TieAway)914 TEST(Arm64InsnTest, AsmConvertF64I32TieAway) {
915   constexpr auto AsmFcvtas = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtas %w0, %d1");
916   TestConvertF64ToInt<uint32_t>(AsmFcvtas, {-8, -7, -7, -6, 6U, 7U, 7U, 8U});
917 }
918 
TEST(Arm64InsnTest,AsmConvertF64U32TieAway)919 TEST(Arm64InsnTest, AsmConvertF64U32TieAway) {
920   constexpr auto AsmFcvtau = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtau %w0, %d1");
921   TestConvertF64ToInt<uint32_t>(AsmFcvtau, {0U, 0U, 0U, 0U, 6U, 7U, 7U, 8U});
922 }
923 
TEST(Arm64InsnTest,AsmConvertF64I32NegInf)924 TEST(Arm64InsnTest, AsmConvertF64I32NegInf) {
925   constexpr auto AsmFcvtms = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtms %w0, %d1");
926   TestConvertF64ToInt<uint32_t>(AsmFcvtms, {-8, -7, -7, -7, 6U, 6U, 6U, 7U});
927 }
928 
TEST(Arm64InsnTest,AsmConvertF64U32NegInf)929 TEST(Arm64InsnTest, AsmConvertF64U32NegInf) {
930   constexpr auto AsmFcvtmu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtmu %w0, %d1");
931   TestConvertF64ToInt<uint32_t>(AsmFcvtmu, {0U, 0U, 0U, 0U, 6U, 6U, 6U, 7U});
932 }
933 
TEST(Arm64InsnTest,AsmConvertF64I32TieEven)934 TEST(Arm64InsnTest, AsmConvertF64I32TieEven) {
935   constexpr auto AsmFcvtns = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtns %w0, %d1");
936   TestConvertF64ToInt<uint32_t>(AsmFcvtns, {-8, -7, -6, -6, 6U, 6U, 7U, 8U});
937 }
938 
TEST(Arm64InsnTest,AsmConvertF64U32TieEven)939 TEST(Arm64InsnTest, AsmConvertF64U32TieEven) {
940   constexpr auto AsmFcvtnu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtnu %w0, %d1");
941   TestConvertF64ToInt<uint32_t>(AsmFcvtnu, {0U, 0U, 0U, 0U, 6U, 6U, 7U, 8U});
942 }
943 
TEST(Arm64InsnTest,AsmConvertF64I32PosInf)944 TEST(Arm64InsnTest, AsmConvertF64I32PosInf) {
945   constexpr auto AsmFcvtps = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtps %w0, %d1");
946   TestConvertF64ToInt<uint32_t>(AsmFcvtps, {-7, -6, -6, -6, 7U, 7U, 7U, 8U});
947 }
948 
TEST(Arm64InsnTest,AsmConvertF64U32PosInf)949 TEST(Arm64InsnTest, AsmConvertF64U32PosInf) {
950   constexpr auto AsmFcvtpu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtpu %w0, %d1");
951   TestConvertF64ToInt<uint32_t>(AsmFcvtpu, {0U, 0U, 0U, 0U, 7U, 7U, 7U, 8U});
952 }
953 
TEST(Arm64InsnTest,AsmConvertF64I32Truncate)954 TEST(Arm64InsnTest, AsmConvertF64I32Truncate) {
955   constexpr auto AsmFcvtzs = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzs %w0, %d1");
956   TestConvertF64ToInt<uint32_t>(AsmFcvtzs, {-7, -6, -6, -6, 6U, 6U, 6U, 7U});
957 }
958 
TEST(Arm64InsnTest,AsmConvertF64U32Truncate)959 TEST(Arm64InsnTest, AsmConvertF64U32Truncate) {
960   constexpr auto AsmFcvtzu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzu %w0, %d1");
961   TestConvertF64ToInt<uint32_t>(AsmFcvtzu, {0U, 0U, 0U, 0U, 6U, 6U, 6U, 7U});
962 }
963 
TEST(Arm64InsnTest,AsmConvertF64I64TieAway)964 TEST(Arm64InsnTest, AsmConvertF64I64TieAway) {
965   constexpr auto AsmFcvtas = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtas %x0, %d1");
966   TestConvertF64ToInt<uint64_t>(AsmFcvtas, {-8, -7, -7, -6, 6U, 7U, 7U, 8U});
967 }
968 
TEST(Arm64InsnTest,AsmConvertF64U64TieAway)969 TEST(Arm64InsnTest, AsmConvertF64U64TieAway) {
970   constexpr auto AsmFcvtau = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtau %x0, %d1");
971   TestConvertF64ToInt<uint64_t>(AsmFcvtau, {0U, 0U, 0U, 0U, 6U, 7U, 7U, 8U});
972 }
973 
TEST(Arm64InsnTest,AsmConvertF64I64NegInf)974 TEST(Arm64InsnTest, AsmConvertF64I64NegInf) {
975   constexpr auto AsmFcvtms = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtms %x0, %d1");
976   TestConvertF64ToInt<uint64_t>(AsmFcvtms, {-8, -7, -7, -7, 6U, 6U, 6U, 7U});
977 }
978 
TEST(Arm64InsnTest,AsmConvertF64U64NegInf)979 TEST(Arm64InsnTest, AsmConvertF64U64NegInf) {
980   constexpr auto AsmFcvtmu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtmu %x0, %d1");
981   TestConvertF64ToInt<uint64_t>(AsmFcvtmu, {0U, 0U, 0U, 0U, 6U, 6U, 6U, 7U});
982 }
983 
TEST(Arm64InsnTest,AsmConvertF64I64TieEven)984 TEST(Arm64InsnTest, AsmConvertF64I64TieEven) {
985   constexpr auto AsmFcvtns = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtns %x0, %d1");
986   TestConvertF64ToInt<uint64_t>(AsmFcvtns, {-8, -7, -6, -6, 6U, 6U, 7U, 8U});
987 }
988 
TEST(Arm64InsnTest,AsmConvertF64U64TieEven)989 TEST(Arm64InsnTest, AsmConvertF64U64TieEven) {
990   constexpr auto AsmFcvtnu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtnu %x0, %d1");
991   TestConvertF64ToInt<uint64_t>(AsmFcvtnu, {0U, 0U, 0U, 0U, 6U, 6U, 7U, 8U});
992 }
993 
TEST(Arm64InsnTest,AsmConvertF64I64PosInf)994 TEST(Arm64InsnTest, AsmConvertF64I64PosInf) {
995   constexpr auto AsmFcvtps = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtps %x0, %d1");
996   TestConvertF64ToInt<uint64_t>(AsmFcvtps, {-7, -6, -6, -6, 7U, 7U, 7U, 8U});
997 }
998 
TEST(Arm64InsnTest,AsmConvertF64U64PosInf)999 TEST(Arm64InsnTest, AsmConvertF64U64PosInf) {
1000   constexpr auto AsmFcvtpu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtpu %x0, %d1");
1001   TestConvertF64ToInt<uint64_t>(AsmFcvtpu, {0U, 0U, 0U, 0U, 7U, 7U, 7U, 8U});
1002 }
1003 
TEST(Arm64InsnTest,AsmConvertF64I64Truncate)1004 TEST(Arm64InsnTest, AsmConvertF64I64Truncate) {
1005   constexpr auto AsmFcvtzs = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzs %x0, %d1");
1006   TestConvertF64ToInt<uint64_t>(AsmFcvtzs, {-7, -6, -6, -6, 6U, 6U, 6U, 7U});
1007 }
1008 
TEST(Arm64InsnTest,AsmConvertF64U64Truncate)1009 TEST(Arm64InsnTest, AsmConvertF64U64Truncate) {
1010   constexpr auto AsmFcvtzu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzu %x0, %d1");
1011   TestConvertF64ToInt<uint64_t>(AsmFcvtzu, {0U, 0U, 0U, 0U, 6U, 6U, 6U, 7U});
1012 }
1013 
TEST(Arm64InsnTest,AsmConvertF32I32ScalarTieAway)1014 TEST(Arm64InsnTest, AsmConvertF32I32ScalarTieAway) {
1015   constexpr auto AsmFcvtas = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtas %s0, %s1");
1016   TestConvertF32ToInt<uint32_t>(AsmFcvtas, {-8, -7, -7, -6, 6U, 7U, 7U, 8U});
1017 }
1018 
TEST(Arm64InsnTest,AsmConvertF32U32ScalarTieAway)1019 TEST(Arm64InsnTest, AsmConvertF32U32ScalarTieAway) {
1020   constexpr auto AsmFcvtau = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtau %s0, %s1");
1021   TestConvertF32ToInt<uint32_t>(AsmFcvtau, {0U, 0U, 0U, 0U, 6U, 7U, 7U, 8U});
1022 }
1023 
TEST(Arm64InsnTest,AsmConvertF32I32ScalarNegInf)1024 TEST(Arm64InsnTest, AsmConvertF32I32ScalarNegInf) {
1025   constexpr auto AsmFcvtms = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtms %s0, %s1");
1026   TestConvertF32ToInt<uint32_t>(AsmFcvtms, {-8, -7, -7, -7, 6U, 6U, 6U, 7U});
1027 }
1028 
TEST(Arm64InsnTest,AsmConvertF32U32ScalarNegInf)1029 TEST(Arm64InsnTest, AsmConvertF32U32ScalarNegInf) {
1030   constexpr auto AsmFcvtmu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtmu %s0, %s1");
1031   TestConvertF32ToInt<uint32_t>(AsmFcvtmu, {0U, 0U, 0U, 0U, 6U, 6U, 6U, 7U});
1032 }
1033 
TEST(Arm64InsnTest,AsmConvertF32I32ScalarTieEven)1034 TEST(Arm64InsnTest, AsmConvertF32I32ScalarTieEven) {
1035   constexpr auto AsmFcvtns = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtns %s0, %s1");
1036   TestConvertF32ToInt<uint32_t>(AsmFcvtns, {-8, -7, -6, -6, 6U, 6U, 7U, 8U});
1037 }
1038 
TEST(Arm64InsnTest,AsmConvertF32U32ScalarTieEven)1039 TEST(Arm64InsnTest, AsmConvertF32U32ScalarTieEven) {
1040   constexpr auto AsmFcvtnu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtnu %s0, %s1");
1041   TestConvertF32ToInt<uint32_t>(AsmFcvtnu, {0U, 0U, 0U, 0U, 6U, 6U, 7U, 8U});
1042 }
1043 
TEST(Arm64InsnTest,AsmConvertF32I32ScalarPosInf)1044 TEST(Arm64InsnTest, AsmConvertF32I32ScalarPosInf) {
1045   constexpr auto AsmFcvtps = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtps %s0, %s1");
1046   TestConvertF32ToInt<uint32_t>(AsmFcvtps, {-7, -6, -6, -6, 7U, 7U, 7U, 8U});
1047 }
1048 
TEST(Arm64InsnTest,AsmConvertF32U32ScalarPosInf)1049 TEST(Arm64InsnTest, AsmConvertF32U32ScalarPosInf) {
1050   constexpr auto AsmFcvtpu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtpu %s0, %s1");
1051   TestConvertF32ToInt<uint32_t>(AsmFcvtpu, {0U, 0U, 0U, 0U, 7U, 7U, 7U, 8U});
1052 }
1053 
TEST(Arm64InsnTest,AsmConvertF32I32ScalarTruncate)1054 TEST(Arm64InsnTest, AsmConvertF32I32ScalarTruncate) {
1055   constexpr auto AsmFcvtzs = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtzs %s0, %s1");
1056   TestConvertF32ToInt<uint32_t>(AsmFcvtzs, {-7, -6, -6, -6, 6U, 6U, 6U, 7U});
1057 }
1058 
TEST(Arm64InsnTest,AsmConvertF32U32ScalarTruncate)1059 TEST(Arm64InsnTest, AsmConvertF32U32ScalarTruncate) {
1060   constexpr auto AsmFcvtzu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtzu %s0, %s1");
1061   TestConvertF32ToInt<uint32_t>(AsmFcvtzu, {0U, 0U, 0U, 0U, 6U, 6U, 6U, 7U});
1062 }
1063 
TEST(Arm64InsnTest,AsmConvertF64I64ScalarTieAway)1064 TEST(Arm64InsnTest, AsmConvertF64I64ScalarTieAway) {
1065   constexpr auto AsmFcvtas = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtas %d0, %d1");
1066   TestConvertF64ToInt<uint64_t>(AsmFcvtas, {-8, -7, -7, -6, 6U, 7U, 7U, 8U});
1067 }
1068 
TEST(Arm64InsnTest,AsmConvertF64U64ScalarTieAway)1069 TEST(Arm64InsnTest, AsmConvertF64U64ScalarTieAway) {
1070   constexpr auto AsmFcvtau = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtau %d0, %d1");
1071   TestConvertF64ToInt<uint64_t>(AsmFcvtau, {0U, 0U, 0U, 0U, 6U, 7U, 7U, 8U});
1072 }
1073 
TEST(Arm64InsnTest,AsmConvertF64I64ScalarNegInf)1074 TEST(Arm64InsnTest, AsmConvertF64I64ScalarNegInf) {
1075   constexpr auto AsmFcvtms = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtms %d0, %d1");
1076   TestConvertF64ToInt<uint64_t>(AsmFcvtms, {-8, -7, -7, -7, 6U, 6U, 6U, 7U});
1077 }
1078 
TEST(Arm64InsnTest,AsmConvertF64U64ScalarNegInf)1079 TEST(Arm64InsnTest, AsmConvertF64U64ScalarNegInf) {
1080   constexpr auto AsmFcvtmu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtmu %d0, %d1");
1081   TestConvertF64ToInt<uint64_t>(AsmFcvtmu, {0U, 0U, 0U, 0U, 6U, 6U, 6U, 7U});
1082 }
1083 
TEST(Arm64InsnTest,AsmConvertF64I64ScalarTieEven)1084 TEST(Arm64InsnTest, AsmConvertF64I64ScalarTieEven) {
1085   constexpr auto AsmFcvtns = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtns %d0, %d1");
1086   TestConvertF64ToInt<uint64_t>(AsmFcvtns, {-8, -7, -6, -6, 6U, 6U, 7U, 8U});
1087 }
1088 
TEST(Arm64InsnTest,AsmConvertF64U64ScalarTieEven)1089 TEST(Arm64InsnTest, AsmConvertF64U64ScalarTieEven) {
1090   constexpr auto AsmFcvtnu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtnu %d0, %d1");
1091   TestConvertF64ToInt<uint64_t>(AsmFcvtnu, {0U, 0U, 0U, 0U, 6U, 6U, 7U, 8U});
1092 }
1093 
TEST(Arm64InsnTest,AsmConvertF64I64ScalarPosInf)1094 TEST(Arm64InsnTest, AsmConvertF64I64ScalarPosInf) {
1095   constexpr auto AsmFcvtps = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtps %d0, %d1");
1096   TestConvertF64ToInt<uint64_t>(AsmFcvtps, {-7, -6, -6, -6, 7U, 7U, 7U, 8U});
1097 }
1098 
TEST(Arm64InsnTest,AsmConvertF64U64ScalarPosInf)1099 TEST(Arm64InsnTest, AsmConvertF64U64ScalarPosInf) {
1100   constexpr auto AsmFcvtpu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtpu %d0, %d1");
1101   TestConvertF64ToInt<uint64_t>(AsmFcvtpu, {0U, 0U, 0U, 0U, 7U, 7U, 7U, 8U});
1102 }
1103 
TEST(Arm64InsnTest,AsmConvertF64I64ScalarTruncate)1104 TEST(Arm64InsnTest, AsmConvertF64I64ScalarTruncate) {
1105   constexpr auto AsmFcvtzs = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtzs %d0, %d1");
1106   TestConvertF64ToInt<uint64_t>(AsmFcvtzs, {-7, -6, -6, -6, 6U, 6U, 6U, 7U});
1107 }
1108 
TEST(Arm64InsnTest,AsmConvertF64U64ScalarTruncate)1109 TEST(Arm64InsnTest, AsmConvertF64U64ScalarTruncate) {
1110   constexpr auto AsmFcvtzu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtzu %d0, %d1");
1111   TestConvertF64ToInt<uint64_t>(AsmFcvtzu, {0U, 0U, 0U, 0U, 6U, 6U, 6U, 7U});
1112 }
1113 
TEST(Arm64InsnTest,AsmConvertF32I32x4TieAway)1114 TEST(Arm64InsnTest, AsmConvertF32I32x4TieAway) {
1115   constexpr auto AsmFcvtas = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtas %0.4s, %1.4s");
1116   __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
1117   ASSERT_EQ(AsmFcvtas(arg1), MakeUInt128(0xfffffff9fffffff8ULL, 0xfffffffafffffff9ULL));
1118   __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
1119   ASSERT_EQ(AsmFcvtas(arg2), MakeUInt128(0x0000000700000006ULL, 0x0000000800000007ULL));
1120 }
1121 
TEST(Arm64InsnTest,AsmConvertF32U32x4TieAway)1122 TEST(Arm64InsnTest, AsmConvertF32U32x4TieAway) {
1123   constexpr auto AsmFcvtau = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtau %0.4s, %1.4s");
1124   __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
1125   ASSERT_EQ(AsmFcvtau(arg1), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
1126   __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
1127   ASSERT_EQ(AsmFcvtau(arg2), MakeUInt128(0x0000000700000006ULL, 0x0000000800000007ULL));
1128 }
1129 
TEST(Arm64InsnTest,AsmConvertF32I32x4NegInf)1130 TEST(Arm64InsnTest, AsmConvertF32I32x4NegInf) {
1131   constexpr auto AsmFcvtms = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtms %0.4s, %1.4s");
1132   __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
1133   ASSERT_EQ(AsmFcvtms(arg1), MakeUInt128(0xfffffff9fffffff8ULL, 0xfffffff9fffffff9ULL));
1134   __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
1135   ASSERT_EQ(AsmFcvtms(arg2), MakeUInt128(0x0000000600000006ULL, 0x0000000700000006ULL));
1136 }
1137 
TEST(Arm64InsnTest,AsmConvertF32U32x4NegInf)1138 TEST(Arm64InsnTest, AsmConvertF32U32x4NegInf) {
1139   constexpr auto AsmFcvtmu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtmu %0.4s, %1.4s");
1140   __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
1141   ASSERT_EQ(AsmFcvtmu(arg1), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
1142   __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
1143   ASSERT_EQ(AsmFcvtmu(arg2), MakeUInt128(0x0000000600000006ULL, 0x0000000700000006ULL));
1144 }
1145 
TEST(Arm64InsnTest,AsmConvertF32I32x4TieEven)1146 TEST(Arm64InsnTest, AsmConvertF32I32x4TieEven) {
1147   constexpr auto AsmFcvtns = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtns %0.4s, %1.4s");
1148   __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
1149   ASSERT_EQ(AsmFcvtns(arg1), MakeUInt128(0xfffffff9fffffff8ULL, 0xfffffffafffffffaULL));
1150   __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
1151   ASSERT_EQ(AsmFcvtns(arg2), MakeUInt128(0x0000000600000006ULL, 0x0000000800000007ULL));
1152 }
1153 
TEST(Arm64InsnTest,AsmConvertF32U32x4TieEven)1154 TEST(Arm64InsnTest, AsmConvertF32U32x4TieEven) {
1155   constexpr auto AsmFcvtnu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtnu %0.4s, %1.4s");
1156   __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
1157   ASSERT_EQ(AsmFcvtnu(arg1), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
1158   __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
1159   ASSERT_EQ(AsmFcvtnu(arg2), MakeUInt128(0x0000000600000006ULL, 0x0000000800000007ULL));
1160 }
1161 
TEST(Arm64InsnTest,AsmConvertF32I32x4PosInf)1162 TEST(Arm64InsnTest, AsmConvertF32I32x4PosInf) {
1163   constexpr auto AsmFcvtps = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtps %0.4s, %1.4s");
1164   __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
1165   ASSERT_EQ(AsmFcvtps(arg1), MakeUInt128(0xfffffffafffffff9ULL, 0xfffffffafffffffaULL));
1166   __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
1167   ASSERT_EQ(AsmFcvtps(arg2), MakeUInt128(0x0000000700000007ULL, 0x0000000800000007ULL));
1168 }
1169 
TEST(Arm64InsnTest,AsmConvertF32U32x4PosInf)1170 TEST(Arm64InsnTest, AsmConvertF32U32x4PosInf) {
1171   constexpr auto AsmFcvtpu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtpu %0.4s, %1.4s");
1172   __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
1173   ASSERT_EQ(AsmFcvtpu(arg1), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
1174   __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
1175   ASSERT_EQ(AsmFcvtpu(arg2), MakeUInt128(0x0000000700000007ULL, 0x0000000800000007ULL));
1176 }
1177 
TEST(Arm64InsnTest,AsmConvertF32I32x4Truncate)1178 TEST(Arm64InsnTest, AsmConvertF32I32x4Truncate) {
1179   constexpr auto AsmFcvtzs = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtzs %0.4s, %1.4s");
1180   __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
1181   ASSERT_EQ(AsmFcvtzs(arg1), MakeUInt128(0xfffffffafffffff9ULL, 0xfffffffafffffffaULL));
1182   __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
1183   ASSERT_EQ(AsmFcvtzs(arg2), MakeUInt128(0x0000000600000006ULL, 0x0000000700000006ULL));
1184 }
1185 
TEST(Arm64InsnTest,AsmConvertF32U32x4Truncate)1186 TEST(Arm64InsnTest, AsmConvertF32U32x4Truncate) {
1187   constexpr auto AsmFcvtzu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtzu %0.4s, %1.4s");
1188   __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
1189   ASSERT_EQ(AsmFcvtzu(arg1), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
1190   __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
1191   ASSERT_EQ(AsmFcvtzu(arg2), MakeUInt128(0x0000000600000006ULL, 0x0000000700000006ULL));
1192 }
1193 
TEST(Arm64InsnTest,AsmConvertF64I64x4TieAway)1194 TEST(Arm64InsnTest, AsmConvertF64I64x4TieAway) {
1195   constexpr auto AsmFcvtas = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtas %0.2d, %1.2d");
1196   __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
1197   ASSERT_EQ(AsmFcvtas(arg1), MakeUInt128(0xfffffffffffffff8ULL, 0xfffffffffffffff9ULL));
1198   __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
1199   ASSERT_EQ(AsmFcvtas(arg2), MakeUInt128(0xfffffffffffffff9ULL, 0xfffffffffffffffaULL));
1200   __uint128_t arg3 = MakeF64x2(6.25, 6.50);
1201   ASSERT_EQ(AsmFcvtas(arg3), MakeUInt128(0x0000000000000006ULL, 0x0000000000000007ULL));
1202   __uint128_t arg4 = MakeF64x2(6.75, 7.50);
1203   ASSERT_EQ(AsmFcvtas(arg4), MakeUInt128(0x0000000000000007ULL, 0x0000000000000008ULL));
1204 }
1205 
TEST(Arm64InsnTest,AsmConvertF64U64x4TieAway)1206 TEST(Arm64InsnTest, AsmConvertF64U64x4TieAway) {
1207   constexpr auto AsmFcvtau = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtau %0.2d, %1.2d");
1208   __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
1209   ASSERT_EQ(AsmFcvtau(arg1), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
1210   __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
1211   ASSERT_EQ(AsmFcvtau(arg2), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
1212   __uint128_t arg3 = MakeF64x2(6.25, 6.50);
1213   ASSERT_EQ(AsmFcvtau(arg3), MakeUInt128(0x0000000000000006ULL, 0x0000000000000007ULL));
1214   __uint128_t arg4 = MakeF64x2(6.75, 7.50);
1215   ASSERT_EQ(AsmFcvtau(arg4), MakeUInt128(0x0000000000000007ULL, 0x0000000000000008ULL));
1216 }
1217 
TEST(Arm64InsnTest,AsmConvertF64I64x4NegInf)1218 TEST(Arm64InsnTest, AsmConvertF64I64x4NegInf) {
1219   constexpr auto AsmFcvtms = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtms %0.2d, %1.2d");
1220   __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
1221   ASSERT_EQ(AsmFcvtms(arg1), MakeUInt128(0xfffffffffffffff8ULL, 0xfffffffffffffff9ULL));
1222   __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
1223   ASSERT_EQ(AsmFcvtms(arg2), MakeUInt128(0xfffffffffffffff9ULL, 0xfffffffffffffff9ULL));
1224   __uint128_t arg3 = MakeF64x2(6.25, 6.50);
1225   ASSERT_EQ(AsmFcvtms(arg3), MakeUInt128(0x0000000000000006ULL, 0x0000000000000006ULL));
1226   __uint128_t arg4 = MakeF64x2(6.75, 7.50);
1227   ASSERT_EQ(AsmFcvtms(arg4), MakeUInt128(0x0000000000000006ULL, 0x0000000000000007ULL));
1228 }
1229 
TEST(Arm64InsnTest,AsmConvertF64U64x4NegInf)1230 TEST(Arm64InsnTest, AsmConvertF64U64x4NegInf) {
1231   constexpr auto AsmFcvtmu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtmu %0.2d, %1.2d");
1232   __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
1233   ASSERT_EQ(AsmFcvtmu(arg1), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
1234   __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
1235   ASSERT_EQ(AsmFcvtmu(arg2), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
1236   __uint128_t arg3 = MakeF64x2(6.25, 6.50);
1237   ASSERT_EQ(AsmFcvtmu(arg3), MakeUInt128(0x0000000000000006ULL, 0x0000000000000006ULL));
1238   __uint128_t arg4 = MakeF64x2(6.75, 7.50);
1239   ASSERT_EQ(AsmFcvtmu(arg4), MakeUInt128(0x0000000000000006ULL, 0x0000000000000007ULL));
1240 }
1241 
TEST(Arm64InsnTest,AsmConvertF64I64x4TieEven)1242 TEST(Arm64InsnTest, AsmConvertF64I64x4TieEven) {
1243   constexpr auto AsmFcvtns = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtns %0.2d, %1.2d");
1244   __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
1245   ASSERT_EQ(AsmFcvtns(arg1), MakeUInt128(0xfffffffffffffff8ULL, 0xfffffffffffffff9ULL));
1246   __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
1247   ASSERT_EQ(AsmFcvtns(arg2), MakeUInt128(0xfffffffffffffffaULL, 0xfffffffffffffffaULL));
1248   __uint128_t arg3 = MakeF64x2(6.25, 6.50);
1249   ASSERT_EQ(AsmFcvtns(arg3), MakeUInt128(0x0000000000000006ULL, 0x0000000000000006ULL));
1250   __uint128_t arg4 = MakeF64x2(6.75, 7.50);
1251   ASSERT_EQ(AsmFcvtns(arg4), MakeUInt128(0x0000000000000007ULL, 0x0000000000000008ULL));
1252 }
1253 
TEST(Arm64InsnTest,AsmConvertF64U64x4TieEven)1254 TEST(Arm64InsnTest, AsmConvertF64U64x4TieEven) {
1255   constexpr auto AsmFcvtnu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtnu %0.2d, %1.2d");
1256   __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
1257   ASSERT_EQ(AsmFcvtnu(arg1), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
1258   __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
1259   ASSERT_EQ(AsmFcvtnu(arg2), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
1260   __uint128_t arg3 = MakeF64x2(6.25, 6.50);
1261   ASSERT_EQ(AsmFcvtnu(arg3), MakeUInt128(0x0000000000000006ULL, 0x0000000000000006ULL));
1262   __uint128_t arg4 = MakeF64x2(6.75, 7.50);
1263   ASSERT_EQ(AsmFcvtnu(arg4), MakeUInt128(0x0000000000000007ULL, 0x0000000000000008ULL));
1264 }
1265 
TEST(Arm64InsnTest,AsmConvertF64I64x4PosInf)1266 TEST(Arm64InsnTest, AsmConvertF64I64x4PosInf) {
1267   constexpr auto AsmFcvtps = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtps %0.2d, %1.2d");
1268   __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
1269   ASSERT_EQ(AsmFcvtps(arg1), MakeUInt128(0xfffffffffffffff9ULL, 0xfffffffffffffffaULL));
1270   __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
1271   ASSERT_EQ(AsmFcvtps(arg2), MakeUInt128(0xfffffffffffffffaULL, 0xfffffffffffffffaULL));
1272   __uint128_t arg3 = MakeF64x2(6.25, 6.50);
1273   ASSERT_EQ(AsmFcvtps(arg3), MakeUInt128(0x0000000000000007ULL, 0x0000000000000007ULL));
1274   __uint128_t arg4 = MakeF64x2(6.75, 7.50);
1275   ASSERT_EQ(AsmFcvtps(arg4), MakeUInt128(0x0000000000000007ULL, 0x0000000000000008ULL));
1276 }
1277 
TEST(Arm64InsnTest,AsmConvertF64U64x4PosInf)1278 TEST(Arm64InsnTest, AsmConvertF64U64x4PosInf) {
1279   constexpr auto AsmFcvtpu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtpu %0.2d, %1.2d");
1280   __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
1281   ASSERT_EQ(AsmFcvtpu(arg1), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
1282   __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
1283   ASSERT_EQ(AsmFcvtpu(arg2), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
1284   __uint128_t arg3 = MakeF64x2(6.25, 6.50);
1285   ASSERT_EQ(AsmFcvtpu(arg3), MakeUInt128(0x0000000000000007ULL, 0x0000000000000007ULL));
1286   __uint128_t arg4 = MakeF64x2(6.75, 7.50);
1287   ASSERT_EQ(AsmFcvtpu(arg4), MakeUInt128(0x0000000000000007ULL, 0x0000000000000008ULL));
1288 }
1289 
TEST(Arm64InsnTest,AsmConvertF64I64x4Truncate)1290 TEST(Arm64InsnTest, AsmConvertF64I64x4Truncate) {
1291   constexpr auto AsmFcvtzs = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtzs %0.2d, %1.2d");
1292   __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
1293   ASSERT_EQ(AsmFcvtzs(arg1), MakeUInt128(0xfffffffffffffff9ULL, 0xfffffffffffffffaULL));
1294   __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
1295   ASSERT_EQ(AsmFcvtzs(arg2), MakeUInt128(0xfffffffffffffffaULL, 0xfffffffffffffffaULL));
1296   __uint128_t arg3 = MakeF64x2(6.25, 6.50);
1297   ASSERT_EQ(AsmFcvtzs(arg3), MakeUInt128(0x0000000000000006ULL, 0x0000000000000006ULL));
1298   __uint128_t arg4 = MakeF64x2(6.75, 7.50);
1299   ASSERT_EQ(AsmFcvtzs(arg4), MakeUInt128(0x0000000000000006ULL, 0x0000000000000007ULL));
1300 }
1301 
TEST(Arm64InsnTest,AsmConvertF64U64x4Truncate)1302 TEST(Arm64InsnTest, AsmConvertF64U64x4Truncate) {
1303   constexpr auto AsmFcvtzu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtzu %0.2d, %1.2d");
1304   __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
1305   ASSERT_EQ(AsmFcvtzu(arg1), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
1306   __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
1307   ASSERT_EQ(AsmFcvtzu(arg2), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
1308   __uint128_t arg3 = MakeF64x2(6.25, 6.50);
1309   ASSERT_EQ(AsmFcvtzu(arg3), MakeUInt128(0x0000000000000006ULL, 0x0000000000000006ULL));
1310   __uint128_t arg4 = MakeF64x2(6.75, 7.50);
1311   ASSERT_EQ(AsmFcvtzu(arg4), MakeUInt128(0x0000000000000006ULL, 0x0000000000000007ULL));
1312 }
1313 
TEST(Arm64InsnTest,AsmConvertX32F32Scalar)1314 TEST(Arm64InsnTest, AsmConvertX32F32Scalar) {
1315   constexpr auto AsmConvertX32F32 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("scvtf %s0, %w1, #7");
1316 
1317   ASSERT_EQ(AsmConvertX32F32(0x610), MakeUInt128(0x41420000ULL, 0U));
1318 
1319   ASSERT_EQ(AsmConvertX32F32(1U << 31), MakeUInt128(0xcb800000ULL, 0U));
1320 }
1321 
TEST(Arm64InsnTest,AsmConvertX32F64Scalar)1322 TEST(Arm64InsnTest, AsmConvertX32F64Scalar) {
1323   constexpr auto AsmConvertX32F64 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("scvtf %d0, %w1, #8");
1324 
1325   ASSERT_EQ(AsmConvertX32F64(0x487), MakeUInt128(0x40121c0000000000ULL, 0U));
1326 
1327   ASSERT_EQ(AsmConvertX32F64(1 << 31), MakeUInt128(0xc160000000000000ULL, 0U));
1328 }
1329 
TEST(Arm64InsnTest,AsmConvertX32F32)1330 TEST(Arm64InsnTest, AsmConvertX32F32) {
1331   constexpr auto AsmConvertX32F32 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("scvtf %s0, %s1, #7");
1332 
1333   ASSERT_EQ(AsmConvertX32F32(0x123), MakeUInt128(0x40118000ULL, 0U));
1334 
1335   ASSERT_EQ(AsmConvertX32F32(1U << 31), MakeUInt128(0xcb800000ULL, 0U));
1336 }
1337 
TEST(Arm64InsnTest,AsmConvertX32x4F32x4)1338 TEST(Arm64InsnTest, AsmConvertX32x4F32x4) {
1339   constexpr auto AsmConvertX32F32 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("scvtf %0.4s, %1.4s, #11");
1340   __uint128_t arg = MakeUInt128(0x80000000ffff9852ULL, 0x0000110200001254ULL);
1341   ASSERT_EQ(AsmConvertX32F32(arg), MakeUInt128(0xc9800000c14f5c00ULL, 0x400810004012a000ULL));
1342 }
1343 
TEST(Arm64InsnTest,AsmConvertUX32F32Scalar)1344 TEST(Arm64InsnTest, AsmConvertUX32F32Scalar) {
1345   constexpr auto AsmConvertUX32F32 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("ucvtf %s0, %w1, #7");
1346 
1347   ASSERT_EQ(AsmConvertUX32F32(0x857), MakeUInt128(0x41857000ULL, 0U));
1348 
1349   ASSERT_EQ(AsmConvertUX32F32(1U << 31), MakeUInt128(0x4b800000ULL, 0U));
1350 
1351   // Test the default rounding behavior (FPRounding_TIEEVEN).
1352   ASSERT_EQ(AsmConvertUX32F32(0x80000080), MakeUInt128(0x4b800000ULL, 0U));
1353   ASSERT_EQ(AsmConvertUX32F32(0x800000c0), MakeUInt128(0x4b800001ULL, 0U));
1354   ASSERT_EQ(AsmConvertUX32F32(0x80000140), MakeUInt128(0x4b800001ULL, 0U));
1355   ASSERT_EQ(AsmConvertUX32F32(0x80000180), MakeUInt128(0x4b800002ULL, 0U));
1356 }
1357 
TEST(Arm64InsnTest,AsmConvertUX32F64Scalar)1358 TEST(Arm64InsnTest, AsmConvertUX32F64Scalar) {
1359   constexpr auto AsmConvertUX32F64 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("ucvtf %d0, %w1, #8");
1360 
1361   ASSERT_EQ(AsmConvertUX32F64(0x361), MakeUInt128(0x400b080000000000ULL, 0U));
1362 
1363   ASSERT_EQ(AsmConvertUX32F64(1U << 31), MakeUInt128(0x4160000000000000ULL, 0U));
1364 }
1365 
TEST(Arm64InsnTest,AsmConvertUX32F32)1366 TEST(Arm64InsnTest, AsmConvertUX32F32) {
1367   constexpr auto AsmConvertUX32F32 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ucvtf %s0, %s1, #7");
1368 
1369   ASSERT_EQ(AsmConvertUX32F32(0x456), MakeUInt128(0x410ac000ULL, 0U));
1370 
1371   ASSERT_EQ(AsmConvertUX32F32(1U << 31), MakeUInt128(0x4b800000ULL, 0U));
1372 }
1373 
TEST(Arm64InsnTest,AsmConvertUX32x4F32x4)1374 TEST(Arm64InsnTest, AsmConvertUX32x4F32x4) {
1375   constexpr auto AsmConvertUX32F32 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ucvtf %0.4s, %1.4s, #11");
1376   __uint128_t arg = MakeUInt128(0x8000000000008023ULL, 0x0000201800001956ULL);
1377   ASSERT_EQ(AsmConvertUX32F32(arg), MakeUInt128(0x4980000041802300ULL, 0x40806000404ab000ULL));
1378 }
1379 
TEST(Arm64InsnTest,AsmConvertX64F32Scalar)1380 TEST(Arm64InsnTest, AsmConvertX64F32Scalar) {
1381   constexpr auto AsmConvertX64F32 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("scvtf %s0, %x1, #10");
1382 
1383   ASSERT_EQ(AsmConvertX64F32(0x2234), MakeUInt128(0x4108d000ULL, 0U));
1384 }
1385 
TEST(Arm64InsnTest,AsmConvertX64F64Scalar)1386 TEST(Arm64InsnTest, AsmConvertX64F64Scalar) {
1387   constexpr auto AsmConvertX64F64 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("scvtf %d0, %x1, #10");
1388 
1389   ASSERT_EQ(AsmConvertX64F64(0x1324), MakeUInt128(0x4013240000000000ULL, 0U));
1390 }
1391 
TEST(Arm64InsnTest,AsmConvertUX64F32Scalar)1392 TEST(Arm64InsnTest, AsmConvertUX64F32Scalar) {
1393   constexpr auto AsmConvertUX64F32 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("ucvtf %s0, %x1, #10");
1394 
1395   ASSERT_EQ(AsmConvertUX64F32(0x5763), MakeUInt128(0x41aec600ULL, 0U));
1396 }
1397 
TEST(Arm64InsnTest,AsmConvertUX64F64Scalar)1398 TEST(Arm64InsnTest, AsmConvertUX64F64Scalar) {
1399   constexpr auto AsmConvertUX64F64 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("ucvtf %d0, %x1, #10");
1400 
1401   ASSERT_EQ(AsmConvertUX64F64(0x2217), MakeUInt128(0x40210b8000000000ULL, 0U));
1402 }
1403 
TEST(Arm64InsnTest,AsmConvertX64F64)1404 TEST(Arm64InsnTest, AsmConvertX64F64) {
1405   constexpr auto AsmConvertX64F64 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("scvtf %d0, %d1, #12");
1406 
1407   ASSERT_EQ(AsmConvertX64F64(0x723), MakeUInt128(0x3fdc8c0000000000ULL, 0U));
1408 
1409   ASSERT_EQ(AsmConvertX64F64(1ULL << 63), MakeUInt128(0xc320000000000000ULL, 0U));
1410 }
1411 
TEST(Arm64InsnTest,AsmConvertUX64F64)1412 TEST(Arm64InsnTest, AsmConvertUX64F64) {
1413   constexpr auto AsmConvertUX64F64 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ucvtf %d0, %d1, #12");
1414 
1415   ASSERT_EQ(AsmConvertUX64F64(0x416), MakeUInt128(0x3fd0580000000000ULL, 0U));
1416 
1417   ASSERT_EQ(AsmConvertUX64F64(1ULL << 63), MakeUInt128(0x4320000000000000ULL, 0U));
1418 }
1419 
TEST(Arm64InsnTest,AsmConvertUX64F64With64BitFraction)1420 TEST(Arm64InsnTest, AsmConvertUX64F64With64BitFraction) {
1421   constexpr auto AsmConvertUX64F64 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ucvtf %d0, %d1, #64");
1422 
1423   ASSERT_EQ(AsmConvertUX64F64(1ULL << 63), MakeUInt128(0x3fe0'0000'0000'0000ULL, 0U));
1424 }
1425 
TEST(Arm64InsnTest,AsmConvertX64x2F64x2)1426 TEST(Arm64InsnTest, AsmConvertX64x2F64x2) {
1427   constexpr auto AsmConvertX64F64 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("scvtf %0.2d, %1.2d, #12");
1428   __uint128_t arg = MakeUInt128(1ULL << 63, 0x8086U);
1429   ASSERT_EQ(AsmConvertX64F64(arg), MakeUInt128(0xc320000000000000ULL, 0x402010c000000000ULL));
1430 }
1431 
TEST(Arm64InsnTest,AsmConvertUX64x2F64x2)1432 TEST(Arm64InsnTest, AsmConvertUX64x2F64x2) {
1433   constexpr auto AsmConvertUX64F64 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ucvtf %0.2d, %1.2d, #12");
1434   __uint128_t arg = MakeUInt128(1ULL << 63, 0x6809U);
1435   ASSERT_EQ(AsmConvertUX64F64(arg), MakeUInt128(0x4320000000000000ULL, 0x401a024000000000ULL));
1436 }
1437 
TEST(Arm64InsnTest,AsmConvertUX64x2F64x2With64BitFraction)1438 TEST(Arm64InsnTest, AsmConvertUX64x2F64x2With64BitFraction) {
1439   constexpr auto AsmConvertUX64F64 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ucvtf %0.2d, %1.2d, #64");
1440   __uint128_t arg = MakeUInt128(0x7874'211c'b7aa'f597ULL, 0x2c0f'5504'd25e'f673ULL);
1441   ASSERT_EQ(AsmConvertUX64F64(arg),
1442             MakeUInt128(0x3fde'1d08'472d'eabdULL, 0x3fc6'07aa'8269'2f7bULL));
1443 }
1444 
TEST(Arm64InsnTest,AsmConvertF32X32Scalar)1445 TEST(Arm64InsnTest, AsmConvertF32X32Scalar) {
1446   constexpr auto AsmConvertF32X32 = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzs %w0, %s1, #16");
1447   uint32_t arg1 = 0x4091eb85U;  // 4.56 in float
1448   ASSERT_EQ(AsmConvertF32X32(arg1), MakeUInt128(0x00048f5cU, 0U));
1449 
1450   uint32_t arg2 = 0xc0d80000U;  // -6.75 in float
1451   ASSERT_EQ(AsmConvertF32X32(arg2), MakeUInt128(0xfff94000U, 0U));
1452 
1453   ASSERT_EQ(AsmConvertF32X32(kDefaultNaN32AsInteger), MakeUInt128(bit_cast<uint32_t>(0.0f), 0U));
1454 }
1455 
TEST(Arm64InsnTest,AsmConvertF32UX32Scalar)1456 TEST(Arm64InsnTest, AsmConvertF32UX32Scalar) {
1457   constexpr auto AsmConvertF32UX32 = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzs %w0, %s1, #16");
1458   uint32_t arg1 = 0x41223d71U;  // 10.14 in float
1459   ASSERT_EQ(AsmConvertF32UX32(arg1), MakeUInt128(0x000a23d7U, 0U));
1460 
1461   uint32_t arg2 = 0xc1540000U;  // -13.25 in float
1462   ASSERT_EQ(AsmConvertF32UX32(arg2), MakeUInt128(0xfff2c000U, 0U));
1463 
1464   ASSERT_EQ(AsmConvertF32UX32(kDefaultNaN32AsInteger), MakeUInt128(bit_cast<uint32_t>(0.0f), 0U));
1465 }
1466 
TEST(Arm64InsnTest,AsmConvertF32UX32With31FractionalBits)1467 TEST(Arm64InsnTest, AsmConvertF32UX32With31FractionalBits) {
1468   constexpr auto AsmConvertF32UX32 = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzs %w0, %s1, #31");
1469   uint32_t arg1 = bit_cast<uint32_t>(0.25f);
1470   ASSERT_EQ(AsmConvertF32UX32(arg1), MakeUInt128(0x20000000U, 0U));
1471 }
1472 
TEST(Arm64InsnTest,AsmConvertF64X32Scalar)1473 TEST(Arm64InsnTest, AsmConvertF64X32Scalar) {
1474   constexpr auto AsmConvertF64X32 = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzs %w0, %d1, #16");
1475   uint64_t arg1 = 0x401e8f5c28f5c28fULL;  // 7.46 in double
1476   ASSERT_EQ(AsmConvertF64X32(arg1), MakeUInt128(0x0007a3d7U, 0U));
1477 
1478   uint64_t arg2 = 0xc040200000000000ULL;  // -32.44 in double
1479   ASSERT_EQ(AsmConvertF64X32(arg2), MakeUInt128(0xffdfc000U, 0U));
1480 }
1481 
TEST(Arm64InsnTest,AsmConvertF32X64Scalar)1482 TEST(Arm64InsnTest, AsmConvertF32X64Scalar) {
1483   constexpr auto AsmFcvtzs = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzs %x0, %s1, #16");
1484   uint64_t arg1 = bit_cast<uint32_t>(7.50f);
1485   ASSERT_EQ(AsmFcvtzs(arg1), MakeUInt128(0x0000000000078000ULL, 0ULL));
1486 
1487   uint64_t arg2 = bit_cast<uint32_t>(-6.50f);
1488   ASSERT_EQ(AsmFcvtzs(arg2), MakeUInt128(0xfffffffffff98000ULL, 0ULL));
1489 }
1490 
TEST(Arm64InsnTest,AsmConvertF32UX64With63FractionalBits)1491 TEST(Arm64InsnTest, AsmConvertF32UX64With63FractionalBits) {
1492   constexpr auto AsmConvertF32UX64 = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzs %x0, %s1, #63");
1493   uint32_t arg1 = bit_cast<uint32_t>(0.25f);
1494   ASSERT_EQ(AsmConvertF32UX64(arg1), MakeUInt128(0x20000000'00000000ULL, 0U));
1495 }
1496 
TEST(Arm64InsnTest,AsmConvertF64X64Scalar)1497 TEST(Arm64InsnTest, AsmConvertF64X64Scalar) {
1498   constexpr auto AsmFcvtzs = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzs %x0, %d1, #16");
1499   uint64_t arg1 = bit_cast<uint64_t>(7.50);
1500   ASSERT_EQ(AsmFcvtzs(arg1), MakeUInt128(0x0000000000078000ULL, 0ULL));
1501 
1502   uint64_t arg2 = bit_cast<uint64_t>(-6.50);
1503   ASSERT_EQ(AsmFcvtzs(arg2), MakeUInt128(0xfffffffffff98000ULL, 0ULL));
1504 }
1505 
TEST(Arm64InsnTest,AsmConvertF32X32x4)1506 TEST(Arm64InsnTest, AsmConvertF32X32x4) {
1507   constexpr auto AsmFcvtzs = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtzs %0.4s, %1.4s, #2");
1508   __uint128_t res = AsmFcvtzs(MakeF32x4(-5.5f, -0.0f, 0.0f, 6.5f));
1509   ASSERT_EQ(res, MakeUInt128(0x00000000ffffffeaULL, 0x0000001a00000000ULL));
1510 }
1511 
TEST(Arm64InsnTest,AsmConvertF64UX32Scalar)1512 TEST(Arm64InsnTest, AsmConvertF64UX32Scalar) {
1513   constexpr auto AsmConvertF64UX32 = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzu %w0, %d1, #16");
1514   uint64_t arg1 = 0x4020947ae147ae14ULL;  // 8.29 in double
1515   ASSERT_EQ(AsmConvertF64UX32(arg1), MakeUInt128(0x00084a3dU, 0U));
1516 
1517   uint64_t arg2 = 0xc023666666666666ULL;  // -9.70 in double
1518   ASSERT_EQ(AsmConvertF64UX32(arg2), MakeUInt128(0U, 0U));
1519 }
1520 
TEST(Arm64InsnTest,AsmConvertF32UX64Scalar)1521 TEST(Arm64InsnTest, AsmConvertF32UX64Scalar) {
1522   constexpr auto AsmFcvtzu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzu %x0, %s1, #16");
1523   uint64_t arg1 = bit_cast<uint32_t>(7.50f);
1524   ASSERT_EQ(AsmFcvtzu(arg1), MakeUInt128(0x0000000000078000ULL, 0ULL));
1525   uint64_t arg2 = bit_cast<uint32_t>(-6.50f);
1526   ASSERT_EQ(AsmFcvtzu(arg2), 0ULL);
1527 }
1528 
TEST(Arm64InsnTest,AsmConvertF64UX64Scalar)1529 TEST(Arm64InsnTest, AsmConvertF64UX64Scalar) {
1530   constexpr auto AsmFcvtzu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzu %x0, %d1, #16");
1531   uint64_t arg1 = bit_cast<uint64_t>(7.50);
1532   ASSERT_EQ(AsmFcvtzu(arg1), MakeUInt128(0x0000000000078000ULL, 0ULL));
1533 
1534   uint64_t arg2 = bit_cast<uint64_t>(-6.50);
1535   ASSERT_EQ(AsmFcvtzu(arg2), MakeUInt128(0ULL, 0ULL));
1536 }
1537 
TEST(Arm64InsnTest,AsmConvertF64UX64ScalarWith64BitFraction)1538 TEST(Arm64InsnTest, AsmConvertF64UX64ScalarWith64BitFraction) {
1539   constexpr auto AsmFcvtzu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzu %x0, %d1, #64");
1540   uint64_t arg = bit_cast<uint64_t>(0.625);
1541   ASSERT_EQ(AsmFcvtzu(arg), MakeUInt128(0xa000'0000'0000'0000ULL, 0ULL));
1542 }
1543 
TEST(Arm64InsnTest,AsmConvertF32UX32x4)1544 TEST(Arm64InsnTest, AsmConvertF32UX32x4) {
1545   constexpr auto AsmFcvtzs = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtzu %0.4s, %1.4s, #2");
1546   __uint128_t res = AsmFcvtzs(MakeF32x4(-5.5f, -0.0f, 0.0f, 6.5f));
1547   ASSERT_EQ(res, MakeUInt128(0x0000000000000000ULL, 0x0000001a00000000ULL));
1548 }
1549 
TEST(Arm64InsnTest,Fp32ConditionalSelect)1550 TEST(Arm64InsnTest, Fp32ConditionalSelect) {
1551   uint64_t int_arg1 = 3;
1552   uint64_t int_arg2 = 7;
1553   uint64_t fp_arg1 = 0xfedcba9876543210ULL;
1554   uint64_t fp_arg2 = 0x0123456789abcdefULL;
1555   __uint128_t res;
1556 
1557   asm("cmp %x1,%x2\n\t"
1558       "fcsel %s0, %s3, %s4, eq"
1559       : "=w"(res)
1560       : "r"(int_arg1), "r"(int_arg2), "w"(fp_arg1), "w"(fp_arg2));
1561   ASSERT_EQ(res, MakeUInt128(0x89abcdefULL, 0U));
1562 
1563   asm("cmp %x1,%x2\n\t"
1564       "fcsel %s0, %s3, %s4, ne"
1565       : "=w"(res)
1566       : "r"(int_arg1), "r"(int_arg2), "w"(fp_arg1), "w"(fp_arg2));
1567   ASSERT_EQ(res, MakeUInt128(0x76543210ULL, 0U));
1568 }
1569 
TEST(Arm64InsnTest,Fp64ConditionalSelect)1570 TEST(Arm64InsnTest, Fp64ConditionalSelect) {
1571   uint64_t int_arg1 = 8;
1572   uint64_t int_arg2 = 3;
1573   uint64_t fp_arg1 = 0xfedcba9876543210ULL;
1574   uint64_t fp_arg2 = 0x0123456789abcdefULL;
1575   __uint128_t res;
1576 
1577   asm("cmp %x1,%x2\n\t"
1578       "fcsel %d0, %d3, %d4, eq"
1579       : "=w"(res)
1580       : "r"(int_arg1), "r"(int_arg2), "w"(fp_arg1), "w"(fp_arg2));
1581   ASSERT_EQ(res, MakeUInt128(0x0123456789abcdefULL, 0U));
1582 
1583   asm("cmp %x1,%x2\n\t"
1584       "fcsel %d0, %d3, %d4, ne"
1585       : "=w"(res)
1586       : "r"(int_arg1), "r"(int_arg2), "w"(fp_arg1), "w"(fp_arg2));
1587   ASSERT_EQ(res, MakeUInt128(0xfedcba9876543210ULL, 0U));
1588 }
1589 
TEST(Arm64InsnTest,RoundUpFp32)1590 TEST(Arm64InsnTest, RoundUpFp32) {
1591   // The lower 32-bit represents 2.7182817 in float.
1592   uint64_t fp_arg = 0xdeadbeef402df854ULL;
1593   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintp %s0, %s1")(fp_arg);
1594   ASSERT_EQ(res, MakeUInt128(0x40400000ULL, 0U));  // 3.0 in float
1595 }
1596 
TEST(Arm64InsnTest,RoundUpFp64)1597 TEST(Arm64InsnTest, RoundUpFp64) {
1598   // 2.7182817 in double.
1599   uint64_t fp_arg = 0x4005BF0A8B145769ULL;
1600   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintp %d0, %d1")(fp_arg);
1601   ASSERT_EQ(res, MakeUInt128(0x4008000000000000ULL, 0U));  // 3.0 in double
1602 }
1603 
TEST(Arm64InsnTest,RoundToIntNearestTiesAwayFp64)1604 TEST(Arm64InsnTest, RoundToIntNearestTiesAwayFp64) {
1605   constexpr auto AsmFrinta = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frinta %d0, %d1");
1606 
1607   // -7.50 -> -8.00 (ties away from zero as opposted to even)
1608   ASSERT_EQ(AsmFrinta(0xc01E000000000000ULL), MakeUInt128(0xc020000000000000ULL, 0U));
1609 
1610   // -6.75 -> -7.00
1611   ASSERT_EQ(AsmFrinta(0xc01B000000000000ULL), MakeUInt128(0xc01c000000000000ULL, 0U));
1612 
1613   // -6.50 -> -7.00 (ties away from zero as opposted to even)
1614   ASSERT_EQ(AsmFrinta(0xc01A000000000000ULL), MakeUInt128(0xc01c000000000000ULL, 0U));
1615 
1616   // -6.25 -> -6.00
1617   ASSERT_EQ(AsmFrinta(0xc019000000000000ULL), MakeUInt128(0xc018000000000000ULL, 0U));
1618 
1619   // 6.25 -> 6.00
1620   ASSERT_EQ(AsmFrinta(0x4019000000000000ULL), MakeUInt128(0x4018000000000000ULL, 0U));
1621 
1622   // 6.50 -> 7.00 (ties away from zero as opposted to even)
1623   ASSERT_EQ(AsmFrinta(0x401A000000000000ULL), MakeUInt128(0x401c000000000000ULL, 0U));
1624 
1625   // 6.75 -> 7.00
1626   ASSERT_EQ(AsmFrinta(0x401B000000000000ULL), MakeUInt128(0x401c000000000000ULL, 0U));
1627 
1628   // 7.50 -> 8.00 (ties away from zero as opposted to even)
1629   ASSERT_EQ(AsmFrinta(0x401E000000000000ULL), MakeUInt128(0x4020000000000000ULL, 0U));
1630 
1631   // -0.49999999999999994 -> -0.0 (should not "tie away" since -0.4999... != -0.5)
1632   ASSERT_EQ(AsmFrinta(0xBFDFFFFFFFFFFFFF), MakeUInt128(0x8000000000000000U, 0U));
1633 
1634   // A number too large to have fractional precision, should not change upon rounding with tie-away
1635   ASSERT_EQ(AsmFrinta(bit_cast<uint64_t>(0.5 / std::numeric_limits<double>::epsilon())),
1636             MakeUInt128(bit_cast<uint64_t>(0.5 / std::numeric_limits<double>::epsilon()), 0U));
1637   ASSERT_EQ(AsmFrinta(bit_cast<uint64_t>(-0.5 / std::numeric_limits<double>::epsilon())),
1638             MakeUInt128(bit_cast<uint64_t>(-0.5 / std::numeric_limits<double>::epsilon()), 0U));
1639   ASSERT_EQ(AsmFrinta(bit_cast<uint64_t>(0.75 / std::numeric_limits<double>::epsilon())),
1640             MakeUInt128(bit_cast<uint64_t>(0.75 / std::numeric_limits<double>::epsilon()), 0U));
1641   ASSERT_EQ(AsmFrinta(bit_cast<uint64_t>(-0.75 / std::numeric_limits<double>::epsilon())),
1642             MakeUInt128(bit_cast<uint64_t>(-0.75 / std::numeric_limits<double>::epsilon()), 0U));
1643   ASSERT_EQ(AsmFrinta(bit_cast<uint64_t>(1.0 / std::numeric_limits<double>::epsilon())),
1644             MakeUInt128(bit_cast<uint64_t>(1.0 / std::numeric_limits<double>::epsilon()), 0U));
1645   ASSERT_EQ(AsmFrinta(bit_cast<uint64_t>(-1.0 / std::numeric_limits<double>::epsilon())),
1646             MakeUInt128(bit_cast<uint64_t>(-1.0 / std::numeric_limits<double>::epsilon()), 0U));
1647   ASSERT_EQ(AsmFrinta(bit_cast<uint64_t>(2.0 / std::numeric_limits<double>::epsilon())),
1648             MakeUInt128(bit_cast<uint64_t>(2.0 / std::numeric_limits<double>::epsilon()), 0U));
1649   ASSERT_EQ(AsmFrinta(bit_cast<uint64_t>(-2.0 / std::numeric_limits<double>::epsilon())),
1650             MakeUInt128(bit_cast<uint64_t>(-2.0 / std::numeric_limits<double>::epsilon()), 0U));
1651   ASSERT_EQ(AsmFrinta(bit_cast<uint64_t>(1.0e100)), MakeUInt128(bit_cast<uint64_t>(1.0e100), 0U));
1652   ASSERT_EQ(AsmFrinta(bit_cast<uint64_t>(-1.0e100)), MakeUInt128(bit_cast<uint64_t>(-1.0e100), 0U));
1653 }
1654 
TEST(Arm64InsnTest,RoundToIntNearestTiesAwayFp32)1655 TEST(Arm64InsnTest, RoundToIntNearestTiesAwayFp32) {
1656   constexpr auto AsmFrinta = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frinta %s0, %s1");
1657 
1658   // -7.50 -> -8.00 (ties away from zero as opposted to even)
1659   ASSERT_EQ(AsmFrinta(0xc0f00000U), MakeUInt128(0xc1000000U, 0U));
1660 
1661   // -6.75 -> -7.00
1662   ASSERT_EQ(AsmFrinta(0xc0d80000U), MakeUInt128(0xc0e00000U, 0U));
1663 
1664   // -6.50 -> -7.00 (ties away from zero as opposted to even)
1665   ASSERT_EQ(AsmFrinta(0xc0d00000U), MakeUInt128(0xc0e00000U, 0U));
1666 
1667   // -6.25 -> -6.00
1668   ASSERT_EQ(AsmFrinta(0xc0c80000U), MakeUInt128(0xc0c00000U, 0U));
1669 
1670   // 6.25 -> 6.00
1671   ASSERT_EQ(AsmFrinta(0x40c80000U), MakeUInt128(0x40c00000U, 0U));
1672 
1673   // 6.50 -> 7.00 (ties away from zero as opposted to even)
1674   ASSERT_EQ(AsmFrinta(0x40d00000U), MakeUInt128(0x40e00000U, 0U));
1675 
1676   // 6.75 -> 7.00
1677   ASSERT_EQ(AsmFrinta(0x40d80000U), MakeUInt128(0x40e00000U, 0U));
1678 
1679   // 7.50 -> 8.00 (ties away from zero as opposted to even)
1680   ASSERT_EQ(AsmFrinta(0x40f00000U), MakeUInt128(0x41000000U, 0U));
1681 
1682   // -0.49999997019767761 -> -0.0 (should not "tie away" since -0.4999... != -0.5)
1683   ASSERT_EQ(AsmFrinta(0xbeffffff), MakeUInt128(0x80000000U, 0U));
1684 
1685   // A number too large to have fractional precision, should not change upon rounding with tie-away
1686   ASSERT_EQ(
1687       AsmFrinta(bit_cast<uint32_t>(float{0.5 / std::numeric_limits<float>::epsilon()})),
1688       MakeUInt128(bit_cast<uint32_t>(float{0.5 / std::numeric_limits<float>::epsilon()}), 0U));
1689   ASSERT_EQ(
1690       AsmFrinta(bit_cast<uint32_t>(float{-0.5 / std::numeric_limits<float>::epsilon()})),
1691       MakeUInt128(bit_cast<uint32_t>(float{-0.5 / std::numeric_limits<float>::epsilon()}), 0U));
1692   ASSERT_EQ(
1693       AsmFrinta(bit_cast<uint32_t>(float{0.75 / std::numeric_limits<float>::epsilon()})),
1694       MakeUInt128(bit_cast<uint32_t>(float{0.75 / std::numeric_limits<float>::epsilon()}), 0U));
1695   ASSERT_EQ(
1696       AsmFrinta(bit_cast<uint32_t>(float{-0.75 / std::numeric_limits<float>::epsilon()})),
1697       MakeUInt128(bit_cast<uint32_t>(float{-0.75 / std::numeric_limits<float>::epsilon()}), 0U));
1698   ASSERT_EQ(
1699       AsmFrinta(bit_cast<uint32_t>(float{1.0 / std::numeric_limits<float>::epsilon()})),
1700       MakeUInt128(bit_cast<uint32_t>(float{1.0 / std::numeric_limits<float>::epsilon()}), 0U));
1701   ASSERT_EQ(
1702       AsmFrinta(bit_cast<uint32_t>(float{-1.0 / std::numeric_limits<float>::epsilon()})),
1703       MakeUInt128(bit_cast<uint32_t>(float{-1.0 / std::numeric_limits<float>::epsilon()}), 0U));
1704   ASSERT_EQ(
1705       AsmFrinta(bit_cast<uint32_t>(float{2.0 / std::numeric_limits<float>::epsilon()})),
1706       MakeUInt128(bit_cast<uint32_t>(float{2.0 / std::numeric_limits<float>::epsilon()}), 0U));
1707   ASSERT_EQ(
1708       AsmFrinta(bit_cast<uint32_t>(float{-2.0 / std::numeric_limits<float>::epsilon()})),
1709       MakeUInt128(bit_cast<uint32_t>(float{-2.0 / std::numeric_limits<float>::epsilon()}), 0U));
1710   ASSERT_EQ(AsmFrinta(bit_cast<uint32_t>(1.0e38f)), MakeUInt128(bit_cast<uint32_t>(1.0e38f), 0U));
1711   ASSERT_EQ(AsmFrinta(bit_cast<uint32_t>(-1.0e38f)), MakeUInt128(bit_cast<uint32_t>(-1.0e38f), 0U));
1712 }
1713 
TEST(Arm64InsnTest,RoundToIntDownwardFp64)1714 TEST(Arm64InsnTest, RoundToIntDownwardFp64) {
1715   constexpr auto AsmFrintm = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintm %d0, %d1");
1716 
1717   // 7.7 -> 7.00
1718   ASSERT_EQ(AsmFrintm(0x401ecccccccccccdULL), MakeUInt128(0x401c000000000000, 0U));
1719 
1720   // 7.1 -> 7.00
1721   ASSERT_EQ(AsmFrintm(0x401c666666666666ULL), MakeUInt128(0x401c000000000000, 0U));
1722 
1723   // -7.10 -> -8.00
1724   ASSERT_EQ(AsmFrintm(0xc01c666666666666ULL), MakeUInt128(0xc020000000000000, 0U));
1725 
1726   // -7.90 -> -8.00
1727   ASSERT_EQ(AsmFrintm(0xc01f99999999999aULL), MakeUInt128(0xc020000000000000, 0U));
1728 
1729   // 0 -> 0
1730   ASSERT_EQ(AsmFrintm(0x0000000000000000ULL), MakeUInt128(0x0000000000000000, 0U));
1731 
1732   // -0 -> -0
1733   ASSERT_EQ(AsmFrintm(0x8000000000000000ULL), MakeUInt128(0x8000000000000000, 0U));
1734 }
1735 
TEST(Arm64InsnTest,RoundToIntDownwardFp32)1736 TEST(Arm64InsnTest, RoundToIntDownwardFp32) {
1737   constexpr auto AsmFrintm = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintm %s0, %s1");
1738 
1739   // 7.7 -> 7.00
1740   ASSERT_EQ(AsmFrintm(0x40f66666), 0x40e00000);
1741 
1742   // 7.1 -> 7.00
1743   ASSERT_EQ(AsmFrintm(0x40e33333), 0x40e00000);
1744 
1745   // -7.10 -> -8.00
1746   ASSERT_EQ(AsmFrintm(0xc0e33333), 0xc1000000);
1747 
1748   // -7.90 -> -8.00
1749   ASSERT_EQ(AsmFrintm(0xc0fccccd), 0xc1000000);
1750 
1751   // 0 -> 0
1752   ASSERT_EQ(AsmFrintm(0x00000000), 0x00000000);
1753 
1754   // -0 -> -0
1755   ASSERT_EQ(AsmFrintm(0x80000000), 0x80000000);
1756 }
1757 
TEST(Arm64InsnTest,RoundToIntNearestFp64)1758 TEST(Arm64InsnTest, RoundToIntNearestFp64) {
1759   constexpr auto AsmFrintn = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintn %d0, %d1");
1760 
1761   // 7.5 -> 8.00 (ties to even)
1762   ASSERT_EQ(AsmFrintn(0x401e000000000000ULL), MakeUInt128(0x4020000000000000, 0U));
1763 
1764   // 8.5 -> 8.00 (ties to even)
1765   ASSERT_EQ(AsmFrintn(0x4021000000000000), MakeUInt128(0x4020000000000000, 0U));
1766 
1767   // 7.10 -> 7.00
1768   ASSERT_EQ(AsmFrintn(0x401c666666666666), MakeUInt128(0x401c000000000000, 0U));
1769 
1770   // 7.90 -> 8.00
1771   ASSERT_EQ(AsmFrintn(0x401f99999999999a), MakeUInt128(0x4020000000000000, 0U));
1772 
1773   // -7.5 -> -8.00 (ties to even)
1774   ASSERT_EQ(AsmFrintn(0xc01e000000000000), MakeUInt128(0xc020000000000000, 0U));
1775 
1776   // // -8.5 -> -8.00 (ties to even)
1777   ASSERT_EQ(AsmFrintn(0xc021000000000000), MakeUInt128(0xc020000000000000, 0U));
1778 
1779   // -7.10 -> -7.00
1780   ASSERT_EQ(AsmFrintn(0xc01c666666666666), MakeUInt128(0xc01c000000000000, 0U));
1781 
1782   // -7.90 -> -8.00
1783   ASSERT_EQ(AsmFrintn(0xc01f99999999999a), MakeUInt128(0xc020000000000000, 0U));
1784 
1785   // 0 -> 0
1786   ASSERT_EQ(AsmFrintn(0x0000000000000000ULL), MakeUInt128(0x0000000000000000, 0U));
1787 
1788   // -0 -> -0
1789   ASSERT_EQ(AsmFrintn(0x8000000000000000ULL), MakeUInt128(0x8000000000000000, 0U));
1790 }
1791 
TEST(Arm64InsnTest,RoundToIntToNearestFp32)1792 TEST(Arm64InsnTest, RoundToIntToNearestFp32) {
1793   constexpr auto AsmFrintn = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintn %s0, %s1");
1794 
1795   // 7.5 -> 8.00 (ties to even)
1796   ASSERT_EQ(AsmFrintn(0x40f00000), 0x41000000);
1797 
1798   // 8.5 -> 8.00 (ties to even)
1799   ASSERT_EQ(AsmFrintn(0x41080000), 0x41000000);
1800 
1801   // 7.10 -> 7.00
1802   ASSERT_EQ(AsmFrintn(0x40e33333), 0x40e00000);
1803 
1804   // 7.90 -> 8.00
1805   ASSERT_EQ(AsmFrintn(0x40fccccd), 0x41000000);
1806 
1807   // -7.5 -> -8.00 (ties to even)
1808   ASSERT_EQ(AsmFrintn(0xc0f00000), 0xc1000000);
1809 
1810   // -8.5 -> -8.00 (ties to even)
1811   ASSERT_EQ(AsmFrintn(0xc1080000), 0xc1000000);
1812 
1813   // -7.10 -> -7.00
1814   ASSERT_EQ(AsmFrintn(0xc0e33333), 0xc0e00000);
1815 
1816   // -7.90 -> -8.00
1817   ASSERT_EQ(AsmFrintn(0xc0fccccd), 0xc1000000);
1818 
1819   // 0 -> 0
1820   ASSERT_EQ(AsmFrintn(0x00000000), 0x00000000);
1821 
1822   // -0 -> -0
1823   ASSERT_EQ(AsmFrintn(0x80000000), 0x80000000);
1824 }
1825 
TEST(Arm64InsnTest,RoundToIntTowardZeroFp64)1826 TEST(Arm64InsnTest, RoundToIntTowardZeroFp64) {
1827   constexpr auto AsmFrintz = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintz %d0, %d1");
1828 
1829   // 7.7 -> 7.00
1830   ASSERT_EQ(AsmFrintz(0x401ecccccccccccdULL), MakeUInt128(0x401c000000000000, 0U));
1831 
1832   // 7.1 -> 7.00
1833   ASSERT_EQ(AsmFrintz(0x401c666666666666ULL), MakeUInt128(0x401c000000000000, 0U));
1834 
1835   // -7.10 -> -7.00
1836   ASSERT_EQ(AsmFrintz(0xc01c666666666666ULL), MakeUInt128(0xc01c000000000000, 0U));
1837 
1838   // -7.90 -> -7.00
1839   ASSERT_EQ(AsmFrintz(0xc01f99999999999aULL), MakeUInt128(0xc01c000000000000, 0U));
1840 
1841   // 0 -> 0
1842   ASSERT_EQ(AsmFrintz(0x0000000000000000ULL), MakeUInt128(0x0000000000000000, 0U));
1843 
1844   // -0 -> -0
1845   ASSERT_EQ(AsmFrintz(0x8000000000000000ULL), MakeUInt128(0x8000000000000000, 0U));
1846 }
1847 
TEST(Arm64InsnTest,RoundToIntTowardZeroFp32)1848 TEST(Arm64InsnTest, RoundToIntTowardZeroFp32) {
1849   constexpr auto AsmFrintz = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintz %s0, %s1");
1850 
1851   // 7.7 -> 7.00
1852   ASSERT_EQ(AsmFrintz(0x40f66666), 0x40e00000);
1853 
1854   // 7.1 -> 7.00
1855   ASSERT_EQ(AsmFrintz(0x40e33333), 0x40e00000);
1856 
1857   // -7.10 -> -7.00
1858   ASSERT_EQ(AsmFrintz(0xc0e33333), 0xc0e00000);
1859 
1860   // -7.90 -> -7.00
1861   ASSERT_EQ(AsmFrintz(0xc0fccccd), 0xc0e00000);
1862 
1863   // 0 -> 0
1864   ASSERT_EQ(AsmFrintz(0x00000000), 0x00000000);
1865 
1866   // -0 -> -0
1867   ASSERT_EQ(AsmFrintz(0x80000000), 0x80000000);
1868 }
1869 
TEST(Arm64InsnTest,AsmConvertF32x4TieAway)1870 TEST(Arm64InsnTest, AsmConvertF32x4TieAway) {
1871   constexpr auto AsmFcvta = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frinta %0.4s, %1.4s");
1872   __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
1873   ASSERT_EQ(AsmFcvta(arg1), MakeF32x4(-8.00f, -7.00f, -7.00f, -6.00f));
1874   __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
1875   ASSERT_EQ(AsmFcvta(arg2), MakeF32x4(6.00f, 7.00f, 7.00f, 8.00f));
1876 }
1877 
TEST(Arm64InsnTest,AsmConvertF32x4NegInf)1878 TEST(Arm64InsnTest, AsmConvertF32x4NegInf) {
1879   constexpr auto AsmFcvtm = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintm %0.4s, %1.4s");
1880   __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
1881   ASSERT_EQ(AsmFcvtm(arg1), MakeF32x4(-8.00f, -7.00f, -7.00f, -7.00f));
1882   __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
1883   ASSERT_EQ(AsmFcvtm(arg2), MakeF32x4(6.00f, 6.00f, 6.00f, 7.00f));
1884 }
1885 
TEST(Arm64InsnTest,AsmConvertF32x4TieEven)1886 TEST(Arm64InsnTest, AsmConvertF32x4TieEven) {
1887   constexpr auto AsmFcvtn = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintn %0.4s, %1.4s");
1888   __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
1889   ASSERT_EQ(AsmFcvtn(arg1), MakeF32x4(-8.00f, -7.00f, -6.00f, -6.00f));
1890   __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
1891   ASSERT_EQ(AsmFcvtn(arg2), MakeF32x4(6.00f, 6.00f, 7.00f, 8.00f));
1892 }
1893 
TEST(Arm64InsnTest,AsmConvertF32x4PosInf)1894 TEST(Arm64InsnTest, AsmConvertF32x4PosInf) {
1895   constexpr auto AsmFcvtp = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintp %0.4s, %1.4s");
1896   __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
1897   ASSERT_EQ(AsmFcvtp(arg1), MakeF32x4(-7.00f, -6.00f, -6.00f, -6.00f));
1898   __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
1899   ASSERT_EQ(AsmFcvtp(arg2), MakeF32x4(7.00f, 7.00f, 7.00f, 8.00f));
1900 }
1901 
TEST(Arm64InsnTest,AsmConvertF32x4Truncate)1902 TEST(Arm64InsnTest, AsmConvertF32x4Truncate) {
1903   constexpr auto AsmFcvtz = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintz %0.4s, %1.4s");
1904   __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
1905   ASSERT_EQ(AsmFcvtz(arg1), MakeF32x4(-7.00f, -6.00f, -6.00f, -6.00f));
1906   __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
1907   ASSERT_EQ(AsmFcvtz(arg2), MakeF32x4(6.00f, 6.00f, 6.00f, 7.00f));
1908 }
1909 
TEST(Arm64InsnTest,AsmConvertF64x4TieAway)1910 TEST(Arm64InsnTest, AsmConvertF64x4TieAway) {
1911   constexpr auto AsmFcvta = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frinta %0.2d, %1.2d");
1912   __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
1913   ASSERT_EQ(AsmFcvta(arg1), MakeF64x2(-8.00, -7.00));
1914   __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
1915   ASSERT_EQ(AsmFcvta(arg2), MakeF64x2(-7.00, -6.00));
1916   __uint128_t arg3 = MakeF64x2(6.25, 6.50);
1917   ASSERT_EQ(AsmFcvta(arg3), MakeF64x2(6.00, 7.00));
1918   __uint128_t arg4 = MakeF64x2(6.75, 7.50);
1919   ASSERT_EQ(AsmFcvta(arg4), MakeF64x2(7.00, 8.00));
1920 }
1921 
TEST(Arm64InsnTest,AsmConvertF64x4NegInf)1922 TEST(Arm64InsnTest, AsmConvertF64x4NegInf) {
1923   constexpr auto AsmFcvtm = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintm %0.2d, %1.2d");
1924   __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
1925   ASSERT_EQ(AsmFcvtm(arg1), MakeF64x2(-8.00, -7.00));
1926   __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
1927   ASSERT_EQ(AsmFcvtm(arg2), MakeF64x2(-7.00, -7.00));
1928   __uint128_t arg3 = MakeF64x2(6.25, 6.50);
1929   ASSERT_EQ(AsmFcvtm(arg3), MakeF64x2(6.00, 6.00));
1930   __uint128_t arg4 = MakeF64x2(6.75, 7.50);
1931   ASSERT_EQ(AsmFcvtm(arg4), MakeF64x2(6.00, 7.00));
1932 }
1933 
TEST(Arm64InsnTest,AsmConvertF64x4TieEven)1934 TEST(Arm64InsnTest, AsmConvertF64x4TieEven) {
1935   constexpr auto AsmFcvtn = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintn %0.2d, %1.2d");
1936   __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
1937   ASSERT_EQ(AsmFcvtn(arg1), MakeF64x2(-8.00, -7.00));
1938   __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
1939   ASSERT_EQ(AsmFcvtn(arg2), MakeF64x2(-6.00, -6.00));
1940   __uint128_t arg3 = MakeF64x2(6.25, 6.50);
1941   ASSERT_EQ(AsmFcvtn(arg3), MakeF64x2(6.00, 6.00));
1942   __uint128_t arg4 = MakeF64x2(6.75, 7.50);
1943   ASSERT_EQ(AsmFcvtn(arg4), MakeF64x2(7.00, 8.00));
1944 }
1945 
TEST(Arm64InsnTest,AsmConvertF64x4PosInf)1946 TEST(Arm64InsnTest, AsmConvertF64x4PosInf) {
1947   constexpr auto AsmFcvtp = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintp %0.2d, %1.2d");
1948   __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
1949   ASSERT_EQ(AsmFcvtp(arg1), MakeF64x2(-7.00, -6.00));
1950   __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
1951   ASSERT_EQ(AsmFcvtp(arg2), MakeF64x2(-6.00, -6.00));
1952   __uint128_t arg3 = MakeF64x2(6.25, 6.50);
1953   ASSERT_EQ(AsmFcvtp(arg3), MakeF64x2(7.00, 7.00));
1954   __uint128_t arg4 = MakeF64x2(6.75, 7.50);
1955   ASSERT_EQ(AsmFcvtp(arg4), MakeF64x2(7.00, 8.00));
1956 }
1957 
TEST(Arm64InsnTest,AsmConvertF64x4Truncate)1958 TEST(Arm64InsnTest, AsmConvertF64x4Truncate) {
1959   constexpr auto AsmFcvtz = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintz %0.2d, %1.2d");
1960   __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
1961   ASSERT_EQ(AsmFcvtz(arg1), MakeF64x2(-7.00, -6.00));
1962   __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
1963   ASSERT_EQ(AsmFcvtz(arg2), MakeF64x2(-6.00, -6.00));
1964   __uint128_t arg3 = MakeF64x2(6.25, 6.50);
1965   ASSERT_EQ(AsmFcvtz(arg3), MakeF64x2(6.00, 6.00));
1966   __uint128_t arg4 = MakeF64x2(6.75, 7.50);
1967   ASSERT_EQ(AsmFcvtz(arg4), MakeF64x2(6.00, 7.00));
1968 }
1969 
TEST(Arm64InsnTest,AsmRoundCurrentModeF32)1970 TEST(Arm64InsnTest, AsmRoundCurrentModeF32) {
1971   constexpr auto AsmFrinti = ASM_INSN_WRAP_FUNC_W_RES_WC_ARG("frinti %s0, %s1");
1972   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-7.50f), kFpcrRModeTieEven), bit_cast<uint32_t>(-8.00f));
1973   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-6.75f), kFpcrRModeTieEven), bit_cast<uint32_t>(-7.00f));
1974   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-6.50f), kFpcrRModeTieEven), bit_cast<uint32_t>(-6.00f));
1975   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-6.25f), kFpcrRModeTieEven), bit_cast<uint32_t>(-6.00f));
1976   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(6.25f), kFpcrRModeTieEven), bit_cast<uint32_t>(6.00f));
1977   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(6.50f), kFpcrRModeTieEven), bit_cast<uint32_t>(6.00f));
1978   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(6.75f), kFpcrRModeTieEven), bit_cast<uint32_t>(7.00f));
1979   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(7.50f), kFpcrRModeTieEven), bit_cast<uint32_t>(8.00f));
1980   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-7.50f), kFpcrRModeNegInf), bit_cast<uint32_t>(-8.00f));
1981   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-6.75f), kFpcrRModeNegInf), bit_cast<uint32_t>(-7.00f));
1982   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-6.50f), kFpcrRModeNegInf), bit_cast<uint32_t>(-7.00f));
1983   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-6.25f), kFpcrRModeNegInf), bit_cast<uint32_t>(-7.00f));
1984   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(6.25f), kFpcrRModeNegInf), bit_cast<uint32_t>(6.00f));
1985   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(6.50f), kFpcrRModeNegInf), bit_cast<uint32_t>(6.00f));
1986   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(6.75f), kFpcrRModeNegInf), bit_cast<uint32_t>(6.00f));
1987   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(7.50f), kFpcrRModeNegInf), bit_cast<uint32_t>(7.00f));
1988   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-7.50f), kFpcrRModePosInf), bit_cast<uint32_t>(-7.00f));
1989   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-6.75f), kFpcrRModePosInf), bit_cast<uint32_t>(-6.00f));
1990   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-6.50f), kFpcrRModePosInf), bit_cast<uint32_t>(-6.00f));
1991   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-6.25f), kFpcrRModePosInf), bit_cast<uint32_t>(-6.00f));
1992   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(6.25f), kFpcrRModePosInf), bit_cast<uint32_t>(7.00f));
1993   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(6.50f), kFpcrRModePosInf), bit_cast<uint32_t>(7.00f));
1994   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(6.75f), kFpcrRModePosInf), bit_cast<uint32_t>(7.00f));
1995   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(7.50f), kFpcrRModePosInf), bit_cast<uint32_t>(8.00f));
1996   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-7.50f), kFpcrRModeZero), bit_cast<uint32_t>(-7.00f));
1997   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-6.75f), kFpcrRModeZero), bit_cast<uint32_t>(-6.00f));
1998   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-6.50f), kFpcrRModeZero), bit_cast<uint32_t>(-6.00f));
1999   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-6.25f), kFpcrRModeZero), bit_cast<uint32_t>(-6.00f));
2000   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(6.25f), kFpcrRModeZero), bit_cast<uint32_t>(6.00f));
2001   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(6.50f), kFpcrRModeZero), bit_cast<uint32_t>(6.00f));
2002   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(6.75f), kFpcrRModeZero), bit_cast<uint32_t>(6.00f));
2003   ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(7.50f), kFpcrRModeZero), bit_cast<uint32_t>(7.00f));
2004 }
2005 
TEST(Arm64InsnTest,AsmRoundCurrentModeF64)2006 TEST(Arm64InsnTest, AsmRoundCurrentModeF64) {
2007   constexpr auto AsmFrinti = ASM_INSN_WRAP_FUNC_W_RES_WC_ARG("frinti %d0, %d1");
2008   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-7.50), kFpcrRModeTieEven), bit_cast<uint64_t>(-8.00));
2009   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-6.75), kFpcrRModeTieEven), bit_cast<uint64_t>(-7.00));
2010   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-6.50), kFpcrRModeTieEven), bit_cast<uint64_t>(-6.00));
2011   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-6.25), kFpcrRModeTieEven), bit_cast<uint64_t>(-6.00));
2012   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(6.25), kFpcrRModeTieEven), bit_cast<uint64_t>(6.00));
2013   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(6.50), kFpcrRModeTieEven), bit_cast<uint64_t>(6.00));
2014   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(6.75), kFpcrRModeTieEven), bit_cast<uint64_t>(7.00));
2015   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(7.50), kFpcrRModeTieEven), bit_cast<uint64_t>(8.00));
2016   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-7.50), kFpcrRModeNegInf), bit_cast<uint64_t>(-8.00));
2017   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-6.75), kFpcrRModeNegInf), bit_cast<uint64_t>(-7.00));
2018   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-6.50), kFpcrRModeNegInf), bit_cast<uint64_t>(-7.00));
2019   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-6.25), kFpcrRModeNegInf), bit_cast<uint64_t>(-7.00));
2020   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(6.25), kFpcrRModeNegInf), bit_cast<uint64_t>(6.00));
2021   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(6.50), kFpcrRModeNegInf), bit_cast<uint64_t>(6.00));
2022   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(6.75), kFpcrRModeNegInf), bit_cast<uint64_t>(6.00));
2023   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(7.50), kFpcrRModeNegInf), bit_cast<uint64_t>(7.00));
2024   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-7.50), kFpcrRModePosInf), bit_cast<uint64_t>(-7.00));
2025   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-6.75), kFpcrRModePosInf), bit_cast<uint64_t>(-6.00));
2026   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-6.50), kFpcrRModePosInf), bit_cast<uint64_t>(-6.00));
2027   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-6.25), kFpcrRModePosInf), bit_cast<uint64_t>(-6.00));
2028   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(6.25), kFpcrRModePosInf), bit_cast<uint64_t>(7.00));
2029   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(6.50), kFpcrRModePosInf), bit_cast<uint64_t>(7.00));
2030   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(6.75), kFpcrRModePosInf), bit_cast<uint64_t>(7.00));
2031   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(7.50), kFpcrRModePosInf), bit_cast<uint64_t>(8.00));
2032   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-7.50), kFpcrRModeZero), bit_cast<uint64_t>(-7.00));
2033   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-6.75), kFpcrRModeZero), bit_cast<uint64_t>(-6.00));
2034   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-6.50), kFpcrRModeZero), bit_cast<uint64_t>(-6.00));
2035   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-6.25), kFpcrRModeZero), bit_cast<uint64_t>(-6.00));
2036   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(6.25), kFpcrRModeZero), bit_cast<uint64_t>(6.00));
2037   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(6.50), kFpcrRModeZero), bit_cast<uint64_t>(6.00));
2038   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(6.75), kFpcrRModeZero), bit_cast<uint64_t>(6.00));
2039   ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(7.50), kFpcrRModeZero), bit_cast<uint64_t>(7.00));
2040 }
2041 
TEST(Arm64InsnTest,AsmRoundCurrentModeF32x4)2042 TEST(Arm64InsnTest, AsmRoundCurrentModeF32x4) {
2043   constexpr auto AsmFrinti = ASM_INSN_WRAP_FUNC_W_RES_WC_ARG("frinti %0.4s, %1.4s");
2044   __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
2045   ASSERT_EQ(AsmFrinti(arg1, kFpcrRModeTieEven), MakeF32x4(-8.00f, -7.00f, -6.00f, -6.00f));
2046   __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
2047   ASSERT_EQ(AsmFrinti(arg2, kFpcrRModeTieEven), MakeF32x4(6.00f, 6.00f, 7.00f, 8.00f));
2048   __uint128_t arg3 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
2049   ASSERT_EQ(AsmFrinti(arg3, kFpcrRModeNegInf), MakeF32x4(-8.00f, -7.00f, -7.00f, -7.00f));
2050   __uint128_t arg4 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
2051   ASSERT_EQ(AsmFrinti(arg4, kFpcrRModeNegInf), MakeF32x4(6.00f, 6.00f, 6.00f, 7.00f));
2052   __uint128_t arg5 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
2053   ASSERT_EQ(AsmFrinti(arg5, kFpcrRModePosInf), MakeF32x4(-7.00f, -6.00f, -6.00f, -6.00f));
2054   __uint128_t arg6 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
2055   ASSERT_EQ(AsmFrinti(arg6, kFpcrRModePosInf), MakeF32x4(7.00f, 7.00f, 7.00f, 8.00f));
2056   __uint128_t arg7 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
2057   ASSERT_EQ(AsmFrinti(arg7, kFpcrRModeZero), MakeF32x4(-7.00f, -6.00f, -6.00f, -6.00f));
2058   __uint128_t arg8 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
2059   ASSERT_EQ(AsmFrinti(arg8, kFpcrRModeZero), MakeF32x4(6.00f, 6.00f, 6.00f, 7.00f));
2060 }
2061 
TEST(Arm64InsnTest,AsmRoundCurrentModeF64x2)2062 TEST(Arm64InsnTest, AsmRoundCurrentModeF64x2) {
2063   constexpr auto AsmFrinti = ASM_INSN_WRAP_FUNC_W_RES_WC_ARG("frinti %0.2d, %1.2d");
2064   __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
2065   ASSERT_EQ(AsmFrinti(arg1, kFpcrRModeTieEven), MakeF64x2(-8.00, -7.00));
2066   __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
2067   ASSERT_EQ(AsmFrinti(arg2, kFpcrRModeTieEven), MakeF64x2(-6.00, -6.00));
2068   __uint128_t arg3 = MakeF64x2(6.25, 6.50);
2069   ASSERT_EQ(AsmFrinti(arg3, kFpcrRModeTieEven), MakeF64x2(6.00, 6.00));
2070   __uint128_t arg4 = MakeF64x2(6.75, 7.50);
2071   ASSERT_EQ(AsmFrinti(arg4, kFpcrRModeTieEven), MakeF64x2(7.00, 8.00));
2072   __uint128_t arg5 = MakeF64x2(-7.50, -6.75);
2073   ASSERT_EQ(AsmFrinti(arg5, kFpcrRModeNegInf), MakeF64x2(-8.00, -7.00));
2074   __uint128_t arg6 = MakeF64x2(-6.50, -6.25);
2075   ASSERT_EQ(AsmFrinti(arg6, kFpcrRModeNegInf), MakeF64x2(-7.00, -7.00));
2076   __uint128_t arg7 = MakeF64x2(6.25, 6.50);
2077   ASSERT_EQ(AsmFrinti(arg7, kFpcrRModeNegInf), MakeF64x2(6.00, 6.00));
2078   __uint128_t arg8 = MakeF64x2(6.75, 7.50);
2079   ASSERT_EQ(AsmFrinti(arg8, kFpcrRModeNegInf), MakeF64x2(6.00, 7.00));
2080   __uint128_t arg9 = MakeF64x2(-7.50, -6.75);
2081   ASSERT_EQ(AsmFrinti(arg9, kFpcrRModePosInf), MakeF64x2(-7.00, -6.00));
2082   __uint128_t arg10 = MakeF64x2(-6.50, -6.25);
2083   ASSERT_EQ(AsmFrinti(arg10, kFpcrRModePosInf), MakeF64x2(-6.00, -6.00));
2084   __uint128_t arg11 = MakeF64x2(6.25, 6.50);
2085   ASSERT_EQ(AsmFrinti(arg11, kFpcrRModePosInf), MakeF64x2(7.00, 7.00));
2086   __uint128_t arg12 = MakeF64x2(6.75, 7.50);
2087   ASSERT_EQ(AsmFrinti(arg12, kFpcrRModePosInf), MakeF64x2(7.00, 8.00));
2088   __uint128_t arg13 = MakeF64x2(-7.50, -6.75);
2089   ASSERT_EQ(AsmFrinti(arg13, kFpcrRModeZero), MakeF64x2(-7.00, -6.00));
2090   __uint128_t arg14 = MakeF64x2(-6.50, -6.25);
2091   ASSERT_EQ(AsmFrinti(arg14, kFpcrRModeZero), MakeF64x2(-6.00, -6.00));
2092   __uint128_t arg15 = MakeF64x2(6.25, 6.50);
2093   ASSERT_EQ(AsmFrinti(arg15, kFpcrRModeZero), MakeF64x2(6.00, 6.00));
2094   __uint128_t arg16 = MakeF64x2(6.75, 7.50);
2095   ASSERT_EQ(AsmFrinti(arg16, kFpcrRModeZero), MakeF64x2(6.00, 7.00));
2096 }
2097 
TEST(Arm64InsnTest,AsmRoundExactF32)2098 TEST(Arm64InsnTest, AsmRoundExactF32) {
2099   constexpr auto AsmFrintx = ASM_INSN_WRAP_FUNC_W_RES_WC_ARG("frintx %s0, %s1");
2100   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-7.50f), kFpcrRModeTieEven), bit_cast<uint32_t>(-8.00f));
2101   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-6.75f), kFpcrRModeTieEven), bit_cast<uint32_t>(-7.00f));
2102   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-6.50f), kFpcrRModeTieEven), bit_cast<uint32_t>(-6.00f));
2103   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-6.25f), kFpcrRModeTieEven), bit_cast<uint32_t>(-6.00f));
2104   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(6.25f), kFpcrRModeTieEven), bit_cast<uint32_t>(6.00f));
2105   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(6.50f), kFpcrRModeTieEven), bit_cast<uint32_t>(6.00f));
2106   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(6.75f), kFpcrRModeTieEven), bit_cast<uint32_t>(7.00f));
2107   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(7.50f), kFpcrRModeTieEven), bit_cast<uint32_t>(8.00f));
2108   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-7.50f), kFpcrRModeNegInf), bit_cast<uint32_t>(-8.00f));
2109   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-6.75f), kFpcrRModeNegInf), bit_cast<uint32_t>(-7.00f));
2110   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-6.50f), kFpcrRModeNegInf), bit_cast<uint32_t>(-7.00f));
2111   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-6.25f), kFpcrRModeNegInf), bit_cast<uint32_t>(-7.00f));
2112   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(6.25f), kFpcrRModeNegInf), bit_cast<uint32_t>(6.00f));
2113   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(6.50f), kFpcrRModeNegInf), bit_cast<uint32_t>(6.00f));
2114   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(6.75f), kFpcrRModeNegInf), bit_cast<uint32_t>(6.00f));
2115   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(7.50f), kFpcrRModeNegInf), bit_cast<uint32_t>(7.00f));
2116   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-7.50f), kFpcrRModePosInf), bit_cast<uint32_t>(-7.00f));
2117   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-6.75f), kFpcrRModePosInf), bit_cast<uint32_t>(-6.00f));
2118   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-6.50f), kFpcrRModePosInf), bit_cast<uint32_t>(-6.00f));
2119   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-6.25f), kFpcrRModePosInf), bit_cast<uint32_t>(-6.00f));
2120   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(6.25f), kFpcrRModePosInf), bit_cast<uint32_t>(7.00f));
2121   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(6.50f), kFpcrRModePosInf), bit_cast<uint32_t>(7.00f));
2122   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(6.75f), kFpcrRModePosInf), bit_cast<uint32_t>(7.00f));
2123   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(7.50f), kFpcrRModePosInf), bit_cast<uint32_t>(8.00f));
2124   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-7.50f), kFpcrRModeZero), bit_cast<uint32_t>(-7.00f));
2125   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-6.75f), kFpcrRModeZero), bit_cast<uint32_t>(-6.00f));
2126   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-6.50f), kFpcrRModeZero), bit_cast<uint32_t>(-6.00f));
2127   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-6.25f), kFpcrRModeZero), bit_cast<uint32_t>(-6.00f));
2128   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(6.25f), kFpcrRModeZero), bit_cast<uint32_t>(6.00f));
2129   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(6.50f), kFpcrRModeZero), bit_cast<uint32_t>(6.00f));
2130   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(6.75f), kFpcrRModeZero), bit_cast<uint32_t>(6.00f));
2131   ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(7.50f), kFpcrRModeZero), bit_cast<uint32_t>(7.00f));
2132 }
2133 
TEST(Arm64InsnTest,AsmRoundExactF64)2134 TEST(Arm64InsnTest, AsmRoundExactF64) {
2135   constexpr auto AsmFrintx = ASM_INSN_WRAP_FUNC_W_RES_WC_ARG("frintx %d0, %d1");
2136   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-7.50), kFpcrRModeTieEven), bit_cast<uint64_t>(-8.00));
2137   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-6.75), kFpcrRModeTieEven), bit_cast<uint64_t>(-7.00));
2138   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-6.50), kFpcrRModeTieEven), bit_cast<uint64_t>(-6.00));
2139   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-6.25), kFpcrRModeTieEven), bit_cast<uint64_t>(-6.00));
2140   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(6.25), kFpcrRModeTieEven), bit_cast<uint64_t>(6.00));
2141   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(6.50), kFpcrRModeTieEven), bit_cast<uint64_t>(6.00));
2142   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(6.75), kFpcrRModeTieEven), bit_cast<uint64_t>(7.00));
2143   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(7.50), kFpcrRModeTieEven), bit_cast<uint64_t>(8.00));
2144   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-7.50), kFpcrRModeNegInf), bit_cast<uint64_t>(-8.00));
2145   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-6.75), kFpcrRModeNegInf), bit_cast<uint64_t>(-7.00));
2146   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-6.50), kFpcrRModeNegInf), bit_cast<uint64_t>(-7.00));
2147   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-6.25), kFpcrRModeNegInf), bit_cast<uint64_t>(-7.00));
2148   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(6.25), kFpcrRModeNegInf), bit_cast<uint64_t>(6.00));
2149   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(6.50), kFpcrRModeNegInf), bit_cast<uint64_t>(6.00));
2150   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(6.75), kFpcrRModeNegInf), bit_cast<uint64_t>(6.00));
2151   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(7.50), kFpcrRModeNegInf), bit_cast<uint64_t>(7.00));
2152   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-7.50), kFpcrRModePosInf), bit_cast<uint64_t>(-7.00));
2153   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-6.75), kFpcrRModePosInf), bit_cast<uint64_t>(-6.00));
2154   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-6.50), kFpcrRModePosInf), bit_cast<uint64_t>(-6.00));
2155   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-6.25), kFpcrRModePosInf), bit_cast<uint64_t>(-6.00));
2156   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(6.25), kFpcrRModePosInf), bit_cast<uint64_t>(7.00));
2157   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(6.50), kFpcrRModePosInf), bit_cast<uint64_t>(7.00));
2158   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(6.75), kFpcrRModePosInf), bit_cast<uint64_t>(7.00));
2159   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(7.50), kFpcrRModePosInf), bit_cast<uint64_t>(8.00));
2160   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-7.50), kFpcrRModeZero), bit_cast<uint64_t>(-7.00));
2161   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-6.75), kFpcrRModeZero), bit_cast<uint64_t>(-6.00));
2162   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-6.50), kFpcrRModeZero), bit_cast<uint64_t>(-6.00));
2163   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-6.25), kFpcrRModeZero), bit_cast<uint64_t>(-6.00));
2164   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(6.25), kFpcrRModeZero), bit_cast<uint64_t>(6.00));
2165   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(6.50), kFpcrRModeZero), bit_cast<uint64_t>(6.00));
2166   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(6.75), kFpcrRModeZero), bit_cast<uint64_t>(6.00));
2167   ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(7.50), kFpcrRModeZero), bit_cast<uint64_t>(7.00));
2168 }
2169 
TEST(Arm64InsnTest,AsmRoundExactF32x4)2170 TEST(Arm64InsnTest, AsmRoundExactF32x4) {
2171   constexpr auto AsmFrintx = ASM_INSN_WRAP_FUNC_W_RES_WC_ARG("frintx %0.4s, %1.4s");
2172   __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
2173   ASSERT_EQ(AsmFrintx(arg1, kFpcrRModeTieEven), MakeF32x4(-8.00f, -7.00f, -6.00f, -6.00f));
2174   __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
2175   ASSERT_EQ(AsmFrintx(arg2, kFpcrRModeTieEven), MakeF32x4(6.00f, 6.00f, 7.00f, 8.00f));
2176   __uint128_t arg3 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
2177   ASSERT_EQ(AsmFrintx(arg3, kFpcrRModeNegInf), MakeF32x4(-8.00f, -7.00f, -7.00f, -7.00f));
2178   __uint128_t arg4 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
2179   ASSERT_EQ(AsmFrintx(arg4, kFpcrRModeNegInf), MakeF32x4(6.00f, 6.00f, 6.00f, 7.00f));
2180   __uint128_t arg5 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
2181   ASSERT_EQ(AsmFrintx(arg5, kFpcrRModePosInf), MakeF32x4(-7.00f, -6.00f, -6.00f, -6.00f));
2182   __uint128_t arg6 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
2183   ASSERT_EQ(AsmFrintx(arg6, kFpcrRModePosInf), MakeF32x4(7.00f, 7.00f, 7.00f, 8.00f));
2184   __uint128_t arg7 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
2185   ASSERT_EQ(AsmFrintx(arg7, kFpcrRModeZero), MakeF32x4(-7.00f, -6.00f, -6.00f, -6.00f));
2186   __uint128_t arg8 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
2187   ASSERT_EQ(AsmFrintx(arg8, kFpcrRModeZero), MakeF32x4(6.00f, 6.00f, 6.00f, 7.00f));
2188 }
2189 
TEST(Arm64InsnTest,AsmRoundExactF64x2)2190 TEST(Arm64InsnTest, AsmRoundExactF64x2) {
2191   constexpr auto AsmFrintx = ASM_INSN_WRAP_FUNC_W_RES_WC_ARG("frintx %0.2d, %1.2d");
2192   __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
2193   ASSERT_EQ(AsmFrintx(arg1, kFpcrRModeTieEven), MakeF64x2(-8.00, -7.00));
2194   __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
2195   ASSERT_EQ(AsmFrintx(arg2, kFpcrRModeTieEven), MakeF64x2(-6.00, -6.00));
2196   __uint128_t arg3 = MakeF64x2(6.25, 6.50);
2197   ASSERT_EQ(AsmFrintx(arg3, kFpcrRModeTieEven), MakeF64x2(6.00, 6.00));
2198   __uint128_t arg4 = MakeF64x2(6.75, 7.50);
2199   ASSERT_EQ(AsmFrintx(arg4, kFpcrRModeTieEven), MakeF64x2(7.00, 8.00));
2200   __uint128_t arg5 = MakeF64x2(-7.50, -6.75);
2201   ASSERT_EQ(AsmFrintx(arg5, kFpcrRModeNegInf), MakeF64x2(-8.00, -7.00));
2202   __uint128_t arg6 = MakeF64x2(-6.50, -6.25);
2203   ASSERT_EQ(AsmFrintx(arg6, kFpcrRModeNegInf), MakeF64x2(-7.00, -7.00));
2204   __uint128_t arg7 = MakeF64x2(6.25, 6.50);
2205   ASSERT_EQ(AsmFrintx(arg7, kFpcrRModeNegInf), MakeF64x2(6.00, 6.00));
2206   __uint128_t arg8 = MakeF64x2(6.75, 7.50);
2207   ASSERT_EQ(AsmFrintx(arg8, kFpcrRModeNegInf), MakeF64x2(6.00, 7.00));
2208   __uint128_t arg9 = MakeF64x2(-7.50, -6.75);
2209   ASSERT_EQ(AsmFrintx(arg9, kFpcrRModePosInf), MakeF64x2(-7.00, -6.00));
2210   __uint128_t arg10 = MakeF64x2(-6.50, -6.25);
2211   ASSERT_EQ(AsmFrintx(arg10, kFpcrRModePosInf), MakeF64x2(-6.00, -6.00));
2212   __uint128_t arg11 = MakeF64x2(6.25, 6.50);
2213   ASSERT_EQ(AsmFrintx(arg11, kFpcrRModePosInf), MakeF64x2(7.00, 7.00));
2214   __uint128_t arg12 = MakeF64x2(6.75, 7.50);
2215   ASSERT_EQ(AsmFrintx(arg12, kFpcrRModePosInf), MakeF64x2(7.00, 8.00));
2216   __uint128_t arg13 = MakeF64x2(-7.50, -6.75);
2217   ASSERT_EQ(AsmFrintx(arg13, kFpcrRModeZero), MakeF64x2(-7.00, -6.00));
2218   __uint128_t arg14 = MakeF64x2(-6.50, -6.25);
2219   ASSERT_EQ(AsmFrintx(arg14, kFpcrRModeZero), MakeF64x2(-6.00, -6.00));
2220   __uint128_t arg15 = MakeF64x2(6.25, 6.50);
2221   ASSERT_EQ(AsmFrintx(arg15, kFpcrRModeZero), MakeF64x2(6.00, 6.00));
2222   __uint128_t arg16 = MakeF64x2(6.75, 7.50);
2223   ASSERT_EQ(AsmFrintx(arg16, kFpcrRModeZero), MakeF64x2(6.00, 7.00));
2224 }
2225 
Fp32Compare(uint64_t arg1,uint64_t arg2)2226 uint64_t Fp32Compare(uint64_t arg1, uint64_t arg2) {
2227   uint64_t res;
2228   asm("fcmp %s1, %s2\n\t"
2229       "mrs %x0, nzcv"
2230       : "=r"(res)
2231       : "w"(arg1), "w"(arg2));
2232   return res;
2233 }
2234 
Fp64Compare(uint64_t arg1,uint64_t arg2)2235 uint64_t Fp64Compare(uint64_t arg1, uint64_t arg2) {
2236   uint64_t res;
2237   asm("fcmp %d1, %d2\n\t"
2238       "mrs %x0, nzcv"
2239       : "=r"(res)
2240       : "w"(arg1), "w"(arg2));
2241   return res;
2242 }
2243 
MakeNZCV(uint64_t nzcv)2244 constexpr uint64_t MakeNZCV(uint64_t nzcv) {
2245   return nzcv << 28;
2246 }
2247 
TEST(Arm64InsnTest,Fp32Compare)2248 TEST(Arm64InsnTest, Fp32Compare) {
2249   // NaN and 1.83
2250   ASSERT_EQ(Fp32Compare(0x7fc00000ULL, 0x3fea3d71ULL), MakeNZCV(0b0011));
2251 
2252   // 6.31 == 6.31
2253   ASSERT_EQ(Fp32Compare(0x40c9eb85ULL, 0x40c9eb85ULL), MakeNZCV(0b0110));
2254 
2255   // 1.23 < 2.34
2256   ASSERT_EQ(Fp32Compare(0x3f9d70a4ULL, 0x4015c28fULL), MakeNZCV(0b1000));
2257 
2258   // 5.25 > 2.94
2259   ASSERT_EQ(Fp32Compare(0x40a80000ULL, 0x403c28f6ULL), MakeNZCV(0b0010));
2260 }
2261 
TEST(Arm64InsnTest,Fp32CompareZero)2262 TEST(Arm64InsnTest, Fp32CompareZero) {
2263   constexpr auto Fp32CompareZero = ASM_INSN_WRAP_FUNC_R_RES_W_ARG(
2264       "fcmp %s1, #0.0\n\t"
2265       "mrs %x0, nzcv");
2266 
2267   // NaN and 0.00
2268   ASSERT_EQ(Fp32CompareZero(0x7fa00000ULL), MakeNZCV(0b0011));
2269 
2270   // 0.00 == 0.00
2271   ASSERT_EQ(Fp32CompareZero(0x00000000ULL), MakeNZCV(0b0110));
2272 
2273   // -2.67 < 0.00
2274   ASSERT_EQ(Fp32CompareZero(0xc02ae148ULL), MakeNZCV(0b1000));
2275 
2276   // 1.56 > 0.00
2277   ASSERT_EQ(Fp32CompareZero(0x3fc7ae14ULL), MakeNZCV(0b0010));
2278 }
2279 
TEST(Arm64InsnTest,Fp64Compare)2280 TEST(Arm64InsnTest, Fp64Compare) {
2281   // NaN and 1.19
2282   ASSERT_EQ(Fp64Compare(0x7ff8000000000000ULL, 0x3ff30a3d70a3d70aULL), MakeNZCV(0b0011));
2283 
2284   // 8.42 == 8.42
2285   ASSERT_EQ(Fp64Compare(0x4020d70a3d70a3d7ULL, 0x4020d70a3d70a3d7ULL), MakeNZCV(0b0110));
2286 
2287   // 0.50 < 1.00
2288   ASSERT_EQ(Fp64Compare(0x3fe0000000000000ULL, 0x3ff0000000000000ULL), MakeNZCV(0b1000));
2289 
2290   // 7.38 > 1.54
2291   ASSERT_EQ(Fp64Compare(0x401d851eb851eb85ULL, 0x3ff8a3d70a3d70a4ULL), MakeNZCV(0b0010));
2292 }
2293 
TEST(Arm64InsnTest,Fp64CompareZero)2294 TEST(Arm64InsnTest, Fp64CompareZero) {
2295   constexpr auto Fp64CompareZero = ASM_INSN_WRAP_FUNC_R_RES_W_ARG(
2296       "fcmp %d1, #0.0\n\t"
2297       "mrs %x0, nzcv");
2298 
2299   // NaN and 0.00
2300   ASSERT_EQ(Fp64CompareZero(0x7ff4000000000000ULL), MakeNZCV(0b0011));
2301 
2302   // 0.00 == 0.00
2303   ASSERT_EQ(Fp64CompareZero(0x0000000000000000ULL), MakeNZCV(0b0110));
2304 
2305   // -7.23 < 0.00
2306   ASSERT_EQ(Fp64CompareZero(0xc01ceb851eb851ecULL), MakeNZCV(0b1000));
2307 
2308   // 5.39 > 0.00
2309   ASSERT_EQ(Fp64CompareZero(0x40158f5c28f5c28fULL), MakeNZCV(0b0010));
2310 }
2311 
Fp32CompareIfEqualOrSetAllFlags(float arg1,float arg2,uint64_t nzcv)2312 uint64_t Fp32CompareIfEqualOrSetAllFlags(float arg1, float arg2, uint64_t nzcv) {
2313   asm("msr nzcv, %x0\n\t"
2314       "fccmp %s2, %s3, #15, eq\n\t"
2315       "mrs %x0, nzcv\n\t"
2316       : "=r"(nzcv)
2317       : "0"(nzcv), "w"(arg1), "w"(arg2));
2318   return nzcv;
2319 }
2320 
TEST(Arm64InsnTest,Fp32ConditionalCompare)2321 TEST(Arm64InsnTest, Fp32ConditionalCompare) {
2322   // Comparison is performed.
2323   constexpr uint64_t kEqual = MakeNZCV(0b0100);
2324   constexpr float kNan = std::numeric_limits<float>::quiet_NaN();
2325   ASSERT_EQ(Fp32CompareIfEqualOrSetAllFlags(1.0f, 1.0f, kEqual), MakeNZCV(0b0110));
2326   ASSERT_EQ(Fp32CompareIfEqualOrSetAllFlags(1.0f, 2.0f, kEqual), MakeNZCV(0b1000));
2327   ASSERT_EQ(Fp32CompareIfEqualOrSetAllFlags(2.0f, 1.0f, kEqual), MakeNZCV(0b0010));
2328   ASSERT_EQ(Fp32CompareIfEqualOrSetAllFlags(kNan, 1.0f, kEqual), MakeNZCV(0b0011));
2329   // Comparison is not performed; alt-nzcv is returned.
2330   constexpr uint64_t kNotEqual = MakeNZCV(0b0000);
2331   ASSERT_EQ(Fp32CompareIfEqualOrSetAllFlags(1.0f, 1.0f, kNotEqual), MakeNZCV(0b1111));
2332   ASSERT_EQ(Fp32CompareIfEqualOrSetAllFlags(1.0f, 2.0f, kNotEqual), MakeNZCV(0b1111));
2333   ASSERT_EQ(Fp32CompareIfEqualOrSetAllFlags(2.0f, 1.0f, kNotEqual), MakeNZCV(0b1111));
2334   ASSERT_EQ(Fp32CompareIfEqualOrSetAllFlags(kNan, 1.0f, kNotEqual), MakeNZCV(0b1111));
2335 }
2336 
Fp64CompareIfEqualOrSetAllFlags(double arg1,double arg2,uint64_t nzcv)2337 uint64_t Fp64CompareIfEqualOrSetAllFlags(double arg1, double arg2, uint64_t nzcv) {
2338   asm("msr nzcv, %x0\n\t"
2339       "fccmp %d2, %d3, #15, eq\n\t"
2340       "mrs %x0, nzcv\n\t"
2341       : "=r"(nzcv)
2342       : "0"(nzcv), "w"(arg1), "w"(arg2));
2343   return nzcv;
2344 }
2345 
TEST(Arm64InsnTest,Fp64ConditionalCompare)2346 TEST(Arm64InsnTest, Fp64ConditionalCompare) {
2347   // Comparison is performed.
2348   constexpr uint64_t kEqual = MakeNZCV(0b0100);
2349   constexpr double kNan = std::numeric_limits<double>::quiet_NaN();
2350   ASSERT_EQ(Fp64CompareIfEqualOrSetAllFlags(1.0, 1.0, kEqual), MakeNZCV(0b0110));
2351   ASSERT_EQ(Fp64CompareIfEqualOrSetAllFlags(1.0, 2.0, kEqual), MakeNZCV(0b1000));
2352   ASSERT_EQ(Fp64CompareIfEqualOrSetAllFlags(2.0, 1.0, kEqual), MakeNZCV(0b0010));
2353   ASSERT_EQ(Fp64CompareIfEqualOrSetAllFlags(kNan, 1.0, kEqual), MakeNZCV(0b0011));
2354   // Comparison is not performed; alt-nzcv is returned.
2355   constexpr uint64_t kNotEqual = MakeNZCV(0b0000);
2356   ASSERT_EQ(Fp64CompareIfEqualOrSetAllFlags(1.0, 1.0, kNotEqual), MakeNZCV(0b1111));
2357   ASSERT_EQ(Fp64CompareIfEqualOrSetAllFlags(1.0, 2.0, kNotEqual), MakeNZCV(0b1111));
2358   ASSERT_EQ(Fp64CompareIfEqualOrSetAllFlags(2.0, 1.0, kNotEqual), MakeNZCV(0b1111));
2359   ASSERT_EQ(Fp64CompareIfEqualOrSetAllFlags(kNan, 1.0f, kNotEqual), MakeNZCV(0b1111));
2360 }
2361 
TEST(Arm64InsnTest,ConvertFp32ToFp64)2362 TEST(Arm64InsnTest, ConvertFp32ToFp64) {
2363   uint64_t arg = 0x40cd70a4ULL;  // 6.42 in float
2364   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvt %d0, %s1")(arg);
2365   ASSERT_EQ(res, MakeUInt128(0x4019ae1480000000ULL, 0U));
2366 }
2367 
TEST(Arm64InsnTest,ConvertFp64ToFp32)2368 TEST(Arm64InsnTest, ConvertFp64ToFp32) {
2369   uint64_t arg = 0x401a0a3d70a3d70aULL;  // 6.51 in double
2370   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvt %s0, %d1")(arg);
2371   ASSERT_EQ(res, MakeUInt128(0x40d051ecULL, 0U));
2372 }
2373 
TEST(Arm64InsnTest,ConvertFp32ToFp16)2374 TEST(Arm64InsnTest, ConvertFp32ToFp16) {
2375   constexpr auto AsmFcvt = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvt %h0, %s1");
2376   EXPECT_EQ(AsmFcvt(bit_cast<uint32_t>(2.5f)), MakeUInt128(0x4100U, 0U));
2377   EXPECT_EQ(AsmFcvt(bit_cast<uint32_t>(4.5f)), MakeUInt128(0x4480U, 0U));
2378   EXPECT_EQ(AsmFcvt(bit_cast<uint32_t>(8.5f)), MakeUInt128(0x4840U, 0U));
2379   EXPECT_EQ(AsmFcvt(bit_cast<uint32_t>(16.5f)), MakeUInt128(0x4c20U, 0U));
2380 }
2381 
TEST(Arm64InsnTest,ConvertFp16ToFp32)2382 TEST(Arm64InsnTest, ConvertFp16ToFp32) {
2383   uint64_t arg = 0x4100U;
2384   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvt %s0, %h1")(arg);
2385   ASSERT_EQ(res, bit_cast<uint32_t>(2.5f));
2386 }
2387 
TEST(Arm64InsnTest,ConvertFp64ToFp16)2388 TEST(Arm64InsnTest, ConvertFp64ToFp16) {
2389   uint64_t arg = bit_cast<uint64_t>(2.5);
2390   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvt %h0, %d1")(arg);
2391   ASSERT_EQ(res, MakeUInt128(0x4100U, 0U));
2392 }
2393 
TEST(Arm64InsnTest,ConvertFp16ToFp64)2394 TEST(Arm64InsnTest, ConvertFp16ToFp64) {
2395   uint64_t arg = 0x4100U;
2396   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvt %d0, %h1")(arg);
2397   ASSERT_EQ(res, bit_cast<uint64_t>(2.5));
2398 }
2399 
TEST(Arm64InsnTest,ConvertToNarrowF64F32x2)2400 TEST(Arm64InsnTest, ConvertToNarrowF64F32x2) {
2401   constexpr auto AsmFcvtn = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtn %0.2s, %1.2d");
2402   ASSERT_EQ(AsmFcvtn(MakeF64x2(2.0, 3.0)), MakeF32x4(2.0f, 3.0f, 0.0f, 0.0f));
2403   // Overflow or inf arguments result in inf.
2404   __uint128_t res = AsmFcvtn(
2405       MakeF64x2(std::numeric_limits<double>::max(), std::numeric_limits<double>::infinity()));
2406   ASSERT_EQ(res,
2407             MakeF32x4(std::numeric_limits<float>::infinity(),
2408                       std::numeric_limits<float>::infinity(),
2409                       0.0f,
2410                       0.0f));
2411   res = AsmFcvtn(
2412       MakeF64x2(std::numeric_limits<double>::lowest(), -std::numeric_limits<double>::infinity()));
2413   ASSERT_EQ(res,
2414             MakeF32x4(-std::numeric_limits<float>::infinity(),
2415                       -std::numeric_limits<float>::infinity(),
2416                       0.0f,
2417                       0.0f));
2418 }
2419 
TEST(Arm64InsnTest,ConvertToNarrowF64F32x2Upper)2420 TEST(Arm64InsnTest, ConvertToNarrowF64F32x2Upper) {
2421   constexpr auto AsmFcvtn = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("fcvtn2 %0.4s, %1.2d");
2422   __uint128_t arg1 = MakeF64x2(2.0, 3.0);
2423   __uint128_t arg2 = MakeF32x4(4.0f, 5.0f, 6.0f, 7.0f);
2424   ASSERT_EQ(AsmFcvtn(arg1, arg2), MakeF32x4(4.0f, 5.0f, 2.0f, 3.0f));
2425 }
2426 
TEST(Arm64InsnTest,ConvertToNarrowRoundToOddF64F32)2427 TEST(Arm64InsnTest, ConvertToNarrowRoundToOddF64F32) {
2428   constexpr auto AsmFcvtxn = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtxn %s0, %d1");
2429   ASSERT_EQ(AsmFcvtxn(bit_cast<uint64_t>(2.0)), bit_cast<uint32_t>(2.0f));
2430   // Overflow is saturated.
2431   ASSERT_EQ(AsmFcvtxn(bit_cast<uint64_t>(std::numeric_limits<double>::max())),
2432             bit_cast<uint32_t>(std::numeric_limits<float>::max()));
2433   ASSERT_EQ(AsmFcvtxn(bit_cast<uint64_t>(std::numeric_limits<double>::lowest())),
2434             bit_cast<uint32_t>(std::numeric_limits<float>::lowest()));
2435   // inf is converted to inf.
2436   ASSERT_EQ(AsmFcvtxn(bit_cast<uint64_t>(std::numeric_limits<double>::infinity())),
2437             bit_cast<uint32_t>(std::numeric_limits<float>::infinity()));
2438   // -inf is converted to -inf.
2439   ASSERT_EQ(AsmFcvtxn(bit_cast<uint64_t>(-std::numeric_limits<double>::infinity())),
2440             bit_cast<uint32_t>(-std::numeric_limits<float>::infinity()));
2441 }
2442 
TEST(Arm64InsnTest,ConvertToNarrowRoundToOddF64F32x2)2443 TEST(Arm64InsnTest, ConvertToNarrowRoundToOddF64F32x2) {
2444   constexpr auto AsmFcvtxn = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtxn %0.2s, %1.2d");
2445   __uint128_t res = AsmFcvtxn(MakeF64x2(2.0, 3.0));
2446   ASSERT_EQ(res, MakeF32x4(2.0f, 3.0f, 0.0f, 0.0f));
2447 }
2448 
TEST(Arm64InsnTest,ConvertToNarrowRoundToOddF64F32x2Upper)2449 TEST(Arm64InsnTest, ConvertToNarrowRoundToOddF64F32x2Upper) {
2450   constexpr auto AsmFcvtxn = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("fcvtxn2 %0.4s, %1.2d");
2451   __uint128_t arg1 = MakeF64x2(2.0, 3.0);
2452   __uint128_t arg2 = MakeF32x4(4.0f, 5.0f, 6.0f, 7.0f);
2453   ASSERT_EQ(AsmFcvtxn(arg1, arg2), MakeF32x4(4.0f, 5.0f, 2.0f, 3.0f));
2454 }
2455 
TEST(Arm64InsnTest,ConvertToWiderF32F64x2Lower)2456 TEST(Arm64InsnTest, ConvertToWiderF32F64x2Lower) {
2457   constexpr auto AsmFcvtl = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtl %0.2d, %1.2s");
2458   __uint128_t arg = MakeF32x4(2.0f, 3.0f, 4.0f, 5.0f);
2459   ASSERT_EQ(AsmFcvtl(arg), MakeF64x2(2.0, 3.0));
2460 }
2461 
TEST(Arm64InsnTest,ConvertToWiderF32F64x2Upper)2462 TEST(Arm64InsnTest, ConvertToWiderF32F64x2Upper) {
2463   constexpr auto AsmFcvtl2 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtl2 %0.2d, %1.4s");
2464   __uint128_t arg = MakeF32x4(2.0f, 3.0f, 4.0f, 5.0f);
2465   ASSERT_EQ(AsmFcvtl2(arg), MakeF64x2(4.0, 5.0));
2466 }
2467 
TEST(Arm64InsnTest,ConvertToWiderF16F32x4Lower)2468 TEST(Arm64InsnTest, ConvertToWiderF16F32x4Lower) {
2469   constexpr auto AsmFcvtl = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtl %0.4s, %1.4h");
2470   // 4xF16 in the lower half.
2471   __uint128_t arg = MakeUInt128(0x4c20'4840'4480'4100ULL, 0);
2472   ASSERT_EQ(AsmFcvtl(arg), MakeF32x4(2.5f, 4.5f, 8.5f, 16.5f));
2473 }
2474 
TEST(Arm64InsnTest,ConvertToWiderF16F32x4Upper)2475 TEST(Arm64InsnTest, ConvertToWiderF16F32x4Upper) {
2476   constexpr auto AsmFcvtl = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtl2 %0.4s, %1.8h");
2477   // 4xF16 in the upper half.
2478   __uint128_t arg = MakeUInt128(0, 0x4c20'4840'4480'4100ULL);
2479   ASSERT_EQ(AsmFcvtl(arg), MakeF32x4(2.5f, 4.5f, 8.5f, 16.5f));
2480 }
2481 
TEST(Arm64InsnTest,ConvertToNarrowF32F16x4Lower)2482 TEST(Arm64InsnTest, ConvertToNarrowF32F16x4Lower) {
2483   constexpr auto AsmFcvtn = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtn %0.4h, %1.4s");
2484   __uint128_t arg = MakeF32x4(2.5f, 4.5f, 8.5f, 16.5f);
2485   // 4xF16 in the lower half.
2486   ASSERT_EQ(AsmFcvtn(arg), MakeUInt128(0x4c20'4840'4480'4100ULL, 0));
2487 }
2488 
TEST(Arm64InsnTest,ConvertToNarrowF32F16x4Upper)2489 TEST(Arm64InsnTest, ConvertToNarrowF32F16x4Upper) {
2490   constexpr auto AsmFcvtn = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("fcvtn2 %0.8h, %1.4s");
2491   __uint128_t arg1 = MakeF32x4(2.5f, 4.5f, 8.5f, 16.5f);
2492   __uint128_t arg2 = MakeF32x4(3.0f, 5.0f, 7.0f, 11.0f);
2493   // 4xF16 in the upper half, lower half preserved.
2494   ASSERT_EQ(AsmFcvtn(arg1, arg2), MakeUInt128(uint64_t(arg2), 0x4c20'4840'4480'4100ULL));
2495 }
2496 
TEST(Arm64InsnTest,AbsF32)2497 TEST(Arm64InsnTest, AbsF32) {
2498   uint32_t arg = 0xc1273333U;  // -10.45 in float
2499   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fabs %s0, %s1")(arg);
2500   ASSERT_EQ(res, MakeUInt128(0x41273333ULL, 0U));  // 10.45 in float
2501 }
2502 
TEST(Arm64InsnTest,AbsF64)2503 TEST(Arm64InsnTest, AbsF64) {
2504   uint64_t arg = 0xc03de8f5c28f5c29ULL;  // -29.91 in double
2505   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fabs %d0, %d1")(arg);
2506   ASSERT_EQ(res, MakeUInt128(0x403de8f5c28f5c29ULL, 0U));  // 29.91 in double
2507 }
2508 
TEST(Arm64InsnTest,AbsF32x4)2509 TEST(Arm64InsnTest, AbsF32x4) {
2510   constexpr auto AsmFabs = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fabs %0.4s, %1.4s");
2511   __uint128_t arg = MakeF32x4(-0.0f, 0.0f, 3.0f, -7.0f);
2512   ASSERT_EQ(AsmFabs(arg), MakeF32x4(0.0f, 0.0f, 3.0f, 7.0f));
2513 }
2514 
TEST(Arm64InsnTest,AbsF64x2)2515 TEST(Arm64InsnTest, AbsF64x2) {
2516   constexpr auto AsmFabs = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fabs %0.2d, %1.2d");
2517   __uint128_t arg = MakeF64x2(-0.0, 3.0);
2518   ASSERT_EQ(AsmFabs(arg), MakeF64x2(0.0, 3.0));
2519 }
2520 
TEST(Arm64InsnTest,AbdF32)2521 TEST(Arm64InsnTest, AbdF32) {
2522   uint32_t arg1 = 0x4181851fU;  // 16.19 in float
2523   uint32_t arg2 = 0x41211eb8U;  // 10.06 in float
2524   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fabd %s0, %s1, %s2")(arg1, arg2);
2525   ASSERT_EQ(res, MakeUInt128(0x40c3d70cULL, 0U));  // 6.12 in float
2526 }
2527 
TEST(Arm64InsnTest,AbdF64)2528 TEST(Arm64InsnTest, AbdF64) {
2529   constexpr auto AsmFabd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fabd %d0, %d1, %d2");
2530   uint64_t arg1 = 0x403828f5c28f5c29U;  // 24.16 in double
2531   uint64_t arg2 = 0x4027d70a3d70a3d7U;  // 11.92 in double
2532   __uint128_t res = AsmFabd(arg1, arg2);
2533   ASSERT_EQ(res, MakeUInt128(0x40287ae147ae147bULL, 0U));  // 12.24 in double
2534 }
2535 
TEST(Arm64InsnTest,AbdF32x4)2536 TEST(Arm64InsnTest, AbdF32x4) {
2537   constexpr auto AsmFabd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fabd %0.4s, %1.4s, %2.4s");
2538   __uint128_t arg1 = MakeF32x4(1.0f, 5.0f, -3.0f, -2.0f);
2539   __uint128_t arg2 = MakeF32x4(-1.0f, 2.0f, -5.0f, 3.0f);
2540   __uint128_t res = AsmFabd(arg1, arg2);
2541   ASSERT_EQ(res, MakeF32x4(2.0f, 3.0f, 2.0f, 5.0f));
2542 }
2543 
TEST(Arm64InsnTest,AbdF64x2)2544 TEST(Arm64InsnTest, AbdF64x2) {
2545   constexpr auto AsmFabd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fabd %0.2d, %1.2d, %2.2d");
2546   __uint128_t arg1 = MakeF64x2(5.0, -2.0);
2547   __uint128_t arg2 = MakeF64x2(4.0, 3.0);
2548   __uint128_t res = AsmFabd(arg1, arg2);
2549   ASSERT_EQ(res, MakeF64x2(1.0, 5.0));
2550 }
2551 
TEST(Arm64InsnTest,NegF32)2552 TEST(Arm64InsnTest, NegF32) {
2553   uint32_t arg = 0x40eeb852U;  // 7.46 in float
2554   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fneg %s0, %s1")(arg);
2555   ASSERT_EQ(res, MakeUInt128(0xc0eeb852ULL, 0U));  // -7.46 in float
2556 }
2557 
TEST(Arm64InsnTest,NegF64)2558 TEST(Arm64InsnTest, NegF64) {
2559   uint64_t arg = 0x4054b28f5c28f5c3ULL;  // 82.79 in double
2560   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fneg %d0, %d1")(arg);
2561   ASSERT_EQ(res, MakeUInt128(0xc054b28f5c28f5c3ULL, 0U));  // -82.79 in double
2562 }
2563 
TEST(Arm64InsnTest,NegF32x4)2564 TEST(Arm64InsnTest, NegF32x4) {
2565   constexpr auto AsmFneg = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fneg %0.4s, %1.4s");
2566   __uint128_t arg = MakeF32x4(-0.0f, 0.0f, 1.0f, -3.0f);
2567   ASSERT_EQ(AsmFneg(arg), MakeF32x4(0.0f, -0.0f, -1.0f, 3.0f));
2568 }
2569 
TEST(Arm64InsnTest,NegF64x2)2570 TEST(Arm64InsnTest, NegF64x2) {
2571   constexpr auto AsmFneg = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fneg %0.2d, %1.2d");
2572   __uint128_t arg = MakeF64x2(0.0, 3.0);
2573   ASSERT_EQ(AsmFneg(arg), MakeF64x2(-0.0, -3.0));
2574 }
2575 
TEST(Arm64InsnTest,SqrtF32)2576 TEST(Arm64InsnTest, SqrtF32) {
2577   uint32_t arg = 0x41f3cac1U;  // 30.474 in float
2578   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fsqrt %s0, %s1")(arg);
2579   ASSERT_EQ(res, MakeUInt128(0x40b0a683ULL, 0U));  // 5.5203261 in float
2580 }
2581 
TEST(Arm64InsnTest,SqrtF64)2582 TEST(Arm64InsnTest, SqrtF64) {
2583   uint64_t arg = 0x403d466666666666ULL;  // 29.275 in double
2584   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fsqrt %d0, %d1")(arg);
2585   ASSERT_EQ(res, MakeUInt128(0x4015a47e3392efb8ULL, 0U));  // 5.41... in double
2586 }
2587 
TEST(Arm64InsnTest,SqrtF32x4)2588 TEST(Arm64InsnTest, SqrtF32x4) {
2589   constexpr auto AsmSqrt = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fsqrt %0.4s, %1.4s");
2590   __uint128_t arg = MakeF32x4(0.0f, 1.0f, 4.0f, 9.0f);
2591   ASSERT_EQ(AsmSqrt(arg), MakeF32x4(0.0f, 1.0f, 2.0f, 3.0f));
2592 }
2593 
TEST(Arm64InsnTest,RecipEstimateF32)2594 TEST(Arm64InsnTest, RecipEstimateF32) {
2595   constexpr auto AsmFrecpe = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frecpe %s0, %s1");
2596   ASSERT_EQ(AsmFrecpe(bit_cast<uint32_t>(0.25f)), bit_cast<uint32_t>(3.9921875f));
2597   ASSERT_EQ(AsmFrecpe(bit_cast<uint32_t>(0.50f)), bit_cast<uint32_t>(1.99609375f));
2598   ASSERT_EQ(AsmFrecpe(bit_cast<uint32_t>(2.00f)), bit_cast<uint32_t>(0.4990234375f));
2599   ASSERT_EQ(AsmFrecpe(bit_cast<uint32_t>(4.00f)), bit_cast<uint32_t>(0.24951171875f));
2600 }
2601 
TEST(Arm64InsnTest,RecipEstimateF32x4)2602 TEST(Arm64InsnTest, RecipEstimateF32x4) {
2603   constexpr auto AsmFrecpe = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frecpe %0.4s, %1.4s");
2604   __uint128_t res = AsmFrecpe(MakeF32x4(0.25f, 0.50f, 2.00f, 4.00f));
2605   ASSERT_EQ(res, MakeF32x4(3.9921875f, 1.99609375f, 0.4990234375f, 0.24951171875f));
2606 }
2607 
TEST(Arm64InsnTest,RecipStepF32)2608 TEST(Arm64InsnTest, RecipStepF32) {
2609   constexpr auto AsmFrecps = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("frecps %s0, %s1, %s2");
2610   __uint128_t res1 = AsmFrecps(bit_cast<uint32_t>(1.50f), bit_cast<uint32_t>(0.50f));
2611   ASSERT_EQ(res1, bit_cast<uint32_t>(1.25f));
2612   __uint128_t res2 = AsmFrecps(bit_cast<uint32_t>(2.00f), bit_cast<uint32_t>(0.50f));
2613   ASSERT_EQ(res2, bit_cast<uint32_t>(1.00f));
2614   __uint128_t res3 = AsmFrecps(bit_cast<uint32_t>(3.00f), bit_cast<uint32_t>(0.25f));
2615   ASSERT_EQ(res3, bit_cast<uint32_t>(1.25f));
2616   __uint128_t res4 = AsmFrecps(bit_cast<uint32_t>(3.00f), bit_cast<uint32_t>(0.50f));
2617   ASSERT_EQ(res4, bit_cast<uint32_t>(0.50f));
2618 }
2619 
TEST(Arm64InsnTest,RecipStepF64)2620 TEST(Arm64InsnTest, RecipStepF64) {
2621   constexpr auto AsmFrecps = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("frecps %d0, %d1, %d2");
2622   __uint128_t res1 = AsmFrecps(bit_cast<uint64_t>(1.50), bit_cast<uint64_t>(0.50));
2623   ASSERT_EQ(res1, bit_cast<uint64_t>(1.25));
2624   __uint128_t res2 = AsmFrecps(bit_cast<uint64_t>(2.00), bit_cast<uint64_t>(0.50));
2625   ASSERT_EQ(res2, bit_cast<uint64_t>(1.00));
2626   __uint128_t res3 = AsmFrecps(bit_cast<uint64_t>(3.00), bit_cast<uint64_t>(0.25));
2627   ASSERT_EQ(res3, bit_cast<uint64_t>(1.25));
2628   __uint128_t res4 = AsmFrecps(bit_cast<uint64_t>(3.00), bit_cast<uint64_t>(0.50));
2629   ASSERT_EQ(res4, bit_cast<uint64_t>(0.50));
2630 }
2631 
TEST(Arm64InsnTest,RecipStepF32x4)2632 TEST(Arm64InsnTest, RecipStepF32x4) {
2633   constexpr auto AsmFrecps = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("frecps %0.4s, %1.4s, %2.4s");
2634   __uint128_t arg1 = MakeF32x4(1.50f, 2.00f, 3.00f, 3.00f);
2635   __uint128_t arg2 = MakeF32x4(0.50f, 0.50f, 0.25f, 0.50f);
2636   __uint128_t res = AsmFrecps(arg1, arg2);
2637   ASSERT_EQ(res, MakeF32x4(1.25f, 1.00f, 1.25f, 0.50f));
2638 }
2639 
TEST(Arm64InsnTest,RecipStepF64x2)2640 TEST(Arm64InsnTest, RecipStepF64x2) {
2641   constexpr auto AsmFrecps = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("frecps %0.2d, %1.2d, %2.2d");
2642   __uint128_t arg1 = MakeF64x2(1.50, 2.00);
2643   __uint128_t arg2 = MakeF64x2(0.50, 0.50);
2644   ASSERT_EQ(AsmFrecps(arg1, arg2), MakeF64x2(1.25, 1.00));
2645   __uint128_t arg3 = MakeF64x2(3.00, 3.00);
2646   __uint128_t arg4 = MakeF64x2(0.25, 0.50);
2647   ASSERT_EQ(AsmFrecps(arg3, arg4), MakeF64x2(1.25, 0.50));
2648 }
2649 
TEST(Arm64InsnTest,RecipSqrtEstimateF32)2650 TEST(Arm64InsnTest, RecipSqrtEstimateF32) {
2651   constexpr auto AsmFrsqrte = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frsqrte %s0, %s1");
2652   ASSERT_EQ(AsmFrsqrte(bit_cast<uint32_t>(2.0f)), bit_cast<uint32_t>(0.705078125f));
2653   ASSERT_EQ(AsmFrsqrte(bit_cast<uint32_t>(3.0f)), bit_cast<uint32_t>(0.576171875f));
2654   ASSERT_EQ(AsmFrsqrte(bit_cast<uint32_t>(4.0f)), bit_cast<uint32_t>(0.4990234375f));
2655   ASSERT_EQ(AsmFrsqrte(bit_cast<uint32_t>(5.0f)), bit_cast<uint32_t>(0.4462890625f));
2656 }
2657 
TEST(Arm64InsnTest,RecipSqrtEstimateF32x2)2658 TEST(Arm64InsnTest, RecipSqrtEstimateF32x2) {
2659   constexpr auto AsmFrsqrte = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frsqrte %0.2s, %1.2s");
2660   __uint128_t arg = MakeF32x4(2.0f, 3.0f, 0, 0);
2661   __uint128_t res = AsmFrsqrte(arg);
2662   ASSERT_EQ(res, MakeF32x4(0.705078125f, 0.576171875f, 0, 0));
2663 }
2664 
TEST(Arm64InsnTest,RecipSqrtEstimateF32x4)2665 TEST(Arm64InsnTest, RecipSqrtEstimateF32x4) {
2666   constexpr auto AsmFrsqrte = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frsqrte %0.4s, %1.4s");
2667   __uint128_t arg = MakeF32x4(2.0f, 3.0f, 4.0f, 5.0f);
2668   __uint128_t res = AsmFrsqrte(arg);
2669   ASSERT_EQ(res, MakeF32x4(0.705078125f, 0.576171875f, 0.4990234375f, 0.4462890625f));
2670 }
2671 
TEST(Arm64InsnTest,RecipSqrtEstimateF64)2672 TEST(Arm64InsnTest, RecipSqrtEstimateF64) {
2673   constexpr auto AsmFrsqrte = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frsqrte %d0, %d1");
2674   ASSERT_EQ(AsmFrsqrte(bit_cast<uint64_t>(2.0)), bit_cast<uint64_t>(0.705078125));
2675   ASSERT_EQ(AsmFrsqrte(bit_cast<uint64_t>(3.0)), bit_cast<uint64_t>(0.576171875));
2676   ASSERT_EQ(AsmFrsqrte(bit_cast<uint64_t>(4.0)), bit_cast<uint64_t>(0.4990234375));
2677   ASSERT_EQ(AsmFrsqrte(bit_cast<uint64_t>(5.0)), bit_cast<uint64_t>(0.4462890625));
2678 }
2679 
TEST(Arm64InsnTest,RecipSqrtEstimateF64x2)2680 TEST(Arm64InsnTest, RecipSqrtEstimateF64x2) {
2681   constexpr auto AsmFrsqrte = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frsqrte %0.2d, %1.2d");
2682   __uint128_t arg = MakeF64x2(2.0, 3.0);
2683   __uint128_t res = AsmFrsqrte(arg);
2684   ASSERT_EQ(res, MakeUInt128(bit_cast<uint64_t>(0.705078125), bit_cast<uint64_t>(0.576171875)));
2685 }
2686 
TEST(Arm64InsnTest,RecipSqrtStepF32)2687 TEST(Arm64InsnTest, RecipSqrtStepF32) {
2688   constexpr auto AsmFrsqrts = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("frsqrts %s0, %s1, %s2");
2689   __uint128_t res1 = AsmFrsqrts(bit_cast<uint32_t>(1.50f), bit_cast<uint32_t>(0.50f));
2690   ASSERT_EQ(res1, bit_cast<uint32_t>(1.125f));
2691   __uint128_t res2 = AsmFrsqrts(bit_cast<uint32_t>(2.00f), bit_cast<uint32_t>(0.50f));
2692   ASSERT_EQ(res2, bit_cast<uint32_t>(1.000f));
2693   __uint128_t res3 = AsmFrsqrts(bit_cast<uint32_t>(3.00f), bit_cast<uint32_t>(0.25f));
2694   ASSERT_EQ(res3, bit_cast<uint32_t>(1.125f));
2695   __uint128_t res4 = AsmFrsqrts(bit_cast<uint32_t>(3.00f), bit_cast<uint32_t>(0.50f));
2696   ASSERT_EQ(res4, bit_cast<uint32_t>(0.750f));
2697 }
2698 
TEST(Arm64InsnTest,RecipSqrtStepF64)2699 TEST(Arm64InsnTest, RecipSqrtStepF64) {
2700   constexpr auto AsmFrsqrts = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("frsqrts %d0, %d1, %d2");
2701   __uint128_t res1 = AsmFrsqrts(bit_cast<uint64_t>(1.50), bit_cast<uint64_t>(0.50));
2702   ASSERT_EQ(res1, bit_cast<uint64_t>(1.125));
2703   __uint128_t res2 = AsmFrsqrts(bit_cast<uint64_t>(2.00), bit_cast<uint64_t>(0.50));
2704   ASSERT_EQ(res2, bit_cast<uint64_t>(1.000));
2705   __uint128_t res3 = AsmFrsqrts(bit_cast<uint64_t>(3.00), bit_cast<uint64_t>(0.25));
2706   ASSERT_EQ(res3, bit_cast<uint64_t>(1.125));
2707   __uint128_t res4 = AsmFrsqrts(bit_cast<uint64_t>(3.00), bit_cast<uint64_t>(0.50));
2708   ASSERT_EQ(res4, bit_cast<uint64_t>(0.750));
2709 }
2710 
TEST(Arm64InsnTest,RecipSqrtStepF32x4)2711 TEST(Arm64InsnTest, RecipSqrtStepF32x4) {
2712   constexpr auto AsmFrsqrts = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("frsqrts %0.4s, %1.4s, %2.4s");
2713   __uint128_t arg1 = MakeF32x4(1.50f, 2.00f, 3.00f, 3.00f);
2714   __uint128_t arg2 = MakeF32x4(0.50f, 0.50f, 0.25f, 0.50f);
2715   __uint128_t res = AsmFrsqrts(arg1, arg2);
2716   ASSERT_EQ(res, MakeF32x4(1.125f, 1.000f, 1.125f, 0.750f));
2717 }
2718 
TEST(Arm64InsnTest,RecipSqrtStepF64x2)2719 TEST(Arm64InsnTest, RecipSqrtStepF64x2) {
2720   constexpr auto AsmFrsqrts = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("frsqrts %0.2d, %1.2d, %2.2d");
2721   __uint128_t arg1 = MakeF64x2(1.50, 2.00);
2722   __uint128_t arg2 = MakeF64x2(0.50, 0.50);
2723   ASSERT_EQ(AsmFrsqrts(arg1, arg2), MakeF64x2(1.125, 1.000));
2724   __uint128_t arg3 = MakeF64x2(3.00, 3.00);
2725   __uint128_t arg4 = MakeF64x2(0.25, 0.50);
2726   ASSERT_EQ(AsmFrsqrts(arg3, arg4), MakeF64x2(1.125, 0.750));
2727 }
2728 
TEST(Arm64InsnTest,AddFp32)2729 TEST(Arm64InsnTest, AddFp32) {
2730   uint64_t fp_arg1 = 0x40d5c28fULL;  // 6.68 in float
2731   uint64_t fp_arg2 = 0x409f5c29ULL;  // 4.98 in float
2732   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fadd %s0, %s1, %s2")(fp_arg1, fp_arg2);
2733   ASSERT_EQ(rd, MakeUInt128(0x413a8f5cULL, 0U));  // 11.66 in float
2734 }
2735 
TEST(Arm64InsnTest,AddFp64)2736 TEST(Arm64InsnTest, AddFp64) {
2737   uint64_t fp_arg1 = 0x402099999999999aULL;  // 8.30 in double
2738   uint64_t fp_arg2 = 0x4010ae147ae147aeULL;  // 4.17 in double
2739   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fadd %d0, %d1, %d2")(fp_arg1, fp_arg2);
2740   ASSERT_EQ(rd, MakeUInt128(0x4028f0a3d70a3d71ULL, 0U));  // 12.47 in double
2741 }
2742 
TEST(Arm64InsnTest,AddF32x4)2743 TEST(Arm64InsnTest, AddF32x4) {
2744   constexpr auto AsmFadd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fadd %0.4s, %1.4s, %2.4s");
2745   __uint128_t arg1 = MakeF32x4(-3.0f, 2.0f, 7.0f, -0.0f);
2746   __uint128_t arg2 = MakeF32x4(6.0f, 1.0f, -8.0f, 5.0f);
2747   ASSERT_EQ(AsmFadd(arg1, arg2), MakeF32x4(3.0f, 3.0f, -1.0f, 5.0f));
2748 }
2749 
TEST(Arm64InsnTest,AddF64x2)2750 TEST(Arm64InsnTest, AddF64x2) {
2751   constexpr auto AsmFadd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fadd %0.2d, %1.2d, %2.2d");
2752   __uint128_t arg1 = MakeF64x2(3.0, 5.0);
2753   __uint128_t arg2 = MakeF64x2(-4.0, 2.0);
2754   ASSERT_EQ(AsmFadd(arg1, arg2), MakeF64x2(-1.0, 7.0));
2755 }
2756 
TEST(Arm64InsnTest,AddPairwiseF32x2)2757 TEST(Arm64InsnTest, AddPairwiseF32x2) {
2758   constexpr auto AsmFaddp = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("faddp %s0, %1.2s");
2759   __uint128_t arg1 = MakeF32x4(1.0f, 2.0f, 4.0f, 8.0f);
2760   ASSERT_EQ(AsmFaddp(arg1), bit_cast<uint32_t>(3.0f));
2761 }
2762 
TEST(Arm64InsnTest,AddPairwiseF32x4)2763 TEST(Arm64InsnTest, AddPairwiseF32x4) {
2764   constexpr auto AsmFaddp = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("faddp %0.4s, %1.4s, %2.4s");
2765   __uint128_t arg1 = MakeF32x4(-3.0f, 2.0f, 7.0f, -0.0f);
2766   __uint128_t arg2 = MakeF32x4(6.0f, 1.0f, -8.0f, 5.0f);
2767   ASSERT_EQ(AsmFaddp(arg1, arg2), MakeF32x4(-1.0f, 7.0f, 7.0f, -3.0f));
2768 }
2769 
TEST(Arm64InsnTest,SubFp32)2770 TEST(Arm64InsnTest, SubFp32) {
2771   uint64_t fp_arg1 = 0x411f5c29ULL;  // 9.96 in float
2772   uint64_t fp_arg2 = 0x404851ecULL;  // 3.13 in float
2773   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fsub %s0, %s1, %s2")(fp_arg1, fp_arg2);
2774   ASSERT_EQ(rd, MakeUInt128(0x40da8f5cULL, 0U));  // 6.83 in float
2775 }
2776 
TEST(Arm64InsnTest,SubFp64)2777 TEST(Arm64InsnTest, SubFp64) {
2778   uint64_t fp_arg1 = 0x401ee147ae147ae1ULL;  // 7.72 in double
2779   uint64_t fp_arg2 = 0x4015666666666666ULL;  // 5.35 in double
2780   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fsub %d0, %d1, %d2")(fp_arg1, fp_arg2);
2781   ASSERT_EQ(rd, MakeUInt128(0x4002f5c28f5c28f6ULL, 0U));  // 2.37 in double
2782 }
2783 
TEST(Arm64InsnTest,SubF32x4)2784 TEST(Arm64InsnTest, SubF32x4) {
2785   constexpr auto AsmFsub = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fsub %0.4s, %1.4s, %2.4s");
2786   __uint128_t arg1 = MakeF32x4(-3.0f, 2.0f, 7.0f, -0.0f);
2787   __uint128_t arg2 = MakeF32x4(6.0f, 1.0f, -8.0f, 5.0f);
2788   ASSERT_EQ(AsmFsub(arg1, arg2), MakeF32x4(-9.0f, 1.0f, 15.0f, -5.0f));
2789 }
2790 
TEST(Arm64InsnTest,SubF64x2)2791 TEST(Arm64InsnTest, SubF64x2) {
2792   constexpr auto AsmFsub = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fsub %0.2d, %1.2d, %2.2d");
2793   __uint128_t arg1 = MakeF64x2(3.0, 5.0);
2794   __uint128_t arg2 = MakeF64x2(-4.0, 2.0);
2795   ASSERT_EQ(AsmFsub(arg1, arg2), MakeF64x2(7.0, 3.0));
2796 }
2797 
TEST(Arm64InsnTest,MaxFp32)2798 TEST(Arm64InsnTest, MaxFp32) {
2799   constexpr auto AsmFmax = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmax %s0, %s1, %s2");
2800   uint32_t fp_arg_two = bit_cast<uint32_t>(2.0f);
2801   uint32_t fp_arg_three = bit_cast<uint32_t>(3.0f);
2802 
2803   ASSERT_EQ(AsmFmax(fp_arg_two, fp_arg_three), MakeU32x4(fp_arg_three, 0, 0, 0));
2804   ASSERT_EQ(AsmFmax(kDefaultNaN32AsInteger, fp_arg_three), kDefaultNaN32AsInteger);
2805   ASSERT_EQ(AsmFmax(fp_arg_three, kDefaultNaN32AsInteger), kDefaultNaN32AsInteger);
2806 }
2807 
TEST(Arm64InsnTest,MaxFp64)2808 TEST(Arm64InsnTest, MaxFp64) {
2809   constexpr auto AsmFmax = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmax %d0, %d1, %d2");
2810   uint64_t fp_arg_two = bit_cast<uint64_t>(2.0);
2811   uint64_t fp_arg_three = bit_cast<uint64_t>(3.0);
2812 
2813   ASSERT_EQ(AsmFmax(fp_arg_two, fp_arg_three), MakeUInt128(fp_arg_three, 0U));
2814   ASSERT_EQ(AsmFmax(kDefaultNaN64AsInteger, fp_arg_three), kDefaultNaN64AsInteger);
2815   ASSERT_EQ(AsmFmax(fp_arg_three, kDefaultNaN64AsInteger), kDefaultNaN64AsInteger);
2816 }
2817 
TEST(Arm64InsnTest,MaxF32x4)2818 TEST(Arm64InsnTest, MaxF32x4) {
2819   constexpr auto AsmFmax = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmax %0.4s, %1.4s, %2.4s");
2820   __uint128_t arg1 = MakeF32x4(-0.0f, 2.0f, 3.0f, -4.0f);
2821   __uint128_t arg2 = MakeF32x4(0.0f, 1.0f, -3.0f, -3.0f);
2822   ASSERT_EQ(AsmFmax(arg1, arg2), MakeF32x4(0.0f, 2.0f, 3.0f, -3.0f));
2823 
2824   __uint128_t arg3 = MakeF32x4(-0.0f, bit_cast<float>(kDefaultNaN32AsInteger), 3.0f, -4.0f);
2825   __uint128_t arg4 = MakeF32x4(0.0f, 1.0f, -3.0f, bit_cast<float>(kDefaultNaN32AsInteger));
2826   ASSERT_EQ(AsmFmax(arg3, arg4),
2827             MakeF32x4(0.0f,
2828                       bit_cast<float>(kDefaultNaN32AsInteger),
2829                       3.0f,
2830                       bit_cast<float>(kDefaultNaN32AsInteger)));
2831 }
2832 
TEST(Arm64InsnTest,MaxF64x2)2833 TEST(Arm64InsnTest, MaxF64x2) {
2834   constexpr auto AsmFmax = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmax %0.2d, %1.2d, %2.2d");
2835   __uint128_t arg1 = MakeF64x2(-0.0, 3.0);
2836   __uint128_t arg2 = MakeF64x2(0.0, -3.0);
2837   ASSERT_EQ(AsmFmax(arg1, arg2), MakeF64x2(0.0, 3.0));
2838 
2839   __uint128_t arg3 = MakeF64x2(bit_cast<double>(kDefaultNaN64AsInteger), 3.0);
2840   __uint128_t arg4 = MakeF64x2(1.0, bit_cast<double>(kDefaultNaN64AsInteger));
2841   ASSERT_EQ(AsmFmax(arg3, arg4),
2842             MakeF64x2(bit_cast<double>(kDefaultNaN64AsInteger),
2843                       bit_cast<double>(kDefaultNaN64AsInteger)));
2844 }
2845 
TEST(Arm64InsnTest,MaxNumberFp32)2846 TEST(Arm64InsnTest, MaxNumberFp32) {
2847   constexpr auto AsmFmaxnm = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmaxnm %s0, %s1, %s2");
2848   uint32_t fp_arg_two = bit_cast<uint32_t>(2.0f);
2849   uint32_t fp_arg_three = bit_cast<uint32_t>(3.0f);
2850   uint64_t fp_arg_minus_two = bit_cast<uint64_t>(-2.0);
2851 
2852   ASSERT_EQ(AsmFmaxnm(fp_arg_two, fp_arg_three), MakeU32x4(fp_arg_three, 0, 0, 0));
2853 
2854   ASSERT_EQ(AsmFmaxnm(fp_arg_two, kQuietNaN32AsInteger), MakeU32x4(fp_arg_two, 0, 0, 0));
2855   ASSERT_EQ(AsmFmaxnm(fp_arg_minus_two, kQuietNaN32AsInteger),
2856             MakeU32x4(fp_arg_minus_two, 0, 0, 0));
2857   ASSERT_EQ(AsmFmaxnm(kQuietNaN32AsInteger, fp_arg_two), MakeU32x4(fp_arg_two, 0, 0, 0));
2858   ASSERT_EQ(AsmFmaxnm(kQuietNaN32AsInteger, fp_arg_minus_two),
2859             MakeU32x4(fp_arg_minus_two, 0, 0, 0));
2860 }
2861 
TEST(Arm64InsnTest,MaxNumberFp64)2862 TEST(Arm64InsnTest, MaxNumberFp64) {
2863   constexpr auto AsmFmaxnm = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmaxnm %d0, %d1, %d2");
2864   uint64_t fp_arg_two = bit_cast<uint64_t>(2.0);
2865   uint64_t fp_arg_three = bit_cast<uint64_t>(3.0);
2866   uint64_t fp_arg_minus_two = bit_cast<uint64_t>(-2.0);
2867 
2868   ASSERT_EQ(AsmFmaxnm(fp_arg_two, fp_arg_three), MakeUInt128(fp_arg_three, 0U));
2869 
2870   ASSERT_EQ(AsmFmaxnm(fp_arg_two, kQuietNaN64AsInteger), MakeUInt128(fp_arg_two, 0U));
2871   ASSERT_EQ(AsmFmaxnm(fp_arg_minus_two, kQuietNaN64AsInteger), MakeUInt128(fp_arg_minus_two, 0));
2872   ASSERT_EQ(AsmFmaxnm(kQuietNaN64AsInteger, fp_arg_two), MakeUInt128(fp_arg_two, 0));
2873   ASSERT_EQ(AsmFmaxnm(kQuietNaN64AsInteger, fp_arg_minus_two), MakeUInt128(fp_arg_minus_two, 0));
2874 }
2875 
TEST(Arm64InsnTest,MinNumberFp32)2876 TEST(Arm64InsnTest, MinNumberFp32) {
2877   constexpr auto AsmFminnm = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fminnm %s0, %s1, %s2");
2878   uint32_t fp_arg_two = bit_cast<uint32_t>(2.0f);
2879   uint32_t fp_arg_three = bit_cast<uint32_t>(3.0f);
2880   uint32_t fp_arg_minus_two = bit_cast<uint32_t>(-2.0f);
2881 
2882   ASSERT_EQ(AsmFminnm(fp_arg_two, fp_arg_three), MakeU32x4(fp_arg_two, 0, 0, 0));
2883 
2884   ASSERT_EQ(AsmFminnm(fp_arg_two, kQuietNaN32AsInteger), MakeU32x4(fp_arg_two, 0, 0, 0));
2885   ASSERT_EQ(AsmFminnm(fp_arg_minus_two, kQuietNaN32AsInteger),
2886             MakeU32x4(fp_arg_minus_two, 0, 0, 0));
2887   ASSERT_EQ(AsmFminnm(kQuietNaN32AsInteger, fp_arg_two), MakeU32x4(fp_arg_two, 0, 0, 0));
2888   ASSERT_EQ(AsmFminnm(kQuietNaN32AsInteger, fp_arg_minus_two),
2889             MakeU32x4(fp_arg_minus_two, 0, 0, 0));
2890 }
2891 
TEST(Arm64InsnTest,MinNumberFp64)2892 TEST(Arm64InsnTest, MinNumberFp64) {
2893   constexpr auto AsmFminnm = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fminnm %d0, %d1, %d2");
2894   uint64_t fp_arg_two = bit_cast<uint64_t>(2.0);
2895   uint64_t fp_arg_three = bit_cast<uint64_t>(3.0);
2896   uint64_t fp_arg_minus_two = bit_cast<uint64_t>(-2.0);
2897 
2898   ASSERT_EQ(AsmFminnm(fp_arg_two, fp_arg_three), MakeUInt128(fp_arg_two, 0U));
2899 
2900   ASSERT_EQ(AsmFminnm(fp_arg_two, kQuietNaN64AsInteger), MakeUInt128(fp_arg_two, 0U));
2901   ASSERT_EQ(AsmFminnm(fp_arg_minus_two, kQuietNaN64AsInteger), MakeUInt128(fp_arg_minus_two, 0));
2902   ASSERT_EQ(AsmFminnm(kQuietNaN64AsInteger, fp_arg_two), MakeUInt128(fp_arg_two, 0));
2903   ASSERT_EQ(AsmFminnm(kQuietNaN64AsInteger, fp_arg_minus_two), MakeUInt128(fp_arg_minus_two, 0));
2904 }
2905 
TEST(Arm64InsnTest,MaxNumberF32x4)2906 TEST(Arm64InsnTest, MaxNumberF32x4) {
2907   constexpr auto AsmFmaxnm = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmaxnm %0.4s, %1.4s, %2.4s");
2908   __uint128_t arg1 = MakeF32x4(-1.0f, 2.0f, 3.0f, -4.0f);
2909   __uint128_t arg2 = MakeF32x4(2.0f, 1.0f, -3.0f, -3.0f);
2910   ASSERT_EQ(AsmFmaxnm(arg1, arg2), MakeF32x4(2.0f, 2.0f, 3.0f, -3.0f));
2911 
2912   __uint128_t arg3 = MakeU32x4(bit_cast<uint32_t>(1.0f),
2913                                bit_cast<uint32_t>(-1.0f),
2914                                kNegativeQuietNaN32AsInteger,
2915                                kQuietNaN32AsInteger);
2916   __uint128_t arg4 = MakeU32x4(kNegativeQuietNaN32AsInteger,
2917                                kQuietNaN32AsInteger,
2918                                bit_cast<uint32_t>(1.0f),
2919                                bit_cast<uint32_t>(-1.0f));
2920   ASSERT_EQ(AsmFmaxnm(arg3, arg4), MakeF32x4(1.0f, -1.0f, 1.0f, -1.0f));
2921 
2922   __uint128_t arg5 = MakeU32x4(bit_cast<uint32_t>(1.0f),
2923                                bit_cast<uint32_t>(-1.0f),
2924                                kSignalingNaN32AsInteger_1,
2925                                kQuietNaN32AsInteger);
2926   __uint128_t arg6 = MakeU32x4(kSignalingNaN32AsInteger_1,
2927                                kQuietNaN32AsInteger,
2928                                bit_cast<uint32_t>(1.0f),
2929                                bit_cast<uint32_t>(-1.0f));
2930   ASSERT_EQ(AsmFmaxnm(arg5, arg6),
2931             MakeF32x4(bit_cast<float>(kDefaultNaN32AsInteger),
2932                       -1.0f,
2933                       bit_cast<float>(kDefaultNaN32AsInteger),
2934                       -1.0f));
2935 
2936   __uint128_t arg7 = MakeU32x4(kSignalingNaN32AsInteger_1,
2937                                kSignalingNaN32AsInteger_1,
2938                                kQuietNaN32AsInteger,
2939                                kQuietNaN32AsInteger);
2940   __uint128_t arg8 = MakeU32x4(kSignalingNaN32AsInteger_1,
2941                                kQuietNaN32AsInteger,
2942                                kSignalingNaN32AsInteger_1,
2943                                kQuietNaN32AsInteger);
2944   ASSERT_EQ(AsmFmaxnm(arg7, arg8),
2945             MakeF32x4(bit_cast<float>(kDefaultNaN32AsInteger),
2946                       bit_cast<float>(kDefaultNaN32AsInteger),
2947                       bit_cast<float>(kDefaultNaN32AsInteger),
2948                       bit_cast<float>(kDefaultNaN32AsInteger)));
2949 
2950   __uint128_t arg9 = MakeF32x4(-0.0f, -0.0f, 0.0f, 0.0f);
2951   __uint128_t arg10 = MakeF32x4(-0.0f, 0.0f, -0.0f, 0.0f);
2952   ASSERT_EQ(AsmFmaxnm(arg9, arg10), MakeF32x4(-0.0f, 0.0f, 0.0f, 0.0f));
2953 }
2954 
TEST(Arm64InsnTest,MaxNumberF64x2)2955 TEST(Arm64InsnTest, MaxNumberF64x2) {
2956   constexpr auto AsmFmaxnm = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmaxnm %0.2d, %1.2d, %2.2d");
2957   __uint128_t arg1 = MakeF64x2(-1.0, -4.0);
2958   __uint128_t arg2 = MakeF64x2(2.0, -3.0);
2959   ASSERT_EQ(AsmFmaxnm(arg1, arg2), MakeF64x2(2.0, -3.0));
2960 
2961   __uint128_t arg3 = MakeUInt128(bit_cast<uint64_t>(1.0), kQuietNaN64AsInteger);
2962   __uint128_t arg4 = MakeUInt128(kQuietNaN64AsInteger, bit_cast<uint64_t>(-1.0));
2963   ASSERT_EQ(AsmFmaxnm(arg3, arg4), MakeF64x2(1.0, -1.0));
2964 }
2965 
TEST(Arm64InsnTest,MinNumberF32x4)2966 TEST(Arm64InsnTest, MinNumberF32x4) {
2967   constexpr auto AsmFminnm = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fminnm %0.4s, %1.4s, %2.4s");
2968   __uint128_t arg1 = MakeF32x4(-1.0f, 2.0f, 3.0f, -4.0f);
2969   __uint128_t arg2 = MakeF32x4(2.0f, 1.0f, -3.0f, -3.0f);
2970   ASSERT_EQ(AsmFminnm(arg1, arg2), MakeF32x4(-1.0f, 1.0f, -3.0f, -4.0f));
2971 
2972   __uint128_t arg3 = MakeU32x4(bit_cast<uint32_t>(1.0f),
2973                                bit_cast<uint32_t>(-1.0f),
2974                                kNegativeQuietNaN32AsInteger,
2975                                kQuietNaN32AsInteger);
2976   __uint128_t arg4 = MakeU32x4(kNegativeQuietNaN32AsInteger,
2977                                kQuietNaN32AsInteger,
2978                                bit_cast<uint32_t>(1.0f),
2979                                bit_cast<uint32_t>(-1.0f));
2980   ASSERT_EQ(AsmFminnm(arg3, arg4), MakeF32x4(1.0f, -1.0f, 1.0f, -1.0f));
2981 
2982   __uint128_t arg5 = MakeU32x4(bit_cast<uint32_t>(1.0f),
2983                                bit_cast<uint32_t>(-1.0f),
2984                                kSignalingNaN32AsInteger_1,
2985                                kQuietNaN32AsInteger);
2986   __uint128_t arg6 = MakeU32x4(kSignalingNaN32AsInteger_1,
2987                                kQuietNaN32AsInteger,
2988                                bit_cast<uint32_t>(1.0f),
2989                                bit_cast<uint32_t>(-1.0f));
2990   ASSERT_EQ(AsmFminnm(arg5, arg6),
2991             MakeF32x4(bit_cast<float>(kDefaultNaN32AsInteger),
2992                       -1.0f,
2993                       bit_cast<float>(kDefaultNaN32AsInteger),
2994                       -1.0f));
2995 
2996   __uint128_t arg7 = MakeU32x4(kSignalingNaN32AsInteger_1,
2997                                kSignalingNaN32AsInteger_1,
2998                                kQuietNaN32AsInteger,
2999                                kQuietNaN32AsInteger);
3000   __uint128_t arg8 = MakeU32x4(kSignalingNaN32AsInteger_1,
3001                                kQuietNaN32AsInteger,
3002                                kSignalingNaN32AsInteger_1,
3003                                kQuietNaN32AsInteger);
3004   ASSERT_EQ(AsmFminnm(arg7, arg8),
3005             MakeF32x4(bit_cast<float>(kDefaultNaN32AsInteger),
3006                       bit_cast<float>(kDefaultNaN32AsInteger),
3007                       bit_cast<float>(kDefaultNaN32AsInteger),
3008                       bit_cast<float>(kDefaultNaN32AsInteger)));
3009 
3010   __uint128_t arg9 = MakeF32x4(-0.0f, -0.0f, 0.0f, 0.0f);
3011   __uint128_t arg10 = MakeF32x4(-0.0f, 0.0f, -0.0f, 0.0f);
3012   ASSERT_EQ(AsmFminnm(arg9, arg10), MakeF32x4(-0.0f, -0.0f, -0.0f, 0.0f));
3013 }
3014 
TEST(Arm64InsnTest,MinNumberF64x2)3015 TEST(Arm64InsnTest, MinNumberF64x2) {
3016   constexpr auto AsmFminnm = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fminnm %0.2d, %1.2d, %2.2d");
3017   __uint128_t arg1 = MakeF64x2(0.0, 3.0);
3018   __uint128_t arg2 = MakeF64x2(-0.0, -3.0);
3019   ASSERT_EQ(AsmFminnm(arg1, arg2), MakeF64x2(-0.0, -3.0));
3020 
3021   __uint128_t arg3 = MakeUInt128(bit_cast<uint64_t>(1.0), kQuietNaN64AsInteger);
3022   __uint128_t arg4 = MakeUInt128(kQuietNaN64AsInteger, bit_cast<uint64_t>(-1.0));
3023   __uint128_t res = AsmFminnm(arg3, arg4);
3024   ASSERT_EQ(res, MakeF64x2(1.0, -1.0));
3025 }
3026 
TEST(Arm64InsnTest,MinFp32)3027 TEST(Arm64InsnTest, MinFp32) {
3028   constexpr auto AsmFmin = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmin %s0, %s1, %s2");
3029   uint32_t fp_arg_two = bit_cast<uint32_t>(2.0f);
3030   uint32_t fp_arg_three = bit_cast<uint32_t>(3.0f);
3031 
3032   ASSERT_EQ(AsmFmin(fp_arg_two, fp_arg_three), MakeU32x4(fp_arg_two, 0, 0, 0));
3033   ASSERT_EQ(AsmFmin(kDefaultNaN32AsInteger, fp_arg_three), kDefaultNaN32AsInteger);
3034   ASSERT_EQ(AsmFmin(fp_arg_three, kDefaultNaN32AsInteger), kDefaultNaN32AsInteger);
3035 }
3036 
TEST(Arm64InsnTest,MinFp64)3037 TEST(Arm64InsnTest, MinFp64) {
3038   constexpr auto AsmFmin = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmin %d0, %d1, %d2");
3039   uint64_t fp_arg_two = bit_cast<uint64_t>(2.0);
3040   uint64_t fp_arg_three = bit_cast<uint64_t>(3.0);
3041 
3042   ASSERT_EQ(AsmFmin(fp_arg_two, fp_arg_three), MakeUInt128(fp_arg_two, 0U));
3043   ASSERT_EQ(AsmFmin(kDefaultNaN64AsInteger, fp_arg_three), kDefaultNaN64AsInteger);
3044   ASSERT_EQ(AsmFmin(fp_arg_three, kDefaultNaN64AsInteger), kDefaultNaN64AsInteger);
3045 }
3046 
TEST(Arm64InsnTest,MinF32x4)3047 TEST(Arm64InsnTest, MinF32x4) {
3048   constexpr auto AsmFmin = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmin %0.4s, %1.4s, %2.4s");
3049   __uint128_t arg1 = MakeF32x4(0.0f, 2.0f, 3.0f, -4.0f);
3050   __uint128_t arg2 = MakeF32x4(-0.0f, 1.0f, -3.0f, -3.0f);
3051   ASSERT_EQ(AsmFmin(arg1, arg2), MakeF32x4(-0.0f, 1.0f, -3.0f, -4.0f));
3052 
3053   __uint128_t arg3 = MakeF32x4(-0.0f, bit_cast<float>(kDefaultNaN32AsInteger), 3.0f, -4.0f);
3054   __uint128_t arg4 = MakeF32x4(0.0f, 1.0f, -3.0f, bit_cast<float>(kDefaultNaN32AsInteger));
3055   ASSERT_EQ(AsmFmin(arg3, arg4),
3056             MakeF32x4(-0.0f,
3057                       bit_cast<float>(kDefaultNaN32AsInteger),
3058                       -3.0f,
3059                       bit_cast<float>(kDefaultNaN32AsInteger)));
3060 }
3061 
TEST(Arm64InsnTest,MinF64x2)3062 TEST(Arm64InsnTest, MinF64x2) {
3063   constexpr auto AsmFmin = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmin %0.2d, %1.2d, %2.2d");
3064   __uint128_t arg1 = MakeF64x2(0.0, 3.0);
3065   __uint128_t arg2 = MakeF64x2(-0.0, -3.0);
3066   ASSERT_EQ(AsmFmin(arg1, arg2), MakeF64x2(-0.0, -3.0));
3067 
3068   __uint128_t arg3 = MakeF64x2(bit_cast<double>(kDefaultNaN64AsInteger), 3.0);
3069   __uint128_t arg4 = MakeF64x2(1.0, bit_cast<double>(kDefaultNaN64AsInteger));
3070   ASSERT_EQ(AsmFmin(arg3, arg4),
3071             MakeF64x2(bit_cast<double>(kDefaultNaN64AsInteger),
3072                       bit_cast<double>(kDefaultNaN64AsInteger)));
3073 }
3074 
TEST(Arm64InsnTest,MaxPairwiseF32Scalar)3075 TEST(Arm64InsnTest, MaxPairwiseF32Scalar) {
3076   constexpr auto AsmFmaxp = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fmaxp %s0, %1.2s");
3077   __uint128_t arg1 = MakeF32x4(-3.0f, 2.0f, 7.0f, -0.0f);
3078   ASSERT_EQ(AsmFmaxp(arg1), bit_cast<uint32_t>(2.0f));
3079 
3080   __uint128_t arg2 = MakeF32x4(bit_cast<float>(kDefaultNaN32AsInteger), 2.0f, 7.0f, -0.0f);
3081   ASSERT_EQ(AsmFmaxp(arg2), kDefaultNaN32AsInteger);
3082 }
3083 
TEST(Arm64InsnTest,MaxPairwiseF32x4)3084 TEST(Arm64InsnTest, MaxPairwiseF32x4) {
3085   constexpr auto AsmFmaxp = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmaxp %0.4s, %1.4s, %2.4s");
3086   __uint128_t arg1 = MakeF32x4(-3.0f, 2.0f, 7.0f, -0.0f);
3087   __uint128_t arg2 = MakeF32x4(6.0f, 1.0f, -8.0f, 5.0f);
3088   ASSERT_EQ(AsmFmaxp(arg1, arg2), MakeF32x4(2.0f, 7.0f, 6.0f, 5.0f));
3089 
3090   __uint128_t arg3 = MakeF32x4(
3091       bit_cast<float>(kDefaultNaN32AsInteger), 2.0f, 7.0f, bit_cast<float>(kDefaultNaN32AsInteger));
3092   __uint128_t arg4 = MakeF32x4(6.0f, 1.0f, -8.0f, 5.0f);
3093   ASSERT_EQ(AsmFmaxp(arg3, arg4),
3094             MakeF32x4(bit_cast<float>(kDefaultNaN32AsInteger),
3095                       bit_cast<float>(kDefaultNaN32AsInteger),
3096                       6.0f,
3097                       5.0f));
3098 }
3099 
TEST(Arm64InsnTest,MinPairwiseF32Scalar)3100 TEST(Arm64InsnTest, MinPairwiseF32Scalar) {
3101   constexpr auto AsmFminp = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fminp %s0, %1.2s");
3102   __uint128_t arg1 = MakeF32x4(-3.0f, 2.0f, 7.0f, -0.0f);
3103   ASSERT_EQ(AsmFminp(arg1), bit_cast<uint32_t>(-3.0f));
3104 
3105   __uint128_t arg2 = MakeF32x4(bit_cast<float>(kDefaultNaN32AsInteger), 2.0f, 7.0f, -0.0f);
3106   ASSERT_EQ(AsmFminp(arg2), kDefaultNaN32AsInteger);
3107 }
3108 
TEST(Arm64InsnTest,MinPairwiseF32x4)3109 TEST(Arm64InsnTest, MinPairwiseF32x4) {
3110   constexpr auto AsmFminp = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fminp %0.4s, %1.4s, %2.4s");
3111   __uint128_t arg1 = MakeF32x4(-3.0f, 2.0f, 7.0f, -0.0f);
3112   __uint128_t arg2 = MakeF32x4(6.0f, 1.0f, -8.0f, 5.0f);
3113   ASSERT_EQ(AsmFminp(arg1, arg2), MakeF32x4(-3.0f, -0.0f, 1.0f, -8.0f));
3114 
3115   __uint128_t arg3 = MakeF32x4(
3116       bit_cast<float>(kDefaultNaN32AsInteger), 2.0f, 7.0f, bit_cast<float>(kDefaultNaN32AsInteger));
3117   __uint128_t arg4 = MakeF32x4(6.0f, 1.0f, -8.0f, 5.0f);
3118   ASSERT_EQ(AsmFminp(arg3, arg4),
3119             MakeF32x4(bit_cast<float>(kDefaultNaN32AsInteger),
3120                       bit_cast<float>(kDefaultNaN32AsInteger),
3121                       1.0f,
3122                       -8.0f));
3123 }
3124 
TEST(Arm64InsnTest,MaxPairwiseNumberF32Scalar)3125 TEST(Arm64InsnTest, MaxPairwiseNumberF32Scalar) {
3126   constexpr auto AsmFmaxnmp = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fmaxnmp %s0, %1.2s");
3127   __uint128_t arg1 = MakeF32x4(-3.0f, 2.0f, 7.0f, -0.0f);
3128   ASSERT_EQ(AsmFmaxnmp(arg1), bit_cast<uint32_t>(2.0f));
3129 
3130   __uint128_t arg2 = MakeF32x4(bit_cast<float>(kQuietNaN32AsInteger), 2.0f, 7.0f, -0.0f);
3131   ASSERT_EQ(AsmFmaxnmp(arg2), bit_cast<uint32_t>(2.0f));
3132 }
3133 
TEST(Arm64InsnTest,MaxPairwiseNumberF32x4)3134 TEST(Arm64InsnTest, MaxPairwiseNumberF32x4) {
3135   constexpr auto AsmFmaxnmp = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmaxnmp %0.4s, %1.4s, %2.4s");
3136   __uint128_t arg1 = MakeF32x4(-3.0f, 2.0f, 7.0f, -0.0f);
3137   __uint128_t arg2 = MakeF32x4(6.0f, 1.0f, -8.0f, 5.0f);
3138   ASSERT_EQ(AsmFmaxnmp(arg1, arg2), MakeF32x4(2.0f, 7.0f, 6.0f, 5.0f));
3139 
3140   __uint128_t arg3 = MakeF32x4(
3141       bit_cast<float>(kQuietNaN32AsInteger), 2.0f, 7.0f, bit_cast<float>(kQuietNaN32AsInteger));
3142   __uint128_t arg4 = MakeF32x4(6.0f, 1.0f, -8.0f, 5.0f);
3143   ASSERT_EQ(AsmFmaxnmp(arg3, arg4), MakeF32x4(2.0f, 7.0f, 6.0f, 5.0f));
3144 }
3145 
TEST(Arm64InsnTest,MinPairwiseNumberF32Scalar)3146 TEST(Arm64InsnTest, MinPairwiseNumberF32Scalar) {
3147   constexpr auto AsmFminnmp = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fminnmp %s0, %1.2s");
3148   __uint128_t arg1 = MakeF32x4(-3.0f, 2.0f, 7.0f, -0.0f);
3149   ASSERT_EQ(AsmFminnmp(arg1), bit_cast<uint32_t>(-3.0f));
3150 
3151   __uint128_t arg2 = MakeF32x4(bit_cast<float>(kQuietNaN32AsInteger), 2.0f, 7.0f, -0.0f);
3152   ASSERT_EQ(AsmFminnmp(arg2), bit_cast<uint32_t>(2.0f));
3153 }
3154 
TEST(Arm64InsnTest,MinPairwiseNumberF32x4)3155 TEST(Arm64InsnTest, MinPairwiseNumberF32x4) {
3156   constexpr auto AsmFminnmp = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fminnmp %0.4s, %1.4s, %2.4s");
3157   __uint128_t arg1 = MakeF32x4(-3.0f, 2.0f, 7.0f, -0.0f);
3158   __uint128_t arg2 = MakeF32x4(6.0f, 1.0f, -8.0f, 5.0f);
3159   ASSERT_EQ(AsmFminnmp(arg1, arg2), MakeF32x4(-3.0f, -0.0f, 1.0f, -8.0f));
3160 
3161   __uint128_t arg3 = MakeF32x4(
3162       bit_cast<float>(kQuietNaN32AsInteger), 2.0f, 7.0f, bit_cast<float>(kQuietNaN32AsInteger));
3163   __uint128_t arg4 = MakeF32x4(6.0f, 1.0f, -8.0f, 5.0f);
3164   ASSERT_EQ(AsmFminnmp(arg3, arg4), MakeF32x4(2.0f, 7.0f, 1.0f, -8.0f));
3165 }
3166 
TEST(Arm64InsnTest,MaxAcrossF32x4)3167 TEST(Arm64InsnTest, MaxAcrossF32x4) {
3168   constexpr auto AsmFmaxv = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fmaxv %s0, %1.4s");
3169   __uint128_t arg1 = MakeF32x4(0.0f, 2.0f, 3.0f, -4.0f);
3170   ASSERT_EQ(AsmFmaxv(arg1), bit_cast<uint32_t>(3.0f));
3171 
3172   __uint128_t arg2 = MakeF32x4(0.0f, 2.0f, bit_cast<float>(kDefaultNaN32AsInteger), -4.0f);
3173   ASSERT_EQ(AsmFmaxv(arg2), kDefaultNaN32AsInteger);
3174 }
3175 
TEST(Arm64InsnTest,MinAcrossF32x4)3176 TEST(Arm64InsnTest, MinAcrossF32x4) {
3177   constexpr auto AsmFminv = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fminv %s0, %1.4s");
3178   __uint128_t arg1 = MakeF32x4(0.0f, 2.0f, 3.0f, -4.0f);
3179   ASSERT_EQ(AsmFminv(arg1), bit_cast<uint32_t>(-4.0f));
3180 
3181   __uint128_t arg2 = MakeF32x4(0.0f, 2.0f, bit_cast<float>(kDefaultNaN32AsInteger), -4.0f);
3182   ASSERT_EQ(AsmFminv(arg2), kDefaultNaN32AsInteger);
3183 }
3184 
TEST(Arm64InsnTest,MaxNumberAcrossF32x4)3185 TEST(Arm64InsnTest, MaxNumberAcrossF32x4) {
3186   constexpr auto AsmFmaxnmv = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fmaxnmv %s0, %1.4s");
3187   __uint128_t arg1 = MakeF32x4(0.0f, 2.0f, 3.0f, -4.0f);
3188   ASSERT_EQ(AsmFmaxnmv(arg1), bit_cast<uint32_t>(3.0f));
3189 
3190   __uint128_t arg2 = MakeF32x4(0.0f, bit_cast<float>(kQuietNaN32AsInteger), 3.0f, -4.0f);
3191   ASSERT_EQ(AsmFmaxnmv(arg2), bit_cast<uint32_t>(3.0f));
3192 }
3193 
TEST(Arm64InsnTest,MinNumberAcrossF32x4)3194 TEST(Arm64InsnTest, MinNumberAcrossF32x4) {
3195   constexpr auto AsmFminnmv = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fminnmv %s0, %1.4s");
3196   __uint128_t arg1 = MakeF32x4(0.0f, 2.0f, 3.0f, -4.0f);
3197   ASSERT_EQ(AsmFminnmv(arg1), bit_cast<uint32_t>(-4.0f));
3198 
3199   __uint128_t arg2 = MakeF32x4(0.0f, bit_cast<float>(kQuietNaN32AsInteger), 3.0f, -4.0f);
3200   ASSERT_EQ(AsmFminnmv(arg2), bit_cast<uint32_t>(-4.0f));
3201 }
3202 
TEST(Arm64InsnTest,MulFp32)3203 TEST(Arm64InsnTest, MulFp32) {
3204   uint64_t fp_arg1 = 0x40a1999aULL;  // 5.05 in float
3205   uint64_t fp_arg2 = 0x40dae148ULL;  // 6.84 in float
3206   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmul %s0, %s1, %s2")(fp_arg1, fp_arg2);
3207   ASSERT_EQ(rd, MakeUInt128(0x420a2b03ULL, 0U));  // 34.5420 in float
3208 }
3209 
TEST(Arm64InsnTest,MulFp64)3210 TEST(Arm64InsnTest, MulFp64) {
3211   uint64_t fp_arg1 = 0x40226b851eb851ecULL;  // 9.21 in double
3212   uint64_t fp_arg2 = 0x4020c7ae147ae148ULL;  // 8.39 in double
3213   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmul %d0, %d1, %d2")(fp_arg1, fp_arg2);
3214   ASSERT_EQ(rd, MakeUInt128(0x40535166cf41f214ULL, 0U));  // 77.2719 in double
3215 }
3216 
TEST(Arm64InsnTest,MulF32x4)3217 TEST(Arm64InsnTest, MulF32x4) {
3218   constexpr auto AsmFmul = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmul %0.4s, %1.4s, %2.4s");
3219   __uint128_t arg1 = MakeF32x4(1.0f, -2.0f, 3.0f, -4.0f);
3220   __uint128_t arg2 = MakeF32x4(-3.0f, -1.0f, 4.0f, 1.0f);
3221   ASSERT_EQ(AsmFmul(arg1, arg2), MakeF32x4(-3.0f, 2.0f, 12.0f, -4.0f));
3222 }
3223 
TEST(Arm64InsnTest,MulF64x2)3224 TEST(Arm64InsnTest, MulF64x2) {
3225   constexpr auto AsmFmul = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmul %0.2d, %1.2d, %2.2d");
3226   __uint128_t arg1 = MakeF64x2(-4.0, 2.0);
3227   __uint128_t arg2 = MakeF64x2(2.0, 3.0);
3228   ASSERT_EQ(AsmFmul(arg1, arg2), MakeF64x2(-8.0, 6.0));
3229 }
3230 
TEST(Arm64InsnTest,MulF32x4ByScalar)3231 TEST(Arm64InsnTest, MulF32x4ByScalar) {
3232   __uint128_t arg1 = MakeF32x4(2.0f, 3.0f, 4.0f, 5.0f);
3233   __uint128_t arg2 = MakeF32x4(6.0f, 7.0f, 8.0f, 9.0f);
3234   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmul %0.4s, %1.4s, %2.s[3]")(arg1, arg2);
3235   ASSERT_EQ(res, MakeF32x4(18.0f, 27.0f, 36.0f, 45.0f));
3236 }
3237 
TEST(Arm64InsnTest,MulF64x2ByScalar)3238 TEST(Arm64InsnTest, MulF64x2ByScalar) {
3239   __uint128_t arg1 = MakeF64x2(2.0, 3.0);
3240   __uint128_t arg2 = MakeF64x2(5.0, 4.0);
3241   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmul %0.2d, %1.2d, %2.d[1]")(arg1, arg2);
3242   ASSERT_EQ(res, MakeF64x2(8.0, 12.0));
3243 }
3244 
TEST(Arm64InsnTest,MulF32IndexedElem)3245 TEST(Arm64InsnTest, MulF32IndexedElem) {
3246   constexpr auto AsmFmul = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmul %s0, %s1, %2.s[2]");
3247   __uint128_t arg1 = MakeF32x4(2.0f, 3.0f, 5.0f, 7.0f);
3248   __uint128_t arg2 = MakeF32x4(11.0f, 13.0f, 17.0f, 19.0f);
3249   ASSERT_EQ(AsmFmul(arg1, arg2), bit_cast<uint32_t>(34.0f));
3250 }
3251 
TEST(Arm64InsnTest,MulF64IndexedElem)3252 TEST(Arm64InsnTest, MulF64IndexedElem) {
3253   constexpr auto AsmFmul = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmul %d0, %d1, %2.d[1]");
3254   __uint128_t arg1 = MakeF64x2(2.0, 3.0);
3255   __uint128_t arg2 = MakeF64x2(5.0, 4.0);
3256   ASSERT_EQ(AsmFmul(arg1, arg2), bit_cast<uint64_t>(8.0));
3257 }
3258 
TEST(Arm64InsnTest,MulExtendedF32)3259 TEST(Arm64InsnTest, MulExtendedF32) {
3260   constexpr auto AsmFmulx = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmulx %s0, %s1, %s2");
3261   __uint128_t arg1 = MakeF32x4(2.0f, 3.0f, 5.0f, 7.0f);
3262   __uint128_t arg2 = MakeF32x4(11.0f, 13.0f, 17.0f, 19.0f);
3263   ASSERT_EQ(AsmFmulx(arg1, arg2), bit_cast<uint32_t>(22.0f));
3264 }
3265 
TEST(Arm64InsnTest,MulExtendedF32x4)3266 TEST(Arm64InsnTest, MulExtendedF32x4) {
3267   constexpr auto AsmFmulx = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmulx %0.4s, %1.4s, %2.4s");
3268   __uint128_t arg1 = MakeF32x4(2.0f, 3.0f, 5.0f, 7.0f);
3269   __uint128_t arg2 = MakeF32x4(11.0f, 13.0f, 17.0f, 19.0f);
3270   ASSERT_EQ(AsmFmulx(arg1, arg2), MakeF32x4(22.0f, 39.0f, 85.0f, 133.0f));
3271 }
3272 
TEST(Arm64InsnTest,MulExtendedF32IndexedElem)3273 TEST(Arm64InsnTest, MulExtendedF32IndexedElem) {
3274   constexpr auto AsmFmulx = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmulx %s0, %s1, %2.s[2]");
3275   __uint128_t arg1 = MakeF32x4(2.0f, 3.0f, 5.0f, 7.0f);
3276   __uint128_t arg2 = MakeF32x4(11.0f, 13.0f, 17.0f, 19.0f);
3277   ASSERT_EQ(AsmFmulx(arg1, arg2), bit_cast<uint32_t>(34.0f));
3278 }
3279 
TEST(Arm64InsnTest,MulExtendedF64IndexedElem)3280 TEST(Arm64InsnTest, MulExtendedF64IndexedElem) {
3281   constexpr auto AsmFmulx = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmulx %d0, %d1, %2.d[1]");
3282   __uint128_t arg1 = MakeF64x2(2.0, 3.0);
3283   __uint128_t arg2 = MakeF64x2(5.0, 4.0);
3284   ASSERT_EQ(AsmFmulx(arg1, arg2), bit_cast<uint64_t>(8.0));
3285 }
3286 
TEST(Arm64InsnTest,MulExtendedF32x4IndexedElem)3287 TEST(Arm64InsnTest, MulExtendedF32x4IndexedElem) {
3288   constexpr auto AsmFmulx = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmulx %0.4s, %1.4s, %2.s[2]");
3289   __uint128_t arg1 = MakeF32x4(2.0f, 3.0f, 5.0f, 7.0f);
3290   __uint128_t arg2 = MakeF32x4(11.0f, 13.0f, 17.0f, 19.0f);
3291   ASSERT_EQ(AsmFmulx(arg1, arg2), MakeF32x4(34.0f, 51.0f, 85.0f, 119.0f));
3292 }
3293 
TEST(Arm64InsnTest,MulNegFp32)3294 TEST(Arm64InsnTest, MulNegFp32) {
3295   uint64_t fp_arg1 = bit_cast<uint32_t>(2.0f);
3296   uint64_t fp_arg2 = bit_cast<uint32_t>(3.0f);
3297   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fnmul %s0, %s1, %s2")(fp_arg1, fp_arg2);
3298   ASSERT_EQ(rd, MakeUInt128(bit_cast<uint32_t>(-6.0f), 0U));
3299 }
3300 
TEST(Arm64InsnTest,MulNegFp64)3301 TEST(Arm64InsnTest, MulNegFp64) {
3302   uint64_t fp_arg1 = bit_cast<uint64_t>(2.0);
3303   uint64_t fp_arg2 = bit_cast<uint64_t>(3.0);
3304   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fnmul %d0, %d1, %d2")(fp_arg1, fp_arg2);
3305   ASSERT_EQ(rd, MakeUInt128(bit_cast<uint64_t>(-6.0), 0U));
3306 }
3307 
TEST(Arm64InsnTest,DivFp32)3308 TEST(Arm64InsnTest, DivFp32) {
3309   constexpr auto AsmFdiv = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fdiv %s0, %s1, %s2");
3310 
3311   uint32_t arg1 = 0x40c23d71U;                                     // 6.07 in float
3312   uint32_t arg2 = 0x401a3d71U;                                     // 2.41 in float
3313   ASSERT_EQ(AsmFdiv(arg1, arg2), MakeUInt128(0x402131edULL, 0U));  // 2.5186722 in float
3314 
3315   // Make sure that FDIV can produce a denormal result under the default FPCR,
3316   // where the FZ bit (flush-to-zero) is off.
3317   uint32_t arg3 = 0xa876eff9U;  // exponent (without offset) = -47
3318   uint32_t arg4 = 0xe7d86b60U;  // exponent (without offset) = 80
3319   ASSERT_EQ(AsmFdiv(arg3, arg4), MakeUInt128(0x0049065cULL, 0U));  // denormal
3320 }
3321 
TEST(Arm64InsnTest,DivFp64)3322 TEST(Arm64InsnTest, DivFp64) {
3323   uint64_t fp_arg1 = 0x401e5c28f5c28f5cULL;  // 7.59 in double
3324   uint64_t fp_arg2 = 0x3ff28f5c28f5c28fULL;  // 1.16 in double
3325   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fdiv %d0, %d1, %d2")(fp_arg1, fp_arg2);
3326   ASSERT_EQ(rd, MakeUInt128(0x401a2c234f72c235ULL, 0U));  // 6.5431034482758620995923593 in double
3327 }
3328 
TEST(Arm64InsnTest,DivFp32_FlagsWhenDivByZero)3329 TEST(Arm64InsnTest, DivFp32_FlagsWhenDivByZero) {
3330   uint64_t fpsr;
3331   volatile float dividend = 123.0f;
3332   volatile float divisor = 0.0f;
3333   float res;
3334   asm volatile(
3335       "msr fpsr, xzr\n\t"
3336       "fdiv %s1, %s2, %s3\n\t"
3337       "mrs %0, fpsr"
3338       : "=r"(fpsr), "=w"(res)
3339       : "w"(dividend), "w"(divisor));
3340   ASSERT_TRUE((fpsr & kFpsrDzcBit) == (kFpsrDzcBit));
3341 
3342   // Previous bug caused IOC to be set upon scalar div by zero.
3343   ASSERT_TRUE((fpsr & kFpsrIocBit) == 0);
3344 }
3345 
TEST(Arm64InsnTest,DivFp64_FlagsWhenDivByZero)3346 TEST(Arm64InsnTest, DivFp64_FlagsWhenDivByZero) {
3347   uint64_t fpsr;
3348   double res;
3349   asm volatile(
3350       "msr fpsr, xzr\n\t"
3351       "fdiv %d1, %d2, %d3\n\t"
3352       "mrs %0, fpsr"
3353       : "=r"(fpsr), "=w"(res)
3354       : "w"(123.0), "w"(0.0));
3355   ASSERT_TRUE((fpsr & kFpsrDzcBit) == (kFpsrDzcBit));
3356 
3357   // Previous bug caused IOC to be set upon scalar div by zero.
3358   ASSERT_TRUE((fpsr & kFpsrIocBit) == 0);
3359 }
3360 
TEST(Arm64InsnTest,DivFp32x4)3361 TEST(Arm64InsnTest, DivFp32x4) {
3362   constexpr auto AsmFdiv = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fdiv %0.4s, %1.4s, %2.4s");
3363 
3364   // 16.39, 80.286, 41.16, 98.01
3365   __uint128_t arg1 = MakeUInt128(0x41831eb842a0926fULL, 0x4224a3d742c4051fULL);
3366   // 13.3, 45.45, 7.89, -2.63
3367   __uint128_t arg2 = MakeUInt128(0x4154cccd4235cccdULL, 0x40fc7ae1c02851ecULL);
3368   __uint128_t res1 = AsmFdiv(arg1, arg2);
3369   // 1.2323308, 1.7664686, 5.21673, -37.26616
3370   ASSERT_EQ(res1, MakeUInt128(0x3f9dbd043fe21ba5ULL, 0x40a6ef74c215108cULL));
3371 
3372   // Verify that fdiv produces a denormal result under the default FPCR.
3373   __uint128_t arg3 = MakeF32x4(1.0f, 1.0f, 1.0f, -0x1.eddff2p-47f);
3374   __uint128_t arg4 = MakeF32x4(1.0f, 1.0f, 1.0f, -0x1.b0d6c0p80f);
3375   __uint128_t res2 = AsmFdiv(arg3, arg4);
3376   __uint128_t expected2 = MakeF32x4(1.0f, 1.0f, 1.0f, 0x0.920cb8p-126f);
3377   ASSERT_EQ(res2, expected2);
3378 }
3379 
TEST(Arm64InsnTest,DivFp64x2)3380 TEST(Arm64InsnTest, DivFp64x2) {
3381   // 6.23, 65.02
3382   __uint128_t arg1 = MakeUInt128(0x4018EB851EB851ECULL, 0x40504147AE147AE1ULL);
3383   // -7.54, 11.92
3384   __uint128_t arg2 = MakeUInt128(0xC01E28F5C28F5C29ULL, 0x4027D70A3D70A3D7ULL);
3385   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fdiv %0.2d, %1.2d, %2.2d")(arg1, arg2);
3386   // -0.82625994695, 5.45469798658
3387   ASSERT_EQ(res, MakeUInt128(0xbfea70b8b3449564ULL, 0x4015d19c59579fc9ULL));
3388 }
3389 
TEST(Arm64InsnTest,MulAddFp32)3390 TEST(Arm64InsnTest, MulAddFp32) {
3391   constexpr auto AsmFmadd = ASM_INSN_WRAP_FUNC_W_RES_WWW_ARG("fmadd %s0, %s1, %s2, %s3");
3392 
3393   __uint128_t res1 =
3394       AsmFmadd(bit_cast<uint32_t>(2.0f), bit_cast<uint32_t>(3.0f), bit_cast<uint32_t>(5.0f));
3395   ASSERT_EQ(res1, MakeF32x4(11.0f, 0, 0, 0));
3396 
3397   __uint128_t res2 =
3398       AsmFmadd(bit_cast<uint32_t>(2.5f), bit_cast<uint32_t>(2.0f), bit_cast<uint32_t>(-5.0f));
3399   ASSERT_EQ(res2, MakeF32x4(0, 0, 0, 0));
3400 
3401   // These tests verify that fmadd does not lose precision while doing the mult + add.
3402   __uint128_t res3 = AsmFmadd(bit_cast<uint32_t>(0x1.fffffep22f),
3403                               bit_cast<uint32_t>(0x1.000002p0f),
3404                               bit_cast<uint32_t>(-0x1.p23f));
3405   ASSERT_EQ(res3, MakeF32x4(0x1.fffffcp-2f, 0, 0, 0));
3406 
3407   __uint128_t res4 = AsmFmadd(bit_cast<uint32_t>(0x1.fffffep22f),
3408                               bit_cast<uint32_t>(0x1.000002p0f),
3409                               bit_cast<uint32_t>(-0x1.fffffep22f));
3410   ASSERT_EQ(res4, MakeF32x4(0x1.fffffep-1f, 0, 0, 0));
3411 
3412   __uint128_t res5 = AsmFmadd(bit_cast<uint32_t>(0x1.p23f),
3413                               bit_cast<uint32_t>(0x1.fffffep-1f),
3414                               bit_cast<uint32_t>(-0x1.000002p23f));
3415   ASSERT_EQ(res5, MakeF32x4(-0x1.80p0f, 0, 0, 0));
3416 }
3417 
TEST(Arm64InsnTest,MulAddFp64)3418 TEST(Arm64InsnTest, MulAddFp64) {
3419   uint64_t arg1 = 0x40323d70a3d70a3dULL;  // 18.24
3420   uint64_t arg2 = 0x40504147ae147ae1ULL;  // 65.02
3421   uint64_t arg3 = 0x4027d70a3d70a3d7ULL;  // 11.92
3422   __uint128_t res1 = ASM_INSN_WRAP_FUNC_W_RES_WWW_ARG("fmadd %d0, %d1, %d2, %d3")(arg1, arg2, arg3);
3423   ASSERT_EQ(res1, MakeUInt128(0x4092b78a0902de00ULL, 0U));  // 1197.8848
3424   __uint128_t res2 =
3425       ASM_INSN_WRAP_FUNC_W_RES_WWW_ARG("fnmadd %d0, %d1, %d2, %d3")(arg1, arg2, arg3);
3426   ASSERT_EQ(res2, MakeUInt128(0xc092b78a0902de00ULL, 0U));  // -1197.8848
3427 }
3428 
TEST(Arm64InsnTest,MulAddFp64Precision)3429 TEST(Arm64InsnTest, MulAddFp64Precision) {
3430   uint64_t arg1 = bit_cast<uint64_t>(0x1.0p1023);
3431   uint64_t arg2 = bit_cast<uint64_t>(0x1.0p-1);
3432   uint64_t arg3 = bit_cast<uint64_t>(0x1.fffffffffffffp1022);
3433   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WWW_ARG("fmadd %d0, %d1, %d2, %d3")(arg1, arg2, arg3);
3434   ASSERT_EQ(res, bit_cast<uint64_t>(0x1.7ffffffffffff8p1023));
3435 }
3436 
TEST(Arm64InsnTest,NegMulAddFp32)3437 TEST(Arm64InsnTest, NegMulAddFp32) {
3438   constexpr auto AsmFnmadd = ASM_INSN_WRAP_FUNC_W_RES_WWW_ARG("fnmadd %s0, %s1, %s2, %s3");
3439 
3440   __uint128_t res1 =
3441       AsmFnmadd(bit_cast<uint32_t>(2.0f), bit_cast<uint32_t>(3.0f), bit_cast<uint32_t>(5.0f));
3442   ASSERT_EQ(res1, MakeF32x4(-11.0f, 0, 0, 0));
3443 
3444   // No -0 (proper negation)
3445   __uint128_t res2 =
3446       AsmFnmadd(bit_cast<uint32_t>(2.5f), bit_cast<uint32_t>(2.0f), bit_cast<uint32_t>(-5.0f));
3447   ASSERT_EQ(res2, MakeF32x4(0.0f, 0, 0, 0));
3448 
3449   // These tests verify that fmadd does not lose precision while doing the mult + add.
3450   __uint128_t res3 = AsmFnmadd(bit_cast<uint32_t>(0x1.fffffep22f),
3451                                bit_cast<uint32_t>(0x1.000002p0f),
3452                                bit_cast<uint32_t>(-0x1.p23f));
3453   ASSERT_EQ(res3, MakeF32x4(-0x1.fffffcp-2f, 0, 0, 0));
3454 
3455   __uint128_t res4 = AsmFnmadd(bit_cast<uint32_t>(0x1.fffffep22f),
3456                                bit_cast<uint32_t>(0x1.000002p0f),
3457                                bit_cast<uint32_t>(-0x1.fffffep22f));
3458   ASSERT_EQ(res4, MakeF32x4(-0x1.fffffep-1f, 0, 0, 0));
3459 
3460   __uint128_t res5 = AsmFnmadd(bit_cast<uint32_t>(0x1.p23f),
3461                                bit_cast<uint32_t>(0x1.fffffep-1f),
3462                                bit_cast<uint32_t>(-0x1.000002p23f));
3463   ASSERT_EQ(res5, MakeF32x4(0x1.80p0f, 0, 0, 0));
3464 }
3465 
TEST(Arm64InsnTest,NegMulAddFp64)3466 TEST(Arm64InsnTest, NegMulAddFp64) {
3467   constexpr auto AsmFnmadd = ASM_INSN_WRAP_FUNC_W_RES_WWW_ARG("fnmadd %d0, %d1, %d2, %d3");
3468 
3469   __uint128_t res1 =
3470       AsmFnmadd(bit_cast<uint64_t>(2.0), bit_cast<uint64_t>(3.0), bit_cast<uint64_t>(5.0));
3471   ASSERT_EQ(res1, MakeF64x2(-11.0, 0));
3472 
3473   // Proper negation (no -0 in this case)
3474   __uint128_t res2 =
3475       AsmFnmadd(bit_cast<uint64_t>(2.5), bit_cast<uint64_t>(2.0), bit_cast<uint64_t>(-5.0));
3476   ASSERT_EQ(res2, MakeF64x2(0.0, 0));
3477 }
3478 
TEST(Arm64InsnTest,NegMulSubFp64)3479 TEST(Arm64InsnTest, NegMulSubFp64) {
3480   constexpr auto AsmFnmsub = ASM_INSN_WRAP_FUNC_W_RES_WWW_ARG("fnmsub %d0, %d1, %d2, %d3");
3481 
3482   __uint128_t res1 =
3483       AsmFnmsub(bit_cast<uint64_t>(-2.0), bit_cast<uint64_t>(3.0), bit_cast<uint64_t>(5.0));
3484   ASSERT_EQ(res1, MakeF64x2(-11.0, 0));
3485 
3486   uint64_t arg1 = 0x40357ae147ae147bULL;  // 21.48
3487   uint64_t arg2 = 0x404ce3d70a3d70a4ull;  // 57.78
3488   uint64_t arg3 = 0x405e29999999999aULL;  // 120.65
3489   __uint128_t res2 = AsmFnmsub(arg1, arg2, arg3);
3490   ASSERT_EQ(res2, MakeUInt128(0x409181db8bac710dULL, 0U));  // 1120.4644
3491 
3492   // Assert no -0 in this case
3493   __uint128_t res3 =
3494       AsmFnmsub(bit_cast<uint64_t>(2.5), bit_cast<uint64_t>(2.0), bit_cast<uint64_t>(5.0));
3495   ASSERT_EQ(res3, MakeF64x2(0.0, 0));
3496 }
3497 
TEST(Arm64InsnTest,NegMulSubFp64Precision)3498 TEST(Arm64InsnTest, NegMulSubFp64Precision) {
3499   constexpr auto AsmFnmsub = ASM_INSN_WRAP_FUNC_W_RES_WWW_ARG("fnmsub %d0, %d1, %d2, %d3");
3500 
3501   __uint128_t res = AsmFnmsub(bit_cast<uint64_t>(0x1.0p1023),
3502                               bit_cast<uint64_t>(0x1.0p-1),
3503                               bit_cast<uint64_t>(-0x1.fffffffffffffp1022));
3504   ASSERT_EQ(res, bit_cast<uint64_t>(0x1.7ffffffffffff8p1023));
3505 }
3506 
TEST(Arm64InsnTest,MulAddF32x4)3507 TEST(Arm64InsnTest, MulAddF32x4) {
3508   constexpr auto AsmFmla = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("fmla %0.4s, %1.4s, %2.4s");
3509   __uint128_t arg1 = MakeF32x4(1.0f, 2.0f, 4.0f, 3.0f);
3510   __uint128_t arg2 = MakeF32x4(3.0f, 1.0f, 2.0f, 4.0f);
3511   __uint128_t arg3 = MakeF32x4(2.0f, 3.0f, 1.0f, 2.0f);
3512   ASSERT_EQ(AsmFmla(arg1, arg2, arg3), MakeF32x4(5.0f, 5.0f, 9.0f, 14.0f));
3513 }
3514 
TEST(Arm64InsnTest,MulAddF32IndexedElem)3515 TEST(Arm64InsnTest, MulAddF32IndexedElem) {
3516   constexpr auto AsmFmla = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("fmla %s0, %s1, %2.s[2]");
3517   __uint128_t arg1 = MakeF32x4(1.0f, 2.0f, 4.0f, 3.0f);
3518   __uint128_t arg2 = MakeF32x4(3.0f, 1.0f, 2.0f, 4.0f);
3519   __uint128_t arg3 = MakeF32x4(2.0f, 3.0f, 1.0f, 2.0f);
3520   // 2 + (1 * 2)
3521   ASSERT_EQ(AsmFmla(arg1, arg2, arg3), bit_cast<uint32_t>(4.0f));
3522 }
3523 
TEST(Arm64InsnTest,MulAddF64IndexedElem)3524 TEST(Arm64InsnTest, MulAddF64IndexedElem) {
3525   constexpr auto AsmFmla = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("fmla %d0, %d1, %2.d[1]");
3526   __uint128_t arg1 = MakeF64x2(2.0, 3.0);
3527   __uint128_t arg2 = MakeF64x2(4.0, 5.0);
3528   __uint128_t arg3 = MakeF64x2(6.0, 7.0);
3529   // 6 + (2 * 5)
3530   ASSERT_EQ(AsmFmla(arg1, arg2, arg3), bit_cast<uint64_t>(16.0));
3531 }
3532 
TEST(Arm64InsnTest,MulAddF64x2)3533 TEST(Arm64InsnTest, MulAddF64x2) {
3534   constexpr auto AsmFmla = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("fmla %0.2d, %1.2d, %2.2d");
3535   __uint128_t arg1 = MakeF64x2(1.0f, 2.0f);
3536   __uint128_t arg2 = MakeF64x2(3.0f, 1.0f);
3537   __uint128_t arg3 = MakeF64x2(2.0f, 3.0f);
3538   ASSERT_EQ(AsmFmla(arg1, arg2, arg3), MakeF64x2(5.0f, 5.0f));
3539 }
3540 
TEST(Arm64InsnTest,MulAddF32x4IndexedElem)3541 TEST(Arm64InsnTest, MulAddF32x4IndexedElem) {
3542   constexpr auto AsmFmla = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("fmla %0.4s, %1.4s, %2.s[2]");
3543   __uint128_t arg1 = MakeF32x4(1.0f, 2.0f, 4.0f, 3.0f);
3544   __uint128_t arg2 = MakeF32x4(3.0f, 1.0f, 2.0f, 4.0f);
3545   __uint128_t arg3 = MakeF32x4(2.0f, 3.0f, 1.0f, 2.0f);
3546   ASSERT_EQ(AsmFmla(arg1, arg2, arg3), MakeF32x4(4.0f, 7.0f, 9.0f, 8.0f));
3547 }
3548 
TEST(Arm64InsnTest,MulSubFp32)3549 TEST(Arm64InsnTest, MulSubFp32) {
3550   uint32_t arg1 = bit_cast<uint32_t>(2.0f);
3551   uint32_t arg2 = bit_cast<uint32_t>(5.0f);
3552   uint32_t arg3 = bit_cast<uint32_t>(3.0f);
3553   __uint128_t res1 = ASM_INSN_WRAP_FUNC_W_RES_WWW_ARG("fmsub %s0, %s1, %s2, %s3")(arg1, arg2, arg3);
3554   ASSERT_EQ(res1, MakeUInt128(bit_cast<uint32_t>(-7.0f), 0U));
3555   __uint128_t res2 =
3556       ASM_INSN_WRAP_FUNC_W_RES_WWW_ARG("fnmsub %s0, %s1, %s2, %s3")(arg1, arg2, arg3);
3557   ASSERT_EQ(res2, MakeUInt128(bit_cast<uint32_t>(7.0f), 0U));
3558 }
3559 
TEST(Arm64InsnTest,MulSubFp64)3560 TEST(Arm64InsnTest, MulSubFp64) {
3561   constexpr auto AsmFmsub = ASM_INSN_WRAP_FUNC_W_RES_WWW_ARG("fmsub %d0, %d1, %d2, %d3");
3562 
3563   uint64_t arg1 = 0x40357ae147ae147bULL;  // 21.48
3564   uint64_t arg2 = 0x404ce3d70a3d70a4ull;  // 57.78
3565   uint64_t arg3 = 0x405e29999999999aULL;  // 120.65
3566   __uint128_t res1 = AsmFmsub(arg1, arg2, arg3);
3567   ASSERT_EQ(res1, MakeUInt128(0xc09181db8bac710dULL, 0U));  // -1120.4644
3568 
3569   // Basic case
3570   __uint128_t res3 =
3571       AsmFmsub(bit_cast<uint64_t>(2.0), bit_cast<uint64_t>(3.0), bit_cast<uint64_t>(-5.0));
3572   ASSERT_EQ(res3, MakeF64x2(-11.0, 0));
3573 
3574   // No -0 in this case (proper negation order)
3575   __uint128_t res4 =
3576       AsmFmsub(bit_cast<uint64_t>(2.5), bit_cast<uint64_t>(2.0), bit_cast<uint64_t>(5.0));
3577   ASSERT_EQ(res4, MakeF64x2(0.0, 0));
3578 }
3579 
TEST(Arm64InsnTest,MulSubFp64Precision)3580 TEST(Arm64InsnTest, MulSubFp64Precision) {
3581   constexpr auto AsmFmsub = ASM_INSN_WRAP_FUNC_W_RES_WWW_ARG("fmsub %d0, %d1, %d2, %d3");
3582   __uint128_t res5 = AsmFmsub(bit_cast<uint64_t>(-0x1.0p1023),
3583                               bit_cast<uint64_t>(0x1.0p-1),
3584                               bit_cast<uint64_t>(0x1.fffffffffffffp1022));
3585   ASSERT_EQ(res5, bit_cast<uint64_t>(0x1.7ffffffffffff8p1023));
3586 }
3587 
TEST(Arm64InsnTest,MulSubF32x4)3588 TEST(Arm64InsnTest, MulSubF32x4) {
3589   constexpr auto AsmFmls = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("fmls %0.4s, %1.4s, %2.4s");
3590   __uint128_t arg1 = MakeF32x4(1.0f, 2.0f, 4.0f, 3.0f);
3591   __uint128_t arg2 = MakeF32x4(3.0f, 1.0f, 2.0f, 4.0f);
3592   __uint128_t arg3 = MakeF32x4(2.0f, 3.0f, 1.0f, 2.0f);
3593   ASSERT_EQ(AsmFmls(arg1, arg2, arg3), MakeF32x4(-1.0f, 1.0f, -7.0f, -10.0f));
3594 }
3595 
TEST(Arm64InsnTest,MulSubF32IndexedElem)3596 TEST(Arm64InsnTest, MulSubF32IndexedElem) {
3597   constexpr auto AsmFmls = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("fmls %s0, %s1, %2.s[2]");
3598   __uint128_t arg1 = MakeF32x4(2.0f, 1.0f, 4.0f, 3.0f);
3599   __uint128_t arg2 = MakeF32x4(4.0f, 3.0f, 2.0f, 1.0f);
3600   __uint128_t arg3 = MakeF32x4(8.0f, 3.0f, 1.0f, 2.0f);
3601   // 8 - (2 * 2)
3602   ASSERT_EQ(AsmFmls(arg1, arg2, arg3), bit_cast<uint32_t>(4.0f));
3603 }
3604 
TEST(Arm64InsnTest,MulSubF32x4IndexedElem)3605 TEST(Arm64InsnTest, MulSubF32x4IndexedElem) {
3606   constexpr auto AsmFmls = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("fmls %0.4s, %1.4s, %2.s[2]");
3607   __uint128_t arg1 = MakeF32x4(1.0f, 2.0f, 4.0f, 3.0f);
3608   __uint128_t arg2 = MakeF32x4(3.0f, 1.0f, 2.0f, 4.0f);
3609   __uint128_t arg3 = MakeF32x4(2.0f, 3.0f, 1.0f, 2.0f);
3610   ASSERT_EQ(AsmFmls(arg1, arg2, arg3), MakeF32x4(0.0f, -1.0f, -7.0f, -4.0f));
3611 }
3612 
TEST(Arm64InsnTest,MulSubF64x2)3613 TEST(Arm64InsnTest, MulSubF64x2) {
3614   constexpr auto AsmFmls = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("fmls %0.2d, %1.2d, %2.2d");
3615   __uint128_t arg1 = MakeF64x2(1.0f, 2.0f);
3616   __uint128_t arg2 = MakeF64x2(3.0f, 1.0f);
3617   __uint128_t arg3 = MakeF64x2(2.0f, 3.0f);
3618   ASSERT_EQ(AsmFmls(arg1, arg2, arg3), MakeF64x2(-1.0f, 1.0f));
3619 }
3620 
TEST(Arm64InsnTest,MulSubF64IndexedElem)3621 TEST(Arm64InsnTest, MulSubF64IndexedElem) {
3622   constexpr auto AsmFmls = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("fmls %d0, %d1, %2.d[1]");
3623   __uint128_t arg1 = MakeF64x2(2.0, 5.0);
3624   __uint128_t arg2 = MakeF64x2(4.0, 1.0);
3625   __uint128_t arg3 = MakeF64x2(6.0, 7.0f);
3626   // 6 - (2 * 1)
3627   ASSERT_EQ(AsmFmls(arg1, arg2, arg3), bit_cast<uint64_t>(4.0));
3628 }
3629 
TEST(Arm64InsnTest,CompareEqualF32)3630 TEST(Arm64InsnTest, CompareEqualF32) {
3631   constexpr auto AsmFcmeq = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fcmeq %s0, %s1, %s2");
3632   uint32_t two = bit_cast<uint32_t>(2.0f);
3633   uint32_t six = bit_cast<uint32_t>(6.0f);
3634   ASSERT_EQ(AsmFcmeq(two, six), 0x00000000ULL);
3635   ASSERT_EQ(AsmFcmeq(two, two), 0xffffffffULL);
3636   ASSERT_EQ(AsmFcmeq(kDefaultNaN32AsInteger, two), 0x00000000ULL);
3637   ASSERT_EQ(AsmFcmeq(two, kDefaultNaN32AsInteger), 0x00000000ULL);
3638 }
3639 
TEST(Arm64InsnTest,CompareEqualF32x4)3640 TEST(Arm64InsnTest, CompareEqualF32x4) {
3641   constexpr auto AsmFcmeq = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fcmeq %0.4s, %1.4s, %2.4s");
3642   __uint128_t arg1 = MakeF32x4(-3.0f, 2.0f, 7.0f, -0.0f);
3643   __uint128_t arg2 = MakeF32x4(6.0f, 2.0f, -8.0f, 5.0f);
3644   __uint128_t res = AsmFcmeq(arg1, arg2);
3645   ASSERT_EQ(res, MakeUInt128(0xffffffff00000000ULL, 0x0000000000000000ULL));
3646 }
3647 
TEST(Arm64InsnTest,CompareGreaterEqualF32)3648 TEST(Arm64InsnTest, CompareGreaterEqualF32) {
3649   constexpr auto AsmFcmge = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fcmge %s0, %s1, %s2");
3650   uint32_t two = bit_cast<uint32_t>(2.0f);
3651   uint32_t six = bit_cast<uint32_t>(6.0f);
3652   ASSERT_EQ(AsmFcmge(two, six), 0x00000000ULL);
3653   ASSERT_EQ(AsmFcmge(two, two), 0xffffffffULL);
3654   ASSERT_EQ(AsmFcmge(six, two), 0xffffffffULL);
3655   ASSERT_EQ(AsmFcmge(kDefaultNaN32AsInteger, two), 0x00000000ULL);
3656   ASSERT_EQ(AsmFcmge(two, kDefaultNaN32AsInteger), 0x00000000ULL);
3657 }
3658 
TEST(Arm64InsnTest,CompareGreaterEqualF32x4)3659 TEST(Arm64InsnTest, CompareGreaterEqualF32x4) {
3660   constexpr auto AsmFcmge = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fcmge %0.4s, %1.4s, %2.4s");
3661   __uint128_t arg1 = MakeF32x4(-3.0f, 2.0f, 7.0f, -0.0f);
3662   __uint128_t arg2 = MakeF32x4(6.0f, 2.0f, -8.0f, 5.0f);
3663   __uint128_t res = AsmFcmge(arg1, arg2);
3664   ASSERT_EQ(res, MakeUInt128(0xffffffff00000000ULL, 0x00000000ffffffffULL));
3665 }
3666 
TEST(Arm64InsnTest,CompareGreaterF32)3667 TEST(Arm64InsnTest, CompareGreaterF32) {
3668   constexpr auto AsmFcmgt = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fcmgt %s0, %s1, %s2");
3669   uint32_t two = bit_cast<uint32_t>(2.0f);
3670   uint32_t six = bit_cast<uint32_t>(6.0f);
3671   ASSERT_EQ(AsmFcmgt(two, six), 0x00000000ULL);
3672   ASSERT_EQ(AsmFcmgt(two, two), 0x00000000ULL);
3673   ASSERT_EQ(AsmFcmgt(six, two), 0xffffffffULL);
3674   ASSERT_EQ(AsmFcmgt(kDefaultNaN32AsInteger, two), 0x00000000ULL);
3675   ASSERT_EQ(AsmFcmgt(two, kDefaultNaN32AsInteger), 0x00000000ULL);
3676 }
3677 
TEST(Arm64InsnTest,CompareGreaterF32x4)3678 TEST(Arm64InsnTest, CompareGreaterF32x4) {
3679   constexpr auto AsmFcmgt = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fcmgt %0.4s, %1.4s, %2.4s");
3680   __uint128_t arg1 = MakeF32x4(-3.0f, 2.0f, 7.0f, -0.0f);
3681   __uint128_t arg2 = MakeF32x4(6.0f, 2.0f, -8.0f, 5.0f);
3682   __uint128_t res = AsmFcmgt(arg1, arg2);
3683   ASSERT_EQ(res, MakeUInt128(0x0000000000000000ULL, 0x00000000ffffffffULL));
3684 }
3685 
TEST(Arm64InsnTest,CompareEqualZeroF32)3686 TEST(Arm64InsnTest, CompareEqualZeroF32) {
3687   constexpr auto AsmFcmeq = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcmeq %s0, %s1, #0");
3688   ASSERT_EQ(AsmFcmeq(bit_cast<uint32_t>(0.0f)), 0xffffffffULL);
3689   ASSERT_EQ(AsmFcmeq(bit_cast<uint32_t>(4.0f)), 0x00000000ULL);
3690 }
3691 
TEST(Arm64InsnTest,CompareEqualZeroF32x4)3692 TEST(Arm64InsnTest, CompareEqualZeroF32x4) {
3693   constexpr auto AsmFcmeq = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcmeq %0.4s, %1.4s, #0");
3694   __uint128_t arg = MakeF32x4(-3.0f, 0.0f, 7.0f, 1.0f);
3695   __uint128_t res = AsmFcmeq(arg);
3696   ASSERT_EQ(res, MakeUInt128(0xffffffff00000000ULL, 0x0000000000000000ULL));
3697 }
3698 
TEST(Arm64InsnTest,CompareGreaterThanZeroF32)3699 TEST(Arm64InsnTest, CompareGreaterThanZeroF32) {
3700   constexpr auto AsmFcmgt = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcmgt %s0, %s1, #0");
3701   ASSERT_EQ(AsmFcmgt(bit_cast<uint32_t>(-1.0f)), 0x00000000ULL);
3702   ASSERT_EQ(AsmFcmgt(bit_cast<uint32_t>(0.0f)), 0x00000000ULL);
3703   ASSERT_EQ(AsmFcmgt(bit_cast<uint32_t>(1.0f)), 0xffffffffULL);
3704 }
3705 
TEST(Arm64InsnTest,CompareGreaterThanZeroF32x4)3706 TEST(Arm64InsnTest, CompareGreaterThanZeroF32x4) {
3707   constexpr auto AsmFcmgt = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcmgt %0.4s, %1.4s, #0");
3708   __uint128_t arg = MakeF32x4(-3.0f, 0.0f, 7.0f, 1.0f);
3709   __uint128_t res = AsmFcmgt(arg);
3710   ASSERT_EQ(res, MakeUInt128(0x0000000000000000ULL, 0xffffffffffffffffULL));
3711 }
3712 
TEST(Arm64InsnTest,CompareGreaterThanOrEqualZeroF32)3713 TEST(Arm64InsnTest, CompareGreaterThanOrEqualZeroF32) {
3714   constexpr auto AsmFcmge = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcmge %s0, %s1, #0");
3715   ASSERT_EQ(AsmFcmge(bit_cast<uint32_t>(-1.0f)), 0x00000000ULL);
3716   ASSERT_EQ(AsmFcmge(bit_cast<uint32_t>(0.0f)), 0xffffffffULL);
3717   ASSERT_EQ(AsmFcmge(bit_cast<uint32_t>(1.0f)), 0xffffffffULL);
3718 }
3719 
TEST(Arm64InsnTest,CompareGreaterThanOrEqualZeroF32x4)3720 TEST(Arm64InsnTest, CompareGreaterThanOrEqualZeroF32x4) {
3721   constexpr auto AsmFcmge = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcmge %0.4s, %1.4s, #0");
3722   __uint128_t arg = MakeF32x4(-3.0f, 0.0f, 7.0f, 1.0f);
3723   __uint128_t res = AsmFcmge(arg);
3724   ASSERT_EQ(res, MakeUInt128(0xffffffff00000000ULL, 0xffffffffffffffffULL));
3725 }
3726 
TEST(Arm64InsnTest,CompareLessThanZeroF32)3727 TEST(Arm64InsnTest, CompareLessThanZeroF32) {
3728   constexpr auto AsmFcmlt = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcmlt %s0, %s1, #0");
3729   ASSERT_EQ(AsmFcmlt(bit_cast<uint32_t>(-1.0f)), 0xffffffffULL);
3730   ASSERT_EQ(AsmFcmlt(bit_cast<uint32_t>(0.0f)), 0x00000000ULL);
3731   ASSERT_EQ(AsmFcmlt(bit_cast<uint32_t>(1.0f)), 0x00000000ULL);
3732 }
3733 
TEST(Arm64InsnTest,CompareLessThanZeroF32x4)3734 TEST(Arm64InsnTest, CompareLessThanZeroF32x4) {
3735   constexpr auto AsmFcmlt = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcmlt %0.4s, %1.4s, #0");
3736   __uint128_t arg = MakeF32x4(-3.0f, 0.0f, 7.0f, 1.0f);
3737   __uint128_t res = AsmFcmlt(arg);
3738   ASSERT_EQ(res, MakeUInt128(0x00000000ffffffffULL, 0x0000000000000000ULL));
3739 }
3740 
TEST(Arm64InsnTest,CompareLessThanOrEqualZeroF32)3741 TEST(Arm64InsnTest, CompareLessThanOrEqualZeroF32) {
3742   constexpr auto AsmFcmle = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcmle %s0, %s1, #0");
3743   ASSERT_EQ(AsmFcmle(bit_cast<uint32_t>(-1.0f)), 0xffffffffULL);
3744   ASSERT_EQ(AsmFcmle(bit_cast<uint32_t>(0.0f)), 0xffffffffULL);
3745   ASSERT_EQ(AsmFcmle(bit_cast<uint32_t>(1.0f)), 0x00000000ULL);
3746 }
3747 
TEST(Arm64InsnTest,CompareLessThanOrEqualZeroF32x4)3748 TEST(Arm64InsnTest, CompareLessThanOrEqualZeroF32x4) {
3749   constexpr auto AsmFcmle = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcmle %0.4s, %1.4s, #0");
3750   __uint128_t arg = MakeF32x4(-3.0f, 0.0f, 7.0f, 1.0f);
3751   __uint128_t res = AsmFcmle(arg);
3752   ASSERT_EQ(res, MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
3753 }
3754 
TEST(Arm64InsnTest,AbsoluteCompareGreaterThanF32)3755 TEST(Arm64InsnTest, AbsoluteCompareGreaterThanF32) {
3756   constexpr auto AsmFacgt = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("facgt %s0, %s1, %s2");
3757   ASSERT_EQ(AsmFacgt(bit_cast<uint32_t>(-3.0f), bit_cast<uint32_t>(1.0f)), 0xffffffffULL);
3758   ASSERT_EQ(AsmFacgt(bit_cast<uint32_t>(1.0f), bit_cast<uint32_t>(-1.0f)), 0x00000000ULL);
3759   ASSERT_EQ(AsmFacgt(bit_cast<uint32_t>(3.0f), bit_cast<uint32_t>(-7.0f)), 0x00000000ULL);
3760 }
3761 
TEST(Arm64InsnTest,AbsoluteCompareGreaterThanOrEqualF32)3762 TEST(Arm64InsnTest, AbsoluteCompareGreaterThanOrEqualF32) {
3763   constexpr auto AsmFacge = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("facge %s0, %s1, %s2");
3764   ASSERT_EQ(AsmFacge(bit_cast<uint32_t>(-3.0f), bit_cast<uint32_t>(1.0f)), 0xffffffffULL);
3765   ASSERT_EQ(AsmFacge(bit_cast<uint32_t>(1.0f), bit_cast<uint32_t>(-1.0f)), 0xffffffffULL);
3766   ASSERT_EQ(AsmFacge(bit_cast<uint32_t>(3.0f), bit_cast<uint32_t>(-7.0f)), 0x00000000ULL);
3767 }
3768 
TEST(Arm64InsnTest,AbsoluteCompareGreaterThanF32x4)3769 TEST(Arm64InsnTest, AbsoluteCompareGreaterThanF32x4) {
3770   constexpr auto AsmFacgt = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("facgt %0.4s, %1.4s, %2.4s");
3771   __uint128_t arg1 = MakeF32x4(-3.0f, 1.0f, 3.0f, 4.0f);
3772   __uint128_t arg2 = MakeF32x4(1.0f, -1.0f, -7.0f, 2.0f);
3773   ASSERT_EQ(AsmFacgt(arg1, arg2), MakeUInt128(0x00000000ffffffffULL, 0xffffffff00000000ULL));
3774 }
3775 
TEST(Arm64InsnTest,AbsoluteCompareGreaterThanEqualF32x4)3776 TEST(Arm64InsnTest, AbsoluteCompareGreaterThanEqualF32x4) {
3777   constexpr auto AsmFacge = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("facge %0.4s, %1.4s, %2.4s");
3778   __uint128_t arg1 = MakeF32x4(-3.0f, 1.0f, 3.0f, 4.0f);
3779   __uint128_t arg2 = MakeF32x4(1.0f, -1.0f, -7.0f, 2.0f);
3780   ASSERT_EQ(AsmFacge(arg1, arg2), MakeUInt128(0xffffffffffffffffULL, 0xffffffff00000000ULL));
3781 }
3782 
TEST(Arm64InsnTest,CompareEqualF64)3783 TEST(Arm64InsnTest, CompareEqualF64) {
3784   constexpr auto AsmFcmeq = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fcmeq %d0, %d1, %d2");
3785   uint64_t two = bit_cast<uint64_t>(2.0);
3786   uint64_t six = bit_cast<uint64_t>(6.0);
3787   ASSERT_EQ(AsmFcmeq(two, six), 0x0000000000000000ULL);
3788   ASSERT_EQ(AsmFcmeq(two, two), 0xffffffffffffffffULL);
3789   ASSERT_EQ(AsmFcmeq(kDefaultNaN64AsInteger, two), 0x0000000000000000ULL);
3790   ASSERT_EQ(AsmFcmeq(two, kDefaultNaN64AsInteger), 0x0000000000000000ULL);
3791 }
3792 
TEST(Arm64InsnTest,CompareEqualF64x2)3793 TEST(Arm64InsnTest, CompareEqualF64x2) {
3794   constexpr auto AsmFcmeq = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fcmeq %0.2d, %1.2d, %2.2d");
3795   __uint128_t arg1 = MakeF64x2(-3.0, 2.0);
3796   __uint128_t arg2 = MakeF64x2(6.0, 2.0);
3797   __uint128_t res = AsmFcmeq(arg1, arg2);
3798   ASSERT_EQ(res, MakeUInt128(0x0000000000000000ULL, 0xffffffffffffffffULL));
3799   arg1 = MakeF64x2(7.0, -0.0);
3800   arg2 = MakeF64x2(-8.0, 5.0);
3801   res = AsmFcmeq(arg1, arg2);
3802   ASSERT_EQ(res, MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
3803 }
3804 
TEST(Arm64InsnTest,CompareGreaterEqualF64)3805 TEST(Arm64InsnTest, CompareGreaterEqualF64) {
3806   constexpr auto AsmFcmge = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fcmge %d0, %d1, %d2");
3807   uint64_t two = bit_cast<uint64_t>(2.0);
3808   uint64_t six = bit_cast<uint64_t>(6.0);
3809   ASSERT_EQ(AsmFcmge(two, six), 0x0000000000000000ULL);
3810   ASSERT_EQ(AsmFcmge(two, two), 0xffffffffffffffffULL);
3811   ASSERT_EQ(AsmFcmge(six, two), 0xffffffffffffffffULL);
3812   ASSERT_EQ(AsmFcmge(kDefaultNaN64AsInteger, two), 0x0000000000000000ULL);
3813   ASSERT_EQ(AsmFcmge(two, kDefaultNaN64AsInteger), 0x0000000000000000ULL);
3814 }
3815 
TEST(Arm64InsnTest,CompareGreaterEqualF64x2)3816 TEST(Arm64InsnTest, CompareGreaterEqualF64x2) {
3817   constexpr auto AsmFcmge = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fcmge %0.2d, %1.2d, %2.2d");
3818   __uint128_t arg1 = MakeF64x2(-3.0, 2.0);
3819   __uint128_t arg2 = MakeF64x2(6.0, 2.0);
3820   __uint128_t res = AsmFcmge(arg1, arg2);
3821   ASSERT_EQ(res, MakeUInt128(0x0000000000000000ULL, 0xffffffffffffffffULL));
3822   arg1 = MakeF64x2(7.0, -0.0);
3823   arg2 = MakeF64x2(-8.0, 5.0);
3824   res = AsmFcmge(arg1, arg2);
3825   ASSERT_EQ(res, MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
3826 }
3827 
TEST(Arm64InsnTest,CompareGreaterF64)3828 TEST(Arm64InsnTest, CompareGreaterF64) {
3829   constexpr auto AsmFcmgt = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fcmgt %d0, %d1, %d2");
3830   uint64_t two = bit_cast<uint64_t>(2.0);
3831   uint64_t six = bit_cast<uint64_t>(6.0);
3832   ASSERT_EQ(AsmFcmgt(two, six), 0x0000000000000000ULL);
3833   ASSERT_EQ(AsmFcmgt(two, two), 0x0000000000000000ULL);
3834   ASSERT_EQ(AsmFcmgt(six, two), 0xffffffffffffffffULL);
3835   ASSERT_EQ(AsmFcmgt(kDefaultNaN64AsInteger, two), 0x0000000000000000ULL);
3836   ASSERT_EQ(AsmFcmgt(two, kDefaultNaN64AsInteger), 0x0000000000000000ULL);
3837 }
3838 
TEST(Arm64InsnTest,CompareGreaterF64x2)3839 TEST(Arm64InsnTest, CompareGreaterF64x2) {
3840   constexpr auto AsmFcmgt = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fcmgt %0.2d, %1.2d, %2.2d");
3841   __uint128_t arg1 = MakeF64x2(-3.0, 2.0);
3842   __uint128_t arg2 = MakeF64x2(6.0, 2.0);
3843   __uint128_t res = AsmFcmgt(arg1, arg2);
3844   ASSERT_EQ(res, MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
3845   arg1 = MakeF64x2(7.0, -0.0);
3846   arg2 = MakeF64x2(-8.0, 5.0);
3847   res = AsmFcmgt(arg1, arg2);
3848   ASSERT_EQ(res, MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
3849 }
3850 
TEST(Arm64InsnTest,AndInt8x16)3851 TEST(Arm64InsnTest, AndInt8x16) {
3852   __uint128_t op1 = MakeUInt128(0x7781857780532171ULL, 0x2268066130019278ULL);
3853   __uint128_t op2 = MakeUInt128(0x0498862723279178ULL, 0x6085784383827967ULL);
3854   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("and %0.16b, %1.16b, %2.16b")(op1, op2);
3855   ASSERT_EQ(rd, MakeUInt128(0x0480842700030170ULL, 0x2000004100001060ULL));
3856 }
3857 
TEST(Arm64InsnTest,AndInt8x8)3858 TEST(Arm64InsnTest, AndInt8x8) {
3859   __uint128_t op1 = MakeUInt128(0x7781857780532171ULL, 0x2268066130019278ULL);
3860   __uint128_t op2 = MakeUInt128(0x0498862723279178ULL, 0x6085784383827967ULL);
3861   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("and %0.8b, %1.8b, %2.8b")(op1, op2);
3862   ASSERT_EQ(rd, MakeUInt128(0x0480842700030170ULL, 0));
3863 }
3864 
TEST(Arm64InsnTest,OrInt8x16)3865 TEST(Arm64InsnTest, OrInt8x16) {
3866   __uint128_t op1 = MakeUInt128(0x00ffaa5500112244ULL, 0x1248124812481248ULL);
3867   __uint128_t op2 = MakeUInt128(0x44221100ffaa5500ULL, 0x1122448811224488ULL);
3868   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("orr %0.16b, %1.16b, %2.16b")(op1, op2);
3869   ASSERT_EQ(rd, MakeUInt128(0x44ffbb55ffbb7744ULL, 0x136a56c8136a56c8ULL));
3870 }
3871 
TEST(Arm64InsnTest,OrInt8x8)3872 TEST(Arm64InsnTest, OrInt8x8) {
3873   __uint128_t op1 = MakeUInt128(0x00ffaa5500112244ULL, 0x1248124812481248ULL);
3874   __uint128_t op2 = MakeUInt128(0x44221100ffaa5500ULL, 0x1122448811224488ULL);
3875   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("orr %0.8b, %1.8b, %2.8b")(op1, op2);
3876   ASSERT_EQ(rd, MakeUInt128(0x44ffbb55ffbb7744ULL, 0));
3877 }
3878 
TEST(Arm64InsnTest,XorInt8x16)3879 TEST(Arm64InsnTest, XorInt8x16) {
3880   __uint128_t op1 = MakeUInt128(0x1050792279689258ULL, 0x9235420199561121ULL);
3881   __uint128_t op2 = MakeUInt128(0x8239864565961163ULL, 0x5488623057745649ULL);
3882   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("eor %0.16b, %1.16b, %2.16b")(op1, op2);
3883   ASSERT_EQ(rd, MakeUInt128(0x9269ff671cfe833bULL, 0xc6bd2031ce224768ULL));
3884 }
3885 
TEST(Arm64InsnTest,XorInt8x8)3886 TEST(Arm64InsnTest, XorInt8x8) {
3887   __uint128_t op1 = MakeUInt128(0x1050792279689258ULL, 0x9235420199561121ULL);
3888   __uint128_t op2 = MakeUInt128(0x8239864565961163ULL, 0x5488623057745649ULL);
3889   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("eor %0.8b, %1.8b, %2.8b")(op1, op2);
3890   ASSERT_EQ(rd, MakeUInt128(0x9269ff671cfe833bULL, 0));
3891 }
3892 
TEST(Arm64InsnTest,AndNotInt8x16)3893 TEST(Arm64InsnTest, AndNotInt8x16) {
3894   __uint128_t op1 = MakeUInt128(0x0313783875288658ULL, 0x7533208381420617ULL);
3895   __uint128_t op2 = MakeUInt128(0x2327917860857843ULL, 0x8382796797668145ULL);
3896   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("bic %0.16b, %1.16b, %2.16b")(op1, op2);
3897   ASSERT_EQ(rd, MakeUInt128(0x0010680015288618ULL, 0x7431008000000612ULL));
3898 }
3899 
TEST(Arm64InsnTest,AndNotInt8x8)3900 TEST(Arm64InsnTest, AndNotInt8x8) {
3901   __uint128_t op1 = MakeUInt128(0x4861045432664821ULL, 0x2590360011330530ULL);
3902   __uint128_t op2 = MakeUInt128(0x5420199561121290ULL, 0x8572424541506959ULL);
3903   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("bic %0.8b, %1.8b, %2.8b")(op1, op2);
3904   ASSERT_EQ(rd, MakeUInt128(0x0841044012644821ULL, 0x0000000000000000ULL));
3905 }
3906 
TEST(Arm64InsnTest,AndNotInt16x4Imm)3907 TEST(Arm64InsnTest, AndNotInt16x4Imm) {
3908   __uint128_t res = MakeUInt128(0x9690314950191085ULL, 0x7598442391986291ULL);
3909 
3910   asm("bic %0.4h, #0x3" : "=w"(res) : "0"(res));
3911 
3912   ASSERT_EQ(res, MakeUInt128(0x9690314850181084ULL, 0x0000000000000000ULL));
3913 }
3914 
TEST(Arm64InsnTest,AndNotInt16x4ImmShiftedBy8)3915 TEST(Arm64InsnTest, AndNotInt16x4ImmShiftedBy8) {
3916   __uint128_t res = MakeUInt128(0x8354056704038674ULL, 0x3513622224771589ULL);
3917 
3918   asm("bic %0.4h, #0xa8, lsl #8" : "=w"(res) : "0"(res));
3919 
3920   ASSERT_EQ(res, MakeUInt128(0x0354056704030674ULL, 0x0000000000000000ULL));
3921 }
3922 
TEST(Arm64InsnTest,AndNotInt32x2ImmShiftedBy8)3923 TEST(Arm64InsnTest, AndNotInt32x2ImmShiftedBy8) {
3924   __uint128_t res = MakeUInt128(0x1842631298608099ULL, 0x8886874132604721ULL);
3925 
3926   asm("bic %0.2s, #0xd3, lsl #8" : "=w"(res) : "0"(res));
3927 
3928   ASSERT_EQ(res, MakeUInt128(0x1842201298600099ULL, 0x0000000000000000ULL));
3929 }
3930 
TEST(Arm64InsnTest,AndNotInt32x2ImmShiftedBy16)3931 TEST(Arm64InsnTest, AndNotInt32x2ImmShiftedBy16) {
3932   __uint128_t res = MakeUInt128(0x2947867242292465ULL, 0x4366800980676928ULL);
3933 
3934   asm("bic %0.2s, #0x22, lsl #16" : "=w"(res) : "0"(res));
3935 
3936   ASSERT_EQ(res, MakeUInt128(0x2945867242092465ULL, 0x0000000000000000ULL));
3937 }
3938 
TEST(Arm64InsnTest,AndNotInt32x2ImmShiftedBy24)3939 TEST(Arm64InsnTest, AndNotInt32x2ImmShiftedBy24) {
3940   __uint128_t res = MakeUInt128(0x0706977942236250ULL, 0x8221688957383798ULL);
3941 
3942   asm("bic %0.2s, #0x83, lsl #24" : "=w"(res) : "0"(res));
3943 
3944   ASSERT_EQ(res, MakeUInt128(0x0406977940236250ULL, 0x0000000000000000ULL));
3945 }
3946 
TEST(Arm64InsnTest,OrInt16x4Imm)3947 TEST(Arm64InsnTest, OrInt16x4Imm) {
3948   __uint128_t res = MakeUInt128(0x0841284886269456ULL, 0x0424196528502221ULL);
3949 
3950   asm("orr %0.4h, #0x5" : "=w"(res) : "0"(res));
3951 
3952   ASSERT_EQ(res, MakeUInt128(0x0845284d86279457ULL, 0x0000000000000000ULL));
3953 }
3954 
TEST(Arm64InsnTest,OrNotInt8x16)3955 TEST(Arm64InsnTest, OrNotInt8x16) {
3956   __uint128_t op1 = MakeUInt128(0x5428584447952658ULL, 0x6782105114135473ULL);
3957   __uint128_t op2 = MakeUInt128(0x3558764024749647ULL, 0x3263914199272604ULL);
3958   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("orn %0.16b, %1.16b, %2.16b")(op1, op2);
3959   ASSERT_EQ(rd, MakeUInt128(0xdeafd9ffdf9f6ff8ULL, 0xef9e7eff76dbddfbULL));
3960 }
3961 
TEST(Arm64InsnTest,OrNotInt8x8)3962 TEST(Arm64InsnTest, OrNotInt8x8) {
3963   __uint128_t op1 = MakeUInt128(0x3279178608578438ULL, 0x3827967976681454ULL);
3964   __uint128_t op2 = MakeUInt128(0x6838689427741559ULL, 0x9185592524595395ULL);
3965   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("orn %0.8b, %1.8b, %2.8b")(op1, op2);
3966   ASSERT_EQ(rd, MakeUInt128(0xb7ff97efd8dfeebeULL, 0x0000000000000000ULL));
3967 }
3968 
TEST(Arm64InsnTest,BitwiseSelectInt8x8)3969 TEST(Arm64InsnTest, BitwiseSelectInt8x8) {
3970   __uint128_t op1 = MakeUInt128(0x2000568127145263ULL, 0x5608277857713427ULL);
3971   __uint128_t op2 = MakeUInt128(0x0792279689258923ULL, 0x5420199561121290ULL);
3972   __uint128_t op3 = MakeUInt128(0x8372978049951059ULL, 0x7317328160963185ULL);
3973   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("bsl %0.8b, %1.8b, %2.8b")(op1, op2, op3);
3974   ASSERT_EQ(res, MakeUInt128(0x0480369681349963ULL, 0x0000000000000000ULL));
3975 }
3976 
TEST(Arm64InsnTest,BitwiseInsertIfTrueInt8x8)3977 TEST(Arm64InsnTest, BitwiseInsertIfTrueInt8x8) {
3978   __uint128_t op1 = MakeUInt128(0x3678925903600113ULL, 0x3053054882046652ULL);
3979   __uint128_t op2 = MakeUInt128(0x9326117931051185ULL, 0x4807446237996274ULL);
3980   __uint128_t op3 = MakeUInt128(0x6430860213949463ULL, 0x9522473719070217ULL);
3981   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("bit %0.8b, %1.8b, %2.8b")(op1, op2, op3);
3982   ASSERT_EQ(res, MakeUInt128(0x7630965b03908563ULL, 0x0000000000000000ULL));
3983 }
3984 
TEST(Arm64InsnTest,BitwiseInsertIfFalseInt8x8)3985 TEST(Arm64InsnTest, BitwiseInsertIfFalseInt8x8) {
3986   __uint128_t op1 = MakeUInt128(0x7067982148086513ULL, 0x2823066470938446ULL);
3987   __uint128_t op2 = MakeUInt128(0x5964462294895493ULL, 0x0381964428810975ULL);
3988   __uint128_t op3 = MakeUInt128(0x0348610454326648ULL, 0x2133936072602491ULL);
3989   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("bif %0.8b, %1.8b, %2.8b")(op1, op2, op3);
3990   ASSERT_EQ(res, MakeUInt128(0x2143d8015c006500ULL, 0x0000000000000000ULL));
3991 }
3992 
TEST(Arm64InsnTest,ArithmeticShiftRightInt64x1)3993 TEST(Arm64InsnTest, ArithmeticShiftRightInt64x1) {
3994   __uint128_t arg = MakeUInt128(0x9486015046652681ULL, 0x4398770516153170ULL);
3995   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("sshr %d0, %d1, #39")(arg);
3996   ASSERT_EQ(res, MakeUInt128(0xffffffffff290c02ULL, 0x0000000000000000ULL));
3997 }
3998 
TEST(Arm64InsnTest,ArithmeticShiftRightBy64Int64x1)3999 TEST(Arm64InsnTest, ArithmeticShiftRightBy64Int64x1) {
4000   __uint128_t arg = MakeUInt128(0x9176042601763387ULL, 0x0454990176143641ULL);
4001   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("sshr %d0, %d1, #64")(arg);
4002   ASSERT_EQ(res, MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
4003 }
4004 
TEST(Arm64InsnTest,ArithmeticShiftRightInt64x2)4005 TEST(Arm64InsnTest, ArithmeticShiftRightInt64x2) {
4006   __uint128_t arg = MakeUInt128(0x7501116498327856ULL, 0x3531614516845769ULL);
4007   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("sshr %0.2d, %1.2d, #35")(arg);
4008   ASSERT_EQ(res, MakeUInt128(0x000000000ea0222cULL, 0x0000000006a62c28ULL));
4009 }
4010 
TEST(Arm64InsnTest,ArithmeticShiftRightAccumulateInt64x1)4011 TEST(Arm64InsnTest, ArithmeticShiftRightAccumulateInt64x1) {
4012   __uint128_t arg1 = MakeUInt128(0x9667179643468760ULL, 0x0770479995378833ULL);
4013   __uint128_t arg2 = MakeUInt128(0x2557176908196030ULL, 0x9201824018842705ULL);
4014   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("ssra %d0, %d1, #40")(arg1, arg2);
4015   ASSERT_EQ(res, MakeUInt128(0x2557176907afc747ULL, 0x0000000000000000ULL));
4016 }
4017 
TEST(Arm64InsnTest,ArithmeticShiftRightBy64AccumulateInt64x1)4018 TEST(Arm64InsnTest, ArithmeticShiftRightBy64AccumulateInt64x1) {
4019   __uint128_t arg1 = MakeUInt128(0x9223343657791601ULL, 0x2809317940171859ULL);
4020   __uint128_t arg2 = MakeUInt128(0x3498025249906698ULL, 0x4233017350358044ULL);
4021   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("ssra %d0, %d1, #64")(arg1, arg2);
4022   ASSERT_EQ(res, MakeUInt128(0x3498025249906697ULL, 0x0000000000000000ULL));
4023 }
4024 
TEST(Arm64InsnTest,ArithmeticShiftRightAccumulateInt16x8)4025 TEST(Arm64InsnTest, ArithmeticShiftRightAccumulateInt16x8) {
4026   __uint128_t arg1 = MakeUInt128(0x9276457931065792ULL, 0x2955249887275846ULL);
4027   __uint128_t arg2 = MakeUInt128(0x0101655256375678ULL, 0x5667227966198857ULL);
4028   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("ssra %0.8h, %1.8h, #12")(arg1, arg2);
4029   ASSERT_EQ(res, MakeUInt128(0x00fa6556563a567dULL, 0x5669227b6611885cULL));
4030 }
4031 
TEST(Arm64InsnTest,ArithmeticRoundingShiftRightAccumulateInt16x8)4032 TEST(Arm64InsnTest, ArithmeticRoundingShiftRightAccumulateInt16x8) {
4033   __uint128_t arg1 = MakeUInt128(0x9894671543578468ULL, 0x7886144458123145ULL);
4034   __uint128_t arg2 = MakeUInt128(0x1412147805734551ULL, 0x0500801908699603ULL);
4035   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("srsra %0.8h, %1.8h, #12")(arg1, arg2);
4036   ASSERT_EQ(res, MakeUInt128(0x140c147e05774549ULL, 0x0508801a086f9606ULL));
4037 }
4038 
TEST(Arm64InsnTest,LogicalShiftRightInt64x1)4039 TEST(Arm64InsnTest, LogicalShiftRightInt64x1) {
4040   __uint128_t arg = MakeUInt128(0x9859771921805158ULL, 0x5321473926532515ULL);
4041   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ushr %d0, %d1, #33")(arg);
4042   ASSERT_EQ(res, MakeUInt128(0x000000004c2cbb8cULL, 0x0000000000000000ULL));
4043 }
4044 
TEST(Arm64InsnTest,LogicalShiftRightBy64Int64x1)4045 TEST(Arm64InsnTest, LogicalShiftRightBy64Int64x1) {
4046   __uint128_t arg = MakeUInt128(0x9474696134360928ULL, 0x6148494178501718ULL);
4047   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ushr %d0, %d1, #64")(arg);
4048   ASSERT_EQ(res, MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
4049 }
4050 
TEST(Arm64InsnTest,LogicalShiftRightInt64x2)4051 TEST(Arm64InsnTest, LogicalShiftRightInt64x2) {
4052   __uint128_t op = MakeUInt128(0x3962657978771855ULL, 0x6084552965412665ULL);
4053   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ushr %0.2d, %1.2d, #33")(op);
4054   ASSERT_EQ(rd, MakeUInt128(0x000000001cb132bcULL, 0x0000000030422a94ULL));
4055 }
4056 
TEST(Arm64InsnTest,LogicalShiftRightAccumulateInt64x1)4057 TEST(Arm64InsnTest, LogicalShiftRightAccumulateInt64x1) {
4058   __uint128_t arg1 = MakeUInt128(0x9004112453790153ULL, 0x3296615697052237ULL);
4059   __uint128_t arg2 = MakeUInt128(0x0499939532215362ULL, 0x2748476603613677ULL);
4060   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("usra %d0, %d1, #40")(arg1, arg2);
4061   ASSERT_EQ(res, MakeUInt128(0x0499939532b15773ULL, 0x0000000000000000ULL));
4062 }
4063 
TEST(Arm64InsnTest,LogicalShiftRightBy64AccumulateInt64x1)4064 TEST(Arm64InsnTest, LogicalShiftRightBy64AccumulateInt64x1) {
4065   __uint128_t arg1 = MakeUInt128(0x9886592578662856ULL, 0x1249665523533829ULL);
4066   __uint128_t arg2 = MakeUInt128(0x3559152534784459ULL, 0x8183134112900199ULL);
4067   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("usra %d0, %d1, #64")(arg1, arg2);
4068   ASSERT_EQ(res, MakeUInt128(0x3559152534784459ULL, 0x0000000000000000ULL));
4069 }
4070 
TEST(Arm64InsnTest,LogicalShiftRightAccumulateInt16x8)4071 TEST(Arm64InsnTest, LogicalShiftRightAccumulateInt16x8) {
4072   __uint128_t arg1 = MakeUInt128(0x9984345225161050ULL, 0x7027056235266012ULL);
4073   __uint128_t arg2 = MakeUInt128(0x4628654036036745ULL, 0x3286510570658748ULL);
4074   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("usra %0.8h, %1.8h, #12")(arg1, arg2);
4075   ASSERT_EQ(res, MakeUInt128(0x4631654336056746ULL, 0x328d51057068874eULL));
4076 }
4077 
TEST(Arm64InsnTest,LogicalRoundingShiftRightAccumulateInt16x8)4078 TEST(Arm64InsnTest, LogicalRoundingShiftRightAccumulateInt16x8) {
4079   __uint128_t arg1 = MakeUInt128(0x9843452251610507ULL, 0x0270562352660127ULL);
4080   __uint128_t arg2 = MakeUInt128(0x6286540360367453ULL, 0x2865105706587488ULL);
4081   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("srsra %0.8h, %1.8h, #12")(arg1, arg2);
4082   ASSERT_EQ(res, MakeUInt128(0x62805407603b7453ULL, 0x2865105c065d7488ULL));
4083 }
4084 
TEST(Arm64InsnTest,SignedRoundingShiftRightInt64x1)4085 TEST(Arm64InsnTest, SignedRoundingShiftRightInt64x1) {
4086   __uint128_t arg = MakeUInt128(0x9323685785585581ULL, 0x9555604215625088ULL);
4087   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("srshr %d0, %d1, #40")(arg);
4088   ASSERT_EQ(res, MakeUInt128(0xffffffffff932368ULL, 0x0000000000000000ULL));
4089 }
4090 
TEST(Arm64InsnTest,SignedRoundingShiftRightInt64x2)4091 TEST(Arm64InsnTest, SignedRoundingShiftRightInt64x2) {
4092   __uint128_t arg = MakeUInt128(0x8714878398908107ULL, 0x4295309410605969ULL);
4093   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("srshr %0.2d, %1.2d, #36")(arg);
4094   ASSERT_EQ(res, MakeUInt128(0xfffffffff8714878ULL, 0x0000000004295309ULL));
4095 }
4096 
TEST(Arm64InsnTest,SignedRoundingShiftRightAccumulateInt64x1)4097 TEST(Arm64InsnTest, SignedRoundingShiftRightAccumulateInt64x1) {
4098   __uint128_t arg1 = MakeUInt128(0x9946016520577405ULL, 0x2942305360178031ULL);
4099   __uint128_t arg2 = MakeUInt128(0x3960188013782542ULL, 0x1927094767337191ULL);
4100   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("srsra %d0, %d1, #33")(arg1, arg2);
4101   ASSERT_EQ(res, MakeUInt128(0x3960187fe01b25f5ULL, 0x0000000000000000ULL));
4102 }
4103 
TEST(Arm64InsnTest,UnsignedRoundingShiftRightInt64x1)4104 TEST(Arm64InsnTest, UnsignedRoundingShiftRightInt64x1) {
4105   __uint128_t arg = MakeUInt128(0x9713552208445285ULL, 0x2640081252027665ULL);
4106   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("urshr %d0, %d1, #33")(arg);
4107   ASSERT_EQ(res, MakeUInt128(0x000000004b89aa91ULL, 0x0000000000000000ULL));
4108 }
4109 
TEST(Arm64InsnTest,UnsignedRoundingShiftRightInt64x2)4110 TEST(Arm64InsnTest, UnsignedRoundingShiftRightInt64x2) {
4111   __uint128_t arg = MakeUInt128(0x6653398573888786ULL, 0x6147629443414010ULL);
4112   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("urshr %0.2d, %1.2d, #34")(arg);
4113   ASSERT_EQ(res, MakeUInt128(0x000000001994ce61ULL, 0x000000001851d8a5ULL));
4114 }
4115 
TEST(Arm64InsnTest,UnsignedRoundingShiftRightAccumulateInt64x1)4116 TEST(Arm64InsnTest, UnsignedRoundingShiftRightAccumulateInt64x1) {
4117   __uint128_t arg1 = MakeUInt128(0x9616143204006381ULL, 0x3224658411111577ULL);
4118   __uint128_t arg2 = MakeUInt128(0x7184728147519983ULL, 0x5050478129771859ULL);
4119   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("ursra %d0, %d1, #33")(arg1, arg2);
4120   ASSERT_EQ(res, MakeUInt128(0x71847281925ca39cULL, 0x0000000000000000ULL));
4121 }
4122 
TEST(Arm64InsnTest,ShiftLeftInt64x1)4123 TEST(Arm64InsnTest, ShiftLeftInt64x1) {
4124   __uint128_t arg = MakeUInt128(0x3903594664691623ULL, 0x5396809201394578ULL);
4125   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("shl %d0, %d1, #35")(arg);
4126   ASSERT_EQ(res, MakeUInt128(0x2348b11800000000ULL, 0x0000000000000000ULL));
4127 }
4128 
TEST(Arm64InsnTest,ShiftLeftInt64x2)4129 TEST(Arm64InsnTest, ShiftLeftInt64x2) {
4130   __uint128_t arg = MakeUInt128(0x0750111649832785ULL, 0x6353161451684576ULL);
4131   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("shl %0.2d, %1.2d, #37")(arg);
4132   ASSERT_EQ(res, MakeUInt128(0x3064f0a000000000ULL, 0x2d08aec000000000ULL));
4133 }
4134 
TEST(Arm64InsnTest,ShiftLeftInt8x8)4135 TEST(Arm64InsnTest, ShiftLeftInt8x8) {
4136   __uint128_t arg = MakeUInt128(0x0402956047346131ULL, 0x1382638788975517ULL);
4137   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("shl %0.8b, %1.8b, #6")(arg);
4138   ASSERT_EQ(res, MakeUInt128(0x00804000c0004040ULL, 0x0000000000000000ULL));
4139 }
4140 
TEST(Arm64InsnTest,ShiftRightInsertInt8x8)4141 TEST(Arm64InsnTest, ShiftRightInsertInt8x8) {
4142   __uint128_t arg1 = MakeUInt128(0x9112232618794059ULL, 0x9415540632701319ULL);
4143   __uint128_t arg2 = MakeUInt128(0x1537675115830432ULL, 0x0849872092028092ULL);
4144   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("sri %0.8b, %1.8b, #4")(arg1, arg2);
4145   ASSERT_EQ(res, MakeUInt128(0x1931625211870435ULL, 0x0000000000000000ULL));
4146 }
4147 
TEST(Arm64InsnTest,ShiftRightInsertInt64x1)4148 TEST(Arm64InsnTest, ShiftRightInsertInt64x1) {
4149   __uint128_t arg1 = MakeUInt128(0x9112232618794059ULL, 0x9415540632701319ULL);
4150   __uint128_t arg2 = MakeUInt128(0x1537675115830432ULL, 0x0849872092028092ULL);
4151   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("sri %d0, %d1, #20")(arg1, arg2);
4152   ASSERT_EQ(res, MakeUInt128(0x1537691122326187ULL, 0x0000000000000000ULL));
4153 }
4154 
TEST(Arm64InsnTest,ShiftRightInsertInt64x2)4155 TEST(Arm64InsnTest, ShiftRightInsertInt64x2) {
4156   __uint128_t arg1 = MakeUInt128(0x7332335603484653ULL, 0x1873029302665964ULL);
4157   __uint128_t arg2 = MakeUInt128(0x5013718375428897ULL, 0x5579714499246540ULL);
4158   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("sri %0.2d, %1.2d, #21")(arg1, arg2);
4159   ASSERT_EQ(res, MakeUInt128(0x50137399919ab01aULL, 0x557970c398149813ULL));
4160 }
4161 
TEST(Arm64InsnTest,ShiftLeftInsertInt64x1)4162 TEST(Arm64InsnTest, ShiftLeftInsertInt64x1) {
4163   __uint128_t arg1 = MakeUInt128(0x3763526969344354ULL, 0x4004730671988689ULL);
4164   __uint128_t arg2 = MakeUInt128(0x6369498567302175ULL, 0x2313252926537589ULL);
4165   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("sli %d0, %d1, #23")(arg1, arg2);
4166   ASSERT_EQ(res, MakeUInt128(0x34b49a21aa302175ULL, 0x0000000000000000ULL));
4167 }
4168 
TEST(Arm64InsnTest,ShiftLeftInsertInt64x2)4169 TEST(Arm64InsnTest, ShiftLeftInsertInt64x2) {
4170   __uint128_t arg1 = MakeUInt128(0x3270206902872323ULL, 0x3005386216347988ULL);
4171   __uint128_t arg2 = MakeUInt128(0x5094695472004795ULL, 0x2311201504329322ULL);
4172   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("sli %0.2d, %1.2d, #21")(arg1, arg2);
4173   ASSERT_EQ(res, MakeUInt128(0x0d2050e464604795ULL, 0x0c42c68f31129322ULL));
4174 }
4175 
TEST(Arm64InsnTest,ShiftLeftLongInt8x8)4176 TEST(Arm64InsnTest, ShiftLeftLongInt8x8) {
4177   __uint128_t arg = MakeUInt128(0x2650697620201995ULL, 0x5484126500053944ULL);
4178   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("shll %0.8h, %1.8b, #8")(arg);
4179   ASSERT_EQ(res, MakeUInt128(0x2000200019009500ULL, 0x2600500069007600ULL));
4180 }
4181 
TEST(Arm64InsnTest,UnsignedShiftLeftLongInt8x8)4182 TEST(Arm64InsnTest, UnsignedShiftLeftLongInt8x8) {
4183   __uint128_t arg = MakeUInt128(0x2650697620201995ULL, 0x5484126500053944ULL);
4184   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ushll %0.8h, %1.8b, #4")(arg);
4185   ASSERT_EQ(res, MakeUInt128(0x200020001900950ULL, 0x260050006900760ULL));
4186 }
4187 
TEST(Arm64InsnTest,ShiftLeftLongInt8x8Upper)4188 TEST(Arm64InsnTest, ShiftLeftLongInt8x8Upper) {
4189   __uint128_t arg = MakeUInt128(0x9050429225978771ULL, 0x0667873840000616ULL);
4190   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("shll2 %0.8h, %1.16b, #8")(arg);
4191   ASSERT_EQ(res, MakeUInt128(0x4000000006001600ULL, 0x0600670087003800ULL));
4192 }
4193 
TEST(Arm64InsnTest,SignedShiftLeftLongInt32x2)4194 TEST(Arm64InsnTest, SignedShiftLeftLongInt32x2) {
4195   __uint128_t arg = MakeUInt128(0x9075407923424023ULL, 0x0092590070173196ULL);
4196   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("sshll %0.2d, %1.2s, #9")(arg);
4197   ASSERT_EQ(res, MakeUInt128(0x0000004684804600ULL, 0xffffff20ea80f200ULL));
4198 }
4199 
TEST(Arm64InsnTest,SignedShiftLeftLongInt32x2Upper)4200 TEST(Arm64InsnTest, SignedShiftLeftLongInt32x2Upper) {
4201   __uint128_t arg = MakeUInt128(0x9382432227188515ULL, 0x9740547021482897ULL);
4202   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("sshll2 %0.2d, %1.4s, #9")(arg);
4203   ASSERT_EQ(res, MakeUInt128(0x0000004290512e00ULL, 0xffffff2e80a8e000ULL));
4204 }
4205 
TEST(Arm64InsnTest,SignedShiftLeftLongInt32x2By0)4206 TEST(Arm64InsnTest, SignedShiftLeftLongInt32x2By0) {
4207   __uint128_t arg = MakeUInt128(0x9008777697763127ULL, 0x9572267265556259ULL);
4208   // SXTL is an alias for SSHLL for the shift count being zero.
4209   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("sxtl %0.2d, %1.2s")(arg);
4210   ASSERT_EQ(res, MakeUInt128(0xffffffff97763127ULL, 0xffffffff90087776ULL));
4211 }
4212 
TEST(Arm64InsnTest,ShiftLeftLongInt32x2)4213 TEST(Arm64InsnTest, ShiftLeftLongInt32x2) {
4214   __uint128_t arg = MakeUInt128(0x9094334676851422ULL, 0x1447737939375170ULL);
4215   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ushll %0.2d, %1.2s, #9")(arg);
4216   ASSERT_EQ(res, MakeUInt128(0x000000ed0a284400ULL, 0x0000012128668c00ULL));
4217 }
4218 
TEST(Arm64InsnTest,ShiftLeftLongInt32x2Upper)4219 TEST(Arm64InsnTest, ShiftLeftLongInt32x2Upper) {
4220   __uint128_t arg = MakeUInt128(0x7096834080053559ULL, 0x8491754173818839ULL);
4221   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ushll2 %0.2d, %1.4s, #17")(arg);
4222   ASSERT_EQ(res, MakeUInt128(0x0000e70310720000ULL, 0x00010922ea820000ULL));
4223 }
4224 
TEST(Arm64InsnTest,ShiftLeftLongInt32x2By0)4225 TEST(Arm64InsnTest, ShiftLeftLongInt32x2By0) {
4226   __uint128_t arg = MakeUInt128(0x9945681506526530ULL, 0x5371829412703369ULL);
4227   // UXTL is an alias for USHLL for the shift count being zero.
4228   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("uxtl %0.2d, %1.2s")(arg);
4229   ASSERT_EQ(res, MakeUInt128(0x0000000006526530ULL, 0x0000000099456815ULL));
4230 }
4231 
TEST(Arm64InsnTest,ShiftRightNarrowI16x8)4232 TEST(Arm64InsnTest, ShiftRightNarrowI16x8) {
4233   __uint128_t arg = MakeUInt128(0x9378541786109696ULL, 0x9202538865034577ULL);
4234   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("shrn %0.8b, %1.8h, #2")(arg);
4235   ASSERT_EQ(res, MakeUInt128(0x80e2405dde0584a5ULL, 0x0000000000000000ULL));
4236 }
4237 
TEST(Arm64InsnTest,ShiftRightNarrowI16x8Upper)4238 TEST(Arm64InsnTest, ShiftRightNarrowI16x8Upper) {
4239   __uint128_t arg1 = MakeUInt128(0x9779940012601642ULL, 0x2760926082349304ULL);
4240   __uint128_t arg2 = MakeUInt128(0x3879158299848645ULL, 0x9271734059225620ULL);
4241   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("shrn2 %0.16b, %1.8h, #2")(arg1, arg2);
4242   ASSERT_EQ(res, MakeUInt128(0x3879158299848645ULL, 0xd8988dc1de009890ULL));
4243 }
4244 
TEST(Arm64InsnTest,RoundingShiftRightNarrowI16x8)4245 TEST(Arm64InsnTest, RoundingShiftRightNarrowI16x8) {
4246   __uint128_t arg = MakeUInt128(0x9303774688099929ULL, 0x6877582441047878ULL);
4247   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("rshrn %0.8b, %1.8h, #2")(arg);
4248   ASSERT_EQ(res, MakeUInt128(0x1e09411ec1d2024aULL, 0x0000000000000000ULL));
4249 }
4250 
TEST(Arm64InsnTest,RoundingShiftRightNarrowI16x8Upper)4251 TEST(Arm64InsnTest, RoundingShiftRightNarrowI16x8Upper) {
4252   __uint128_t arg1 = MakeUInt128(0x9314507607167064ULL, 0x3556827437743965ULL);
4253   __uint128_t arg2 = MakeUInt128(0x2103098604092717ULL, 0x0909512808630902ULL);
4254   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("rshrn2 %0.16b, %1.8h, #2")(arg1, arg2);
4255   ASSERT_EQ(res, MakeUInt128(0x2103098604092717ULL, 0x569ddd59c51ec619ULL));
4256 }
4257 
TEST(Arm64InsnTest,AddInt64x1)4258 TEST(Arm64InsnTest, AddInt64x1) {
4259   __uint128_t arg1 = MakeUInt128(0x0080000000000003ULL, 0xdeadbeef01234567ULL);
4260   __uint128_t arg2 = MakeUInt128(0x0080000000000005ULL, 0x0123deadbeef4567ULL);
4261   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("add %d0, %d1, %d2")(arg1, arg2);
4262   ASSERT_EQ(res, MakeUInt128(0x0100000000000008ULL, 0x0ULL));
4263 }
4264 
TEST(Arm64InsnTest,AddInt32x4)4265 TEST(Arm64InsnTest, AddInt32x4) {
4266   // The "add" below adds two vectors, each with four 32-bit elements.  We set the sign
4267   // bit for each element to verify that the carry does not affect any lane.
4268   __uint128_t op1 = MakeUInt128(0x8000000380000001ULL, 0x8000000780000005ULL);
4269   __uint128_t op2 = MakeUInt128(0x8000000480000002ULL, 0x8000000880000006ULL);
4270   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("add %0.4s, %1.4s, %2.4s")(op1, op2);
4271   ASSERT_EQ(rd, MakeUInt128(0x0000000700000003ULL, 0x0000000f0000000bULL));
4272 }
4273 
TEST(Arm64InsnTest,AddInt32x2)4274 TEST(Arm64InsnTest, AddInt32x2) {
4275   __uint128_t op1 = MakeUInt128(0x8000000380000001ULL, 0x8000000780000005ULL);
4276   __uint128_t op2 = MakeUInt128(0x8000000480000002ULL, 0x8000000880000006ULL);
4277   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("add %0.2s, %1.2s, %2.2s")(op1, op2);
4278   ASSERT_EQ(rd, MakeUInt128(0x0000000700000003ULL, 0));
4279 }
4280 
TEST(Arm64InsnTest,AddInt64x2)4281 TEST(Arm64InsnTest, AddInt64x2) {
4282   __uint128_t op1 = MakeUInt128(0x8000000380000001ULL, 0x8000000780000005ULL);
4283   __uint128_t op2 = MakeUInt128(0x8000000480000002ULL, 0x8000000880000006ULL);
4284   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("add %0.2d, %1.2d, %2.2d")(op1, op2);
4285   ASSERT_EQ(rd, MakeUInt128(0x0000000800000003ULL, 0x000000100000000bULL));
4286 }
4287 
TEST(Arm64InsnTest,SubInt64x1)4288 TEST(Arm64InsnTest, SubInt64x1) {
4289   __uint128_t arg1 = MakeUInt128(0x0000000000000002ULL, 0x0011223344556677ULL);
4290   __uint128_t arg2 = MakeUInt128(0x0000000000000003ULL, 0x0123456789abcdefULL);
4291   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("sub %d0, %d1, %d2")(arg1, arg2);
4292   ASSERT_EQ(res, MakeUInt128(0xffffffffffffffffULL, 0x0ULL));
4293 }
4294 
TEST(Arm64InsnTest,SubInt64x2)4295 TEST(Arm64InsnTest, SubInt64x2) {
4296   constexpr auto AsmSub = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("sub %0.2d, %1.2d, %2.2d");
4297   __uint128_t arg1 = MakeUInt128(0x6873115956286388ULL, 0x2353787593751957ULL);
4298   __uint128_t arg2 = MakeUInt128(0x7818577805321712ULL, 0x2680661300192787ULL);
4299   __uint128_t res = AsmSub(arg1, arg2);
4300   ASSERT_EQ(res, MakeUInt128(0xf05ab9e150f64c76ULL, 0xfcd31262935bf1d0ULL));
4301 }
4302 
TEST(Arm64InsnTest,SubInt32x4)4303 TEST(Arm64InsnTest, SubInt32x4) {
4304   __uint128_t op1 = MakeUInt128(0x0000000A00000005ULL, 0x0000000C00000C45ULL);
4305   __uint128_t op2 = MakeUInt128(0x0000000500000003ULL, 0x0000000200000C45ULL);
4306   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("sub %0.4s, %1.4s, %2.4s")(op1, op2);
4307   ASSERT_EQ(rd, MakeUInt128(0x0000000500000002ULL, 0x00000000A00000000ULL));
4308 }
4309 
TEST(Arm64InsnTest,SubInt32x2)4310 TEST(Arm64InsnTest, SubInt32x2) {
4311   __uint128_t op1 = MakeUInt128(0x0000000000000005ULL, 0x0000000000000C45ULL);
4312   __uint128_t op2 = MakeUInt128(0x0000000000000003ULL, 0x0000000000000C45ULL);
4313   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("sub %0.2s, %1.2s, %2.2s")(op1, op2);
4314   ASSERT_EQ(rd, MakeUInt128(0x0000000000000002ULL, 0x00000000000000000ULL));
4315 }
4316 
TEST(Arm64InsnTest,SubInt16x4)4317 TEST(Arm64InsnTest, SubInt16x4) {
4318   __uint128_t arg1 = MakeUInt128(0x8888777766665555ULL, 0);
4319   __uint128_t arg2 = MakeUInt128(0x1111222233334444ULL, 0);
4320   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("sub %0.4h, %1.4h, %2.4h")(arg1, arg2);
4321   ASSERT_EQ(res, MakeUInt128(0x7777555533331111ULL, 0));
4322 }
4323 
TEST(Arm64InsnTest,MultiplyI8x8)4324 TEST(Arm64InsnTest, MultiplyI8x8) {
4325   __uint128_t arg1 = MakeUInt128(0x5261365549781893ULL, 0x1297848216829989ULL);
4326   __uint128_t arg2 = MakeUInt128(0x4542858444795265ULL, 0x8678210511413547ULL);
4327   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("mul %0.8b, %1.8b, %2.8b")(arg1, arg2);
4328   ASSERT_EQ(res, MakeUInt128(0x1a020ed464b8b0ffULL, 0x0000000000000000ULL));
4329 }
4330 
TEST(Arm64InsnTest,MultiplyAndAccumulateI8x8)4331 TEST(Arm64InsnTest, MultiplyAndAccumulateI8x8) {
4332   __uint128_t arg1 = MakeUInt128(0x5848406353422072ULL, 0x2258284886481584ULL);
4333   __uint128_t arg2 = MakeUInt128(0x7823986456596116ULL, 0x3548862305774564ULL);
4334   __uint128_t arg3 = MakeUInt128(0x8797108931456691ULL, 0x3686722874894056ULL);
4335   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("mla %0.8b, %1.8b, %2.8b")(arg1, arg2, arg3);
4336   ASSERT_EQ(res, MakeUInt128(0xc76f10351337865dULL, 0x0000000000000000ULL));
4337 }
4338 
TEST(Arm64InsnTest,MultiplyAndAccumulateI8x8IndexedElem)4339 TEST(Arm64InsnTest, MultiplyAndAccumulateI8x8IndexedElem) {
4340   __uint128_t arg1 = MakeUInt128(0x4143334547762416ULL, 0x8625189835694855ULL);
4341   __uint128_t arg2 = MakeUInt128(0x5346462080466842ULL, 0x5906949129331367ULL);
4342   __uint128_t arg3 = MakeUInt128(0x0355876402474964ULL, 0x7326391419927260ULL);
4343   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("mla %0.4h, %1.4h, %2.h[0]")(arg1, arg2, arg3);
4344   ASSERT_EQ(res, MakeUInt128(0x0e9bc72e5eb38710ULL, 0x0000000000000000ULL));
4345 }
4346 
TEST(Arm64InsnTest,MultiplyAndAccumulateI8x8IndexedElemPosition2)4347 TEST(Arm64InsnTest, MultiplyAndAccumulateI8x8IndexedElemPosition2) {
4348   __uint128_t arg1 = MakeUInt128(0x1431429809190659ULL, 0x2509372216964615ULL);
4349   __uint128_t arg2 = MakeUInt128(0x2686838689427741ULL, 0x5599185592524595ULL);
4350   __uint128_t arg3 = MakeUInt128(0x6099124608051243ULL, 0x8843904512441365ULL);
4351   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("mla %0.2s, %1.2s, %2.s[2]")(arg1, arg2, arg3);
4352   ASSERT_EQ(res, MakeUInt128(0x6ce7ccbedccdc110ULL, 0x0000000000000000ULL));
4353 }
4354 
TEST(Arm64InsnTest,MultiplyAndSubtractI8x8IndexedElem)4355 TEST(Arm64InsnTest, MultiplyAndSubtractI8x8IndexedElem) {
4356   __uint128_t arg1 = MakeUInt128(0x8297455570674983ULL, 0x8505494588586926ULL);
4357   __uint128_t arg2 = MakeUInt128(0x6549911988183479ULL, 0x7753566369807426ULL);
4358   __uint128_t arg3 = MakeUInt128(0x4524919217321721ULL, 0x4772350141441973ULL);
4359   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("mls %0.4h, %1.4h, %2.h[1]")(arg1, arg2, arg3);
4360   ASSERT_EQ(res, MakeUInt128(0xcefce99ad58a9ad9ULL, 0x0000000000000000ULL));
4361 }
4362 
TEST(Arm64InsnTest,MultiplyAndSubtractI8x8)4363 TEST(Arm64InsnTest, MultiplyAndSubtractI8x8) {
4364   __uint128_t arg1 = MakeUInt128(0x0635342207222582ULL, 0x8488648158456028ULL);
4365   __uint128_t arg2 = MakeUInt128(0x9864565961163548ULL, 0x8623057745649803ULL);
4366   __uint128_t arg3 = MakeUInt128(0x1089314566913686ULL, 0x7228748940560101ULL);
4367   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("mls %0.8b, %1.8b, %2.8b")(arg1, arg2, arg3);
4368   ASSERT_EQ(res, MakeUInt128(0x80d5b973bfa58df6ULL, 0x0000000000000000ULL));
4369 }
4370 
TEST(Arm64InsnTest,MultiplyI32x4IndexedElem)4371 TEST(Arm64InsnTest, MultiplyI32x4IndexedElem) {
4372   __uint128_t arg1 = MakeUInt128(0x143334547762416ULL, 0x8625189835694855ULL);
4373   __uint128_t arg2 = MakeUInt128(0x627232791786085ULL, 0x7843838279679766ULL);
4374   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("mul %0.4s, %1.4s, %2.s[1]")(arg1, arg2);
4375   ASSERT_EQ(res, MakeUInt128(0xcec23e830d48815aULL, 0xd12b87288ae0a3f3ULL));
4376 }
4377 
TEST(Arm64InsnTest,PolynomialMultiplyU8x8)4378 TEST(Arm64InsnTest, PolynomialMultiplyU8x8) {
4379   __uint128_t arg1 = MakeUInt128(0x1862056476931257ULL, 0x0586356620185581ULL);
4380   __uint128_t arg2 = MakeUInt128(0x1668039626579787ULL, 0x7185560845529654ULL);
4381   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("pmul %0.8b, %1.8b, %2.8b")(arg1, arg2);
4382   ASSERT_EQ(res, MakeUInt128(0xd0d00f18f4095e25ULL, 0x0000000000000000ULL));
4383 }
4384 
TEST(Arm64InsnTest,PolynomialMultiplyLongU8x8)4385 TEST(Arm64InsnTest, PolynomialMultiplyLongU8x8) {
4386   __uint128_t arg1 = MakeUInt128(0x1327656180937734ULL, 0x4403070746921120ULL);
4387   __uint128_t arg2 = MakeUInt128(0x9838952286847831ULL, 0x2355265821314495ULL);
4388   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("pmull %0.8h, %1.8b, %2.8b")(arg1, arg2);
4389   ASSERT_EQ(res, MakeUInt128(0x43004bcc17e805f4ULL, 0x082807a835210ce2ULL));
4390 }
4391 
TEST(Arm64InsnTest,PolynomialMultiplyLongU8x8Upper)4392 TEST(Arm64InsnTest, PolynomialMultiplyLongU8x8Upper) {
4393   __uint128_t arg1 = MakeUInt128(0x4439658253375438ULL, 0x8569094113031509ULL);
4394   __uint128_t arg2 = MakeUInt128(0x1865619673378623ULL, 0x6256125216320862ULL);
4395   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("pmull2 %0.8h, %1.16b, %2.16b")(arg1, arg2);
4396   ASSERT_EQ(res, MakeUInt128(0x015a005600a80372ULL, 0x30ea1da6008214d2ULL));
4397 }
4398 
TEST(Arm64InsnTest,PolynomialMultiplyLongU64x2)4399 TEST(Arm64InsnTest, PolynomialMultiplyLongU64x2) {
4400   __uint128_t arg1 = MakeUInt128(0x1000100010001000ULL, 0xffffeeeeffffeeeeULL);
4401   __uint128_t arg2 = MakeUInt128(0x10001ULL, 0xffffeeeeffffeeeeULL);
4402   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("pmull %0.1q, %1.1d, %2.1d")(arg1, arg2);
4403   ASSERT_EQ(res, MakeUInt128(0x1000ULL, 0x1000ULL));
4404 }
4405 
TEST(Arm64InsnTest,PolynomialMultiplyLongU64x2Upper)4406 TEST(Arm64InsnTest, PolynomialMultiplyLongU64x2Upper) {
4407   __uint128_t arg1 = MakeUInt128(0xffffeeeeffffeeeeULL, 0x1000100010001000ULL);
4408   __uint128_t arg2 = MakeUInt128(0xffffeeeeffffeeeeULL, 0x10001ULL);
4409   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("pmull2 %0.1q, %1.2d, %2.2d")(arg1, arg2);
4410   ASSERT_EQ(res, MakeUInt128(0x1000ULL, 0x1000ULL));
4411 }
4412 
TEST(Arm64InsnTest,PairwiseAddInt8x16)4413 TEST(Arm64InsnTest, PairwiseAddInt8x16) {
4414   __uint128_t op1 = MakeUInt128(0x7766554433221100ULL, 0xffeeddccbbaa9988ULL);
4415   __uint128_t op2 = MakeUInt128(0x0706050403020100ULL, 0x0f0e0d0c0b0a0908ULL);
4416   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("addp %0.16b, %1.16b, %2.16b")(op1, op2);
4417   ASSERT_EQ(rd, MakeUInt128(0xeda96521dd995511ULL, 0x1d1915110d090501ULL));
4418 }
4419 
TEST(Arm64InsnTest,PairwiseAddInt8x8)4420 TEST(Arm64InsnTest, PairwiseAddInt8x8) {
4421   __uint128_t op1 = MakeUInt128(0x7766554433221100ULL, 0xffeeddccbbaa9988ULL);
4422   __uint128_t op2 = MakeUInt128(0x0706050403020100ULL, 0x0f0e0d0c0b0a0908ULL);
4423   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("addp %0.8b, %1.8b, %2.8b")(op1, op2);
4424   ASSERT_EQ(rd, MakeUInt128(0x0d090501dd995511ULL, 0));
4425 }
4426 
TEST(Arm64InsnTest,PairwiseAddInt64x2)4427 TEST(Arm64InsnTest, PairwiseAddInt64x2) {
4428   __uint128_t op1 = MakeUInt128(1ULL, 2ULL);
4429   __uint128_t op2 = MakeUInt128(3ULL, 4ULL);
4430   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("addp %0.2d, %1.2d, %2.2d")(op1, op2);
4431   ASSERT_EQ(rd, MakeUInt128(3ULL, 7ULL));
4432 }
4433 
TEST(Arm64InsnTest,CompareEqualInt8x16)4434 TEST(Arm64InsnTest, CompareEqualInt8x16) {
4435   __uint128_t op1 = MakeUInt128(0x9375195778185778ULL, 0x0532171226806613ULL);
4436   __uint128_t op2 = MakeUInt128(0x9371595778815787ULL, 0x0352172126068613ULL);
4437   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmeq %0.16b, %1.16b, %2.16b")(op1, op2);
4438   ASSERT_EQ(rd, MakeUInt128(0xff0000ffff00ff00ULL, 0x0000ff00ff0000ffULL));
4439 }
4440 
TEST(Arm64InsnTest,CompareEqualInt8x8)4441 TEST(Arm64InsnTest, CompareEqualInt8x8) {
4442   __uint128_t op1 = MakeUInt128(0x9375195778185778ULL, 0x0532171226806613ULL);
4443   __uint128_t op2 = MakeUInt128(0x9371595778815787ULL, 0x0352172126068613ULL);
4444   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmeq %0.8b, %1.8b, %2.8b")(op1, op2);
4445   ASSERT_EQ(rd, MakeUInt128(0xff0000ffff00ff00ULL, 0));
4446 }
4447 
TEST(Arm64InsnTest,CompareEqualInt16x4)4448 TEST(Arm64InsnTest, CompareEqualInt16x4) {
4449   __uint128_t op1 = MakeUInt128(0x4444333322221111ULL, 0);
4450   __uint128_t op2 = MakeUInt128(0x8888333300001111ULL, 0);
4451   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmeq %0.4h, %1.4h, %2.4h")(op1, op2);
4452   ASSERT_EQ(rd, MakeUInt128(0x0000ffff0000ffffULL, 0));
4453 }
4454 
TEST(Arm64InsnTest,CompareEqualInt64x1)4455 TEST(Arm64InsnTest, CompareEqualInt64x1) {
4456   constexpr auto AsmCmeq = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmeq %d0, %d1, %d2");
4457   __uint128_t arg1 = MakeUInt128(0x8297455570674983ULL, 0x8505494588586926ULL);
4458   __uint128_t arg2 = MakeUInt128(0x0665499119881834ULL, 0x7977535663698074ULL);
4459   __uint128_t arg3 = MakeUInt128(0x8297455570674983ULL, 0x1452491921732172ULL);
4460   ASSERT_EQ(AsmCmeq(arg1, arg2), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
4461   ASSERT_EQ(AsmCmeq(arg1, arg3), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
4462 }
4463 
TEST(Arm64InsnTest,CompareEqualZeroInt64x1)4464 TEST(Arm64InsnTest, CompareEqualZeroInt64x1) {
4465   constexpr auto AsmCmeq = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cmeq %d0, %d1, #0");
4466   __uint128_t arg1 = MakeUInt128(0x6517166776672793ULL, 0x0354851542040238ULL);
4467   __uint128_t arg2 = MakeUInt128(0x0000000000000000ULL, 0x1746089232839170ULL);
4468   ASSERT_EQ(AsmCmeq(arg1), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
4469   ASSERT_EQ(AsmCmeq(arg2), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
4470 }
4471 
TEST(Arm64InsnTest,CompareEqualZeroInt8x16)4472 TEST(Arm64InsnTest, CompareEqualZeroInt8x16) {
4473   __uint128_t op = MakeUInt128(0x0000555500332200ULL, 0x0000000077001100ULL);
4474   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cmeq %0.16b, %1.16b, #0")(op);
4475   ASSERT_EQ(rd, MakeUInt128(0xffff0000ff0000ffULL, 0xffffffff00ff00ffULL));
4476 }
4477 
TEST(Arm64InsnTest,CompareEqualZeroInt8x8)4478 TEST(Arm64InsnTest, CompareEqualZeroInt8x8) {
4479   __uint128_t op = MakeUInt128(0x001122330000aaaaULL, 0xdeadbeef0000cafeULL);
4480   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cmeq %0.8b, %1.8b, #0")(op);
4481   ASSERT_EQ(rd, MakeUInt128(0xff000000ffff0000ULL, 0));
4482 }
4483 
TEST(Arm64InsnTest,CompareGreaterInt64x1)4484 TEST(Arm64InsnTest, CompareGreaterInt64x1) {
4485   constexpr auto AsmCmgt = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmgt %d0, %d1, %d2");
4486   __uint128_t arg1 = MakeUInt128(0x1976668559233565ULL, 0x4639138363185745ULL);
4487   __uint128_t arg2 = MakeUInt128(0x3474940784884423ULL, 0x7721751543342603ULL);
4488   __uint128_t arg3 = MakeUInt128(0x1976668559233565ULL, 0x8183196376370761ULL);
4489   __uint128_t arg4 = MakeUInt128(0x9243530136776310ULL, 0x8491351615642269ULL);
4490   ASSERT_EQ(AsmCmgt(arg1, arg2), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
4491   ASSERT_EQ(AsmCmgt(arg1, arg3), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
4492   ASSERT_EQ(AsmCmgt(arg1, arg4), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
4493 }
4494 
TEST(Arm64InsnTest,CompareGreaterZeroInt64x1)4495 TEST(Arm64InsnTest, CompareGreaterZeroInt64x1) {
4496   constexpr auto AsmCmgt = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cmgt %d0, %d1, #0");
4497   __uint128_t arg1 = MakeUInt128(0x6517166776672793ULL, 0x0354851542040238ULL);
4498   __uint128_t arg2 = MakeUInt128(0x0000000000000000ULL, 0x6174599705674507ULL);
4499   __uint128_t arg3 = MakeUInt128(0x9592057668278967ULL, 0x7644531840404185ULL);
4500   ASSERT_EQ(AsmCmgt(arg1), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
4501   ASSERT_EQ(AsmCmgt(arg2), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
4502   ASSERT_EQ(AsmCmgt(arg3), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
4503 }
4504 
TEST(Arm64InsnTest,CompareGreaterThanZeroInt8x16)4505 TEST(Arm64InsnTest, CompareGreaterThanZeroInt8x16) {
4506   __uint128_t op = MakeUInt128(0x807fff00017efe02ULL, 0xff7f80000102fe02ULL);
4507   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cmgt %0.16b, %1.16b, #0")(op);
4508   ASSERT_EQ(rd, MakeUInt128(0x00ff0000ffff00ffULL, 0x00ff0000ffff00ffULL));
4509 }
4510 
TEST(Arm64InsnTest,CompareGreaterThanZeroInt8x8)4511 TEST(Arm64InsnTest, CompareGreaterThanZeroInt8x8) {
4512   __uint128_t op = MakeUInt128(0x00ff7f80017efe00ULL, 0x0000cafedeadbeefULL);
4513   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cmgt %0.8b, %1.8b, #0")(op);
4514   ASSERT_EQ(rd, MakeUInt128(0x0000ff00ffff0000ULL, 0));
4515 }
4516 
TEST(Arm64InsnTest,CompareGreaterThanInt16x8)4517 TEST(Arm64InsnTest, CompareGreaterThanInt16x8) {
4518   __uint128_t arg1 = MakeUInt128(0x9789389001852956ULL, 0x9196780455448285ULL);
4519   __uint128_t arg2 = MakeUInt128(0x7269389081795897ULL, 0x5469399264218285);
4520   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmgt %0.8h, %1.8h, %2.8h")(arg1, arg2);
4521   ASSERT_EQ(res, MakeUInt128(0x00000000ffff0000ULL, 0x0000ffff00000000ULL));
4522 }
4523 
TEST(Arm64InsnTest,CompareGreaterThanInt32x4)4524 TEST(Arm64InsnTest, CompareGreaterThanInt32x4) {
4525   __uint128_t arg1 = MakeUInt128(0x0000'0000'ffff'ffffULL, 0xffff'ffff'0000'0000ULL);
4526   __uint128_t arg2 = MakeUInt128(0xffff'ffff'0000'0000ULL, 0x0000'0000'ffff'ffffULL);
4527   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmgt %0.4s, %1.4s, %2.4s")(arg1, arg2);
4528   ASSERT_EQ(res, MakeUInt128(0xffff'ffff'0000'0000ULL, 0x0000'0000'ffff'ffffULL));
4529 }
4530 
TEST(Arm64InsnTest,CompareLessZeroInt64x1)4531 TEST(Arm64InsnTest, CompareLessZeroInt64x1) {
4532   constexpr auto AsmCmlt = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cmlt %d0, %d1, #0");
4533   __uint128_t arg1 = MakeUInt128(0x4784264567633881ULL, 0x8807565612168960ULL);
4534   __uint128_t arg2 = MakeUInt128(0x0000000000000000ULL, 0x8955999911209916ULL);
4535   __uint128_t arg3 = MakeUInt128(0x9364610175685060ULL, 0x1671453543158148ULL);
4536   ASSERT_EQ(AsmCmlt(arg1), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
4537   ASSERT_EQ(AsmCmlt(arg2), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
4538   ASSERT_EQ(AsmCmlt(arg3), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
4539 }
4540 
TEST(Arm64InsnTest,CompareLessThanZeroInt8x16)4541 TEST(Arm64InsnTest, CompareLessThanZeroInt8x16) {
4542   __uint128_t op = MakeUInt128(0xff00017ffe020180ULL, 0x0001027e7ffeff80ULL);
4543   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cmlt %0.16b, %1.16b, #0")(op);
4544   ASSERT_EQ(rd, MakeUInt128(0xff000000ff0000ffULL, 0x0000000000ffffffULL));
4545 }
4546 
TEST(Arm64InsnTest,CompareLessThanZeroInt8x8)4547 TEST(Arm64InsnTest, CompareLessThanZeroInt8x8) {
4548   __uint128_t op = MakeUInt128(0x0002017e7fff8000ULL, 0x001100220000ffffULL);
4549   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cmlt %0.8b, %1.8b, #0")(op);
4550   ASSERT_EQ(rd, MakeUInt128(0x0000000000ffff00ULL, 0));
4551 }
4552 
TEST(Arm64InsnTest,CompareGreaterThanEqualInt64x1)4553 TEST(Arm64InsnTest, CompareGreaterThanEqualInt64x1) {
4554   constexpr auto AsmCmge = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmge %d0, %d1, %d2");
4555   __uint128_t arg1 = MakeUInt128(0x1009391369138107ULL, 0x2581378135789400ULL);
4556   __uint128_t arg2 = MakeUInt128(0x5890939568814856ULL, 0x0263224393726562ULL);
4557   __uint128_t arg3 = MakeUInt128(0x1009391369138107ULL, 0x5511995818319637ULL);
4558   __uint128_t arg4 = MakeUInt128(0x9427141009391369ULL, 0x1381072581378135ULL);
4559   ASSERT_EQ(AsmCmge(arg1, arg2), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
4560   ASSERT_EQ(AsmCmge(arg1, arg3), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
4561   ASSERT_EQ(AsmCmge(arg1, arg4), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
4562 }
4563 
TEST(Arm64InsnTest,CompareGreaterThanEqualZeroInt64x1)4564 TEST(Arm64InsnTest, CompareGreaterThanEqualZeroInt64x1) {
4565   constexpr auto AsmCmge = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cmge %d0, %d1, #0");
4566   __uint128_t arg1 = MakeUInt128(0x5562116715468484ULL, 0x7780394475697980ULL);
4567   __uint128_t arg2 = MakeUInt128(0x0000000000000000ULL, 0x3548487562529875ULL);
4568   __uint128_t arg3 = MakeUInt128(0x9212366168902596ULL, 0x2730430679316531ULL);
4569   ASSERT_EQ(AsmCmge(arg1), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
4570   ASSERT_EQ(AsmCmge(arg2), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
4571   ASSERT_EQ(AsmCmge(arg3), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
4572 }
4573 
TEST(Arm64InsnTest,CompareGreaterThanEqualZeroInt8x16)4574 TEST(Arm64InsnTest, CompareGreaterThanEqualZeroInt8x16) {
4575   __uint128_t op = MakeUInt128(0x00ff01027ffe8002ULL, 0x80fffe7f7e020100ULL);
4576   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cmge %0.16b, %1.16b, #0")(op);
4577   ASSERT_EQ(rd, MakeUInt128(0xff00ffffff0000ffULL, 0x000000ffffffffffULL));
4578 }
4579 
TEST(Arm64InsnTest,CompareGreaterThanEqualZeroInt8x8)4580 TEST(Arm64InsnTest, CompareGreaterThanEqualZeroInt8x8) {
4581   __uint128_t op = MakeUInt128(0x0001027f80feff00ULL, 0x0011223344556677ULL);
4582   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cmge %0.8b, %1.8b, #0")(op);
4583   ASSERT_EQ(rd, MakeUInt128(0xffffffff000000ffULL, 0));
4584 }
4585 
TEST(Arm64InsnTest,CompareGreaterEqualInt16x8)4586 TEST(Arm64InsnTest, CompareGreaterEqualInt16x8) {
4587   __uint128_t arg1 = MakeUInt128(0x4391962838870543ULL, 0x6777432242768091ULL);
4588   __uint128_t arg2 = MakeUInt128(0x4391838548318875ULL, 0x0142432208995068ULL);
4589   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmge %0.8h, %1.8h, %2.8h")(arg1, arg2);
4590   ASSERT_EQ(res, MakeUInt128(0xffffffff0000ffffULL, 0xffffffffffff0000ULL));
4591 }
4592 
TEST(Arm64InsnTest,CompareLessThanEqualZeroInt64x1)4593 TEST(Arm64InsnTest, CompareLessThanEqualZeroInt64x1) {
4594   constexpr auto AsmCmle = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cmle %d0, %d1, #0");
4595   __uint128_t arg1 = MakeUInt128(0x3643296406335728ULL, 0x1070788758164043ULL);
4596   __uint128_t arg2 = MakeUInt128(0x0000000000000000ULL, 0x5865720227637840ULL);
4597   __uint128_t arg3 = MakeUInt128(0x8694346828590066ULL, 0x6408063140777577ULL);
4598   ASSERT_EQ(AsmCmle(arg1), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
4599   ASSERT_EQ(AsmCmle(arg2), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
4600   ASSERT_EQ(AsmCmle(arg3), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
4601 }
4602 
TEST(Arm64InsnTest,CompareLessThanEqualZeroInt8x16)4603 TEST(Arm64InsnTest, CompareLessThanEqualZeroInt8x16) {
4604   __uint128_t op = MakeUInt128(0x80fffe7f7e020100ULL, 0x00ff01027ffe8002ULL);
4605   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cmle %0.16b, %1.16b, #0")(op);
4606   ASSERT_EQ(rd, MakeUInt128(0xffffff00000000ffULL, 0xffff000000ffff00ULL));
4607 }
4608 
TEST(Arm64InsnTest,CompareHigherInt64x1)4609 TEST(Arm64InsnTest, CompareHigherInt64x1) {
4610   constexpr auto AsmCmhi = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmhi %d0, %d1, %d2");
4611   __uint128_t arg1 = MakeUInt128(0x1009391369138107ULL, 0x2581378135789400ULL);
4612   __uint128_t arg2 = MakeUInt128(0x0759167297007850ULL, 0x5807171863810549ULL);
4613   __uint128_t arg3 = MakeUInt128(0x1009391369138107ULL, 0x6026322439372656ULL);
4614   __uint128_t arg4 = MakeUInt128(0x9087839523245323ULL, 0x7896029841669225ULL);
4615   ASSERT_EQ(AsmCmhi(arg1, arg2), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
4616   ASSERT_EQ(AsmCmhi(arg1, arg3), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
4617   ASSERT_EQ(AsmCmhi(arg1, arg4), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
4618 }
4619 
TEST(Arm64InsnTest,CompareHigherInt16x8)4620 TEST(Arm64InsnTest, CompareHigherInt16x8) {
4621   __uint128_t arg1 = MakeUInt128(0x6517166776672793ULL, 0x0354851542040238ULL);
4622   __uint128_t arg2 = MakeUInt128(0x2057166778967764ULL, 0x4531840442045540ULL);
4623   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmhi %0.8h, %1.8h, %2.8h")(arg1, arg2);
4624   ASSERT_EQ(res, MakeUInt128(0xffff000000000000ULL, 0x0000ffff00000000ULL));
4625 }
4626 
TEST(Arm64InsnTest,CompareHigherInt32x4)4627 TEST(Arm64InsnTest, CompareHigherInt32x4) {
4628   __uint128_t arg1 = MakeUInt128(0x0000'0000'ffff'ffffULL, 0xffff'ffff'0000'0000ULL);
4629   __uint128_t arg2 = MakeUInt128(0xffff'ffff'0000'0000ULL, 0x0000'0000'ffff'ffffULL);
4630   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmhi %0.4s, %1.4s, %2.4s")(arg1, arg2);
4631   ASSERT_EQ(res, MakeUInt128(0x0000'0000'ffff'ffffULL, 0xffff'ffff'0000'0000ULL));
4632 }
4633 
TEST(Arm64InsnTest,CompareHigherSameInt64x1)4634 TEST(Arm64InsnTest, CompareHigherSameInt64x1) {
4635   constexpr auto AsmCmhs = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmhs %d0, %d1, %d2");
4636   __uint128_t arg1 = MakeUInt128(0x3529566139788848ULL, 0x6050978608595701ULL);
4637   __uint128_t arg2 = MakeUInt128(0x1769845875810446ULL, 0x6283998806006162ULL);
4638   __uint128_t arg3 = MakeUInt128(0x3529566139788848ULL, 0x9001852956919678ULL);
4639   __uint128_t arg4 = MakeUInt128(0x9628388705436777ULL, 0x4322427680913236ULL);
4640   ASSERT_EQ(AsmCmhs(arg1, arg2), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
4641   ASSERT_EQ(AsmCmhs(arg1, arg3), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
4642   ASSERT_EQ(AsmCmhs(arg1, arg4), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
4643 }
4644 
TEST(Arm64InsnTest,CompareHigherSameInt16x8)4645 TEST(Arm64InsnTest, CompareHigherSameInt16x8) {
4646   __uint128_t arg1 = MakeUInt128(0x4599705674507183ULL, 0x3206503455664403ULL);
4647   __uint128_t arg2 = MakeUInt128(0x4264705633881880ULL, 0x3206612168960504ULL);
4648   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmhs %0.8h, %1.8h, %2.8h")(arg1, arg2);
4649   ASSERT_EQ(res, MakeUInt128(0xffffffffffffffffULL, 0xffff00000000ffffULL));
4650 }
4651 
TEST(Arm64InsnTest,CompareLessThanEqualZeroInt8x8)4652 TEST(Arm64InsnTest, CompareLessThanEqualZeroInt8x8) {
4653   __uint128_t op = MakeUInt128(0x00fffe807f020100ULL, 0x00aabbccddeeff00ULL);
4654   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cmle %0.8b, %1.8b, #0")(op);
4655   ASSERT_EQ(rd, MakeUInt128(0xffffffff000000ffULL, 0));
4656 }
4657 
TEST(Arm64InsnTest,TestInt64x1)4658 TEST(Arm64InsnTest, TestInt64x1) {
4659   constexpr auto AsmCmtst = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmtst %d0, %d1, %d2");
4660   __uint128_t arg1 = MakeUInt128(0xaaaaaaaa55555555ULL, 0x7698385483188750ULL);
4661   __uint128_t arg2 = MakeUInt128(0x55555555aaaaaaaaULL, 0x1429389089950685ULL);
4662   __uint128_t arg3 = MakeUInt128(0xaa00aa0055005500ULL, 0x4530765116803337ULL);
4663   ASSERT_EQ(AsmCmtst(arg1, arg2), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
4664   ASSERT_EQ(AsmCmtst(arg1, arg3), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
4665 }
4666 
TEST(Arm64InsnTest,TestInt16x8)4667 TEST(Arm64InsnTest, TestInt16x8) {
4668   __uint128_t arg1 = MakeUInt128(0x5999911209916464ULL, 0x6441191856827700ULL);
4669   __uint128_t arg2 = MakeUInt128(0x6101756850601671ULL, 0x4535431581480105ULL);
4670   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmtst %0.8h, %1.8h, %2.8h")(arg1, arg2);
4671   ASSERT_EQ(res, MakeUInt128(0xffffffff0000ffffULL, 0xffffffff0000ffffULL));
4672 }
4673 
TEST(Arm64InsnTest,ExtractVectorFromPair)4674 TEST(Arm64InsnTest, ExtractVectorFromPair) {
4675   __uint128_t op1 = MakeUInt128(0x0011223344556677ULL, 0x8899aabbccddeeffULL);
4676   __uint128_t op2 = MakeUInt128(0x0001020304050607ULL, 0x08090a0b0c0d0e0fULL);
4677   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("ext %0.16b, %1.16b, %2.16b, #8")(op1, op2);
4678   ASSERT_EQ(rd, MakeUInt128(0x8899aabbccddeeffULL, 0x0001020304050607ULL));
4679 }
4680 
TEST(Arm64InsnTest,ExtractVectorFromPairHalfWidth)4681 TEST(Arm64InsnTest, ExtractVectorFromPairHalfWidth) {
4682   __uint128_t op1 = MakeUInt128(0x8138268683868942ULL, 0x7741559918559252ULL);
4683   __uint128_t op2 = MakeUInt128(0x3622262609912460ULL, 0x8051243884390451ULL);
4684   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("ext %0.8b, %1.8b, %2.8b, #3")(op1, op2);
4685   ASSERT_EQ(res, MakeUInt128(0x9124608138268683ULL, 0x0000000000000000ULL));
4686 }
4687 
TEST(Arm64InsnTest,ExtractVectorFromPairHalfWidthPosition1)4688 TEST(Arm64InsnTest, ExtractVectorFromPairHalfWidthPosition1) {
4689   __uint128_t op1 = MakeUInt128(0x9471329621073404ULL, 0x3751895735961458ULL);
4690   __uint128_t op2 = MakeUInt128(0x9048010941214722ULL, 0x1317947647772622ULL);
4691   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("ext %0.8b, %1.8b, %2.8b, #1")(op1, op2);
4692   ASSERT_EQ(res, MakeUInt128(0x2294713296210734ULL, 0x0000000000000000ULL));
4693 }
4694 
TEST(Arm64InsnTest,Load1OneI8x8)4695 TEST(Arm64InsnTest, Load1OneI8x8) {
4696   static constexpr uint64_t arg = 0x8867915896904956ULL;
4697   __uint128_t res;
4698   asm("ld1 {%0.8b}, [%1]" : "=w"(res) : "r"(&arg) : "memory");
4699   ASSERT_EQ(res, arg);
4700 }
4701 
TEST(Arm64InsnTest,Load1ThreeI8x8)4702 TEST(Arm64InsnTest, Load1ThreeI8x8) {
4703   static constexpr uint64_t arg[3] = {
4704       0x3415354584283376ULL, 0x4378111988556318ULL, 0x7777925372011667ULL};
4705   __uint128_t res[3];
4706   asm("ld1 {v0.8b-v2.8b}, [%3]\n\t"
4707       "mov %0.16b, v0.16b\n\t"
4708       "mov %1.16b, v1.16b\n\t"
4709       "mov %2.16b, v2.16b"
4710       : "=w"(res[0]), "=w"(res[1]), "=w"(res[2])
4711       : "r"(arg)
4712       : "v0", "v1", "v2", "memory");
4713   ASSERT_EQ(res[0], static_cast<__uint128_t>(arg[0]));
4714   ASSERT_EQ(res[1], static_cast<__uint128_t>(arg[1]));
4715   ASSERT_EQ(res[2], static_cast<__uint128_t>(arg[2]));
4716 }
4717 
TEST(Arm64InsnTest,Load1FourI8x8)4718 TEST(Arm64InsnTest, Load1FourI8x8) {
4719   static constexpr uint64_t arg[4] = {
4720       0x9523688483099930ULL,
4721       0x2757419916463841ULL,
4722       0x4270779887088742ULL,
4723       0x2927705389122717ULL,
4724   };
4725   __uint128_t res[4];
4726   asm("ld1 {v0.8b-v3.8b}, [%4]\n\t"
4727       "mov %0.16b, v0.16b\n\t"
4728       "mov %1.16b, v1.16b\n\t"
4729       "mov %2.16b, v2.16b\n\t"
4730       "mov %3.16b, v3.16b"
4731       : "=w"(res[0]), "=w"(res[1]), "=w"(res[2]), "=w"(res[3])
4732       : "r"(arg)
4733       : "v0", "v1", "v2", "v3", "memory");
4734   ASSERT_EQ(res[0], static_cast<__uint128_t>(arg[0]));
4735   ASSERT_EQ(res[1], static_cast<__uint128_t>(arg[1]));
4736   ASSERT_EQ(res[2], static_cast<__uint128_t>(arg[2]));
4737   ASSERT_EQ(res[3], static_cast<__uint128_t>(arg[3]));
4738 }
4739 
TEST(Arm64InsnTest,Store1OneI8x16)4740 TEST(Arm64InsnTest, Store1OneI8x16) {
4741   static constexpr __uint128_t arg = MakeUInt128(0x7642291583425006ULL, 0x7361245384916067ULL);
4742   __uint128_t res;
4743   asm("st1 {%0.16b}, [%1]" : : "w"(arg), "r"(&res) : "memory");
4744   ASSERT_EQ(res, arg);
4745 }
4746 
TEST(Arm64InsnTest,Store1ThreeI8x8)4747 TEST(Arm64InsnTest, Store1ThreeI8x8) {
4748   static constexpr uint64_t arg[3] = {
4749       0x3086436111389069ULL, 0x4202790881431194ULL, 0x4879941715404210ULL};
4750   uint64_t res[3];
4751   asm("mov v0.16b, %0.16b\n\t"
4752       "mov v1.16b, %1.16b\n\t"
4753       "mov v2.16b, %2.16b\n\t"
4754       "st1 {v0.8b-v2.8b}, [%3]"
4755       :
4756       : "w"(arg[0]), "w"(arg[1]), "w"(arg[2]), "r"(res)
4757       : "v0", "v1", "v2", "memory");
4758   ASSERT_EQ(res[0], arg[0]);
4759   ASSERT_EQ(res[1], arg[1]);
4760   ASSERT_EQ(res[2], arg[2]);
4761 }
4762 
TEST(Arm64InsnTest,Store1FourI8x8)4763 TEST(Arm64InsnTest, Store1FourI8x8) {
4764   static constexpr uint64_t arg[4] = {
4765       0x8954750448339314ULL, 0x6896307633966572ULL, 0x2672704339321674ULL, 0x5421824557062524ULL};
4766   uint64_t res[4];
4767   asm("mov v0.16b, %0.16b\n\t"
4768       "mov v1.16b, %1.16b\n\t"
4769       "mov v2.16b, %2.16b\n\t"
4770       "mov v3.16b, %3.16b\n\t"
4771       "st1 {v0.8b-v3.8b}, [%4]"
4772       :
4773       : "w"(arg[0]), "w"(arg[1]), "w"(arg[2]), "w"(arg[3]), "r"(res)
4774       : "v0", "v1", "v2", "v3", "memory");
4775   ASSERT_EQ(res[0], arg[0]);
4776   ASSERT_EQ(res[1], arg[1]);
4777   ASSERT_EQ(res[2], arg[2]);
4778   ASSERT_EQ(res[3], arg[3]);
4779 }
4780 
TEST(Arm64InsnTest,Load1TwoPostIndex)4781 TEST(Arm64InsnTest, Load1TwoPostIndex) {
4782   __uint128_t op0 = MakeUInt128(0x5499119881834797ULL, 0x0507922796892589ULL);
4783   __uint128_t op1 = MakeUInt128(0x0511854807446237ULL, 0x6691368672287489ULL);
4784   __uint128_t array[] = {
4785       op0,
4786       op1,
4787   };
4788   __uint128_t* addr = &array[0];
4789   __uint128_t res0 = 0;
4790   __uint128_t res1 = 0;
4791 
4792   // The "memory" below ensures that the array contents are up to date.  Without it, the
4793   // compiler might decide to initialize the array after the asm statement.
4794   //
4795   // We hardcode SIMD registers v0 and v1 below because there is no other way to express
4796   // consecutive registers, which in turn requires the mov instructions to retrieve the
4797   // loaded values into res0 and res1.
4798   asm("ld1 {v0.16b, v1.16b}, [%2], #32\n\t"
4799       "mov %0.16b, v0.16b\n\t"
4800       "mov %1.16b, v1.16b"
4801       : "=w"(res0), "=w"(res1), "+r"(addr)
4802       :
4803       : "v0", "v1", "memory");
4804 
4805   ASSERT_EQ(res0, op0);
4806   ASSERT_EQ(res1, op1);
4807   ASSERT_EQ(addr, &array[2]);
4808 }
4809 
TEST(Arm64InsnTest,Load1OnePostIndexReg)4810 TEST(Arm64InsnTest, Load1OnePostIndexReg) {
4811   static constexpr __uint128_t arg = MakeUInt128(0x4884761005564018ULL, 0x2423921926950620ULL);
4812   __uint128_t res_val;
4813   uint64_t res_addr;
4814   asm("ld1 {%0.16b}, [%1], %2"
4815       : "=w"(res_val), "=r"(res_addr)
4816       : "r"(static_cast<uint64_t>(32U)), "1"(&arg)
4817       : "memory");
4818   ASSERT_EQ(res_val, arg);
4819   ASSERT_EQ(res_addr, reinterpret_cast<uint64_t>(&arg) + 32);
4820 }
4821 
TEST(Arm64InsnTest,LoadSingleInt8)4822 TEST(Arm64InsnTest, LoadSingleInt8) {
4823   static constexpr __uint128_t reg_before =
4824       MakeUInt128(0x0011223344556677ULL, 0x8899aabbccddeeffULL);
4825   static constexpr __uint128_t mem_src = MakeUInt128(0x0102030405060708ULL, 0x090a0b0c0d0e0f10ULL);
4826   __uint128_t reg_after;
4827   asm("ld1 {%0.b}[3], [%1]" : "=w"(reg_after) : "r"(&mem_src), "0"(reg_before) : "memory");
4828   ASSERT_EQ(reg_after, MakeUInt128(0x00112233'08'556677ULL, 0x8899aabbccddeeffULL));
4829 }
4830 
TEST(Arm64InsnTest,LoadSingleInt16)4831 TEST(Arm64InsnTest, LoadSingleInt16) {
4832   static constexpr __uint128_t reg_before =
4833       MakeUInt128(0x0000111122223333ULL, 0x4444555566667777ULL);
4834   static constexpr __uint128_t mem_src = MakeUInt128(0x0102030405060708ULL, 0x090a0b0c0d0e0f10ULL);
4835   __uint128_t reg_after;
4836   asm("ld1 {%0.h}[2], [%1]" : "=w"(reg_after) : "r"(&mem_src), "0"(reg_before) : "memory");
4837   ASSERT_EQ(reg_after, MakeUInt128(0x0000'0708'22223333ULL, 0x4444555566667777ULL));
4838 }
4839 
TEST(Arm64InsnTest,LoadSingleInt32)4840 TEST(Arm64InsnTest, LoadSingleInt32) {
4841   static constexpr __uint128_t reg_before =
4842       MakeUInt128(0x0000000011111111ULL, 0x2222222233333333ULL);
4843   static constexpr __uint128_t mem_src = MakeUInt128(0x0102030405060708ULL, 0x090a0b0c0d0e0f10ULL);
4844   __uint128_t reg_after;
4845   asm("ld1 {%0.s}[1], [%1]" : "=w"(reg_after) : "r"(&mem_src), "0"(reg_before) : "memory");
4846   ASSERT_EQ(reg_after, MakeUInt128(0x0506070811111111ULL, 0x2222222233333333ULL));
4847 }
4848 
TEST(Arm64InsnTest,LoadSingleInt64)4849 TEST(Arm64InsnTest, LoadSingleInt64) {
4850   static constexpr __uint128_t reg_before =
4851       MakeUInt128(0x0000000000000000ULL, 0x1111111111111111ULL);
4852   static constexpr __uint128_t mem_src = MakeUInt128(0x0102030405060708ULL, 0x090a0b0c0d0e0f10ULL);
4853   __uint128_t reg_after;
4854   asm("ld1 {%0.d}[1], [%1]" : "=w"(reg_after) : "r"(&mem_src), "0"(reg_before) : "memory");
4855   ASSERT_EQ(reg_after, MakeUInt128(0x0000000000000000ULL, 0x0102030405060708ULL));
4856 }
4857 
TEST(Arm64InsnTest,StoreSingleInt8)4858 TEST(Arm64InsnTest, StoreSingleInt8) {
4859   static constexpr __uint128_t arg = MakeUInt128(0x0102030405060708ULL, 0x090a0b0c0d0e0f10ULL);
4860   __uint128_t mem_dest = MakeUInt128(0x0011223344556677ULL, 0x8899aabbccddeeffULL);
4861   asm("st1 {%1.b}[3], [%0]" : : "r"(&mem_dest), "w"(arg) : "memory");
4862   ASSERT_EQ(mem_dest, MakeUInt128(0x00112233445566'05ULL, 0x8899aabbccddeeffULL));
4863 }
4864 
TEST(Arm64InsnTest,StoreSingleInt16)4865 TEST(Arm64InsnTest, StoreSingleInt16) {
4866   static constexpr __uint128_t arg = MakeUInt128(0x0102030405060708ULL, 0x090a0b0c0d0e0f10ULL);
4867   __uint128_t mem_dest = MakeUInt128(0x0000111122223333ULL, 0x4444555566667777ULL);
4868   asm("st1 {%1.h}[5], [%0]" : : "r"(&mem_dest), "w"(arg) : "memory");
4869   ASSERT_EQ(mem_dest, MakeUInt128(0x000011112222'0d0eULL, 0x4444555566667777ULL));
4870 }
4871 
TEST(Arm64InsnTest,StoreSingleInt32)4872 TEST(Arm64InsnTest, StoreSingleInt32) {
4873   static constexpr __uint128_t arg = MakeUInt128(0x0102030405060708ULL, 0x090a0b0c0d0e0f10ULL);
4874   __uint128_t mem_dest = MakeUInt128(0x0000000011111111ULL, 0x2222222233333333ULL);
4875   asm("st1 {%1.s}[2], [%0]" : : "r"(&mem_dest), "w"(arg) : "memory");
4876   ASSERT_EQ(mem_dest, MakeUInt128(0x000000000'd0e0f10ULL, 0x2222222233333333ULL));
4877 }
4878 
TEST(Arm64InsnTest,StoreSingleInt64)4879 TEST(Arm64InsnTest, StoreSingleInt64) {
4880   static constexpr __uint128_t arg = MakeUInt128(0x0102030405060708ULL, 0x090a0b0c0d0e0f10ULL);
4881   __uint128_t mem_dest = MakeUInt128(0x0000000000000000ULL, 0x1111111111111111ULL);
4882   asm("st1 {%1.d}[1], [%0]" : : "r"(&mem_dest), "w"(arg) : "memory");
4883   ASSERT_EQ(mem_dest, MakeUInt128(0x090a0b0c0d0e0f10ULL, 0x1111111111111111ULL));
4884 }
4885 
TEST(Arm64InsnTest,LoadSinglePostIndexImmInt8)4886 TEST(Arm64InsnTest, LoadSinglePostIndexImmInt8) {
4887   static constexpr __uint128_t arg1 = MakeUInt128(0x5494167594605487ULL, 0x1172359464291058ULL);
4888   static constexpr __uint128_t arg2 = MakeUInt128(0x5090995021495879ULL, 0x3112196135908315ULL);
4889   __uint128_t res;
4890   uint8_t* addr;
4891   asm("ld1 {%0.b}[3], [%1], #1" : "=w"(res), "=r"(addr) : "0"(arg1), "1"(&arg2) : "memory");
4892   ASSERT_EQ(res, MakeUInt128(0x5494167579605487ULL, 0x1172359464291058ULL));
4893   ASSERT_EQ(addr, reinterpret_cast<const uint8_t*>(&arg2) + 1);
4894 }
4895 
TEST(Arm64InsnTest,LoadSinglePostIndexRegInt16)4896 TEST(Arm64InsnTest, LoadSinglePostIndexRegInt16) {
4897   static constexpr __uint128_t arg1 = MakeUInt128(0x0080587824107493ULL, 0x5751488997891173ULL);
4898   static constexpr __uint128_t arg2 = MakeUInt128(0x9746129320351081ULL, 0x4327032514090304ULL);
4899   __uint128_t res;
4900   uint8_t* addr;
4901   asm("ld1 {%0.h}[7], [%1], %2"
4902       : "=w"(res), "=r"(addr)
4903       : "r"(static_cast<uint64_t>(17U)), "0"(arg1), "1"(&arg2)
4904       : "memory");
4905   ASSERT_EQ(res, MakeUInt128(0x0080587824107493ULL, 0x1081488997891173ULL));
4906   ASSERT_EQ(addr, reinterpret_cast<const uint8_t*>(&arg2) + 17);
4907 }
4908 
TEST(Arm64InsnTest,StoreSimdPostIndex)4909 TEST(Arm64InsnTest, StoreSimdPostIndex) {
4910   __uint128_t old_val = MakeUInt128(0x4939965143142980ULL, 0x9190659250937221ULL);
4911   __uint128_t new_val = MakeUInt128(0x5985261365549781ULL, 0x8931297848216829ULL);
4912   __uint128_t* addr = &old_val;
4913 
4914   // Verify that the interpreter accepts "str q0, [x0], #8" where the register numbers are
4915   // the same, when the data register is one of the SIMD registers.
4916   asm("mov x0, %0\n\t"
4917       "mov v0.2D, %1.2D\n\t"
4918       "str q0, [x0], #8\n\t"
4919       "mov %0, x0"
4920       : "+r"(addr)
4921       : "w"(new_val)
4922       : "v0", "x0", "memory");
4923 
4924   ASSERT_EQ(old_val, MakeUInt128(0x5985261365549781ULL, 0x8931297848216829ULL));
4925   ASSERT_EQ(reinterpret_cast<uintptr_t>(addr), reinterpret_cast<uintptr_t>(&old_val) + 8);
4926 }
4927 
TEST(Arm64InsnTest,StoreZeroPostIndex1)4928 TEST(Arm64InsnTest, StoreZeroPostIndex1) {
4929   uint64_t res;
4930   asm("str xzr, [sp, #-16]!\n\t"
4931       "ldr %0, [sp, #0]\n\t"
4932       "add sp, sp, #16"
4933       : "=r"(res));
4934   ASSERT_EQ(res, 0);
4935 }
4936 
TEST(Arm64InsnTest,StoreZeroPostIndex2)4937 TEST(Arm64InsnTest, StoreZeroPostIndex2) {
4938   __uint128_t arg1 = MakeUInt128(0x9415573293820485ULL, 0x4212350817391254ULL);
4939   __uint128_t arg2 = MakeUInt128(0x9749819308714396ULL, 0x6151329420459193ULL);
4940   __uint128_t res1;
4941   __uint128_t res2;
4942   asm("mov v30.16b, %2.16b\n\t"
4943       "mov v31.16b, %3.16b\n\t"
4944       "stp q30, q31, [sp, #-32]!\n\t"
4945       "ldr %q0, [sp, #0]\n\t"
4946       "ldr %q1, [sp, #16]\n\t"
4947       "add sp, sp, #32"
4948       : "=w"(res1), "=w"(res2)
4949       : "w"(arg1), "w"(arg2)
4950       : "v30", "v31");
4951 
4952   ASSERT_EQ(res1, arg1);
4953   ASSERT_EQ(res2, arg2);
4954 }
4955 
TEST(Arm64InsnTest,Load2MultipleInt8x8)4956 TEST(Arm64InsnTest, Load2MultipleInt8x8) {
4957   static constexpr uint8_t mem[] = {0x02,
4958                                     0x16,
4959                                     0x91,
4960                                     0x83,
4961                                     0x37,
4962                                     0x23,
4963                                     0x68,
4964                                     0x03,
4965                                     0x99,
4966                                     0x02,
4967                                     0x79,
4968                                     0x31,
4969                                     0x60,
4970                                     0x64,
4971                                     0x20,
4972                                     0x43};
4973   __uint128_t res[2];
4974   asm("ld2 {v0.8b, v1.8b}, [%2]\n\t"
4975       "mov %0.16b, v0.16b\n\t"
4976       "mov %1.16b, v1.16b"
4977       : "=w"(res[0]), "=w"(res[1])
4978       : "r"(mem)
4979       : "v0", "v1", "memory");
4980   ASSERT_EQ(res[0], MakeUInt128(0x2060799968379102ULL, 0U));
4981   ASSERT_EQ(res[1], MakeUInt128(0x4364310203238316ULL, 0U));
4982 }
4983 
TEST(Arm64InsnTest,Load3MultipleInt8x8)4984 TEST(Arm64InsnTest, Load3MultipleInt8x8) {
4985   static constexpr uint8_t mem[3 * 8] = {0x32, 0x87, 0x67, 0x03, 0x80, 0x92, 0x52, 0x16,
4986                                          0x79, 0x07, 0x57, 0x12, 0x04, 0x06, 0x12, 0x37,
4987                                          0x59, 0x63, 0x27, 0x68, 0x56, 0x74, 0x84, 0x50};
4988   __uint128_t res[3];
4989   asm("ld3 {v7.8b-v9.8b}, [%3]\n\t"
4990       "mov %0.16b, v7.16b\n\t"
4991       "mov %1.16b, v8.16b\n\t"
4992       "mov %2.16b, v9.16b"
4993       : "=w"(res[0]), "=w"(res[1]), "=w"(res[2])
4994       : "r"(mem)
4995       : "v7", "v8", "v9", "memory");
4996   ASSERT_EQ(res[0], MakeUInt128(0x7427370407520332ULL, 0U));
4997   ASSERT_EQ(res[1], MakeUInt128(0x8468590657168087ULL, 0U));
4998   ASSERT_EQ(res[2], MakeUInt128(0x5056631212799267ULL, 0U));
4999 }
5000 
TEST(Arm64InsnTest,Store3MultipleInt8x8)5001 TEST(Arm64InsnTest, Store3MultipleInt8x8) {
5002   static constexpr uint64_t arg[3] = {
5003       0x7427370407520332ULL, 0x8468590657168087ULL, 0x5056631212799267ULL};
5004   uint64_t res[3];
5005   asm("mov v0.16b, %0.16b\n\t"
5006       "mov v1.16b, %1.16b\n\t"
5007       "mov v2.16b, %2.16b\n\t"
5008       "st3 {v0.8b-v2.8b}, [%3]"
5009       :
5010       : "w"(arg[0]), "w"(arg[1]), "w"(arg[2]), "r"(res)
5011       : "v0", "v1", "v2", "memory");
5012   ASSERT_EQ(res[0], 0x1652928003678732ULL);
5013   ASSERT_EQ(res[1], 0x3712060412570779ULL);
5014   ASSERT_EQ(res[2], 0x5084745668276359ULL);
5015 }
5016 
TEST(Arm64InsnTest,Load3MultipleInt8x16)5017 TEST(Arm64InsnTest, Load3MultipleInt8x16) {
5018   static constexpr uint8_t mem[3 * 16] = {
5019       0x69, 0x20, 0x35, 0x65, 0x63, 0x38, 0x44, 0x96, 0x25, 0x32, 0x83, 0x38,
5020       0x52, 0x27, 0x99, 0x24, 0x59, 0x60, 0x97, 0x86, 0x59, 0x47, 0x23, 0x88,
5021       0x91, 0x29, 0x63, 0x62, 0x59, 0x54, 0x32, 0x73, 0x45, 0x44, 0x37, 0x16,
5022       0x33, 0x55, 0x77, 0x43, 0x29, 0x49, 0x99, 0x28, 0x81, 0x05, 0x57, 0x17};
5023   __uint128_t res[3];
5024   asm("ld3 {v7.16b-v9.16b}, [%3]\n\t"
5025       "mov %0.16b, v7.16b\n\t"
5026       "mov %1.16b, v8.16b\n\t"
5027       "mov %2.16b, v9.16b"
5028       : "=w"(res[0]), "=w"(res[1]), "=w"(res[2])
5029       : "r"(mem)
5030       : "v7", "v8", "v9", "memory");
5031   ASSERT_EQ(res[0], MakeUInt128(0x4797245232446569ULL, 0x599433344326291ULL));
5032   ASSERT_EQ(res[1], MakeUInt128(0x2386592783966320ULL, 0x5728295537735929ULL));
5033   ASSERT_EQ(res[2], MakeUInt128(0x8859609938253835ULL, 0x1781497716455463ULL));
5034 }
5035 
TEST(Arm64InsnTest,Store3MultipleInt8x16)5036 TEST(Arm64InsnTest, Store3MultipleInt8x16) {
5037   static constexpr __uint128_t arg[3] = {MakeUInt128(0x4797245232446569ULL, 0x599433344326291ULL),
5038                                          MakeUInt128(0x2386592783966320ULL, 0x5728295537735929ULL),
5039                                          MakeUInt128(0x8859609938253835ULL, 0x1781497716455463ULL)};
5040   __uint128_t res[3];
5041   asm("mov v0.16b, %0.16b\n\t"
5042       "mov v1.16b, %1.16b\n\t"
5043       "mov v2.16b, %2.16b\n\t"
5044       "st3 {v0.16b-v2.16b}, [%3]"
5045       :
5046       : "w"(arg[0]), "w"(arg[1]), "w"(arg[2]), "r"(res)
5047       : "v0", "v1", "v2", "memory");
5048   ASSERT_EQ(res[0], MakeUInt128(0x9644386365352069ULL, 0x2499275238833225ULL));
5049   ASSERT_EQ(res[1], MakeUInt128(0x8823475986976059ULL, 0x7332545962632991ULL));
5050   ASSERT_EQ(res[2], MakeUInt128(0x4377553316374445ULL, 0x1757058128994929ULL));
5051 }
5052 
TEST(Arm64InsnTest,Load3MultipleInt16x4)5053 TEST(Arm64InsnTest, Load3MultipleInt16x4) {
5054   static constexpr uint16_t mem[3 * 4] = {0x2069,
5055                                           0x6535,
5056                                           0x3863,
5057                                           0x9644,
5058                                           0x3225,
5059                                           0x3883,
5060                                           0x2752,
5061                                           0x2499,
5062                                           0x6059,
5063                                           0x8697,
5064                                           0x4759,
5065                                           0x8823};
5066   __uint128_t res[3];
5067   asm("ld3 {v30.4h-v0.4h}, [%3]\n\t"
5068       "mov %0.16b, v30.16b\n\t"
5069       "mov %1.16b, v31.16b\n\t"
5070       "mov %2.16b, v0.16b"
5071       : "=w"(res[0]), "=w"(res[1]), "=w"(res[2])
5072       : "r"(mem)
5073       : "v30", "v31", "v0", "memory");
5074   ASSERT_EQ(res[0], MakeUInt128(0x8697275296442069ULL, 0));
5075   ASSERT_EQ(res[1], MakeUInt128(0x4759249932256535ULL, 0));
5076   ASSERT_EQ(res[2], MakeUInt128(0x8823605938833863ULL, 0));
5077 }
5078 
TEST(Arm64InsnTest,Store3MultipleInt16x4)5079 TEST(Arm64InsnTest, Store3MultipleInt16x4) {
5080   static constexpr uint64_t arg[3] = {
5081       0x8697275296442069ULL, 0x4759249932256535ULL, 0x8823605938833863ULL};
5082   uint64_t res[3];
5083   asm("mov v0.16b, %0.16b\n\t"
5084       "mov v1.16b, %1.16b\n\t"
5085       "mov v2.16b, %2.16b\n\t"
5086       "st3 {v0.4h-v2.4h}, [%3]"
5087       :
5088       : "w"(arg[0]), "w"(arg[1]), "w"(arg[2]), "r"(res)
5089       : "v0", "v1", "v2", "memory");
5090   ASSERT_EQ(res[0], 0x9644386365352069ULL);
5091   ASSERT_EQ(res[1], 0x2499275238833225ULL);
5092   ASSERT_EQ(res[2], 0x8823475986976059ULL);
5093 }
5094 
TEST(Arm64InsnTest,Load3MultipleInt16x8)5095 TEST(Arm64InsnTest, Load3MultipleInt16x8) {
5096   static constexpr uint16_t mem[3 * 8] = {0x2069, 0x6535, 0x3863, 0x9644, 0x3225, 0x3883,
5097                                           0x2752, 0x2499, 0x6059, 0x8697, 0x4759, 0x8823,
5098                                           0x2991, 0x6263, 0x5459, 0x7332, 0x4445, 0x1637,
5099                                           0x5533, 0x4377, 0x4929, 0x2899, 0x0581, 0x1757};
5100   __uint128_t res[3];
5101   asm("ld3 {v30.8h-v0.8h}, [%3]\n\t"
5102       "mov %0.16b, v30.16b\n\t"
5103       "mov %1.16b, v31.16b\n\t"
5104       "mov %2.16b, v0.16b"
5105       : "=w"(res[0]), "=w"(res[1]), "=w"(res[2])
5106       : "r"(mem)
5107       : "v30", "v31", "v0", "memory");
5108   ASSERT_EQ(res[0], MakeUInt128(0x8697275296442069ULL, 0x2899553373322991ULL));
5109   ASSERT_EQ(res[1], MakeUInt128(0x4759249932256535ULL, 0x581437744456263ULL));
5110   ASSERT_EQ(res[2], MakeUInt128(0x8823605938833863ULL, 0x1757492916375459ULL));
5111 }
5112 
TEST(Arm64InsnTest,Store3MultipleInt16x8)5113 TEST(Arm64InsnTest, Store3MultipleInt16x8) {
5114   static constexpr __uint128_t arg[3] = {MakeUInt128(0x8697275296442069ULL, 0x2899553373322991ULL),
5115                                          MakeUInt128(0x4759249932256535ULL, 0x581437744456263ULL),
5116                                          MakeUInt128(0x8823605938833863ULL, 0x1757492916375459ULL)};
5117   __uint128_t res[3];
5118   asm("mov v0.16b, %0.16b\n\t"
5119       "mov v1.16b, %1.16b\n\t"
5120       "mov v2.16b, %2.16b\n\t"
5121       "st3 {v0.8h-v2.8h}, [%3]"
5122       :
5123       : "w"(arg[0]), "w"(arg[1]), "w"(arg[2]), "r"(res)
5124       : "v0", "v1", "v2", "memory");
5125   ASSERT_EQ(res[0], MakeUInt128(0x9644386365352069ULL, 0x2499275238833225ULL));
5126   ASSERT_EQ(res[1], MakeUInt128(0x8823475986976059ULL, 0x7332545962632991ULL));
5127   ASSERT_EQ(res[2], MakeUInt128(0x4377553316374445ULL, 0x1757058128994929ULL));
5128 }
5129 
TEST(Arm64InsnTest,Load3MultipleInt32x2)5130 TEST(Arm64InsnTest, Load3MultipleInt32x2) {
5131   static constexpr uint32_t mem[3 * 2] = {
5132       0x65352069, 0x96443863, 0x38833225, 0x24992752, 0x86976059, 0x88234759};
5133   __uint128_t res[3];
5134   asm("ld3 {v30.2s-v0.2s}, [%3]\n\t"
5135       "mov %0.16b, v30.16b\n\t"
5136       "mov %1.16b, v31.16b\n\t"
5137       "mov %2.16b, v0.16b"
5138       : "=w"(res[0]), "=w"(res[1]), "=w"(res[2])
5139       : "r"(mem)
5140       : "v30", "v31", "v0", "memory");
5141   ASSERT_EQ(res[0], MakeUInt128(0x2499275265352069ULL, 0));
5142   ASSERT_EQ(res[1], MakeUInt128(0x8697605996443863ULL, 0));
5143   ASSERT_EQ(res[2], MakeUInt128(0x8823475938833225ULL, 0));
5144 }
5145 
TEST(Arm64InsnTest,Store3MultipleInt32x2)5146 TEST(Arm64InsnTest, Store3MultipleInt32x2) {
5147   static constexpr uint64_t arg[3] = {
5148       0x2499275265352069ULL, 0x8697605996443863ULL, 0x8823475938833225ULL};
5149   uint64_t res[3];
5150   asm("mov v0.16b, %0.16b\n\t"
5151       "mov v1.16b, %1.16b\n\t"
5152       "mov v2.16b, %2.16b\n\t"
5153       "st3 {v0.2s-v2.2s}, [%3]"
5154       :
5155       : "w"(arg[0]), "w"(arg[1]), "w"(arg[2]), "r"(res)
5156       : "v0", "v1", "v2", "memory");
5157   ASSERT_EQ(res[0], 0x9644386365352069ULL);
5158   ASSERT_EQ(res[1], 0x2499275238833225ULL);
5159   ASSERT_EQ(res[2], 0x8823475986976059ULL);
5160 }
5161 
TEST(Arm64InsnTest,Load3MultipleInt32x4)5162 TEST(Arm64InsnTest, Load3MultipleInt32x4) {
5163   static constexpr uint32_t mem[3 * 4] = {0x65352069,
5164                                           0x96443863,
5165                                           0x38833225,
5166                                           0x24992752,
5167                                           0x86976059,
5168                                           0x88234759,
5169                                           0x62632991,
5170                                           0x73325459,
5171                                           0x16374445,
5172                                           0x43775533,
5173                                           0x28994929,
5174                                           0x17570581};
5175   __uint128_t res[3];
5176   asm("ld3 {v30.4s-v0.4s}, [%3]\n\t"
5177       "mov %0.16b, v30.16b\n\t"
5178       "mov %1.16b, v31.16b\n\t"
5179       "mov %2.16b, v0.16b"
5180       : "=w"(res[0]), "=w"(res[1]), "=w"(res[2])
5181       : "r"(mem)
5182       : "v30", "v31", "v0", "memory");
5183   ASSERT_EQ(res[0], MakeUInt128(0x2499275265352069ULL, 0x4377553362632991ULL));
5184   ASSERT_EQ(res[1], MakeUInt128(0x8697605996443863ULL, 0x2899492973325459ULL));
5185   ASSERT_EQ(res[2], MakeUInt128(0x8823475938833225ULL, 0x1757058116374445ULL));
5186 }
5187 
TEST(Arm64InsnTest,Store3MultipleInt32x4)5188 TEST(Arm64InsnTest, Store3MultipleInt32x4) {
5189   static constexpr __uint128_t arg[3] = {MakeUInt128(0x2499275265352069ULL, 0x4377553362632991ULL),
5190                                          MakeUInt128(0x8697605996443863ULL, 0x2899492973325459ULL),
5191                                          MakeUInt128(0x8823475938833225ULL, 0x1757058116374445ULL)};
5192   __uint128_t res[3];
5193   asm("mov v0.16b, %0.16b\n\t"
5194       "mov v1.16b, %1.16b\n\t"
5195       "mov v2.16b, %2.16b\n\t"
5196       "st3 {v0.4s-v2.4s}, [%3]"
5197       :
5198       : "w"(arg[0]), "w"(arg[1]), "w"(arg[2]), "r"(res)
5199       : "v0", "v1", "v2", "memory");
5200   ASSERT_EQ(res[0], MakeUInt128(0x9644386365352069ULL, 0x2499275238833225ULL));
5201   ASSERT_EQ(res[1], MakeUInt128(0x8823475986976059ULL, 0x7332545962632991ULL));
5202   ASSERT_EQ(res[2], MakeUInt128(0x4377553316374445ULL, 0x1757058128994929ULL));
5203 }
5204 
TEST(Arm64InsnTest,Load3MultipleInt64x2)5205 TEST(Arm64InsnTest, Load3MultipleInt64x2) {
5206   static constexpr uint64_t mem[3 * 2] = {0x9644386365352069,
5207                                           0x2499275238833225,
5208                                           0x8823475986976059,
5209                                           0x7332545962632991,
5210                                           0x4377553316374445,
5211                                           0x1757058128994929};
5212   __uint128_t res[3];
5213   asm("ld3 {v30.2d-v0.2d}, [%3]\n\t"
5214       "mov %0.16b, v30.16b\n\t"
5215       "mov %1.16b, v31.16b\n\t"
5216       "mov %2.16b, v0.16b"
5217       : "=w"(res[0]), "=w"(res[1]), "=w"(res[2])
5218       : "r"(mem)
5219       : "v30", "v31", "v0", "memory");
5220   ASSERT_EQ(res[0], MakeUInt128(0x9644386365352069ULL, 0x7332545962632991ULL));
5221   ASSERT_EQ(res[1], MakeUInt128(0x2499275238833225ULL, 0x4377553316374445ULL));
5222   ASSERT_EQ(res[2], MakeUInt128(0x8823475986976059ULL, 0x1757058128994929ULL));
5223 }
5224 
TEST(Arm64InsnTest,Store3MultipleInt64x2)5225 TEST(Arm64InsnTest, Store3MultipleInt64x2) {
5226   static constexpr __uint128_t arg[3] = {MakeUInt128(0x9644386365352069ULL, 0x7332545962632991ULL),
5227                                          MakeUInt128(0x2499275238833225ULL, 0x4377553316374445ULL),
5228                                          MakeUInt128(0x8823475986976059ULL, 0x1757058128994929ULL)};
5229   __uint128_t res[3];
5230   asm("mov v0.16b, %0.16b\n\t"
5231       "mov v1.16b, %1.16b\n\t"
5232       "mov v2.16b, %2.16b\n\t"
5233       "st3 {v0.2d-v2.2d}, [%3]"
5234       :
5235       : "w"(arg[0]), "w"(arg[1]), "w"(arg[2]), "r"(res)
5236       : "v0", "v1", "v2", "memory");
5237   ASSERT_EQ(res[0], MakeUInt128(0x9644386365352069ULL, 0x2499275238833225ULL));
5238   ASSERT_EQ(res[1], MakeUInt128(0x8823475986976059ULL, 0x7332545962632991ULL));
5239   ASSERT_EQ(res[2], MakeUInt128(0x4377553316374445ULL, 0x1757058128994929ULL));
5240 }
5241 
TEST(Arm64InsnTest,Load4MultipleInt8x8)5242 TEST(Arm64InsnTest, Load4MultipleInt8x8) {
5243   static constexpr uint8_t mem[4 * 8] = {0x69, 0x20, 0x35, 0x65, 0x63, 0x38, 0x44, 0x96,
5244                                          0x25, 0x32, 0x83, 0x38, 0x52, 0x27, 0x99, 0x24,
5245                                          0x59, 0x60, 0x97, 0x86, 0x59, 0x47, 0x23, 0x88,
5246                                          0x91, 0x29, 0x63, 0x62, 0x59, 0x54, 0x32, 0x73};
5247   __uint128_t res[4];
5248   asm("ld4 {v7.8b-v10.8b}, [%4]\n\t"
5249       "mov %0.16b, v7.16b\n\t"
5250       "mov %1.16b, v8.16b\n\t"
5251       "mov %2.16b, v9.16b\n\t"
5252       "mov %3.16b, v10.16b"
5253       : "=w"(res[0]), "=w"(res[1]), "=w"(res[2]), "=w"(res[3])
5254       : "r"(mem)
5255       : "v7", "v8", "v9", "v10", "memory");
5256   ASSERT_EQ(res[0], MakeUInt128(0x5991595952256369ULL, 0));
5257   ASSERT_EQ(res[1], MakeUInt128(0x5429476027323820ULL, 0));
5258   ASSERT_EQ(res[2], MakeUInt128(0x3263239799834435ULL, 0));
5259   ASSERT_EQ(res[3], MakeUInt128(0x7362888624389665ULL, 0));
5260 }
5261 
TEST(Arm64InsnTest,Store4MultipleInt8x8)5262 TEST(Arm64InsnTest, Store4MultipleInt8x8) {
5263   static constexpr uint64_t arg[4] = {
5264       0x5991595952256369ULL, 0x5429476027323820ULL, 0x3263239799834435ULL, 0x7362888624389665ULL};
5265   uint64_t res[4];
5266   asm("mov v7.16b, %0.16b\n\t"
5267       "mov v8.16b, %1.16b\n\t"
5268       "mov v9.16b, %2.16b\n\t"
5269       "mov v10.16b, %3.16b\n\t"
5270       "st4 {v7.8b-v10.8b}, [%4]"
5271       :
5272       : "w"(arg[0]), "w"(arg[1]), "w"(arg[2]), "w"(arg[3]), "r"(res)
5273       : "v7", "v8", "v9", "v10", "memory");
5274   ASSERT_EQ(res[0], 0x9644386365352069ULL);
5275   ASSERT_EQ(res[1], 0x2499275238833225ULL);
5276   ASSERT_EQ(res[2], 0x8823475986976059ULL);
5277   ASSERT_EQ(res[3], 0x7332545962632991ULL);
5278 }
5279 
TEST(Arm64InsnTest,Load4MultipleInt8x16)5280 TEST(Arm64InsnTest, Load4MultipleInt8x16) {
5281   static constexpr uint8_t mem[4 * 16] = {
5282       0x69, 0x20, 0x35, 0x65, 0x63, 0x38, 0x44, 0x96, 0x25, 0x32, 0x83, 0x38, 0x52,
5283       0x27, 0x99, 0x24, 0x59, 0x60, 0x97, 0x86, 0x59, 0x47, 0x23, 0x88, 0x91, 0x29,
5284       0x63, 0x62, 0x59, 0x54, 0x32, 0x73, 0x45, 0x44, 0x37, 0x16, 0x33, 0x55, 0x77,
5285       0x43, 0x29, 0x49, 0x99, 0x28, 0x81, 0x05, 0x57, 0x17, 0x81, 0x98, 0x78, 0x50,
5286       0x68, 0x14, 0x62, 0x52, 0x32, 0x13, 0x47, 0x52, 0x37, 0x38, 0x11, 0x65};
5287   __uint128_t res[4];
5288   asm("ld4 {v7.16b-v10.16b}, [%4]\n\t"
5289       "mov %0.16b, v7.16b\n\t"
5290       "mov %1.16b, v8.16b\n\t"
5291       "mov %2.16b, v9.16b\n\t"
5292       "mov %3.16b, v10.16b"
5293       : "=w"(res[0]), "=w"(res[1]), "=w"(res[2]), "=w"(res[3])
5294       : "r"(mem)
5295       : "v7", "v8", "v9", "v10", "memory");
5296   ASSERT_EQ(res[0], MakeUInt128(0x5991595952256369ULL, 0x3732688181293345ULL));
5297   ASSERT_EQ(res[1], MakeUInt128(0x5429476027323820ULL, 0x3813149805495544ULL));
5298   ASSERT_EQ(res[2], MakeUInt128(0x3263239799834435ULL, 0x1147627857997737ULL));
5299   ASSERT_EQ(res[3], MakeUInt128(0x7362888624389665ULL, 0x6552525017284316ULL));
5300 }
5301 
TEST(Arm64InsnTest,Store4MultipleInt8x16)5302 TEST(Arm64InsnTest, Store4MultipleInt8x16) {
5303   static constexpr __uint128_t arg[4] = {MakeUInt128(0x5991595952256369ULL, 0x3732688181293345ULL),
5304                                          MakeUInt128(0x5429476027323820ULL, 0x3813149805495544ULL),
5305                                          MakeUInt128(0x3263239799834435ULL, 0x1147627857997737ULL),
5306                                          MakeUInt128(0x7362888624389665ULL, 0x6552525017284316ULL)};
5307   __uint128_t res[4];
5308   asm("mov v7.16b, %0.16b\n\t"
5309       "mov v8.16b, %1.16b\n\t"
5310       "mov v9.16b, %2.16b\n\t"
5311       "mov v10.16b, %3.16b\n\t"
5312       "st4 {v7.16b-v10.16b}, [%4]"
5313       :
5314       : "w"(arg[0]), "w"(arg[1]), "w"(arg[2]), "w"(arg[3]), "r"(res)
5315       : "v7", "v8", "v9", "v10", "memory");
5316   ASSERT_EQ(res[0], MakeUInt128(0x9644386365352069ULL, 0x2499275238833225ULL));
5317   ASSERT_EQ(res[1], MakeUInt128(0x8823475986976059ULL, 0x7332545962632991ULL));
5318   ASSERT_EQ(res[2], MakeUInt128(0x4377553316374445ULL, 0x1757058128994929ULL));
5319   ASSERT_EQ(res[3], MakeUInt128(0x5262146850789881ULL, 0x6511383752471332ULL));
5320 }
5321 
TEST(Arm64InsnTest,Load4MultipleInt16x4)5322 TEST(Arm64InsnTest, Load4MultipleInt16x4) {
5323   static constexpr uint16_t mem[4 * 4] = {0x2069,
5324                                           0x6535,
5325                                           0x3863,
5326                                           0x9644,
5327                                           0x3225,
5328                                           0x3883,
5329                                           0x2752,
5330                                           0x2499,
5331                                           0x6059,
5332                                           0x8697,
5333                                           0x4759,
5334                                           0x8823,
5335                                           0x2991,
5336                                           0x6263,
5337                                           0x5459,
5338                                           0x7332};
5339   __uint128_t res[4];
5340   asm("ld4 {v30.4h-v1.4h}, [%4]\n\t"
5341       "mov %0.16b, v30.16b\n\t"
5342       "mov %1.16b, v31.16b\n\t"
5343       "mov %2.16b, v0.16b\n\t"
5344       "mov %3.16b, v1.16b"
5345       : "=w"(res[0]), "=w"(res[1]), "=w"(res[2]), "=w"(res[3])
5346       : "r"(mem)
5347       : "v30", "v31", "v0", "v1", "memory");
5348   ASSERT_EQ(res[0], MakeUInt128(0x2991605932252069ULL, 0));
5349   ASSERT_EQ(res[1], MakeUInt128(0x6263869738836535ULL, 0));
5350   ASSERT_EQ(res[2], MakeUInt128(0x5459475927523863ULL, 0));
5351   ASSERT_EQ(res[3], MakeUInt128(0x7332882324999644ULL, 0));
5352 }
5353 
TEST(Arm64InsnTest,Store4MultipleInt16x4)5354 TEST(Arm64InsnTest, Store4MultipleInt16x4) {
5355   static constexpr uint64_t arg[4] = {
5356       0x2991605932252069ULL, 0x6263869738836535ULL, 0x5459475927523863ULL, 0x7332882324999644ULL};
5357   uint64_t res[4];
5358   asm("mov v30.16b, %0.16b\n\t"
5359       "mov v31.16b, %1.16b\n\t"
5360       "mov v0.16b, %2.16b\n\t"
5361       "mov v1.16b, %3.16b\n\t"
5362       "st4 {v30.4h-v1.4h}, [%4]"
5363       :
5364       : "w"(arg[0]), "w"(arg[1]), "w"(arg[2]), "w"(arg[3]), "r"(res)
5365       : "v30", "v31", "v0", "v1", "memory");
5366   ASSERT_EQ(res[0], 0x9644386365352069ULL);
5367   ASSERT_EQ(res[1], 0x2499275238833225ULL);
5368   ASSERT_EQ(res[2], 0x8823475986976059ULL);
5369   ASSERT_EQ(res[3], 0x7332545962632991ULL);
5370 }
5371 
TEST(Arm64InsnTest,Load4MultipleInt16x8)5372 TEST(Arm64InsnTest, Load4MultipleInt16x8) {
5373   static constexpr uint16_t mem[4 * 8] = {
5374       0x2069, 0x6535, 0x3863, 0x9644, 0x3225, 0x3883, 0x2752, 0x2499, 0x6059, 0x8697, 0x4759,
5375       0x8823, 0x2991, 0x6263, 0x5459, 0x7332, 0x4445, 0x1637, 0x5533, 0x4377, 0x4929, 0x2899,
5376       0x0581, 0x1757, 0x9881, 0x5078, 0x1468, 0x5262, 0x1332, 0x5247, 0x3837, 0x6511};
5377   __uint128_t res[4];
5378   asm("ld4 {v30.8h-v1.8h}, [%4]\n\t"
5379       "mov %0.16b, v30.16b\n\t"
5380       "mov %1.16b, v31.16b\n\t"
5381       "mov %2.16b, v0.16b\n\t"
5382       "mov %3.16b, v1.16b"
5383       : "=w"(res[0]), "=w"(res[1]), "=w"(res[2]), "=w"(res[3])
5384       : "r"(mem)
5385       : "v30", "v31", "v0", "v1", "memory");
5386   ASSERT_EQ(res[0], MakeUInt128(0x2991605932252069ULL, 0x1332988149294445ULL));
5387   ASSERT_EQ(res[1], MakeUInt128(0x6263869738836535ULL, 0x5247507828991637ULL));
5388   ASSERT_EQ(res[2], MakeUInt128(0x5459475927523863ULL, 0x3837146805815533ULL));
5389   ASSERT_EQ(res[3], MakeUInt128(0x7332882324999644ULL, 0x6511526217574377ULL));
5390 }
5391 
TEST(Arm64InsnTest,Store4MultipleInt16x8)5392 TEST(Arm64InsnTest, Store4MultipleInt16x8) {
5393   static constexpr __uint128_t arg[4] = {MakeUInt128(0x2991605932252069ULL, 0x1332988149294445ULL),
5394                                          MakeUInt128(0x6263869738836535ULL, 0x5247507828991637ULL),
5395                                          MakeUInt128(0x5459475927523863ULL, 0x3837146805815533ULL),
5396                                          MakeUInt128(0x7332882324999644ULL, 0x6511526217574377ULL)};
5397   __uint128_t res[4];
5398   asm("mov v30.16b, %0.16b\n\t"
5399       "mov v31.16b, %1.16b\n\t"
5400       "mov v0.16b, %2.16b\n\t"
5401       "mov v1.16b, %3.16b\n\t"
5402       "st4 {v30.8h-v1.8h}, [%4]"
5403       :
5404       : "w"(arg[0]), "w"(arg[1]), "w"(arg[2]), "w"(arg[3]), "r"(res)
5405       : "v30", "v31", "v0", "v1", "memory");
5406   ASSERT_EQ(res[0], MakeUInt128(0x9644386365352069ULL, 0x2499275238833225ULL));
5407   ASSERT_EQ(res[1], MakeUInt128(0x8823475986976059ULL, 0x7332545962632991ULL));
5408   ASSERT_EQ(res[2], MakeUInt128(0x4377553316374445ULL, 0x1757058128994929ULL));
5409   ASSERT_EQ(res[3], MakeUInt128(0x5262146850789881ULL, 0x6511383752471332ULL));
5410 }
5411 
TEST(Arm64InsnTest,Load4MultipleInt32x2)5412 TEST(Arm64InsnTest, Load4MultipleInt32x2) {
5413   static constexpr uint32_t mem[4 * 2] = {0x65352069,
5414                                           0x96443863,
5415                                           0x38833225,
5416                                           0x24992752,
5417                                           0x86976059,
5418                                           0x88234759,
5419                                           0x62632991,
5420                                           0x73325459};
5421   __uint128_t res[4];
5422   asm("ld4 {v30.2s-v1.2s}, [%4]\n\t"
5423       "mov %0.16b, v30.16b\n\t"
5424       "mov %1.16b, v31.16b\n\t"
5425       "mov %2.16b, v0.16b\n\t"
5426       "mov %3.16b, v1.16b"
5427       : "=w"(res[0]), "=w"(res[1]), "=w"(res[2]), "=w"(res[3])
5428       : "r"(mem)
5429       : "v30", "v31", "v0", "v1", "memory");
5430   ASSERT_EQ(res[0], MakeUInt128(0x8697605965352069ULL, 0));
5431   ASSERT_EQ(res[1], MakeUInt128(0x8823475996443863ULL, 0));
5432   ASSERT_EQ(res[2], MakeUInt128(0x6263299138833225ULL, 0));
5433   ASSERT_EQ(res[3], MakeUInt128(0x7332545924992752ULL, 0));
5434 }
5435 
TEST(Arm64InsnTest,Store4MultipleInt32x2)5436 TEST(Arm64InsnTest, Store4MultipleInt32x2) {
5437   static constexpr uint64_t arg[4] = {
5438       0x8697605965352069ULL, 0x8823475996443863ULL, 0x6263299138833225ULL, 0x7332545924992752ULL};
5439   uint64_t res[4];
5440   asm("mov v30.16b, %0.16b\n\t"
5441       "mov v31.16b, %1.16b\n\t"
5442       "mov v0.16b, %2.16b\n\t"
5443       "mov v1.16b, %3.16b\n\t"
5444       "st4 {v30.2s-v1.2s}, [%4]"
5445       :
5446       : "w"(arg[0]), "w"(arg[1]), "w"(arg[2]), "w"(arg[3]), "r"(res)
5447       : "v30", "v31", "v0", "v1", "memory");
5448   ASSERT_EQ(res[0], 0x9644386365352069ULL);
5449   ASSERT_EQ(res[1], 0x2499275238833225ULL);
5450   ASSERT_EQ(res[2], 0x8823475986976059ULL);
5451   ASSERT_EQ(res[3], 0x7332545962632991ULL);
5452 }
5453 
TEST(Arm64InsnTest,Load4MultipleInt32x4)5454 TEST(Arm64InsnTest, Load4MultipleInt32x4) {
5455   static constexpr uint32_t mem[4 * 4] = {0x65352069,
5456                                           0x96443863,
5457                                           0x38833225,
5458                                           0x24992752,
5459                                           0x86976059,
5460                                           0x88234759,
5461                                           0x62632991,
5462                                           0x73325459,
5463                                           0x16374445,
5464                                           0x43775533,
5465                                           0x28994929,
5466                                           0x17570581,
5467                                           0x50789881,
5468                                           0x52621468,
5469                                           0x52471332,
5470                                           0x65113837};
5471   __uint128_t res[4];
5472   asm("ld4 {v30.4s-v1.4s}, [%4]\n\t"
5473       "mov %0.16b, v30.16b\n\t"
5474       "mov %1.16b, v31.16b\n\t"
5475       "mov %2.16b, v0.16b\n\t"
5476       "mov %3.16b, v1.16b"
5477       : "=w"(res[0]), "=w"(res[1]), "=w"(res[2]), "=w"(res[3])
5478       : "r"(mem)
5479       : "v30", "v31", "v0", "v1", "memory");
5480   ASSERT_EQ(res[0], MakeUInt128(0x8697605965352069ULL, 0x5078988116374445ULL));
5481   ASSERT_EQ(res[1], MakeUInt128(0x8823475996443863ULL, 0x5262146843775533ULL));
5482   ASSERT_EQ(res[2], MakeUInt128(0x6263299138833225ULL, 0x5247133228994929ULL));
5483   ASSERT_EQ(res[3], MakeUInt128(0x7332545924992752ULL, 0x6511383717570581ULL));
5484 }
5485 
TEST(Arm64InsnTest,Store4MultipleInt32x4)5486 TEST(Arm64InsnTest, Store4MultipleInt32x4) {
5487   static constexpr __uint128_t arg[4] = {MakeUInt128(0x8697605965352069ULL, 0x5078988116374445ULL),
5488                                          MakeUInt128(0x8823475996443863ULL, 0x5262146843775533ULL),
5489                                          MakeUInt128(0x6263299138833225ULL, 0x5247133228994929ULL),
5490                                          MakeUInt128(0x7332545924992752ULL, 0x6511383717570581ULL)};
5491   __uint128_t res[4];
5492   asm("mov v30.16b, %0.16b\n\t"
5493       "mov v31.16b, %1.16b\n\t"
5494       "mov v0.16b, %2.16b\n\t"
5495       "mov v1.16b, %3.16b\n\t"
5496       "st4 {v30.4s-v1.4s}, [%4]"
5497       :
5498       : "w"(arg[0]), "w"(arg[1]), "w"(arg[2]), "w"(arg[3]), "r"(res)
5499       : "v30", "v31", "v0", "v1", "memory");
5500   ASSERT_EQ(res[0], MakeUInt128(0x9644386365352069ULL, 0x2499275238833225ULL));
5501   ASSERT_EQ(res[1], MakeUInt128(0x8823475986976059ULL, 0x7332545962632991ULL));
5502   ASSERT_EQ(res[2], MakeUInt128(0x4377553316374445ULL, 0x1757058128994929ULL));
5503   ASSERT_EQ(res[3], MakeUInt128(0x5262146850789881ULL, 0x6511383752471332ULL));
5504 }
5505 
TEST(Arm64InsnTest,Load4MultipleInt64x2)5506 TEST(Arm64InsnTest, Load4MultipleInt64x2) {
5507   static constexpr uint64_t mem[4 * 2] = {0x9644386365352069,
5508                                           0x2499275238833225,
5509                                           0x8823475986976059,
5510                                           0x7332545962632991,
5511                                           0x4377553316374445,
5512                                           0x1757058128994929,
5513                                           0x5262146850789881,
5514                                           0x6511383752471332};
5515   __uint128_t res[4];
5516   asm("ld4 {v30.2d-v1.2d}, [%4]\n\t"
5517       "mov %0.16b, v30.16b\n\t"
5518       "mov %1.16b, v31.16b\n\t"
5519       "mov %2.16b, v0.16b\n\t"
5520       "mov %3.16b, v1.16b"
5521       : "=w"(res[0]), "=w"(res[1]), "=w"(res[2]), "=w"(res[3])
5522       : "r"(mem)
5523       : "v30", "v31", "v0", "v1", "memory");
5524   ASSERT_EQ(res[0], MakeUInt128(0x9644386365352069ULL, 0x4377553316374445ULL));
5525   ASSERT_EQ(res[1], MakeUInt128(0x2499275238833225ULL, 0x1757058128994929ULL));
5526   ASSERT_EQ(res[2], MakeUInt128(0x8823475986976059ULL, 0x5262146850789881ULL));
5527   ASSERT_EQ(res[3], MakeUInt128(0x7332545962632991ULL, 0x6511383752471332ULL));
5528 }
5529 
TEST(Arm64InsnTest,Store4MultipleInt64x2)5530 TEST(Arm64InsnTest, Store4MultipleInt64x2) {
5531   static constexpr __uint128_t arg[4] = {MakeUInt128(0x9644386365352069ULL, 0x4377553316374445ULL),
5532                                          MakeUInt128(0x2499275238833225ULL, 0x1757058128994929ULL),
5533                                          MakeUInt128(0x8823475986976059ULL, 0x5262146850789881ULL),
5534                                          MakeUInt128(0x7332545962632991ULL, 0x6511383752471332ULL)};
5535   __uint128_t res[4];
5536   asm("mov v30.16b, %0.16b\n\t"
5537       "mov v31.16b, %1.16b\n\t"
5538       "mov v0.16b, %2.16b\n\t"
5539       "mov v1.16b, %3.16b\n\t"
5540       "st4 {v30.2d-v1.2d}, [%4]"
5541       :
5542       : "w"(arg[0]), "w"(arg[1]), "w"(arg[2]), "w"(arg[3]), "r"(res)
5543       : "v30", "v31", "v0", "v1", "memory");
5544   ASSERT_EQ(res[0], MakeUInt128(0x9644386365352069ULL, 0x2499275238833225ULL));
5545   ASSERT_EQ(res[1], MakeUInt128(0x8823475986976059ULL, 0x7332545962632991ULL));
5546   ASSERT_EQ(res[2], MakeUInt128(0x4377553316374445ULL, 0x1757058128994929ULL));
5547   ASSERT_EQ(res[3], MakeUInt128(0x5262146850789881ULL, 0x6511383752471332ULL));
5548 }
5549 
TEST(Arm64InsnTest,Load1ReplicateInt8x8)5550 TEST(Arm64InsnTest, Load1ReplicateInt8x8) {
5551   static constexpr uint8_t mem = 0x81U;
5552   __uint128_t res;
5553   asm("ld1r {%0.8b}, [%1]" : "=w"(res) : "r"(&mem) : "memory");
5554   ASSERT_EQ(res, MakeUInt128(0x8181818181818181ULL, 0U));
5555 }
5556 
TEST(Arm64InsnTest,Load2ReplicateInt16x8)5557 TEST(Arm64InsnTest, Load2ReplicateInt16x8) {
5558   static constexpr uint16_t mem[] = {0x7904, 0x8715};
5559   __uint128_t res[2];
5560   asm("ld2r {v6.8h, v7.8h}, [%2]\n\t"
5561       "mov %0.16b, v6.16b\n\t"
5562       "mov %1.16b, v7.16b"
5563       : "=w"(res[0]), "=w"(res[1])
5564       : "r"(mem)
5565       : "v6", "v7", "memory");
5566   ASSERT_EQ(res[0], MakeUInt128(0x7904790479047904ULL, 0x7904790479047904ULL));
5567   ASSERT_EQ(res[1], MakeUInt128(0x8715871587158715ULL, 0x8715871587158715ULL));
5568 }
5569 
TEST(Arm64InsnTest,Load3ReplicateInt32x4)5570 TEST(Arm64InsnTest, Load3ReplicateInt32x4) {
5571   static constexpr uint32_t mem[] = {0x78713710U, 0x60510637U, 0x95558588U};
5572   __uint128_t res[3];
5573   asm("ld3r {v30.4s-v0.4s}, [%3]\n\t"
5574       "mov %0.16b, v30.16b\n\t"
5575       "mov %1.16b, v31.16b\n\t"
5576       "mov %2.16b, v0.16b"
5577       : "=w"(res[0]), "=w"(res[1]), "=w"(res[2])
5578       : "r"(mem)
5579       : "v30", "v31", "v0", "memory");
5580   ASSERT_EQ(res[0], MakeUInt128(0x7871371078713710ULL, 0x7871371078713710ULL));
5581   ASSERT_EQ(res[1], MakeUInt128(0x6051063760510637ULL, 0x6051063760510637ULL));
5582   ASSERT_EQ(res[2], MakeUInt128(0x9555858895558588ULL, 0x9555858895558588ULL));
5583 }
5584 
TEST(Arm64InsnTest,Load4ReplicateInt64x2)5585 TEST(Arm64InsnTest, Load4ReplicateInt64x2) {
5586   static constexpr uint64_t mem[] = {
5587       0x8150781468526213ULL, 0x3252473837651192ULL, 0x9901561091897779ULL, 0x2200870579339646ULL};
5588   __uint128_t res[4];
5589   asm("ld4r {v29.2d-v0.2d}, [%4]\n\t"
5590       "mov %0.16b, v29.16b\n\t"
5591       "mov %1.16b, v30.16b\n\t"
5592       "mov %2.16b, v31.16b\n\t"
5593       "mov %3.16b, v0.16b"
5594       : "=w"(res[0]), "=w"(res[1]), "=w"(res[2]), "=w"(res[3])
5595       : "r"(mem)
5596       : "v29", "v30", "v31", "v0", "memory");
5597   ASSERT_EQ(res[0], MakeUInt128(mem[0], mem[0]));
5598   ASSERT_EQ(res[1], MakeUInt128(mem[1], mem[1]));
5599   ASSERT_EQ(res[2], MakeUInt128(mem[2], mem[2]));
5600   ASSERT_EQ(res[3], MakeUInt128(mem[3], mem[3]));
5601 }
5602 
TEST(Arm64InsnTest,LoadPairNonTemporarlInt64)5603 TEST(Arm64InsnTest, LoadPairNonTemporarlInt64) {
5604   static constexpr uint64_t mem[] = {0x3843601737474215ULL, 0x2476085152099016ULL};
5605   __uint128_t res[2];
5606   asm("ldnp %d0, %d1, [%2]" : "=w"(res[0]), "=w"(res[1]) : "r"(mem) : "memory");
5607   ASSERT_EQ(res[0], MakeUInt128(0x3843601737474215ULL, 0U));
5608   ASSERT_EQ(res[1], MakeUInt128(0x2476085152099016ULL, 0U));
5609 }
5610 
TEST(Arm64InsnTest,MoviVector2S)5611 TEST(Arm64InsnTest, MoviVector2S) {
5612   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES("movi %0.2s, #0xe4")();
5613   ASSERT_EQ(rd, MakeUInt128(0x000000e4000000e4ULL, 0x0000000000000000ULL));
5614 }
5615 
TEST(Arm64InsnTest,MoviVector2D)5616 TEST(Arm64InsnTest, MoviVector2D) {
5617   __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES("movi %0.2d, #0xff")();
5618   ASSERT_EQ(rd, MakeUInt128(0x00000000000000ffULL, 0x00000000000000ffULL));
5619 }
5620 
TEST(Arm64InsnTest,MoviVector8B)5621 TEST(Arm64InsnTest, MoviVector8B) {
5622   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES("movi %0.8b, #0xda")();
5623   ASSERT_EQ(res, MakeUInt128(0xdadadadadadadadaULL, 0x0000000000000000ULL));
5624 }
5625 
TEST(Arm64InsnTest,MoviVector4HShiftBy8)5626 TEST(Arm64InsnTest, MoviVector4HShiftBy8) {
5627   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES("movi %0.4h, #0xd1, lsl #8")();
5628   ASSERT_EQ(res, MakeUInt128(0xd100d100d100d100ULL, 0x0000000000000000ULL));
5629 }
5630 
TEST(Arm64InsnTest,MoviVector2SShiftBy16)5631 TEST(Arm64InsnTest, MoviVector2SShiftBy16) {
5632   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES("movi %0.2s, #0x37, msl #16")();
5633   ASSERT_EQ(res, MakeUInt128(0x0037ffff0037ffffULL, 0x0000000000000000ULL));
5634 }
5635 
TEST(Arm64InsnTest,MvniVector4H)5636 TEST(Arm64InsnTest, MvniVector4H) {
5637   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES("mvni %0.4h, #0xbc")();
5638   ASSERT_EQ(res, MakeUInt128(0xff43ff43ff43ff43ULL, 0x0000000000000000ULL));
5639 }
5640 
TEST(Arm64InsnTest,MvniVector2SShiftBy8)5641 TEST(Arm64InsnTest, MvniVector2SShiftBy8) {
5642   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES("mvni %0.2s, #0x24, lsl #8")();
5643   ASSERT_EQ(res, MakeUInt128(0xffffdbffffffdbffULL, 0x0000000000000000ULL));
5644 }
5645 
TEST(Arm64InsnTest,MvniVector2SShiftBy16)5646 TEST(Arm64InsnTest, MvniVector2SShiftBy16) {
5647   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES("mvni %0.2s, #0x25, msl #16")();
5648   ASSERT_EQ(res, MakeUInt128(0xffda0000ffda0000ULL, 0x0000000000000000ULL));
5649 }
5650 
TEST(Arm64InsnTest,LoadSimdRegPlusReg)5651 TEST(Arm64InsnTest, LoadSimdRegPlusReg) {
5652   __uint128_t array[] = {
5653       MakeUInt128(0x6517980694113528ULL, 0x0131470130478164ULL),
5654       MakeUInt128(0x8672422924654366ULL, 0x8009806769282382ULL),
5655   };
5656   uint64_t offset = 16;
5657   __uint128_t rd;
5658 
5659   asm("ldr %q0, [%1, %2]" : "=w"(rd) : "r"(array), "r"(offset) : "memory");
5660 
5661   ASSERT_EQ(rd, MakeUInt128(0x8672422924654366ULL, 0x8009806769282382ULL));
5662 }
5663 
TEST(Arm64InsnTest,ExtractNarrowI16x8ToI8x8)5664 TEST(Arm64InsnTest, ExtractNarrowI16x8ToI8x8) {
5665   __uint128_t arg = MakeUInt128(0x0123456789abcdefULL, 0x0011223344556677ULL);
5666   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("xtn %0.8b, %1.8h")(arg);
5667   ASSERT_EQ(res, MakeUInt128(0x113355772367abefULL, 0x0ULL));
5668 }
5669 
TEST(Arm64InsnTest,ExtractNarrowI32x4ToI16x4)5670 TEST(Arm64InsnTest, ExtractNarrowI32x4ToI16x4) {
5671   __uint128_t arg = MakeUInt128(0x0123456789abcdefULL, 0x0011223344556677ULL);
5672   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("xtn %0.4h, %1.4s")(arg);
5673   ASSERT_EQ(res, MakeUInt128(0x223366774567cdefULL, 0x0ULL));
5674 }
5675 
TEST(Arm64InsnTest,ExtractNarrowI64x2ToI32x2)5676 TEST(Arm64InsnTest, ExtractNarrowI64x2ToI32x2) {
5677   __uint128_t arg = MakeUInt128(0x0123456789abcdefULL, 0x0011223344556677ULL);
5678   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("xtn %0.2s, %1.2d")(arg);
5679   ASSERT_EQ(res, MakeUInt128(0x4455667789abcdefULL, 0x0ULL));
5680 }
5681 
TEST(Arm64InsnTest,ExtractNarrow2Int16x8ToInt8x16)5682 TEST(Arm64InsnTest, ExtractNarrow2Int16x8ToInt8x16) {
5683   __uint128_t arg1 = MakeUInt128(0x1844396582533754ULL, 0x3885690941130315ULL);
5684   __uint128_t arg2 = MakeUInt128(0x6121865619673378ULL, 0x6236256125216320ULL);
5685   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("xtn2 %0.16b, %1.8h")(arg1, arg2);
5686   ASSERT_EQ(res, MakeUInt128(0x6121865619673378ULL, 0x8509131544655354ULL));
5687 }
5688 
TEST(Arm64InsnTest,LoadLiteralSimd)5689 TEST(Arm64InsnTest, LoadLiteralSimd) {
5690   // We call an external assembly function to perform LDR literal because we
5691   // need to place the literal in .rodata.  The literal placed in .text would
5692   // trigger a segfault.
5693   ASSERT_EQ(get_fp64_literal(), 0x0123456789abcdefULL);
5694 }
5695 
TEST(Arm64InsnTest,AbsInt64x1)5696 TEST(Arm64InsnTest, AbsInt64x1) {
5697   __uint128_t arg = MakeUInt128(0xfffffffffffffffdULL, 0xdeadbeef01234567ULL);
5698   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("abs %d0, %d1")(arg);
5699   ASSERT_EQ(res, MakeUInt128(0x0000000000000003ULL, 0x0ULL));
5700 }
5701 
TEST(Arm64InsnTest,AbsInt8x8)5702 TEST(Arm64InsnTest, AbsInt8x8) {
5703   __uint128_t arg = MakeUInt128(0x0001027e7f8081ffULL, 0x0123456789abcdefULL);
5704   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("abs %0.8b, %1.8b")(arg);
5705   ASSERT_EQ(res, MakeUInt128(0x0001027e7f807f01ULL, 0x0ULL));
5706 }
5707 
TEST(Arm64InsnTest,UseV31)5708 TEST(Arm64InsnTest, UseV31) {
5709   __uint128_t res;
5710 
5711   asm("movi v31.2d, #0xffffffffffffffff\n\t"
5712       "mov %0.16b, v31.16b"
5713       : "=w"(res)
5714       :
5715       : "v31");
5716 
5717   ASSERT_EQ(res, MakeUInt128(~0ULL, ~0ULL));
5718 }
5719 
TEST(Arm64InsnTest,AddHighNarrowInt16x8)5720 TEST(Arm64InsnTest, AddHighNarrowInt16x8) {
5721   __uint128_t arg1 = MakeUInt128(0x2296617119637792ULL, 0x1337575114959501ULL);
5722   __uint128_t arg2 = MakeUInt128(0x0941214722131794ULL, 0x7647772622414254ULL);
5723   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("addhn %0.8b, %1.8h, %2.8h")(arg1, arg2);
5724   ASSERT_EQ(res, MakeUInt128(0x89ce36d72b823b8fULL, 0x0ULL));
5725 }
5726 
TEST(Arm64InsnTest,AddHighNarrowUpperInt16x8)5727 TEST(Arm64InsnTest, AddHighNarrowUpperInt16x8) {
5728   __uint128_t arg1 = MakeUInt128(0x6561809377344403ULL, 0x0707469211201913ULL);
5729   __uint128_t arg2 = MakeUInt128(0x6095752706957220ULL, 0x9175671167229109ULL);
5730   __uint128_t arg3 = MakeUInt128(0x5797877185560845ULL, 0x5296541266540853ULL);
5731   __uint128_t res =
5732       ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("addhn2 %0.16b, %1.8h, %2.8h")(arg1, arg2, arg3);
5733   ASSERT_EQ(res, MakeUInt128(0x5797877185560845ULL, 0x98ad78aac5f57db6ULL));
5734 }
5735 
TEST(Arm64InsnTest,SubHighNarrowInt16x8)5736 TEST(Arm64InsnTest, SubHighNarrowInt16x8) {
5737   __uint128_t arg1 = MakeUInt128(0x4978189312978482ULL, 0x1682998948722658ULL);
5738   __uint128_t arg2 = MakeUInt128(0x1210835791513698ULL, 0x8209144421006751ULL);
5739   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("subhn %0.8b, %1.8h, %2.8h")(arg1, arg2);
5740   ASSERT_EQ(res, MakeUInt128(0x948527bf3795814dULL, 0x0ULL));
5741 }
5742 
TEST(Arm64InsnTest,SubHighNarrowUpperInt16x8)5743 TEST(Arm64InsnTest, SubHighNarrowUpperInt16x8) {
5744   __uint128_t arg1 = MakeUInt128(0x5324944166803962ULL, 0x6579787718556084ULL);
5745   __uint128_t arg2 = MakeUInt128(0x1066587969981635ULL, 0x7473638405257145ULL);
5746   __uint128_t arg3 = MakeUInt128(0x3142980919065925ULL, 0x0937221696461515ULL);
5747   __uint128_t res =
5748       ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("subhn2 %0.16b, %1.8h, %2.8h")(arg1, arg2, arg3);
5749   ASSERT_EQ(res, MakeUInt128(0x3142980919065925ULL, 0xf11413ef423bfc23ULL));
5750 }
5751 
TEST(Arm64InsnTest,RoundingAddHighNarrowInt16x8)5752 TEST(Arm64InsnTest, RoundingAddHighNarrowInt16x8) {
5753   __uint128_t arg1 = MakeUInt128(0x8039626579787718ULL, 0x5560845529654126ULL);
5754   __uint128_t arg2 = MakeUInt128(0x3440171274947042ULL, 0x0562230538994561ULL);
5755   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("raddhn %0.8b, %1.8h, %2.8h")(arg1, arg2);
5756   ASSERT_EQ(res, MakeUInt128(0x5ba76287b479eee7ULL, 0x0000000000000000ULL));
5757 }
5758 
TEST(Arm64InsnTest,RoundingSubHighNarrowInt16x8)5759 TEST(Arm64InsnTest, RoundingSubHighNarrowInt16x8) {
5760   __uint128_t arg1 = MakeUInt128(0x3063432858785698ULL, 0x3052358089330657ULL);
5761   __uint128_t arg2 = MakeUInt128(0x0216471550979259ULL, 0x2309907965473761ULL);
5762   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("rsubhn %0.8b, %1.8h, %2.8h")(arg1, arg2);
5763   ASSERT_EQ(res, MakeUInt128(0x0da524cf2efc08c4ULL, 0x0000000000000000ULL));
5764 }
5765 
TEST(Arm64InsnTest,ScalarPairwiseAddInt8x2)5766 TEST(Arm64InsnTest, ScalarPairwiseAddInt8x2) {
5767   __uint128_t arg = MakeUInt128(0x6257591633303910ULL, 0x7225383742182140ULL);
5768   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("addp %d0, %1.2d")(arg);
5769   ASSERT_EQ(res, MakeUInt128(0xd47c914d75485a50ULL, 0x0000000000000000ULL));
5770 }
5771 
TEST(Arm64InsnTest,AddAcrossInt8x8)5772 TEST(Arm64InsnTest, AddAcrossInt8x8) {
5773   __uint128_t arg = MakeUInt128(0x0681216028764962ULL, 0x8674460477464915ULL);
5774   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("addv %b0, %1.8b")(arg);
5775   ASSERT_EQ(res, MakeUInt128(0x51ULL, 0x0ULL));
5776 }
5777 
TEST(Arm64InsnTest,SignedAddLongAcrossInt16x8)5778 TEST(Arm64InsnTest, SignedAddLongAcrossInt16x8) {
5779   __uint128_t arg = MakeUInt128(0x9699557377273756ULL, 0x6761552711392258ULL);
5780   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("saddlv %s0, %1.8h")(arg);
5781   ASSERT_EQ(res, MakeUInt128(0x0000000000018aa2ULL, 0x0000000000000000ULL));
5782 }
5783 
TEST(Arm64InsnTest,UnsignedAddLongAcrossInt16x8)5784 TEST(Arm64InsnTest, UnsignedAddLongAcrossInt16x8) {
5785   __uint128_t arg = MakeUInt128(0x7986396522961312ULL, 0x8017826797172898ULL);
5786   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("uaddlv %s0, %1.8h")(arg);
5787   ASSERT_EQ(res, MakeUInt128(0x000000000002aac0ULL, 0x0000000000000000ULL));
5788 }
5789 
TEST(Arm64InsnTest,SignedMaximumAcrossInt16x8)5790 TEST(Arm64InsnTest, SignedMaximumAcrossInt16x8) {
5791   __uint128_t arg = MakeUInt128(0x8482065967379473ULL, 0x1680864156456505ULL);
5792   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("smaxv %h0, %1.8h")(arg);
5793   ASSERT_EQ(res, MakeUInt128(0x0000000000006737ULL, 0x0000000000000000ULL));
5794 }
5795 
TEST(Arm64InsnTest,SignedMinimumAcrossInt16x8)5796 TEST(Arm64InsnTest, SignedMinimumAcrossInt16x8) {
5797   __uint128_t arg = MakeUInt128(0x6772530431825197ULL, 0x5791679296996504ULL);
5798   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("sminv %h0, %1.8h")(arg);
5799   ASSERT_EQ(res, MakeUInt128(0x0000000000009699ULL, 0x0000000000000000ULL));
5800 }
5801 
TEST(Arm64InsnTest,UnsignedMaximumAcrossInt16x8)5802 TEST(Arm64InsnTest, UnsignedMaximumAcrossInt16x8) {
5803   __uint128_t arg = MakeUInt128(0x6500378070466126ULL, 0x4706021457505793ULL);
5804   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("umaxv %h0, %1.8h")(arg);
5805   ASSERT_EQ(res, MakeUInt128(0x0000000000007046ULL, 0x0000000000000000ULL));
5806 }
5807 
TEST(Arm64InsnTest,UnsignedMinimumAcrossInt16x8)5808 TEST(Arm64InsnTest, UnsignedMinimumAcrossInt16x8) {
5809   __uint128_t arg = MakeUInt128(0x5223572397395128ULL, 0x8181640597859142ULL);
5810   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("uminv %h0, %1.8h")(arg);
5811   ASSERT_EQ(res, MakeUInt128(0x0000000000005128ULL, 0x0000000000000000ULL));
5812 }
5813 
TEST(Arm64InsnTest,CountLeadingZerosI8x8)5814 TEST(Arm64InsnTest, CountLeadingZerosI8x8) {
5815   __uint128_t arg = MakeUInt128(0x1452635608277857ULL, 0x7134275778960917ULL);
5816   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("clz %0.8b, %1.8b")(arg);
5817   ASSERT_EQ(res, MakeUInt128(0x0301010104020101ULL, 0x0000000000000000ULL));
5818 }
5819 
TEST(Arm64InsnTest,CountLeadingSignBitsI8x8)5820 TEST(Arm64InsnTest, CountLeadingSignBitsI8x8) {
5821   __uint128_t arg = MakeUInt128(0x8925892354201995ULL, 0x6112129021960864ULL);
5822   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cls %0.8b, %1.8b")(arg);
5823   ASSERT_EQ(res, MakeUInt128(0x0001000100010200ULL, 0x0000000000000000ULL));
5824 }
5825 
TEST(Arm64InsnTest,Cnt)5826 TEST(Arm64InsnTest, Cnt) {
5827   __uint128_t arg = MakeUInt128(0x9835484875625298ULL, 0x7524238730775595ULL);
5828   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cnt %0.16b, %1.16b")(arg);
5829   ASSERT_EQ(res, MakeUInt128(0x0304020205030303ULL, 0x0502030402060404ULL));
5830 }
5831 
TEST(Arm64InsnTest,SimdScalarMove)5832 TEST(Arm64InsnTest, SimdScalarMove) {
5833   __uint128_t arg = MakeUInt128(0x1433345477624168ULL, 0x6251898356948556ULL);
5834   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("mov %b0, %1.b[5]")(arg);
5835   ASSERT_EQ(res, MakeUInt128(0x0000000000000034ULL, 0x0000000000000000ULL));
5836 }
5837 
TEST(Arm64InsnTest,SimdVectorElemDuplicate)5838 TEST(Arm64InsnTest, SimdVectorElemDuplicate) {
5839   __uint128_t arg = MakeUInt128(0x3021647155097925ULL, 0x9230990796547376ULL);
5840   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("dup %0.8b, %1.b[5]")(arg);
5841   ASSERT_EQ(res, MakeUInt128(0x6464646464646464ULL, 0x0000000000000000ULL));
5842 }
5843 
TEST(Arm64InsnTest,SimdVectorElemDuplicateInt16AtIndex7)5844 TEST(Arm64InsnTest, SimdVectorElemDuplicateInt16AtIndex7) {
5845   __uint128_t arg = MakeUInt128(0x2582262052248940ULL, 0x7726719478268482ULL);
5846   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("dup %0.4h, %1.h[7]")(arg);
5847   ASSERT_EQ(res, MakeUInt128(0x7726772677267726ULL, 0x0000000000000000ULL));
5848 }
5849 
TEST(Arm64InsnTest,SimdVectorElemInsert)5850 TEST(Arm64InsnTest, SimdVectorElemInsert) {
5851   __uint128_t arg1 = MakeUInt128(0x7120844335732654ULL, 0x8938239119325974ULL);
5852   __uint128_t arg2 = MakeUInt128(0x7656180937734440ULL, 0x3070746921120191ULL);
5853   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("mov %0.s[2], %1.s[1]")(arg1, arg2);
5854   ASSERT_EQ(res, MakeUInt128(0x7656180937734440ULL, 0x3070746971208443ULL));
5855 }
5856 
TEST(Arm64InsnTest,NegateInt64x1)5857 TEST(Arm64InsnTest, NegateInt64x1) {
5858   constexpr auto AsmNeg = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("neg %d0, %d1");
5859   __uint128_t arg1 = MakeUInt128(0x8389522868478312ULL, 0x3552658213144957ULL);
5860   ASSERT_EQ(AsmNeg(arg1), MakeUInt128(0x7c76add797b87ceeULL, 0x0000000000000000ULL));
5861 
5862   __uint128_t arg2 = MakeUInt128(1ULL << 63, 0U);
5863   ASSERT_EQ(AsmNeg(arg2), MakeUInt128(1ULL << 63, 0U));
5864 }
5865 
TEST(Arm64InsnTest,NegateInt16x8)5866 TEST(Arm64InsnTest, NegateInt16x8) {
5867   __uint128_t arg = MakeUInt128(0x4411010446823252ULL, 0x7162010526522721ULL);
5868   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("neg %0.8h, %1.8h")(arg);
5869   ASSERT_EQ(res, MakeUInt128(0xbbeffefcb97ecdaeULL, 0x8e9efefbd9aed8dfULL));
5870 }
5871 
TEST(Arm64InsnTest,NotI8x8)5872 TEST(Arm64InsnTest, NotI8x8) {
5873   __uint128_t arg = MakeUInt128(0x6205647693125705ULL, 0x8635662018558100ULL);
5874   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("not %0.8b, %1.8b")(arg);
5875   ASSERT_EQ(res, MakeUInt128(0x9dfa9b896ceda8faULL, 0x0000000000000000ULL));
5876 }
5877 
TEST(Arm64InsnTest,RbitInt8x8)5878 TEST(Arm64InsnTest, RbitInt8x8) {
5879   __uint128_t arg = MakeUInt128(0x4713296210734043ULL, 0x7518957359614589ULL);
5880   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("rbit %0.8b, %1.8b")(arg);
5881   ASSERT_EQ(res, MakeUInt128(0xe2c8944608ce02c2ULL, 0x0000000000000000ULL));
5882 }
5883 
TEST(Arm64InsnTest,Rev16Int8x16)5884 TEST(Arm64InsnTest, Rev16Int8x16) {
5885   __uint128_t arg = MakeUInt128(0x9904801094121472ULL, 0x2131794764777262ULL);
5886   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("rev16 %0.16b, %1.16b")(arg);
5887   ASSERT_EQ(res, MakeUInt128(0x0499108012947214ULL, 0x3121477977646272ULL));
5888 }
5889 
TEST(Arm64InsnTest,Rev32Int16x8)5890 TEST(Arm64InsnTest, Rev32Int16x8) {
5891   __uint128_t arg = MakeUInt128(0x8662237172159160ULL, 0x7716692547487389ULL);
5892   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("rev32 %0.8h, %1.8h")(arg);
5893   ASSERT_EQ(res, MakeUInt128(0x2371866291607215ULL, 0x6925771673894748ULL));
5894 }
5895 
TEST(Arm64InsnTest,Rev64Int32x4)5896 TEST(Arm64InsnTest, Rev64Int32x4) {
5897   __uint128_t arg = MakeUInt128(0x5306736096571209ULL, 0x1807638327166416ULL);
5898   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("rev64 %0.4s, %1.4s")(arg);
5899   ASSERT_EQ(res, MakeUInt128(0x9657120953067360ULL, 0x2716641618076383ULL));
5900 }
5901 
TEST(Arm64InsnTest,TblInt8x8)5902 TEST(Arm64InsnTest, TblInt8x8) {
5903   __uint128_t arg1 = MakeUInt128(0x7766554433221100ULL, 0xffeeddccbbaa9988ULL);
5904   __uint128_t arg2 = MakeUInt128(0x0104011509120605ULL, 0x0315080907091312ULL);
5905   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("tbl %0.8b, {%1.16b}, %2.8b")(arg1, arg2);
5906   ASSERT_EQ(res, MakeUInt128(0x1144110099006655ULL, 0x0000000000000000ULL));
5907 }
5908 
TEST(Arm64InsnTest,TblInt8x16)5909 TEST(Arm64InsnTest, TblInt8x16) {
5910   __uint128_t arg1 = MakeUInt128(0x7766554433221100ULL, 0xffeeddccbbaa9988ULL);
5911   __uint128_t arg2 = MakeUInt128(0x0905060808010408ULL, 0x0506000206030202ULL);
5912   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("tbl %0.16b, {%1.16b}, %2.16b")(arg1, arg2);
5913   ASSERT_EQ(res, MakeUInt128(0x9955668888114488ULL, 0x5566002266332222ULL));
5914 }
5915 
TEST(Arm64InsnTest,Tbl2Int8x16)5916 TEST(Arm64InsnTest, Tbl2Int8x16) {
5917   __uint128_t arg1 = MakeUInt128(0x7766554433221100ULL, 0xffeeddccbbaa9988ULL);
5918   __uint128_t arg2 = MakeUInt128(0x76655443322110ffULL, 0xfeeddccbbaa99887ULL);
5919   __uint128_t arg3 = MakeUInt128(0x0224052800020910ULL, 0x1807280319002203ULL);
5920   __uint128_t res;
5921 
5922   // Hardcode v30 and v0 so that the TBL instruction gets consecutive registers.
5923   asm("mov v31.16b, %1.16b\n\t"
5924       "mov v0.16b, %2.16b\n\t"
5925       "tbl %0.16b, {v31.16b, v0.16b}, %3.16b"
5926       : "=w"(res)
5927       : "w"(arg1), "w"(arg2), "w"(arg3)
5928       : "v31", "v0");
5929 
5930   ASSERT_EQ(res, MakeUInt128(0x22005500002299ffULL, 0x8777003398000033ULL));
5931 }
5932 
TEST(Arm64InsnTest,Tbl3Int8x16)5933 TEST(Arm64InsnTest, Tbl3Int8x16) {
5934   __uint128_t arg1 = MakeUInt128(0x7766554433221100ULL, 0xffeeddccbbaa9988ULL);
5935   __uint128_t arg2 = MakeUInt128(0x76655443322110ffULL, 0xfeeddccbbaa99887ULL);
5936   __uint128_t arg3 = MakeUInt128(0x7060504030201000ULL, 0xf0e0d0c0b0a09080ULL);
5937   __uint128_t arg4 = MakeUInt128(0x0718264039291035ULL, 0x3526190040211304ULL);
5938   __uint128_t res;
5939 
5940   // Hardcode v0, v1, and v2 so that the TBL instruction gets consecutive registers.
5941   asm("mov v30.16b, %1.16b\n\t"
5942       "mov v31.16b, %2.16b\n\t"
5943       "mov v0.16b, %3.16b\n\t"
5944       "tbl %0.16b, {v30.16b-v0.16b}, %4.16b"
5945       : "=w"(res)
5946       : "w"(arg1), "w"(arg2), "w"(arg3), "w"(arg4)
5947       : "v0", "v1", "v2");
5948 
5949   ASSERT_EQ(res, MakeUInt128(0x778760000090ff00ULL, 0x0060980000103244ULL));
5950 }
5951 
TEST(Arm64InsnTest,Tbl4Int8x16)5952 TEST(Arm64InsnTest, Tbl4Int8x16) {
5953   __uint128_t arg1 = MakeUInt128(0x7766554433221100ULL, 0xffeeddccbbaa9988ULL);
5954   __uint128_t arg2 = MakeUInt128(0x76655443322110ffULL, 0xfeeddccbbaa99887ULL);
5955   __uint128_t arg3 = MakeUInt128(0x7060504030201000ULL, 0xf0e0d0c0b0a09080ULL);
5956   __uint128_t arg4 = MakeUInt128(0x7f6f5f4f3f2f1fffULL, 0xffefdfcfbfaf9f8fULL);
5957   __uint128_t arg5 = MakeUInt128(0x0718264039291035ULL, 0x3526190040211304ULL);
5958   __uint128_t res;
5959 
5960   // Hardcode v30, v31, v0, and v1 so that the TBX instruction gets consecutive registers.
5961   asm("mov v30.16b, %1.16b\n\t"
5962       "mov v31.16b, %2.16b\n\t"
5963       "mov v0.16b, %3.16b\n\t"
5964       "mov v1.16b, %4.16b\n\t"
5965       "tbl %0.16b, {v30.16b-v1.16b}, %5.16b"
5966       : "=w"(res)
5967       : "w"(arg1), "w"(arg2), "w"(arg3), "w"(arg4), "w"(arg5)
5968       : "v30", "v31", "v0", "v1");
5969 
5970   ASSERT_EQ(res, MakeUInt128(0x778760009f90ff5fULL, 0x5f60980000103244ULL));
5971 }
5972 
TEST(Arm64InsnTest,TbxInt8x16)5973 TEST(Arm64InsnTest, TbxInt8x16) {
5974   __uint128_t arg1 = MakeUInt128(0x7766554433221100ULL, 0xffeeddccbbaa9988ULL);
5975   __uint128_t arg2 = MakeUInt128(0x0915061808010408ULL, 0x0516000206031202ULL);
5976   __uint128_t arg3 = MakeUInt128(0x6668559233565463ULL, 0x9138363185745698ULL);
5977   __uint128_t res =
5978       ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("tbx %0.16b, {%1.16b}, %2.16b")(arg1, arg2, arg3);
5979   ASSERT_EQ(res, MakeUInt128(0x9968669288114488ULL, 0x5538002266335622ULL));
5980 }
5981 
TEST(Arm64InsnTest,Tbx2Int8x16)5982 TEST(Arm64InsnTest, Tbx2Int8x16) {
5983   __uint128_t arg1 = MakeUInt128(0x7766554433221100ULL, 0xffeeddccbbaa9988ULL);
5984   __uint128_t arg2 = MakeUInt128(0x76655443322110ffULL, 0xfeeddccbbaa99887ULL);
5985   __uint128_t arg3 = MakeUInt128(0x0224052800020910ULL, 0x1807280319002203ULL);
5986   __uint128_t res = MakeUInt128(0x7494078488442377ULL, 0x2175154334260306ULL);
5987 
5988   // Hardcode v0 and v1 so that the TBX instruction gets consecutive registers.
5989   asm("mov v0.16b, %1.16b\n\t"
5990       "mov v1.16b, %2.16b\n\t"
5991       "tbx %0.16b, {v0.16b, v1.16b}, %3.16b"
5992       : "=w"(res)
5993       : "w"(arg1), "w"(arg2), "w"(arg3), "0"(res)
5994       : "v0", "v1");
5995 
5996   ASSERT_EQ(res, MakeUInt128(0x22945584002299ffULL, 0x8777153398000333ULL));
5997 }
5998 
TEST(Arm64InsnTest,Tbx3Int8x16)5999 TEST(Arm64InsnTest, Tbx3Int8x16) {
6000   __uint128_t arg1 = MakeUInt128(0x7766554433221100ULL, 0xffeeddccbbaa9988ULL);
6001   __uint128_t arg2 = MakeUInt128(0x76655443322110ffULL, 0xfeeddccbbaa99887ULL);
6002   __uint128_t arg3 = MakeUInt128(0x7060504030201000ULL, 0xf0e0d0c0b0a09080ULL);
6003   __uint128_t arg4 = MakeUInt128(0x0718264039291035ULL, 0x3526190040211304ULL);
6004   __uint128_t res = MakeUInt128(0x0136776310849135ULL, 0x1615642269847507ULL);
6005 
6006   // Hardcode v0, v1, and v2 so that the TBX instruction gets consecutive registers.
6007   asm("mov v0.16b, %1.16b\n\t"
6008       "mov v1.16b, %2.16b\n\t"
6009       "mov v2.16b, %3.16b\n\t"
6010       "tbx %0.16b, {v0.16b, v1.16b, v2.16b}, %4.16b"
6011       : "=w"(res)
6012       : "w"(arg1), "w"(arg2), "w"(arg3), "w"(arg4), "0"(res)
6013       : "v0", "v1", "v2");
6014 
6015   ASSERT_EQ(res, MakeUInt128(0x778760631090ff35ULL, 0x1660980069103244ULL));
6016 }
6017 
TEST(Arm64InsnTest,Tbx4Int8x16)6018 TEST(Arm64InsnTest, Tbx4Int8x16) {
6019   __uint128_t arg1 = MakeUInt128(0x7766554433221100ULL, 0xffeeddccbbaa9988ULL);
6020   __uint128_t arg2 = MakeUInt128(0x76655443322110ffULL, 0xfeeddccbbaa99887ULL);
6021   __uint128_t arg3 = MakeUInt128(0x7060504030201000ULL, 0xf0e0d0c0b0a09080ULL);
6022   __uint128_t arg4 = MakeUInt128(0x7f6f5f4f3f2f1fffULL, 0xffefdfcfbfaf9f8fULL);
6023   __uint128_t arg5 = MakeUInt128(0x0718264039291035ULL, 0x3526190040211304ULL);
6024   __uint128_t res = MakeUInt128(0x5818319637637076ULL, 0x1799191920357958ULL);
6025 
6026   // Hardcode v0, v1, v2, and v3 so that the TBX instruction gets consecutive registers.
6027   asm("mov v0.16b, %1.16b\n\t"
6028       "mov v1.16b, %2.16b\n\t"
6029       "mov v2.16b, %3.16b\n\t"
6030       "mov v3.16b, %4.16b\n\t"
6031       "tbx %0.16b, {v0.16b-v3.16b}, %5.16b"
6032       : "=w"(res)
6033       : "w"(arg1), "w"(arg2), "w"(arg3), "w"(arg4), "w"(arg5), "0"(res)
6034       : "v0", "v1", "v2", "v3");
6035 
6036   ASSERT_EQ(res, MakeUInt128(0x778760969f90ff5fULL, 0x5f60980020103244ULL));
6037 }
6038 
TEST(Arm64InsnTest,Trn1Int8x8)6039 TEST(Arm64InsnTest, Trn1Int8x8) {
6040   __uint128_t arg1 = MakeUInt128(0x2075916729700785ULL, 0x0580717186381054ULL);
6041   __uint128_t arg2 = MakeUInt128(0x2786099055690013ULL, 0x4137182368370991ULL);
6042   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("trn1 %0.8b, %1.8b, %2.8b")(arg1, arg2);
6043   ASSERT_EQ(res, MakeUInt128(0x8675906769701385ULL, 0x0000000000000000ULL));
6044 }
6045 
TEST(Arm64InsnTest,Trn2Int16x8)6046 TEST(Arm64InsnTest, Trn2Int16x8) {
6047   __uint128_t arg1 = MakeUInt128(0x6685592335654639ULL, 0x1383631857456981ULL);
6048   __uint128_t arg2 = MakeUInt128(0x7494078488442377ULL, 0x2175154334260306ULL);
6049   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("trn2 %0.8h, %1.8h, %2.8h")(arg1, arg2);
6050   ASSERT_EQ(res, MakeUInt128(0x7494668588443565ULL, 0x2175138334265745ULL));
6051 }
6052 
TEST(Arm64InsnTest,Uzp1Int8x8)6053 TEST(Arm64InsnTest, Uzp1Int8x8) {
6054   __uint128_t arg1 = MakeUInt128(0x4954893139394489ULL, 0x9216125525597701ULL);
6055   __uint128_t arg2 = MakeUInt128(0x2783467926101995ULL, 0x5852247172201777ULL);
6056   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("uzp1 %0.8b, %1.8b, %2.8b")(arg1, arg2);
6057   ASSERT_EQ(res, MakeUInt128(0x8379109554313989ULL, 0x0000000000000000ULL));
6058 }
6059 
TEST(Arm64InsnTest,Uzp2Int16x8)6060 TEST(Arm64InsnTest, Uzp2Int16x8) {
6061   __uint128_t arg1 = MakeUInt128(0x6745642390585850ULL, 0x2167190313952629ULL);
6062   __uint128_t arg2 = MakeUInt128(0x3620129476918749ULL, 0x7519101147231528ULL);
6063   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("uzp2 %0.8h, %1.8h, %2.8h")(arg1, arg2);
6064   ASSERT_EQ(res, MakeUInt128(0x2167139567459058ULL, 0x7519472336207691ULL));
6065 }
6066 
TEST(Arm64InsnTest,Zip2Int64x2)6067 TEST(Arm64InsnTest, Zip2Int64x2) {
6068   __uint128_t arg1 = MakeUInt128(0x1494271410093913ULL, 0x6913810725813781ULL);
6069   __uint128_t arg2 = MakeUInt128(0x3578940055995001ULL, 0x8354251184172136ULL);
6070   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("uzp2 %0.2d, %1.2d, %2.2d")(arg1, arg2);
6071   ASSERT_EQ(res, MakeUInt128(0x6913810725813781ULL, 0x8354251184172136ULL));
6072 }
6073 
TEST(Arm64InsnTest,Zip1Int8x8)6074 TEST(Arm64InsnTest, Zip1Int8x8) {
6075   __uint128_t arg1 = MakeUInt128(0x7499235630254947ULL, 0x8024901141952123ULL);
6076   __uint128_t arg2 = MakeUInt128(0x3331239480494707ULL, 0x9119153267343028ULL);
6077   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("zip1 %0.8b, %1.8b, %2.8b")(arg1, arg2);
6078   ASSERT_EQ(res, MakeUInt128(0x8030492547490747ULL, 0x0000000000000000ULL));
6079 }
6080 
TEST(Arm64InsnTest,Zip1Int64x2)6081 TEST(Arm64InsnTest, Zip1Int64x2) {
6082   __uint128_t arg1 = MakeUInt128(0x9243530136776310ULL, 0x8491351615642269ULL);
6083   __uint128_t arg2 = MakeUInt128(0x0551199581831963ULL, 0x7637076179919192ULL);
6084   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("zip1 %0.2d, %1.2d, %2.2d")(arg1, arg2);
6085   ASSERT_EQ(res, MakeUInt128(0x9243530136776310ULL, 0x0551199581831963ULL));
6086 }
6087 
TEST(Arm64InsnTest,Zip2Int16x8)6088 TEST(Arm64InsnTest, Zip2Int16x8) {
6089   __uint128_t arg1 = MakeUInt128(0x5831832713142517ULL, 0x0296923488962766ULL);
6090   __uint128_t arg2 = MakeUInt128(0x2934595889706953ULL, 0x6534940603402166ULL);
6091   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("zip2 %0.8h, %1.8h, %2.8h")(arg1, arg2);
6092   ASSERT_EQ(res, MakeUInt128(0x0340889621662766ULL, 0x6534029694069234ULL));
6093 }
6094 
TEST(Arm64InsnTest,SignedMaxInt16x8)6095 TEST(Arm64InsnTest, SignedMaxInt16x8) {
6096   __uint128_t arg1 = MakeUInt128(0x9901573466102371ULL, 0x2235478911292547ULL);
6097   __uint128_t arg2 = MakeUInt128(0x4922157650450812ULL, 0x0677173571202718ULL);
6098   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("smax %0.8h, %1.8h, %2.8h")(arg1, arg2);
6099   ASSERT_EQ(res, MakeUInt128(0x4922573466102371ULL, 0x2235478971202718ULL));
6100 }
6101 
TEST(Arm64InsnTest,SignedMinInt16x8)6102 TEST(Arm64InsnTest, SignedMinInt16x8) {
6103   __uint128_t arg1 = MakeUInt128(0x7820385653909910ULL, 0x4775941413215432ULL);
6104   __uint128_t arg2 = MakeUInt128(0x0084531214065935ULL, 0x8090412711359200ULL);
6105   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("smin %0.8h, %1.8h, %2.8h")(arg1, arg2);
6106   ASSERT_EQ(res, MakeUInt128(0x0084385614069910ULL, 0x8090941411359200ULL));
6107 }
6108 
TEST(Arm64InsnTest,SignedMaxPairwiseInt16x8)6109 TEST(Arm64InsnTest, SignedMaxPairwiseInt16x8) {
6110   __uint128_t arg1 = MakeUInt128(0x6998469884770232ULL, 0x3823840055655517ULL);
6111   __uint128_t arg2 = MakeUInt128(0x3272867600724817ULL, 0x2987637569816335ULL);
6112   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("smaxp %0.8h, %1.8h, %2.8h")(arg1, arg2);
6113   ASSERT_EQ(res, MakeUInt128(0x3823556569980232ULL, 0x6375698132724817ULL));
6114 }
6115 
TEST(Arm64InsnTest,SignedMinPairwiseInt16x8)6116 TEST(Arm64InsnTest, SignedMinPairwiseInt16x8) {
6117   __uint128_t arg1 = MakeUInt128(0x8865701568501691ULL, 0x8647488541679154ULL);
6118   __uint128_t arg2 = MakeUInt128(0x1821553559732353ULL, 0x0686043010675760ULL);
6119   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("sminp %0.8h, %1.8h, %2.8h")(arg1, arg2);
6120   ASSERT_EQ(res, MakeUInt128(0x8647915488651691ULL, 0x0430106718212353ULL));
6121 }
6122 
TEST(Arm64InsnTest,UnsignedMaxInt16x8)6123 TEST(Arm64InsnTest, UnsignedMaxInt16x8) {
6124   __uint128_t arg1 = MakeUInt128(0x7639975974619383ULL, 0x5845749159880976ULL);
6125   __uint128_t arg2 = MakeUInt128(0x5928493695941434ULL, 0x0814685298150539ULL);
6126   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("umax %0.8h, %1.8h, %2.8h")(arg1, arg2);
6127   ASSERT_EQ(res, MakeUInt128(0x7639975995949383ULL, 0x5845749198150976ULL));
6128 }
6129 
TEST(Arm64InsnTest,UnsignedMinInt16x8)6130 TEST(Arm64InsnTest, UnsignedMinInt16x8) {
6131   __uint128_t arg1 = MakeUInt128(0x2888773717663748ULL, 0x6027660634960353ULL);
6132   __uint128_t arg2 = MakeUInt128(0x6983349515101986ULL, 0x4269887847171939ULL);
6133   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("umin %0.8h, %1.8h, %2.8h")(arg1, arg2);
6134   ASSERT_EQ(res, MakeUInt128(0x2888349515101986ULL, 0x4269660634960353ULL));
6135 }
6136 
TEST(Arm64InsnTest,UnsignedMaxPairwiseInt16x8)6137 TEST(Arm64InsnTest, UnsignedMaxPairwiseInt16x8) {
6138   __uint128_t arg1 = MakeUInt128(0x1318583584066747ULL, 0x2370297149785084ULL);
6139   __uint128_t arg2 = MakeUInt128(0x4570249413983163ULL, 0x4332378975955680ULL);
6140   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("umaxp %0.8h, %1.8h, %2.8h")(arg1, arg2);
6141   ASSERT_EQ(res, MakeUInt128(0x2971508458358406ULL, 0x4332759545703163ULL));
6142 }
6143 
TEST(Arm64InsnTest,UnsignedMinPairwiseInt16x8)6144 TEST(Arm64InsnTest, UnsignedMinPairwiseInt16x8) {
6145   __uint128_t arg1 = MakeUInt128(0x9538121791319145ULL, 0x1350099384631177ULL);
6146   __uint128_t arg2 = MakeUInt128(0x7769055481028850ULL, 0x2080858008781157ULL);
6147   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("uminp %0.8h, %1.8h, %2.8h")(arg1, arg2);
6148   ASSERT_EQ(res, MakeUInt128(0x0993117712179131ULL, 0x2080087805548102ULL));
6149 }
6150 
TEST(Arm64InsnTest,SignedHalvingAddInt16x8)6151 TEST(Arm64InsnTest, SignedHalvingAddInt16x8) {
6152   __uint128_t arg1 = MakeUInt128(0x1021944719713869ULL, 0x2560841624511239ULL);
6153   __uint128_t arg2 = MakeUInt128(0x8062011318454124ULL, 0x4782050110798760ULL);
6154   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("shadd %0.8h, %1.8h, %2.8h")(arg1, arg2);
6155   ASSERT_EQ(res, MakeUInt128(0xc841caad18db3cc6ULL, 0x3671c48b1a65ccccULL));
6156 }
6157 
TEST(Arm64InsnTest,SignedHalvingSubInt16x8)6158 TEST(Arm64InsnTest, SignedHalvingSubInt16x8) {
6159   __uint128_t arg1 = MakeUInt128(0x9041210873032402ULL, 0x0106853419472304ULL);
6160   __uint128_t arg2 = MakeUInt128(0x7666672174986986ULL, 0x8547076781205124ULL);
6161   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("shsub %0.8h, %1.8h, %2.8h")(arg1, arg2);
6162   ASSERT_EQ(res, MakeUInt128(0x8ceddcf3ff35dd3eULL, 0x3ddfbee64c13e8f0ULL));
6163 }
6164 
TEST(Arm64InsnTest,SignedRoundingHalvingAddInt16x8)6165 TEST(Arm64InsnTest, SignedRoundingHalvingAddInt16x8) {
6166   __uint128_t arg1 = MakeUInt128(0x5871487839890810ULL, 0x7429530941060596ULL);
6167   __uint128_t arg2 = MakeUInt128(0x9443158477539700ULL, 0x9439883949144323ULL);
6168   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("srhadd %0.8h, %1.8h, %2.8h")(arg1, arg2);
6169   ASSERT_EQ(res, MakeUInt128(0xf65a2efe586ecf88ULL, 0x0431eda1450d245dULL));
6170 }
6171 
TEST(Arm64InsnTest,SignedAbsoluteDifferenceInt16x8)6172 TEST(Arm64InsnTest, SignedAbsoluteDifferenceInt16x8) {
6173   __uint128_t arg1 = MakeUInt128(0x1349607501116498ULL, 0x3278563531614516ULL);
6174   __uint128_t arg2 = MakeUInt128(0x8457695687109002ULL, 0x9997698412632665ULL);
6175   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("sabd %0.8h, %1.8h, %2.8h")(arg1, arg2);
6176   ASSERT_EQ(res, MakeUInt128(0x8ef208e17a01d496ULL, 0x98e1134f1efe1eb1ULL));
6177 }
6178 
TEST(Arm64InsnTest,SignedAbsoluteDifferenceLongInt16x8)6179 TEST(Arm64InsnTest, SignedAbsoluteDifferenceLongInt16x8) {
6180   __uint128_t arg1 = MakeUInt128(0x7419850973346267ULL, 0x9332107268687076ULL);
6181   __uint128_t arg2 = MakeUInt128(0x8062639919361965ULL, 0x0440995421676278ULL);
6182   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("sabdl %0.4s, %1.4h, %2.4h")(arg1, arg2);
6183   ASSERT_EQ(res, MakeUInt128(0x000059fe00004902ULL, 0x0000f3b70000de90ULL));
6184 }
6185 
TEST(Arm64InsnTest,SignedAbsoluteDifferenceLongUpperInt16x8)6186 TEST(Arm64InsnTest, SignedAbsoluteDifferenceLongUpperInt16x8) {
6187   __uint128_t arg1 = MakeUInt128(0x4980559610330799ULL, 0x4145347784574699ULL);
6188   __uint128_t arg2 = MakeUInt128(0x9921285999993996ULL, 0x1228161521931488ULL);
6189   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("sabdl2 %0.4s, %1.8h, %2.8h")(arg1, arg2);
6190   ASSERT_EQ(res, MakeUInt128(0x00009d3c00003211ULL, 0x00002f1d00001e62ULL));
6191 }
6192 
TEST(Arm64InsnTest,SignedAbsoluteDifferenceAccumulateInt16x8)6193 TEST(Arm64InsnTest, SignedAbsoluteDifferenceAccumulateInt16x8) {
6194   // The lowest element tests the overflow.
6195   __uint128_t arg1 = MakeUInt128(0x8967'0031'9258'7fffULL, 0x9410'5105'3358'4384ULL);
6196   __uint128_t arg2 = MakeUInt128(0x6560'2339'1796'8000ULL, 0x6784'4763'7084'7497ULL);
6197   __uint128_t arg3 = MakeUInt128(0x8333'6555'7900'5555ULL, 0x1914'7319'8862'7135ULL);
6198   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("saba %0.8h, %1.8h, %2.8h")(arg1, arg2, arg3);
6199   ASSERT_EQ(res, MakeUInt128(0x5f2c'885d'fe3e'5554ULL, 0xec88'7cbb'c58e'a248ULL));
6200 }
6201 
TEST(Arm64InsnTest,SignedAbsoluteDifferenceAccumulateInt32x4)6202 TEST(Arm64InsnTest, SignedAbsoluteDifferenceAccumulateInt32x4) {
6203   // The lowest element tests the overflow.
6204   __uint128_t arg1 = MakeUInt128(0x8967'0031'7fff'ffffULL, 0x9410'5105'3358'4384ULL);
6205   __uint128_t arg2 = MakeUInt128(0x6560'2339'8000'0000ULL, 0x6784'4763'7084'7497ULL);
6206   __uint128_t arg3 = MakeUInt128(0x8333'6555'aaaa'5555ULL, 0x1914'7319'8862'7135ULL);
6207   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("saba %0.4s, %1.4s, %2.4s")(arg1, arg2, arg3);
6208   ASSERT_EQ(res, MakeUInt128(0x5f2c'885d'aaaa'5554ULL, 0xec88'6977'c58e'a248ULL));
6209 }
6210 
TEST(Arm64InsnTest,SignedAbsoluteDifferenceAccumulateLongInt16x4)6211 TEST(Arm64InsnTest, SignedAbsoluteDifferenceAccumulateLongInt16x4) {
6212   __uint128_t arg1 = MakeUInt128(0x078464167452167ULL, 0x719048310967671ULL);
6213   __uint128_t arg2 = MakeUInt128(0x344349481926268ULL, 0x110739948250607ULL);
6214   __uint128_t arg3 = MakeUInt128(0x949507350316901ULL, 0x731852119552635ULL);
6215   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("sabal %0.4s, %1.4h, %2.4h")(arg1, arg2, arg3);
6216   ASSERT_EQ(res, MakeUInt128(0x094a36265031aa02ULL, 0x073187ed195537e2ULL));
6217 }
6218 
TEST(Arm64InsnTest,SignedAbsoluteDifferenceLongInt32x2)6219 TEST(Arm64InsnTest, SignedAbsoluteDifferenceLongInt32x2) {
6220   __uint128_t arg1 = MakeUInt128(0x000000007fffffffULL, 0x0000000000000000ULL);
6221   __uint128_t arg2 = MakeUInt128(0x0000000080000000ULL, 0x0000000000000000ULL);
6222   __uint128_t arg3 = MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL);
6223   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("sabal %0.2d, %1.2s, %2.2s")(arg1, arg2, arg3);
6224   ASSERT_EQ(res, MakeUInt128(0x00000000ffffffffULL, 0x0000000000000000ULL));
6225 }
6226 
TEST(Arm64InsnTest,SignedAbsoluteDifferenceAccumulateLongUpperInt16x8)6227 TEST(Arm64InsnTest, SignedAbsoluteDifferenceAccumulateLongUpperInt16x8) {
6228   __uint128_t arg1 = MakeUInt128(0x690943470482932ULL, 0x414041114654092ULL);
6229   __uint128_t arg2 = MakeUInt128(0x988344435159133ULL, 0x010773944111840ULL);
6230   __uint128_t arg3 = MakeUInt128(0x410768498106634ULL, 0x241048239358274ULL);
6231   __uint128_t res =
6232       ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("sabal2 %0.4s, %1.8h, %2.8h")(arg1, arg2, arg3);
6233   ASSERT_EQ(res, MakeUInt128(0x0410a63098108e86ULL, 0x024108863935f59cULL));
6234 }
6235 
TEST(Arm64InsnTest,UnsignedHalvingAddInt16x8)6236 TEST(Arm64InsnTest, UnsignedHalvingAddInt16x8) {
6237   __uint128_t arg1 = MakeUInt128(0x4775379853799732ULL, 0x2344561227858432ULL);
6238   __uint128_t arg2 = MakeUInt128(0x9684664751333657ULL, 0x3692387201464723ULL);
6239   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("uhadd %0.8h, %1.8h, %2.8h")(arg1, arg2);
6240   ASSERT_EQ(res, MakeUInt128(0x6efc4eef525666c4ULL, 0x2ceb4742146565aaULL));
6241 }
6242 
TEST(Arm64InsnTest,UnsignedHalvingSubInt16x8)6243 TEST(Arm64InsnTest, UnsignedHalvingSubInt16x8) {
6244   __uint128_t arg1 = MakeUInt128(0x9926884349592876ULL, 0x1240075587569464ULL);
6245   __uint128_t arg2 = MakeUInt128(0x1370562514001179ULL, 0x7133166207153715ULL);
6246   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("uhsub %0.8h, %1.8h, %2.8h")(arg1, arg2);
6247   ASSERT_EQ(res, MakeUInt128(0x42db190f1aac0b7eULL, 0xd086f87940202ea7ULL));
6248 }
6249 
TEST(Arm64InsnTest,UnsignedRoundingHalvingAddInt16x8)6250 TEST(Arm64InsnTest, UnsignedRoundingHalvingAddInt16x8) {
6251   __uint128_t arg1 = MakeUInt128(0x5066533985738887ULL, 0x8661476294434140ULL);
6252   __uint128_t arg2 = MakeUInt128(0x1049888993160051ULL, 0x2076781035886116ULL);
6253   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("urhadd %0.8h, %1.8h, %2.8h")(arg1, arg2);
6254   ASSERT_EQ(res, MakeUInt128(0x30586de18c45446cULL, 0x536c5fb964e6512bULL));
6255 }
6256 
TEST(Arm64InsnTest,UnsignedAbsoluteDifferenceInt16x8)6257 TEST(Arm64InsnTest, UnsignedAbsoluteDifferenceInt16x8) {
6258   __uint128_t arg1 = MakeUInt128(0x8574664607722834ULL, 0x1540311441529418ULL);
6259   __uint128_t arg2 = MakeUInt128(0x8047825438761770ULL, 0x7904300015669867ULL);
6260   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("uabd %0.8h, %1.8h, %2.8h")(arg1, arg2);
6261   ASSERT_EQ(res, MakeUInt128(0x052d1c0e310410c4ULL, 0x63c401142bec044fULL));
6262 }
6263 
TEST(Arm64InsnTest,UnsignedAbsoluteDifferenceLongInt16x8)6264 TEST(Arm64InsnTest, UnsignedAbsoluteDifferenceLongInt16x8) {
6265   __uint128_t arg1 = MakeUInt128(0x1614585505839727ULL, 0x4209809097817293ULL);
6266   __uint128_t arg2 = MakeUInt128(0x2393010676638682ULL, 0x4040111304024700ULL);
6267   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("uabdl %0.4s, %1.4h, %2.4h")(arg1, arg2);
6268   ASSERT_EQ(res, MakeUInt128(0x000070e0000010a5ULL, 0x00000d7f0000574fULL));
6269 }
6270 
TEST(Arm64InsnTest,UnsignedAbsoluteDifferenceLongUpperInt16x8)6271 TEST(Arm64InsnTest, UnsignedAbsoluteDifferenceLongUpperInt16x8) {
6272   __uint128_t arg1 = MakeUInt128(0x0347999588867695ULL, 0x0161249722820403ULL);
6273   __uint128_t arg2 = MakeUInt128(0x0399546327883069ULL, 0x5976249361510102ULL);
6274   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("uabdl2 %0.4s, %1.8h, %2.8h")(arg1, arg2);
6275   ASSERT_EQ(res, MakeUInt128(0x00003ecf00000301ULL, 0x0000581500000004ULL));
6276 }
6277 
TEST(Arm64InsnTest,UnsignedAbsoluteDifferenceAccumulateInt16x8)6278 TEST(Arm64InsnTest, UnsignedAbsoluteDifferenceAccumulateInt16x8) {
6279   __uint128_t arg1 = MakeUInt128(0x0857466460772283ULL, 0x4154031144152941ULL);
6280   __uint128_t arg2 = MakeUInt128(0x8804782543876177ULL, 0x0790430001566986ULL);
6281   __uint128_t arg3 = MakeUInt128(0x7767957609099669ULL, 0x3607559496515273ULL);
6282   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("uaba %0.8h, %1.8h, %2.8h")(arg1, arg2, arg3);
6283   ASSERT_EQ(res, MakeUInt128(0xf714c73725f9d55dULL, 0x6fcb9583d91092b8ULL));
6284 }
6285 
TEST(Arm64InsnTest,UnsignedAbsoluteDifferenceAccumulateLongInt16x4)6286 TEST(Arm64InsnTest, UnsignedAbsoluteDifferenceAccumulateLongInt16x4) {
6287   __uint128_t arg1 = MakeUInt128(0x8343417044157348ULL, 0x2481833301640566ULL);
6288   __uint128_t arg2 = MakeUInt128(0x9596688667695634ULL, 0x9141632842641497ULL);
6289   __uint128_t arg3 = MakeUInt128(0x4533349999480002ULL, 0x6699875888159350ULL);
6290   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("uabal %0.4s, %1.4h, %2.4h")(arg1, arg2, arg3);
6291   ASSERT_EQ(res, MakeUInt128(0x453357ed99481d16ULL, 0x669999ab8815ba66ULL));
6292 }
6293 
TEST(Arm64InsnTest,UnsignedAbsoluteDifferenceAccumulateLongUpperInt16x8)6294 TEST(Arm64InsnTest, UnsignedAbsoluteDifferenceAccumulateLongUpperInt16x8) {
6295   __uint128_t arg1 = MakeUInt128(0x998685541703188ULL, 0x778867592902607ULL);
6296   __uint128_t arg2 = MakeUInt128(0x043212666179192ULL, 0x352093822787888ULL);
6297   __uint128_t arg3 = MakeUInt128(0x988633599116081ULL, 0x235355570464634ULL);
6298   __uint128_t res =
6299       ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("uabal2 %0.4s, %1.8h, %2.8h")(arg1, arg2, arg3);
6300   ASSERT_EQ(res, MakeUInt128(0x0988d34d9911b302ULL, 0x0235397b7046c371ULL));
6301 }
6302 
TEST(Arm64InsnTest,SignedAddLongPairwiseInt8x8)6303 TEST(Arm64InsnTest, SignedAddLongPairwiseInt8x8) {
6304   __uint128_t arg = MakeUInt128(0x6164411096256633ULL, 0x7305409219519675ULL);
6305   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("saddlp %0.4h, %1.8b")(arg);
6306   ASSERT_EQ(res, MakeUInt128(0x00c50051ffbb0099ULL, 0x0000000000000000ULL));
6307 }
6308 
TEST(Arm64InsnTest,SignedAddLongPairwiseInt8x16)6309 TEST(Arm64InsnTest, SignedAddLongPairwiseInt8x16) {
6310   __uint128_t arg = MakeUInt128(0x6164411096256633ULL, 0x7305409219519675ULL);
6311   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("saddlp %0.8h, %1.16b")(arg);
6312   ASSERT_EQ(res, MakeUInt128(0x00c50051ffbb0099ULL, 0x0078ffd2006a000bULL));
6313 }
6314 
TEST(Arm64InsnTest,SignedAddLongPairwiseInt16x4)6315 TEST(Arm64InsnTest, SignedAddLongPairwiseInt16x4) {
6316   __uint128_t arg = MakeUInt128(0x6164411096256633ULL, 0x7305409219519675ULL);
6317   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("saddlp %0.2s, %1.4h")(arg);
6318   ASSERT_EQ(res, MakeUInt128(0x0000a274fffffc58ULL, 0x0000000000000000ULL));
6319 }
6320 
TEST(Arm64InsnTest,SignedAddLongPairwiseInt16x8)6321 TEST(Arm64InsnTest, SignedAddLongPairwiseInt16x8) {
6322   __uint128_t arg = MakeUInt128(0x6164411096256633ULL, 0x7305409219519675ULL);
6323   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("saddlp %0.4s, %1.8h")(arg);
6324   ASSERT_EQ(res, MakeUInt128(0xa274fffffc58ULL, 0xb397ffffafc6ULL));
6325 }
6326 
TEST(Arm64InsnTest,SignedAddAccumulateLongPairwiseInt8x16)6327 TEST(Arm64InsnTest, SignedAddAccumulateLongPairwiseInt8x16) {
6328   __uint128_t arg1 = MakeUInt128(0x1991646384142707ULL, 0x7988708874229277ULL);
6329   __uint128_t arg2 = MakeUInt128(0x7217826030500994ULL, 0x5108247835729056ULL);
6330   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("sadalp %0.8h, %1.16b")(arg1, arg2);
6331   ASSERT_EQ(res, MakeUInt128(0x71c183272fe809c2ULL, 0x510924703608905fULL));
6332 }
6333 
TEST(Arm64InsnTest,SignedAddAccumulateLongPairwiseInt16x8)6334 TEST(Arm64InsnTest, SignedAddAccumulateLongPairwiseInt16x8) {
6335   __uint128_t arg1 = MakeUInt128(0x1991646384142707ULL, 0x7988708874229277ULL);
6336   __uint128_t arg2 = MakeUInt128(0x7217826030500994ULL, 0x5108247835729056ULL);
6337   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("sadalp %0.4s, %1.8h")(arg1, arg2);
6338   ASSERT_EQ(res, MakeUInt128(0x72180054304fb4afULL, 0x51090e88357296efULL));
6339 }
6340 
TEST(Arm64InsnTest,UnsignedAddLongPairwiseInt8x16)6341 TEST(Arm64InsnTest, UnsignedAddLongPairwiseInt8x16) {
6342   __uint128_t arg = MakeUInt128(0x1483287348089574ULL, 0x7777527834422109ULL);
6343   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("uaddlp %0.8h, %1.16b")(arg);
6344   ASSERT_EQ(res, MakeUInt128(0x0097009b00500109ULL, 0x00ee00ca0076002aULL));
6345 }
6346 
TEST(Arm64InsnTest,UnsignedAddAccumulateLongPairwiseInt8x16)6347 TEST(Arm64InsnTest, UnsignedAddAccumulateLongPairwiseInt8x16) {
6348   __uint128_t arg1 = MakeUInt128(0x9348154691631162ULL, 0x4928873574718824ULL);
6349   __uint128_t arg2 = MakeUInt128(0x5207665738825139ULL, 0x6391635767231510ULL);
6350   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("uadalp %0.8h, %1.16b")(arg1, arg2);
6351   ASSERT_EQ(res, MakeUInt128(0x52e266b2397651acULL, 0x64026413680815bcULL));
6352 }
6353 
TEST(Arm64InsnTest,SignedAddLong)6354 TEST(Arm64InsnTest, SignedAddLong) {
6355   __uint128_t arg1 = MakeUInt128(0x3478074585067606ULL, 0x3048229409653041ULL);
6356   __uint128_t arg2 = MakeUInt128(0x1183066710818930ULL, 0x3110887172816751ULL);
6357   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("saddl %0.4s, %1.4h, %2.4h")(arg1, arg2);
6358   ASSERT_EQ(res, MakeUInt128(0xffff9587ffffff36ULL, 0x000045fb00000dacULL));
6359 }
6360 
TEST(Arm64InsnTest,SignedAddLongUpper)6361 TEST(Arm64InsnTest, SignedAddLongUpper) {
6362   __uint128_t arg1 = MakeUInt128(0x3160683158679946ULL, 0x0165205774052942ULL);
6363   __uint128_t arg2 = MakeUInt128(0x3053601780313357ULL, 0x2632670547903384ULL);
6364   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("saddl2 %0.4s, %1.8h, %2.8h")(arg1, arg2);
6365   ASSERT_EQ(res, MakeUInt128(0x0000bb9500005cc6ULL, 0x000027970000875cULL));
6366 }
6367 
TEST(Arm64InsnTest,SignedSubLong)6368 TEST(Arm64InsnTest, SignedSubLong) {
6369   __uint128_t arg1 = MakeUInt128(0x8566746260879482ULL, 0x0186474876727272ULL);
6370   __uint128_t arg2 = MakeUInt128(0x2206267646533809ULL, 0x9801966883680994ULL);
6371   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("ssubl %0.4s, %1.4h, %2.4h")(arg1, arg2);
6372   ASSERT_EQ(res, MakeUInt128(0x00001a34ffff5c79ULL, 0xffff636000004decULL));
6373 }
6374 
TEST(Arm64InsnTest,SignedSubLongUpper)6375 TEST(Arm64InsnTest, SignedSubLongUpper) {
6376   __uint128_t arg1 = MakeUInt128(0x3011331753305329ULL, 0x8020166888174813ULL);
6377   __uint128_t arg2 = MakeUInt128(0x4298868158557781ULL, 0x0343231753064784ULL);
6378   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("ssubl2 %0.4s, %1.8h, %2.8h")(arg1, arg2);
6379   ASSERT_EQ(res, MakeUInt128(0xffff35110000008fULL, 0xffff7cddfffff351ULL));
6380 }
6381 
TEST(Arm64InsnTest,UnsignedAddLong)6382 TEST(Arm64InsnTest, UnsignedAddLong) {
6383   __uint128_t arg1 = MakeUInt128(0x3126059505777727ULL, 0x5424712416483128ULL);
6384   __uint128_t arg2 = MakeUInt128(0x3298207236175057ULL, 0x4673870128209575ULL);
6385   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("uaddl %0.4s, %1.4h, %2.4h")(arg1, arg2);
6386   ASSERT_EQ(res, MakeUInt128(0x00003b8e0000c77eULL, 0x000063be00002607ULL));
6387 }
6388 
TEST(Arm64InsnTest,UnsignedAddLongUpper)6389 TEST(Arm64InsnTest, UnsignedAddLongUpper) {
6390   __uint128_t arg1 = MakeUInt128(0x3384698499778726ULL, 0x7065551918544686ULL);
6391   __uint128_t arg2 = MakeUInt128(0x9846947849573462ULL, 0x2606294219624557ULL);
6392   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("uaddl2 %0.4s, %1.8h, %2.8h")(arg1, arg2);
6393   ASSERT_EQ(res, MakeUInt128(0x000031b600008bddULL, 0x0000966b00007e5bULL));
6394 }
6395 
TEST(Arm64InsnTest,UnsignedSubLong)6396 TEST(Arm64InsnTest, UnsignedSubLong) {
6397   __uint128_t arg1 = MakeUInt128(0x4378111988556318ULL, 0x7777925372011667ULL);
6398   __uint128_t arg2 = MakeUInt128(0x1853954183598443ULL, 0x8305203762819440ULL);
6399   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("usubl %0.4s, %1.4h, %2.4h")(arg1, arg2);
6400   ASSERT_EQ(res, MakeUInt128(0x000004fcffffded5ULL, 0x00002b25ffff7bd8ULL));
6401 }
6402 
TEST(Arm64InsnTest,UnsignedSubLongUpper)6403 TEST(Arm64InsnTest, UnsignedSubLongUpper) {
6404   __uint128_t arg1 = MakeUInt128(0x5228717440266638ULL, 0x9148817173086436ULL);
6405   __uint128_t arg2 = MakeUInt128(0x1113890694202790ULL, 0x8814311944879941ULL);
6406   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("usubl2 %0.4s, %1.8h, %2.8h")(arg1, arg2);
6407   ASSERT_EQ(res, MakeUInt128(0x00002e81ffffcaf5ULL, 0x0000093400005058ULL));
6408 }
6409 
TEST(Arm64InsnTest,SignedAddWide8x8)6410 TEST(Arm64InsnTest, SignedAddWide8x8) {
6411   __uint128_t arg1 = MakeUInt128(0x7844598183134112ULL, 0x9001999205981352ULL);
6412   __uint128_t arg2 = MakeUInt128(0x2051173365856407ULL, 0x8264849427644113ULL);
6413   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("saddw %0.8h, %1.8h, %2.8b")(arg1, arg2);
6414   ASSERT_EQ(res, MakeUInt128(0x78a9590683774119ULL, 0x902199e305af1385ULL));
6415 }
6416 
TEST(Arm64InsnTest,SignedAddWide16x4)6417 TEST(Arm64InsnTest, SignedAddWide16x4) {
6418   __uint128_t arg1 = MakeUInt128(0x7844598183134112ULL, 0x9001999205981352ULL);
6419   __uint128_t arg2 = MakeUInt128(0x2051173365856407ULL, 0x8264849427644113ULL);
6420   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("saddw %0.4s, %1.4s, %2.4h")(arg1, arg2);
6421   ASSERT_EQ(res, MakeUInt128(0x7844bf068313a519ULL, 0x9001b9e305982a85ULL));
6422 }
6423 
TEST(Arm64InsnTest,SignedAddWide32x2)6424 TEST(Arm64InsnTest, SignedAddWide32x2) {
6425   __uint128_t arg1 = MakeUInt128(0x7844598183134112ULL, 0x9001999205981352ULL);
6426   __uint128_t arg2 = MakeUInt128(0x2051173365856407ULL, 0x8264849427644113ULL);
6427   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("saddw %0.2d, %1.2d, %2.2s")(arg1, arg2);
6428   ASSERT_EQ(res, MakeUInt128(0x78445981e898a519ULL, 0x9001999225e92a85ULL));
6429 }
6430 
TEST(Arm64InsnTest,SignedAddWideUpper)6431 TEST(Arm64InsnTest, SignedAddWideUpper) {
6432   __uint128_t arg1 = MakeUInt128(0x3407092233436577ULL, 0x9160128093179401ULL);
6433   __uint128_t arg2 = MakeUInt128(0x7185985999338492ULL, 0x3549564005709955ULL);
6434   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("saddw2 %0.4s, %1.4s, %2.8h")(arg1, arg2);
6435   ASSERT_EQ(res, MakeUInt128(0x34070e923342feccULL, 0x916047c99317ea41ULL));
6436 }
6437 
TEST(Arm64InsnTest,SignedSubWide)6438 TEST(Arm64InsnTest, SignedSubWide) {
6439   __uint128_t arg1 = MakeUInt128(0x2302847007312065ULL, 0x8032626417116165ULL);
6440   __uint128_t arg2 = MakeUInt128(0x9576132723515666ULL, 0x6253667271899853ULL);
6441   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("ssubw %0.4s, %1.4s, %2.4h")(arg1, arg2);
6442   ASSERT_EQ(res, MakeUInt128(0x2302611f0730c9ffULL, 0x8032ccee17114e3eULL));
6443 }
6444 
TEST(Arm64InsnTest,SignedSubWideUpper)6445 TEST(Arm64InsnTest, SignedSubWideUpper) {
6446   __uint128_t arg1 = MakeUInt128(0x4510824783572905ULL, 0x6919885554678860ULL);
6447   __uint128_t arg2 = MakeUInt128(0x7946280537122704ULL, 0x2466543192145281ULL);
6448   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("ssubw2 %0.4s, %1.4s, %2.8h")(arg1, arg2);
6449   ASSERT_EQ(res, MakeUInt128(0x4510f0338356d684ULL, 0x691963ef5467342fULL));
6450 }
6451 
TEST(Arm64InsnTest,UnsignedAddWide8x8)6452 TEST(Arm64InsnTest, UnsignedAddWide8x8) {
6453   __uint128_t arg1 = MakeUInt128(0x5870785951298344ULL, 0x1729535195378855ULL);
6454   __uint128_t arg2 = MakeUInt128(0x3457374260859029ULL, 0x0817651557803905ULL);
6455   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("uaddw %0.8h, %1.8h, %2.8b")(arg1, arg2);
6456   ASSERT_EQ(res, MakeUInt128(0x58d078de51b9836dULL, 0x175d53a8956e8897ULL));
6457 }
6458 
TEST(Arm64InsnTest,UnsignedAddWide16x4)6459 TEST(Arm64InsnTest, UnsignedAddWide16x4) {
6460   __uint128_t arg1 = MakeUInt128(0x5870785951298344ULL, 0x1729535195378855ULL);
6461   __uint128_t arg2 = MakeUInt128(0x3457374260859029ULL, 0x0817651557803905ULL);
6462   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("uaddw %0.4s, %1.4s, %2.4h")(arg1, arg2);
6463   ASSERT_EQ(res, MakeUInt128(0x5870d8de512a136dULL, 0x172987a89537bf97ULL));
6464 }
6465 
TEST(Arm64InsnTest,UnsignedAddWide32x2)6466 TEST(Arm64InsnTest, UnsignedAddWide32x2) {
6467   __uint128_t arg1 = MakeUInt128(0x5870785951298344ULL, 0x1729535195378855ULL);
6468   __uint128_t arg2 = MakeUInt128(0x3457374260859029ULL, 0x0817651557803905ULL);
6469   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("uaddw %0.2d, %1.2d, %2.2s")(arg1, arg2);
6470   ASSERT_EQ(res, MakeUInt128(0x58707859b1af136dULL, 0x17295351c98ebf97ULL));
6471 }
6472 
TEST(Arm64InsnTest,UnsignedAddWideUpper)6473 TEST(Arm64InsnTest, UnsignedAddWideUpper) {
6474   __uint128_t arg1 = MakeUInt128(0x7516493270950493ULL, 0x4639382432227188ULL);
6475   __uint128_t arg2 = MakeUInt128(0x5159740547021482ULL, 0x8971117779237612ULL);
6476   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("uaddw2 %0.4s, %1.4s, %2.8h")(arg1, arg2);
6477   ASSERT_EQ(res, MakeUInt128(0x7516c25570957aa5ULL, 0x4639c195322282ffULL));
6478 }
6479 
TEST(Arm64InsnTest,UnsignedSubWide)6480 TEST(Arm64InsnTest, UnsignedSubWide) {
6481   __uint128_t arg1 = MakeUInt128(0x0625247972199786ULL, 0x6854279897799233ULL);
6482   __uint128_t arg2 = MakeUInt128(0x9579057581890622ULL, 0x5254735822052364ULL);
6483   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("usubw %0.4s, %1.4s, %2.4h")(arg1, arg2);
6484   ASSERT_EQ(res, MakeUInt128(0x0624a2f072199164ULL, 0x6853921f97798cbeULL));
6485 }
6486 
TEST(Arm64InsnTest,UnsignedSubWideUpper)6487 TEST(Arm64InsnTest, UnsignedSubWideUpper) {
6488   __uint128_t arg1 = MakeUInt128(0x8242392192695062ULL, 0x0831838145469839ULL);
6489   __uint128_t arg2 = MakeUInt128(0x2366461363989101ULL, 0x2102177095976704ULL);
6490   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("usubw2 %0.4s, %1.4s, %2.8h")(arg1, arg2);
6491   ASSERT_EQ(res, MakeUInt128(0x8241a38a9268e95eULL, 0x0831627f454680c9ULL));
6492 }
6493 
TEST(Arm64InsnTest,SignedMultiplyLongInt8x8)6494 TEST(Arm64InsnTest, SignedMultiplyLongInt8x8) {
6495   __uint128_t arg1 = MakeUInt128(0x9191791552241718ULL, 0x9585361680594741ULL);
6496   __uint128_t arg2 = MakeUInt128(0x2341933984202187ULL, 0x4564925644346239ULL);
6497   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("smull %0.8h, %1.8b, %2.8b")(arg1, arg2);
6498   ASSERT_EQ(res, MakeUInt128(0xd848048002f7f4a8ULL, 0xf0d3e3d1cc7b04adULL));
6499 }
6500 
TEST(Arm64InsnTest,SignedMultiplyLongInt8x8Upper)6501 TEST(Arm64InsnTest, SignedMultiplyLongInt8x8Upper) {
6502   __uint128_t arg1 = MakeUInt128(0x9314052976347574ULL, 0x8119356709110137ULL);
6503   __uint128_t arg2 = MakeUInt128(0x7517210080315590ULL, 0x2485309066920376ULL);
6504   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("smull2 %0.8h, %1.16b, %2.16b")(arg1, arg2);
6505   ASSERT_EQ(res, MakeUInt128(0x0396f8b20003195aULL, 0xee24f3fd09f0d2f0ULL));
6506 }
6507 
TEST(Arm64InsnTest,UnsignedMultiplyLongInt8x8)6508 TEST(Arm64InsnTest, UnsignedMultiplyLongInt8x8) {
6509   __uint128_t arg1 = MakeUInt128(0x9149055628425039ULL, 0x1275771028402799ULL);
6510   __uint128_t arg2 = MakeUInt128(0x8066365825488926ULL, 0x4880254566101729ULL);
6511   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("umull %0.8h, %1.8b, %2.8b")(arg1, arg2);
6512   ASSERT_EQ(res, MakeUInt128(0x05c812902ad00876ULL, 0x48801d16010e1d90ULL));
6513 }
6514 
TEST(Arm64InsnTest,UnsignedMultiplyLongInt8x8Upper)6515 TEST(Arm64InsnTest, UnsignedMultiplyLongInt8x8Upper) {
6516   __uint128_t arg1 = MakeUInt128(0x9709683408005355ULL, 0x9849175417381883ULL);
6517   __uint128_t arg2 = MakeUInt128(0x9994469748676265ULL, 0x5165827658483588ULL);
6518   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("umull2 %0.8h, %1.16b, %2.16b")(arg1, arg2);
6519   ASSERT_EQ(res, MakeUInt128(0x07e80fc004f84598ULL, 0x30181ccd0bae26b8ULL));
6520 }
6521 
TEST(Arm64InsnTest,SignedMultiplyLongInt8x8IndexedElem)6522 TEST(Arm64InsnTest, SignedMultiplyLongInt8x8IndexedElem) {
6523   __uint128_t arg1 = MakeUInt128(0x9293459588970695ULL, 0x3653494060340216ULL);
6524   __uint128_t arg2 = MakeUInt128(0x6544375589004563ULL, 0x2882250545255640ULL);
6525   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("smull %0.4s, %1.4h, %2.h[2]")(arg1, arg2);
6526   ASSERT_EQ(res, MakeUInt128(0xe630cb23016c3279ULL, 0xe8593fcf0f0a1d79ULL));
6527 }
6528 
TEST(Arm64InsnTest,SignedMultiplyLongInt8x8IndexedElemUpper)6529 TEST(Arm64InsnTest, SignedMultiplyLongInt8x8IndexedElemUpper) {
6530   __uint128_t arg1 = MakeUInt128(0x9279068212073883ULL, 0x7781423356282360ULL);
6531   __uint128_t arg2 = MakeUInt128(0x8963208068222468ULL, 0x0122482611771858ULL);
6532   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("smull2 %0.4s, %1.8h, %2.h[2]")(arg1, arg2);
6533   ASSERT_EQ(res, MakeUInt128(0x0af01400047db000ULL, 0x0f2be08008677980ULL));
6534 }
6535 
TEST(Arm64InsnTest,UnsignedMultiplyLongInt8x8IndexedElem)6536 TEST(Arm64InsnTest, UnsignedMultiplyLongInt8x8IndexedElem) {
6537   __uint128_t arg1 = MakeUInt128(0x9086996033027634ULL, 0x7870810817545011ULL);
6538   __uint128_t arg2 = MakeUInt128(0x9307141223390866ULL, 0x3938339529425786ULL);
6539   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("umull %0.4s, %1.4h, %2.h[2]")(arg1, arg2);
6540   ASSERT_EQ(res, MakeUInt128(0x03ffbe2409445fa8ULL, 0x0b54a16c0c0648c0ULL));
6541 }
6542 
TEST(Arm64InsnTest,UnsignedMultiplyLongInt8x8IndexedElem2)6543 TEST(Arm64InsnTest, UnsignedMultiplyLongInt8x8IndexedElem2) {
6544   __uint128_t arg1 = MakeUInt128(0x9132710495478599ULL, 0x1801969678353214ULL);
6545   __uint128_t arg2 = MakeUInt128(0x6444118926063152ULL, 0x6618167443193550ULL);
6546   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("umull %0.4s, %1.4h, %2.h[4]")(arg1, arg2);
6547   ASSERT_EQ(res, MakeUInt128(0x1f1659301bd26cd0ULL, 0x1e3cb9a017892540ULL));
6548 }
6549 
TEST(Arm64InsnTest,UnsignedMultiplyLongInt8x8IndexedElemUpper)6550 TEST(Arm64InsnTest, UnsignedMultiplyLongInt8x8IndexedElemUpper) {
6551   __uint128_t arg1 = MakeUInt128(0x9815793678976697ULL, 0x4220575059683440ULL);
6552   __uint128_t arg2 = MakeUInt128(0x8697350201410206ULL, 0x7235850200724522ULL);
6553   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("umull2 %0.4s, %1.8h, %2.h[2]")(arg1, arg2);
6554   ASSERT_EQ(res, MakeUInt128(0x12833ad00ad1a880ULL, 0x0db1244012143ea0ULL));
6555 }
6556 
TEST(Arm64InsnTest,SignedMultiplyAddLongInt8x8)6557 TEST(Arm64InsnTest, SignedMultiplyAddLongInt8x8) {
6558   __uint128_t arg1 = MakeUInt128(0x9779940012601642ULL, 0x2760926082349304ULL);
6559   __uint128_t arg2 = MakeUInt128(0x1180643829138347ULL, 0x3546797253992623ULL);
6560   __uint128_t arg3 = MakeUInt128(0x3879158299848645ULL, 0x9271734059225620ULL);
6561   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("smlal %0.8h, %1.8b, %2.8b")(arg1, arg2, arg3);
6562   ASSERT_EQ(res, MakeUInt128(0x3b5b1ca28ec69893ULL, 0x8b7836c02ef25620ULL));
6563 }
6564 
TEST(Arm64InsnTest,SignedMultiplyAddLongInt8x8Upper)6565 TEST(Arm64InsnTest, SignedMultiplyAddLongInt8x8Upper) {
6566   __uint128_t arg1 = MakeUInt128(0x5514435021828702ULL, 0x6685610665003531ULL);
6567   __uint128_t arg2 = MakeUInt128(0x0502163182060176ULL, 0x0921798468493686ULL);
6568   __uint128_t arg3 = MakeUInt128(0x3161293727951873ULL, 0x0789726373537171ULL);
6569   __uint128_t res =
6570       ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("smlal2 %0.8h, %1.16b, %2.16b")(arg1, arg2, arg3);
6571   ASSERT_EQ(res, MakeUInt128(0x5a69293732c30119ULL, 0x0b1f6288a12c6e89ULL));
6572 }
6573 
TEST(Arm64InsnTest,SignedMultiplySubtractLongInt8x8)6574 TEST(Arm64InsnTest, SignedMultiplySubtractLongInt8x8) {
6575   __uint128_t arg1 = MakeUInt128(0x9662539339538092ULL, 0x2195591918188552ULL);
6576   __uint128_t arg2 = MakeUInt128(0x6780621499231727ULL, 0x6316321833989693ULL);
6577   __uint128_t arg3 = MakeUInt128(0x8075616855911752ULL, 0x9984501320671293ULL);
6578   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("smlsl %0.8h, %1.8b, %2.8b")(arg1, arg2, arg3);
6579   ASSERT_EQ(res, MakeUInt128(0x9764560f61112814ULL, 0xc42a811300a11b17ULL));
6580 }
6581 
TEST(Arm64InsnTest,SignedMultiplySubtractLongInt8x8Upper)6582 TEST(Arm64InsnTest, SignedMultiplySubtractLongInt8x8Upper) {
6583   __uint128_t arg1 = MakeUInt128(0x9826903089111856ULL, 0x8798692947051352ULL);
6584   __uint128_t arg2 = MakeUInt128(0x4816091743243015ULL, 0x3836847072928989ULL);
6585   __uint128_t arg3 = MakeUInt128(0x8284602223730145ULL, 0x2655679898627767ULL);
6586   __uint128_t res =
6587       ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("smlsl2 %0.8h, %1.16b, %2.16b")(arg1, arg2, arg3);
6588   ASSERT_EQ(res, MakeUInt128(0x62e662482c482763ULL, 0x40cd7d88cb3e6577ULL));
6589 }
6590 
TEST(Arm64InsnTest,SignedMultiplyAddLongInt16x4)6591 TEST(Arm64InsnTest, SignedMultiplyAddLongInt16x4) {
6592   __uint128_t arg1 = MakeUInt128(0x9779940012601642ULL, 0x2760926082349304ULL);
6593   __uint128_t arg2 = MakeUInt128(0x1180643829138347ULL, 0x3546797253992623ULL);
6594   __uint128_t arg3 = MakeUInt128(0x3879158299848645ULL, 0x9271734059225620ULL);
6595   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("smlal %0.4s, %1.4h, %2.4h")(arg1, arg2, arg3);
6596   ASSERT_EQ(res, MakeUInt128(0x3b6bd2a28eac7893ULL, 0x8b4c38c02edab620ULL));
6597 }
6598 
TEST(Arm64InsnTest,UnsignedMultiplyAddLongInt8x8)6599 TEST(Arm64InsnTest, UnsignedMultiplyAddLongInt8x8) {
6600   __uint128_t arg1 = MakeUInt128(0x9696920253886503ULL, 0x4577183176686885ULL);
6601   __uint128_t arg2 = MakeUInt128(0x9236814884752764ULL, 0x9846882194973972ULL);
6602   __uint128_t arg3 = MakeUInt128(0x9707737187188400ULL, 0x4143231276365048ULL);
6603   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("umlal %0.8h, %1.8b, %2.8b")(arg1, arg2, arg3);
6604   ASSERT_EQ(res, MakeUInt128(0xc1d3b199967b852cULL, 0x96cf42b6bfc850d8ULL));
6605 }
6606 
TEST(Arm64InsnTest,UnsignedMultiplyAddLongInt8x8Upper)6607 TEST(Arm64InsnTest, UnsignedMultiplyAddLongInt8x8Upper) {
6608   __uint128_t arg1 = MakeUInt128(0x9055637695252326ULL, 0x5361442478023082ULL);
6609   __uint128_t arg2 = MakeUInt128(0x6811831037735887ULL, 0x0892406130313364ULL);
6610   __uint128_t arg3 = MakeUInt128(0x7737101162821461ULL, 0x4661679404090518ULL);
6611   __uint128_t res =
6612       ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("umlal2 %0.8h, %1.16b, %2.16b")(arg1, arg2, arg3);
6613   ASSERT_EQ(res, MakeUInt128(0x8db710736c124729ULL, 0x48f99ee6150912bcULL));
6614 }
6615 
TEST(Arm64InsnTest,UnsignedMultiplySubtractLongInt8x8)6616 TEST(Arm64InsnTest, UnsignedMultiplySubtractLongInt8x8) {
6617   __uint128_t arg1 = MakeUInt128(0x4577772457520386ULL, 0x5437542828256714ULL);
6618   __uint128_t arg2 = MakeUInt128(0x1288583454443513ULL, 0x2562054464241011ULL);
6619   __uint128_t arg3 = MakeUInt128(0x0379554641905811ULL, 0x6862305964476958ULL);
6620   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("umlsl %0.8h, %1.8b, %2.8b")(arg1, arg2, arg3);
6621   ASSERT_EQ(res, MakeUInt128(0xe6ed3f7e40f14e1fULL, 0x6388f1213b5f6208ULL));
6622 }
6623 
TEST(Arm64InsnTest,UnsignedMultiplySubtractLongInt8x8Upper)6624 TEST(Arm64InsnTest, UnsignedMultiplySubtractLongInt8x8Upper) {
6625   __uint128_t arg1 = MakeUInt128(0x4739376564336319ULL, 0x7978680367187307ULL);
6626   __uint128_t arg2 = MakeUInt128(0x9693924236321448ULL, 0x4503547763156702ULL);
6627   __uint128_t arg3 = MakeUInt128(0x5539006542311792ULL, 0x0153464977929066ULL);
6628   __uint128_t res =
6629       ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("umlsl2 %0.8h, %1.16b, %2.16b")(arg1, arg2, arg3);
6630   ASSERT_EQ(res, MakeUInt128(0x2d64fe6d13ec1784ULL, 0xe0b644e155728f01ULL));
6631 }
6632 
TEST(Arm64InsnTest,SignedShiftLeftInt64x1)6633 TEST(Arm64InsnTest, SignedShiftLeftInt64x1) {
6634   constexpr auto AsmSshl = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("sshl %d0, %d1, %d2");
6635   __uint128_t arg = MakeUInt128(0x9007497297363549ULL, 0x6453328886984406ULL);
6636   ASSERT_EQ(AsmSshl(arg, -65), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
6637   ASSERT_EQ(AsmSshl(arg, -64), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
6638   ASSERT_EQ(AsmSshl(arg, -63), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
6639   ASSERT_EQ(AsmSshl(arg, -1), MakeUInt128(0xc803a4b94b9b1aa4ULL, 0x0000000000000000ULL));
6640   ASSERT_EQ(AsmSshl(arg, 0), MakeUInt128(0x9007497297363549ULL, 0x0000000000000000ULL));
6641   ASSERT_EQ(AsmSshl(arg, 1), MakeUInt128(0x200e92e52e6c6a92ULL, 0x0000000000000000ULL));
6642   ASSERT_EQ(AsmSshl(arg, 63), MakeUInt128(0x8000000000000000ULL, 0x0000000000000000ULL));
6643   ASSERT_EQ(AsmSshl(arg, 64), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
6644   ASSERT_EQ(AsmSshl(arg, 65), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
6645 }
6646 
TEST(Arm64InsnTest,SignedRoundingShiftLeftInt64x1)6647 TEST(Arm64InsnTest, SignedRoundingShiftLeftInt64x1) {
6648   constexpr auto AsmSrshl = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("srshl %d0, %d1, %d2");
6649   __uint128_t arg = MakeUInt128(0x9276457931065792ULL, 0x2955249887275846ULL);
6650   ASSERT_EQ(AsmSrshl(arg, -65), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
6651   ASSERT_EQ(AsmSrshl(arg, -64), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
6652   ASSERT_EQ(AsmSrshl(arg, -63), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
6653   ASSERT_EQ(AsmSrshl(arg, -1), MakeUInt128(0xc93b22bc98832bc9ULL, 0x0000000000000000ULL));
6654   ASSERT_EQ(AsmSrshl(arg, 0), MakeUInt128(0x9276457931065792ULL, 0x0000000000000000ULL));
6655   ASSERT_EQ(AsmSrshl(arg, 1), MakeUInt128(0x24ec8af2620caf24ULL, 0x0000000000000000ULL));
6656   ASSERT_EQ(AsmSrshl(arg, 63), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
6657   ASSERT_EQ(AsmSrshl(arg, 64), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
6658   ASSERT_EQ(AsmSrshl(arg, 65), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
6659 }
6660 
TEST(Arm64InsnTest,UnsignedShiftLeftInt64x1)6661 TEST(Arm64InsnTest, UnsignedShiftLeftInt64x1) {
6662   constexpr auto AsmUshl = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("ushl %d0, %d1, %d2");
6663   __uint128_t arg = MakeUInt128(0x9138296682468185ULL, 0x7103188790652870ULL);
6664   ASSERT_EQ(AsmUshl(arg, -65), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
6665   ASSERT_EQ(AsmUshl(arg, -64), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
6666   ASSERT_EQ(AsmUshl(arg, -63), MakeUInt128(0x0000000000000001ULL, 0x0000000000000000ULL));
6667   ASSERT_EQ(AsmUshl(arg, -1), MakeUInt128(0x489c14b3412340c2ULL, 0x0000000000000000ULL));
6668   ASSERT_EQ(AsmUshl(arg, 0), MakeUInt128(0x9138296682468185ULL, 0x0000000000000000ULL));
6669   ASSERT_EQ(AsmUshl(arg, 1), MakeUInt128(0x227052cd048d030aULL, 0x0000000000000000ULL));
6670   ASSERT_EQ(AsmUshl(arg, 63), MakeUInt128(0x8000000000000000ULL, 0x0000000000000000ULL));
6671   ASSERT_EQ(AsmUshl(arg, 64), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
6672   ASSERT_EQ(AsmUshl(arg, 65), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
6673 }
6674 
TEST(Arm64InsnTest,UnsignedRoundingShiftLeftInt64x1)6675 TEST(Arm64InsnTest, UnsignedRoundingShiftLeftInt64x1) {
6676   constexpr auto AsmUrshl = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("urshl %d0, %d1, %d2");
6677   __uint128_t arg = MakeUInt128(0x9023452924407736ULL, 0x5949563051007421ULL);
6678   ASSERT_EQ(AsmUrshl(arg, -65), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
6679   ASSERT_EQ(AsmUrshl(arg, -64), MakeUInt128(0x0000000000000001ULL, 0x0000000000000000ULL));
6680   ASSERT_EQ(AsmUrshl(arg, -63), MakeUInt128(0x0000000000000001ULL, 0x0000000000000000ULL));
6681   ASSERT_EQ(AsmUrshl(arg, -1), MakeUInt128(0x4811a29492203b9bULL, 0x0000000000000000ULL));
6682   ASSERT_EQ(AsmUrshl(arg, 0), MakeUInt128(0x9023452924407736ULL, 0x0000000000000000ULL));
6683   ASSERT_EQ(AsmUrshl(arg, 1), MakeUInt128(0x20468a524880ee6cULL, 0x0000000000000000ULL));
6684   ASSERT_EQ(AsmUrshl(arg, 63), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
6685   ASSERT_EQ(AsmUrshl(arg, 64), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
6686   ASSERT_EQ(AsmUrshl(arg, 65), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
6687 }
6688 
TEST(Arm64InsnTest,SignedShiftLeftInt16x8)6689 TEST(Arm64InsnTest, SignedShiftLeftInt16x8) {
6690   constexpr auto AsmSshl = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("sshl %0.8h, %1.8h, %2.8h");
6691   __uint128_t arg1 = MakeUInt128(0x9999999999999999ULL, 0x9999999999999999ULL);
6692   __uint128_t arg2 = MakeUInt128(0x0010000f00020001ULL, 0xfffffff1fff0ffefULL);
6693   ASSERT_EQ(AsmSshl(arg1, arg2), MakeUInt128(0x0000800066643332ULL, 0xccccffffffffffffULL));
6694   ASSERT_EQ(AsmSshl(arg1, 0), MakeUInt128(0x9999999999999999ULL, 0x9999999999999999ULL));
6695 }
6696 
TEST(Arm64InsnTest,SignedRoundingShiftLeftInt16x8)6697 TEST(Arm64InsnTest, SignedRoundingShiftLeftInt16x8) {
6698   constexpr auto AsmSrshl = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("srshl %0.8h, %1.8h, %2.8h");
6699   __uint128_t arg1 = MakeUInt128(0x9999999999999999ULL, 0x9999999999999999ULL);
6700   __uint128_t arg2 = MakeUInt128(0x0010000f00020001ULL, 0xfffffff1fff0ffefULL);
6701   ASSERT_EQ(AsmSrshl(arg1, arg2), MakeUInt128(0x0000800066643332ULL, 0xcccdffff00000000ULL));
6702   ASSERT_EQ(AsmSrshl(arg1, 0), MakeUInt128(0x9999999999999999ULL, 0x9999999999999999ULL));
6703 }
6704 
TEST(Arm64InsnTest,UnsignedShiftLeftInt16x8)6705 TEST(Arm64InsnTest, UnsignedShiftLeftInt16x8) {
6706   constexpr auto AsmUshl = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("ushl %0.8h, %1.8h, %2.8h");
6707   __uint128_t arg1 = MakeUInt128(0x9999999999999999ULL, 0x9999999999999999ULL);
6708   __uint128_t arg2 = MakeUInt128(0x0010000f00020001ULL, 0xfffffff1fff0ffefULL);
6709   ASSERT_EQ(AsmUshl(arg1, arg2), MakeUInt128(0x0000800066643332ULL, 0x4ccc000100000000ULL));
6710   ASSERT_EQ(AsmUshl(arg1, 0), MakeUInt128(0x9999999999999999ULL, 0x9999999999999999ULL));
6711 }
6712 
TEST(Arm64InsnTest,UnsignedRoundingShiftLeftInt16x8)6713 TEST(Arm64InsnTest, UnsignedRoundingShiftLeftInt16x8) {
6714   constexpr auto AsmUrshl = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("urshl %0.8h, %1.8h, %2.8h");
6715   __uint128_t arg1 = MakeUInt128(0x9999999999999999ULL, 0x9999999999999999ULL);
6716   __uint128_t arg2 = MakeUInt128(0x0010000f00020001ULL, 0xfffffff1fff0ffefULL);
6717   ASSERT_EQ(AsmUrshl(arg1, arg2), MakeUInt128(0x0000800066643332ULL, 0x4ccd000100010000ULL));
6718   ASSERT_EQ(AsmUrshl(arg1, 0), MakeUInt128(0x9999999999999999ULL, 0x9999999999999999ULL));
6719 }
6720 
TEST(Arm64InsnTest,UnsignedReciprocalSquareRootEstimateInt32x4)6721 TEST(Arm64InsnTest, UnsignedReciprocalSquareRootEstimateInt32x4) {
6722   __uint128_t arg = MakeUInt128(0x9641122821407533ULL, 0x0265510042410489ULL);
6723   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ursqrte %0.4s, %1.4s")(arg);
6724   ASSERT_EQ(res, MakeUInt128(0xa7000000ffffffffULL, 0xfffffffffb800000ULL));
6725 }
6726 
TEST(Arm64InsnTest,UnsignedReciprocalEstimateInt32x4)6727 TEST(Arm64InsnTest, UnsignedReciprocalEstimateInt32x4) {
6728   __uint128_t arg = MakeUInt128(0x9714864899468611ULL, 0x2476054286734367ULL);
6729   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("urecpe %0.4s, %1.4s")(arg);
6730   ASSERT_EQ(res, MakeUInt128(0xd8800000d6000000ULL, 0xfffffffff4000000ULL));
6731 }
6732 
IsQcBitSet(uint32_t fpsr)6733 bool IsQcBitSet(uint32_t fpsr) {
6734   return (fpsr & kFpsrQcBit) != 0;
6735 }
6736 
TEST(Arm64InsnTest,SignedSaturatingAddInt64x1)6737 TEST(Arm64InsnTest, SignedSaturatingAddInt64x1) {
6738   constexpr auto AsmSqadd = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqadd %d0, %d2, %d3");
6739 
6740   __uint128_t arg1 = MakeUInt128(0x4342527753119724ULL, 0x7430873043619511ULL);
6741   __uint128_t arg2 = MakeUInt128(0x3961190800302558ULL, 0x7838764420608504ULL);
6742   auto [res1, fpsr1] = AsmSqadd(arg1, arg2);
6743   ASSERT_EQ(res1, MakeUInt128(0x7ca36b7f5341bc7cULL, 0x0000000000000000ULL));
6744   ASSERT_FALSE(IsQcBitSet(fpsr1));
6745 
6746   __uint128_t arg3 = MakeUInt128(0x2557185308919284ULL, 0x4038050710300647ULL);
6747   __uint128_t arg4 = MakeUInt128(0x7684786324319100ULL, 0x0223929785255372ULL);
6748   auto [res2, fpsr2] = AsmSqadd(arg3, arg4);
6749   ASSERT_EQ(res2, MakeUInt128(0x7fffffffffffffffULL, 0x0000000000000000ULL));
6750   ASSERT_TRUE(IsQcBitSet(fpsr2));
6751 }
6752 
TEST(Arm64InsnTest,SignedSaturatingAddInt32x4)6753 TEST(Arm64InsnTest, SignedSaturatingAddInt32x4) {
6754   constexpr auto AsmSqadd = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqadd %0.4s, %2.4s, %3.4s");
6755 
6756   __uint128_t arg1 = MakeUInt128(0x9883554445602495ULL, 0x5666843660292219ULL);
6757   __uint128_t arg2 = MakeUInt128(0x5124830910605377ULL, 0x2019802183101032ULL);
6758   auto [res1, fpsr1] = AsmSqadd(arg1, arg2);
6759   ASSERT_EQ(res1, MakeUInt128(0xe9a7d84d55c0780cULL, 0x76800457e339324bULL));
6760   ASSERT_FALSE(IsQcBitSet(fpsr1));
6761 
6762   __uint128_t arg3 = MakeUInt128(0x9713308844617410ULL, 0x7959162511714864ULL);
6763   __uint128_t arg4 = MakeUInt128(0x8744686112476054ULL, 0x2867343670904667ULL);
6764   auto [res2, fpsr2] = AsmSqadd(arg3, arg4);
6765   ASSERT_EQ(res2, MakeUInt128(0x8000000056a8d464ULL, 0x7fffffff7fffffffULL));
6766   ASSERT_TRUE(IsQcBitSet(fpsr2));
6767 }
6768 
TEST(Arm64InsnTest,UnsignedSaturatingAddInt8x1)6769 TEST(Arm64InsnTest, UnsignedSaturatingAddInt8x1) {
6770   constexpr auto AsmUqadd = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("uqadd %b0, %b2, %b3");
6771 
6772   __uint128_t arg1 = MakeUInt128(0x6017174229960273ULL, 0x5310276871944944ULL);
6773   __uint128_t arg2 = MakeUInt128(0x4917939785144631ULL, 0x5973144353518504ULL);
6774   auto [res1, fpsr1] = AsmUqadd(arg1, arg2);
6775   ASSERT_EQ(res1, MakeUInt128(0x00000000000000a4ULL, 0x0000000000000000ULL));
6776   ASSERT_FALSE(IsQcBitSet(fpsr1));
6777 
6778   __uint128_t arg3 = MakeUInt128(0x3306263695626490ULL, 0x9108276271159038ULL);
6779   __uint128_t arg4 = MakeUInt128(0x5699505124652999ULL, 0x6062855443838330ULL);
6780   auto [res2, fpsr2] = AsmUqadd(arg3, arg4);
6781   ASSERT_EQ(res2, MakeUInt128(0x00000000000000ffULL, 0x0000000000000000ULL));
6782   ASSERT_TRUE(IsQcBitSet(fpsr2));
6783 }
6784 
TEST(Arm64InsnTest,UnsignedSaturatingAddInt64x1)6785 TEST(Arm64InsnTest, UnsignedSaturatingAddInt64x1) {
6786   constexpr auto AsmUqadd = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("uqadd %d0, %d2, %d3");
6787 
6788   __uint128_t arg1 = MakeUInt128(0x0606885137234627ULL, 0x0799732723313469ULL);
6789   __uint128_t arg2 = MakeUInt128(0x3971456285542615ULL, 0x4676506324656766ULL);
6790   auto [res1, fpsr1] = AsmUqadd(arg1, arg2);
6791   ASSERT_EQ(res1, MakeUInt128(0x3f77cdb3bc776c3cULL, 0x0000000000000000ULL));
6792   ASSERT_FALSE(IsQcBitSet(fpsr1));
6793 
6794   __uint128_t arg3 = MakeUInt128(0x9534957018600154ULL, 0x1262396228641389ULL);
6795   __uint128_t arg4 = MakeUInt128(0x7796733329070567ULL, 0x3769621564981845ULL);
6796   auto [res2, fpsr2] = AsmUqadd(arg3, arg4);
6797   ASSERT_EQ(res2, MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
6798   ASSERT_TRUE(IsQcBitSet(fpsr2));
6799 }
6800 
TEST(Arm64InsnTest,UnsignedSaturatingAddInt32x4)6801 TEST(Arm64InsnTest, UnsignedSaturatingAddInt32x4) {
6802   constexpr auto AsmUqadd = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("uqadd %0.4s, %2.4s, %3.4s");
6803 
6804   __uint128_t arg1 = MakeUInt128(0x9737425700735921ULL, 0x0031541508936793ULL);
6805   __uint128_t arg2 = MakeUInt128(0x0081699805365202ULL, 0x7600727749674584ULL);
6806   auto [res1, fpsr1] = AsmUqadd(arg1, arg2);
6807   ASSERT_EQ(res1, MakeUInt128(0x97b8abef05a9ab23ULL, 0x7631c68c51faad17ULL));
6808   ASSERT_FALSE(IsQcBitSet(fpsr1));
6809 
6810   __uint128_t arg3 = MakeUInt128(0x9727856471983963ULL, 0x0878154322116691ULL);
6811   __uint128_t arg4 = MakeUInt128(0x8654522268126887ULL, 0x2684459684424161ULL);
6812   auto [res2, fpsr2] = AsmUqadd(arg3, arg4);
6813   ASSERT_EQ(res2, MakeUInt128(0xffffffffd9aaa1eaULL, 0x2efc5ad9a653a7f2ULL));
6814   ASSERT_TRUE(IsQcBitSet(fpsr2));
6815 }
6816 
TEST(Arm64InsnTest,SignedSaturatingSubtractInt32x1)6817 TEST(Arm64InsnTest, SignedSaturatingSubtractInt32x1) {
6818   constexpr auto AsmSqsub = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqsub %s0, %s2, %s3");
6819 
6820   __uint128_t arg1 = MakeUInt128(0x3178534870760322ULL, 0x1982970579751191ULL);
6821   __uint128_t arg2 = MakeUInt128(0x4405109942358830ULL, 0x3454635349234982ULL);
6822   auto [res1, fpsr1] = AsmSqsub(arg1, arg2);
6823   ASSERT_EQ(res1, MakeUInt128(0x2e407af2ULL, 0U));
6824   ASSERT_FALSE(IsQcBitSet(fpsr1));
6825 
6826   __uint128_t arg3 = MakeUInt128(0x1423696483086410ULL, 0x2592887457999322ULL);
6827   __uint128_t arg4 = MakeUInt128(0x3749551912219519ULL, 0x0342445230753513ULL);
6828   auto [res2, fpsr2] = AsmSqsub(arg3, arg4);
6829   ASSERT_EQ(res2, MakeUInt128(0x80000000ULL, 0U));
6830   ASSERT_TRUE(IsQcBitSet(fpsr2));
6831 
6832   __uint128_t arg5 = MakeUInt128(0x3083508879584152ULL, 0x1489912761065137ULL);
6833   __uint128_t arg6 = MakeUInt128(0x4153943580721139ULL, 0x0328574918769094ULL);
6834   auto [res3, fpsr3] = AsmSqsub(arg5, arg6);
6835   ASSERT_EQ(res3, MakeUInt128(0x7fffffffULL, 0U));
6836   ASSERT_TRUE(IsQcBitSet(fpsr3));
6837 }
6838 
TEST(Arm64InsnTest,SignedSaturatingSubtractInt64x1)6839 TEST(Arm64InsnTest, SignedSaturatingSubtractInt64x1) {
6840   constexpr auto AsmSqsub = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqsub %d0, %d2, %d3");
6841 
6842   __uint128_t arg1 = MakeUInt128(0x4416125223196943ULL, 0x4712064173754912ULL);
6843   __uint128_t arg2 = MakeUInt128(0x1635700857369439ULL, 0x7305979709719726ULL);
6844   auto [res1, fpsr1] = AsmSqsub(arg1, arg2);
6845   ASSERT_EQ(res1, MakeUInt128(0x2de0a249cbe2d50aULL, 0x0000000000000000ULL));
6846   ASSERT_FALSE(IsQcBitSet(fpsr1));
6847 
6848   __uint128_t arg3 = MakeUInt128(0x7862766490242516ULL, 0x1990277471090335ULL);
6849   __uint128_t arg4 = MakeUInt128(0x9333093049483805ULL, 0x9785662884478744ULL);
6850   auto [res2, fpsr2] = AsmSqsub(arg3, arg4);
6851   ASSERT_EQ(res2, MakeUInt128(0x7fffffffffffffffULL, 0x0000000000000000ULL));
6852   ASSERT_TRUE(IsQcBitSet(fpsr2));
6853 }
6854 
TEST(Arm64InsnTest,SignedSaturatingSubtractInt32x4)6855 TEST(Arm64InsnTest, SignedSaturatingSubtractInt32x4) {
6856   constexpr auto AsmSqsub = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqsub %0.4s, %2.4s, %3.4s");
6857 
6858   __uint128_t arg1 = MakeUInt128(0x4485680977569630ULL, 0x3129588719161129ULL);
6859   __uint128_t arg2 = MakeUInt128(0x2946818849363386ULL, 0x4739274760122696ULL);
6860   auto [res1, fpsr1] = AsmSqsub(arg1, arg2);
6861   ASSERT_EQ(res1, MakeUInt128(0x1b3ee6812e2062aaULL, 0xe9f03140b903ea93ULL));
6862   ASSERT_FALSE(IsQcBitSet(fpsr1));
6863 
6864   __uint128_t arg3 = MakeUInt128(0x9304127100727784ULL, 0x9301555038895360ULL);
6865   __uint128_t arg4 = MakeUInt128(0x3382619293437970ULL, 0x8187432094991415ULL);
6866   auto [res2, fpsr2] = AsmSqsub(arg3, arg4);
6867   ASSERT_EQ(res2, MakeUInt128(0x800000006d2efe14ULL, 0x117a12307fffffffULL));
6868   ASSERT_TRUE(IsQcBitSet(fpsr2));
6869 }
6870 
TEST(Arm64InsnTest,UnsignedSaturatingSubtractInt32x1)6871 TEST(Arm64InsnTest, UnsignedSaturatingSubtractInt32x1) {
6872   constexpr auto AsmUqsub = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("uqsub %s0, %s2, %s3");
6873 
6874   __uint128_t arg1 = MakeUInt128(0x2548156091372812ULL, 0x8406333039373562ULL);
6875   __uint128_t arg2 = MakeUInt128(0x4200160456645574ULL, 0x1458816605216660ULL);
6876   auto [res1, fpsr1] = AsmUqsub(arg1, arg2);
6877   ASSERT_EQ(res1, MakeUInt128(0x3ad2d29eULL, 0U));
6878   ASSERT_FALSE(IsQcBitSet(fpsr1));
6879 
6880   __uint128_t arg3 = MakeUInt128(0x1259960281839309ULL, 0x5487090590738613ULL);
6881   __uint128_t arg4 = MakeUInt128(0x5191459181951029ULL, 0x7327875571049729ULL);
6882   auto [res2, fpsr2] = AsmUqsub(arg3, arg4);
6883   ASSERT_EQ(res2, MakeUInt128(0U, 0U));
6884   ASSERT_TRUE(IsQcBitSet(fpsr2));
6885 }
6886 
TEST(Arm64InsnTest,UnsignedSaturatingSubtractInt64x1)6887 TEST(Arm64InsnTest, UnsignedSaturatingSubtractInt64x1) {
6888   constexpr auto AsmUqsub = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("uqsub %d0, %d2, %d3");
6889 
6890   __uint128_t arg1 = MakeUInt128(0x9691077542576474ULL, 0x8832534141213280ULL);
6891   __uint128_t arg2 = MakeUInt128(0x0626717094009098ULL, 0x2235296579579978ULL);
6892   auto [res1, fpsr1] = AsmUqsub(arg1, arg2);
6893   ASSERT_EQ(res1, MakeUInt128(0x906a9604ae56d3dcULL, 0x0000000000000000ULL));
6894   ASSERT_FALSE(IsQcBitSet(fpsr1));
6895 
6896   __uint128_t arg3 = MakeUInt128(0x7752929106925043ULL, 0x2614469501098610ULL);
6897   __uint128_t arg4 = MakeUInt128(0x8889991465855188ULL, 0x1873582528164302ULL);
6898   auto [res2, fpsr2] = AsmUqsub(arg3, arg4);
6899   ASSERT_EQ(res2, MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
6900   ASSERT_TRUE(IsQcBitSet(fpsr2));
6901 }
6902 
TEST(Arm64InsnTest,UnsignedSaturatingSubtractInt32x4)6903 TEST(Arm64InsnTest, UnsignedSaturatingSubtractInt32x4) {
6904   constexpr auto AsmUqsub = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("uqsub %0.4s, %2.4s, %3.4s");
6905 
6906   __uint128_t arg1 = MakeUInt128(0x6884962578665885ULL, 0x9991798675205545ULL);
6907   __uint128_t arg2 = MakeUInt128(0x5809900455646117ULL, 0x8755249370124553ULL);
6908   auto [res1, fpsr1] = AsmUqsub(arg1, arg2);
6909   ASSERT_EQ(res1, MakeUInt128(0x107b06212301f76eULL, 0x123c54f3050e0ff2ULL));
6910   ASSERT_FALSE(IsQcBitSet(fpsr1));
6911 
6912   __uint128_t arg3 = MakeUInt128(0x5032678340586301ULL, 0x9301932429963972ULL);
6913   __uint128_t arg4 = MakeUInt128(0x0444517928812285ULL, 0x4478211953530898ULL);
6914   auto [res2, fpsr2] = AsmUqsub(arg3, arg4);
6915   ASSERT_EQ(res2, MakeUInt128(0x4bee160a17d7407cULL, 0x4e89720b00000000ULL));
6916   ASSERT_TRUE(IsQcBitSet(fpsr2));
6917 }
6918 
TEST(Arm64InsnTest,SignedSaturatingAbsoluteInt8x1)6919 TEST(Arm64InsnTest, SignedSaturatingAbsoluteInt8x1) {
6920   constexpr auto AsmSqabs = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqabs %b0, %b2");
6921 
6922   __uint128_t arg1 = MakeUInt128(0x8918016855727981ULL, 0x5642185819119749ULL);
6923   auto [res1, fpsr1] = AsmSqabs(arg1);
6924   ASSERT_EQ(res1, MakeUInt128(0x000000000000007fULL, 0x0000000000000000ULL));
6925   ASSERT_FALSE(IsQcBitSet(fpsr1));
6926 
6927   __uint128_t arg2 = MakeUInt128(0x0000000000000080ULL, 0x6464607287574305ULL);
6928   auto [res2, fpsr2] = AsmSqabs(arg2);
6929   ASSERT_EQ(res2, MakeUInt128(0x000000000000007fULL, 0x0000000000000000ULL));
6930   ASSERT_TRUE(IsQcBitSet(fpsr2));
6931 }
6932 
TEST(Arm64InsnTest,SignedSaturatingAbsoluteInt64x1)6933 TEST(Arm64InsnTest, SignedSaturatingAbsoluteInt64x1) {
6934   constexpr auto AsmSqabs = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqabs %d0, %d2");
6935 
6936   __uint128_t arg1 = MakeUInt128(0x9717317281315179ULL, 0x3290443112181587ULL);
6937   auto [res1, fpsr1] = AsmSqabs(arg1);
6938   ASSERT_EQ(res1, MakeUInt128(0x68e8ce8d7eceae87ULL, 0x0000000000000000ULL));
6939   ASSERT_FALSE(IsQcBitSet(fpsr1));
6940 
6941   __uint128_t arg2 = MakeUInt128(0x8000000000000000ULL, 0x1001237687219447ULL);
6942   auto [res2, fpsr2] = AsmSqabs(arg2);
6943   ASSERT_EQ(res2, MakeUInt128(0x7fffffffffffffffULL, 0x0000000000000000ULL));
6944   ASSERT_TRUE(IsQcBitSet(fpsr2));
6945 }
6946 
TEST(Arm64InsnTest,SignedSaturatingAbsoluteInt32x4)6947 TEST(Arm64InsnTest, SignedSaturatingAbsoluteInt32x4) {
6948   constexpr auto AsmSqabs = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqabs %0.4s, %2.4s");
6949 
6950   __uint128_t arg1 = MakeUInt128(0x9133820578492800ULL, 0x6982551957402018ULL);
6951   auto [res1, fpsr1] = AsmSqabs(arg1);
6952   ASSERT_EQ(res1, MakeUInt128(0x6ecc7dfb78492800ULL, 0x6982551957402018ULL));
6953   ASSERT_FALSE(IsQcBitSet(fpsr1));
6954 
6955   __uint128_t arg2 = MakeUInt128(0x1810564129725083ULL, 0x6070356880000000ULL);
6956   auto [res2, fpsr2] = AsmSqabs(arg2);
6957   ASSERT_EQ(res2, MakeUInt128(0x1810564129725083ULL, 0x607035687fffffffULL));
6958   ASSERT_TRUE(IsQcBitSet(fpsr2));
6959 }
6960 
TEST(Arm64InsnTest,SignedSaturatingNegateInt32x1)6961 TEST(Arm64InsnTest, SignedSaturatingNegateInt32x1) {
6962   constexpr auto AsmSqneg = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqneg %s0, %s2");
6963 
6964   __uint128_t arg1 = MakeUInt128(0x6461582694563802ULL, 0x3950283712168644ULL);
6965   auto [res1, fpsr1] = AsmSqneg(arg1);
6966   ASSERT_EQ(res1, MakeUInt128(0x000000006ba9c7feULL, 0x0000000000000000ULL));
6967   ASSERT_FALSE(IsQcBitSet(fpsr1));
6968 
6969   __uint128_t arg2 = MakeUInt128(0x6561785280000000ULL, 0x1277128269186886ULL);
6970   auto [res2, fpsr2] = AsmSqneg(arg2);
6971   ASSERT_EQ(res2, MakeUInt128(0x000000007fffffffULL, 0x0000000000000000ULL));
6972   ASSERT_TRUE(IsQcBitSet(fpsr2));
6973 }
6974 
TEST(Arm64InsnTest,SignedSaturatingNegateInt64x1)6975 TEST(Arm64InsnTest, SignedSaturatingNegateInt64x1) {
6976   constexpr auto AsmSqneg = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqneg %d0, %d2");
6977 
6978   __uint128_t arg1 = MakeUInt128(0x9703600795698276ULL, 0x2639234410714658ULL);
6979   auto [res1, fpsr1] = AsmSqneg(arg1);
6980   ASSERT_EQ(res1, MakeUInt128(0x68fc9ff86a967d8aULL, 0x0000000000000000ULL));
6981   ASSERT_FALSE(IsQcBitSet(fpsr1));
6982 
6983   __uint128_t arg2 = MakeUInt128(0x8000000000000000ULL, 0x4052295369374997ULL);
6984   auto [res2, fpsr2] = AsmSqneg(arg2);
6985   ASSERT_EQ(res2, MakeUInt128(0x7fffffffffffffffULL, 0x0000000000000000ULL));
6986   ASSERT_TRUE(IsQcBitSet(fpsr2));
6987 }
6988 
TEST(Arm64InsnTest,SignedSaturatingNegateInt32x4)6989 TEST(Arm64InsnTest, SignedSaturatingNegateInt32x4) {
6990   constexpr auto AsmSqneg = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqneg %0.4s, %2.4s");
6991 
6992   __uint128_t arg1 = MakeUInt128(0x9172320202822291ULL, 0x4886959399729974ULL);
6993   auto [res1, fpsr1] = AsmSqneg(arg1);
6994   ASSERT_EQ(res1, MakeUInt128(0x6e8dcdfefd7ddd6fULL, 0xb7796a6d668d668cULL));
6995   ASSERT_FALSE(IsQcBitSet(fpsr1));
6996 
6997   __uint128_t arg2 = MakeUInt128(0x2974711553718589ULL, 0x2423849380000000ULL);
6998   auto [res2, fpsr2] = AsmSqneg(arg2);
6999   ASSERT_EQ(res2, MakeUInt128(0xd68b8eebac8e7a77ULL, 0xdbdc7b6d7fffffffULL));
7000   ASSERT_TRUE(IsQcBitSet(fpsr2));
7001 }
7002 
TEST(Arm64InsnTest,SignedSaturatingShiftLeftImmInt32x1)7003 TEST(Arm64InsnTest, SignedSaturatingShiftLeftImmInt32x1) {
7004   constexpr auto AsmSqshl = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqshl %s0, %s2, #20");
7005 
7006   __uint128_t arg1 = MakeUInt128(0x9724611600000181ULL, 0x0003509892864120ULL);
7007   auto [res1, fpsr1] = AsmSqshl(arg1);
7008   ASSERT_EQ(res1, MakeUInt128(0x0000000018100000ULL, 0x0000000000000000ULL));
7009   ASSERT_FALSE(IsQcBitSet(fpsr1));
7010 
7011   __uint128_t arg2 = MakeUInt128(0x4195163551108763ULL, 0x2042676129798265ULL);
7012   auto [res2, fpsr2] = AsmSqshl(arg2);
7013   ASSERT_EQ(res2, MakeUInt128(0x000000007fffffffULL, 0x0000000000000000ULL));
7014   ASSERT_TRUE(IsQcBitSet(fpsr2));
7015 }
7016 
TEST(Arm64InsnTest,SignedSaturatingShiftLeftImmInt64x1)7017 TEST(Arm64InsnTest, SignedSaturatingShiftLeftImmInt64x1) {
7018   constexpr auto AsmSqshl = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqshl %d0, %d2, #28");
7019 
7020   __uint128_t arg1 = MakeUInt128(0x0000000774000539ULL, 0x2622760323659751ULL);
7021   auto [res1, fpsr1] = AsmSqshl(arg1);
7022   ASSERT_EQ(res1, MakeUInt128(0x7740005390000000ULL, 0x0000000000000000ULL));
7023   ASSERT_FALSE(IsQcBitSet(fpsr1));
7024 
7025   __uint128_t arg2 = MakeUInt128(0x9938714995449137ULL, 0x3020518436690767ULL);
7026   auto [res2, fpsr2] = AsmSqshl(arg2);
7027   ASSERT_EQ(res2, MakeUInt128(0x8000000000000000ULL, 0x0000000000000000ULL));
7028   ASSERT_TRUE(IsQcBitSet(fpsr2));
7029 }
7030 
TEST(Arm64InsnTest,SignedSaturatingShiftLeftImmInt32x4)7031 TEST(Arm64InsnTest, SignedSaturatingShiftLeftImmInt32x4) {
7032   constexpr auto AsmSqshl = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqshl %0.4s, %2.4s, #12");
7033 
7034   __uint128_t arg1 = MakeUInt128(0x0007256800042011ULL, 0x0000313500033555ULL);
7035   auto [res1, fpsr1] = AsmSqshl(arg1);
7036   ASSERT_EQ(res1, MakeUInt128(0x7256800042011000ULL, 0x0313500033555000ULL));
7037   ASSERT_FALSE(IsQcBitSet(fpsr1));
7038 
7039   __uint128_t arg2 = MakeUInt128(0x0944031900072034ULL, 0x8651010561049872ULL);
7040   auto [res2, fpsr2] = AsmSqshl(arg2);
7041   ASSERT_EQ(res2, MakeUInt128(0x7fffffff72034000ULL, 0x800000007fffffffULL));
7042   ASSERT_TRUE(IsQcBitSet(fpsr2));
7043 }
7044 
TEST(Arm64InsnTest,SignedSaturatingShiftLeftByRegisterImmInt32x1)7045 TEST(Arm64InsnTest, SignedSaturatingShiftLeftByRegisterImmInt32x1) {
7046   constexpr auto AsmSqshl = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqshl %s0, %s2, %s3");
7047 
7048   __uint128_t res;
7049   uint32_t fpsr;
7050   __uint128_t arg1 = MakeUInt128(0x7480771811555330ULL, 0x9098870255052076ULL);
7051 
7052   std::tie(res, fpsr) = AsmSqshl(arg1, -33);
7053   ASSERT_EQ(res, MakeUInt128(0U, 0U));
7054   ASSERT_FALSE(IsQcBitSet(fpsr));
7055 
7056   std::tie(res, fpsr) = AsmSqshl(arg1, -32);
7057   ASSERT_EQ(res, MakeUInt128(0U, 0U));
7058   ASSERT_FALSE(IsQcBitSet(fpsr));
7059 
7060   std::tie(res, fpsr) = AsmSqshl(arg1, -31);
7061   ASSERT_EQ(res, MakeUInt128(0U, 0U));
7062   ASSERT_FALSE(IsQcBitSet(fpsr));
7063 
7064   std::tie(res, fpsr) = AsmSqshl(arg1, -1);
7065   ASSERT_EQ(res, MakeUInt128(0x08aaa998ULL, 0U));
7066   ASSERT_FALSE(IsQcBitSet(fpsr));
7067 
7068   std::tie(res, fpsr) = AsmSqshl(arg1, 0);
7069   ASSERT_EQ(res, MakeUInt128(0x11555330ULL, 0U));
7070   ASSERT_FALSE(IsQcBitSet(fpsr));
7071 
7072   std::tie(res, fpsr) = AsmSqshl(arg1, 1);
7073   ASSERT_EQ(res, MakeUInt128(0x22aaa660ULL, 0U));
7074   ASSERT_FALSE(IsQcBitSet(fpsr));
7075 
7076   std::tie(res, fpsr) = AsmSqshl(arg1, 31);
7077   ASSERT_EQ(res, MakeUInt128(0x7fffffffULL, 0U));
7078   ASSERT_TRUE(IsQcBitSet(fpsr));
7079 
7080   std::tie(res, fpsr) = AsmSqshl(arg1, 32);
7081   ASSERT_EQ(res, MakeUInt128(0x7fffffffULL, 0U));
7082   ASSERT_TRUE(IsQcBitSet(fpsr));
7083 
7084   std::tie(res, fpsr) = AsmSqshl(arg1, 33);
7085   ASSERT_EQ(res, MakeUInt128(0x7fffffffULL, 0U));
7086   ASSERT_TRUE(IsQcBitSet(fpsr));
7087 }
7088 
TEST(Arm64InsnTest,UnsignedSaturatingShiftLeftImmInt64x1)7089 TEST(Arm64InsnTest, UnsignedSaturatingShiftLeftImmInt64x1) {
7090   constexpr auto AsmUqshl = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("uqshl %d0, %d2, #28");
7091 
7092   __uint128_t arg1 = MakeUInt128(0x0000000961573564ULL, 0x8883443185280853ULL);
7093   auto [res1, fpsr1] = AsmUqshl(arg1);
7094   ASSERT_EQ(res1, MakeUInt128(0x9615735640000000ULL, 0x0000000000000000ULL));
7095   ASSERT_FALSE(IsQcBitSet(fpsr1));
7096 
7097   __uint128_t arg2 = MakeUInt128(0x9759277344336553ULL, 0x8418834030351782ULL);
7098   auto [res2, fpsr2] = AsmUqshl(arg2);
7099   ASSERT_EQ(res2, MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
7100   ASSERT_TRUE(IsQcBitSet(fpsr2));
7101 }
7102 
TEST(Arm64InsnTest,UnsignedSaturatingShiftLeftImmInt32x4)7103 TEST(Arm64InsnTest, UnsignedSaturatingShiftLeftImmInt32x4) {
7104   constexpr auto AsmUqshl = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("uqshl %0.4s, %2.4s, #12");
7105 
7106   __uint128_t arg1 = MakeUInt128(0x0000326300096218ULL, 0x0004565900066853ULL);
7107   auto [res1, fpsr1] = AsmUqshl(arg1);
7108   ASSERT_EQ(res1, MakeUInt128(0x0326300096218000ULL, 0x4565900066853000ULL));
7109   ASSERT_FALSE(IsQcBitSet(fpsr1));
7110 
7111   __uint128_t arg2 = MakeUInt128(0x0009911314010804ULL, 0x0009732335449090ULL);
7112   auto [res2, fpsr2] = AsmUqshl(arg2);
7113   ASSERT_EQ(res2, MakeUInt128(0x99113000ffffffffULL, 0x97323000ffffffffULL));
7114   ASSERT_TRUE(IsQcBitSet(fpsr2));
7115 }
7116 
TEST(Arm64InsnTest,UnsignedSaturatingShiftLeftByRegisterImmInt32x1)7117 TEST(Arm64InsnTest, UnsignedSaturatingShiftLeftByRegisterImmInt32x1) {
7118   constexpr auto AsmUqshl = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("uqshl %s0, %s2, %s3");
7119 
7120   __uint128_t res;
7121   uint32_t fpsr;
7122   __uint128_t arg1 = MakeUInt128(0x9714978507414585ULL, 0x3085781339156270ULL);
7123 
7124   std::tie(res, fpsr) = AsmUqshl(arg1, -33);
7125   ASSERT_EQ(res, MakeUInt128(0U, 0U));
7126   ASSERT_FALSE(IsQcBitSet(fpsr));
7127 
7128   std::tie(res, fpsr) = AsmUqshl(arg1, -32);
7129   ASSERT_EQ(res, MakeUInt128(0U, 0U));
7130   ASSERT_FALSE(IsQcBitSet(fpsr));
7131 
7132   std::tie(res, fpsr) = AsmUqshl(arg1, -31);
7133   ASSERT_EQ(res, MakeUInt128(0U, 0U));
7134   ASSERT_FALSE(IsQcBitSet(fpsr));
7135 
7136   std::tie(res, fpsr) = AsmUqshl(arg1, -1);
7137   ASSERT_EQ(res, MakeUInt128(0x03a0a2c2ULL, 0U));
7138   ASSERT_FALSE(IsQcBitSet(fpsr));
7139 
7140   std::tie(res, fpsr) = AsmUqshl(arg1, 0);
7141   ASSERT_EQ(res, MakeUInt128(0x07414585ULL, 0U));
7142   ASSERT_FALSE(IsQcBitSet(fpsr));
7143 
7144   std::tie(res, fpsr) = AsmUqshl(arg1, 1);
7145   ASSERT_EQ(res, MakeUInt128(0x0e828b0aULL, 0U));
7146   ASSERT_FALSE(IsQcBitSet(fpsr));
7147 
7148   std::tie(res, fpsr) = AsmUqshl(arg1, 31);
7149   ASSERT_EQ(res, MakeUInt128(0xffffffffULL, 0U));
7150   ASSERT_TRUE(IsQcBitSet(fpsr));
7151 
7152   std::tie(res, fpsr) = AsmUqshl(arg1, 32);
7153   ASSERT_EQ(res, MakeUInt128(0xffffffffULL, 0U));
7154   ASSERT_TRUE(IsQcBitSet(fpsr));
7155 
7156   std::tie(res, fpsr) = AsmUqshl(arg1, 33);
7157   ASSERT_EQ(res, MakeUInt128(0xffffffffULL, 0U));
7158   ASSERT_TRUE(IsQcBitSet(fpsr));
7159 }
7160 
TEST(Arm64InsnTest,SignedSaturatingShiftLeftByRegisterImmInt16x8)7161 TEST(Arm64InsnTest, SignedSaturatingShiftLeftByRegisterImmInt16x8) {
7162   constexpr auto AsmSqshl = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqshl %0.8h, %2.8h, %3.8h");
7163 
7164   __uint128_t arg1 = 0U;
7165   __uint128_t arg2 = MakeUInt128(0xffdfffe0ffe1ffffULL, 0x0001001f00200021ULL);
7166   auto [res1, fpsr1] = AsmSqshl(arg1, arg2);
7167   ASSERT_EQ(res1, MakeUInt128(0U, 0U));
7168   ASSERT_FALSE(IsQcBitSet(fpsr1));
7169 
7170   __uint128_t arg3 = MakeUInt128(0x3333333333333333ULL, 0x3333333333333333ULL);
7171   auto [res2, fpsr2] = AsmSqshl(arg3, arg2);
7172   ASSERT_EQ(res2, MakeUInt128(0x0000000000001999ULL, 0x66667fff7fff7fffULL));
7173   ASSERT_TRUE(IsQcBitSet(fpsr2));
7174 }
7175 
TEST(Arm64InsnTest,UnsignedSaturatingShiftLeftByRegisterImmInt16x8)7176 TEST(Arm64InsnTest, UnsignedSaturatingShiftLeftByRegisterImmInt16x8) {
7177   constexpr auto AsmUqshl = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("uqshl %0.8h, %2.8h, %3.8h");
7178 
7179   __uint128_t arg1 = 0U;
7180   __uint128_t arg2 = MakeUInt128(0xffdfffe0ffe1ffffULL, 0x0001001f00200021ULL);
7181   auto [res1, fpsr1] = AsmUqshl(arg1, arg2);
7182   ASSERT_EQ(res1, MakeUInt128(0U, 0U));
7183   ASSERT_FALSE(IsQcBitSet(fpsr1));
7184 
7185   __uint128_t arg3 = MakeUInt128(0x7777777777777777ULL, 0x7777777777777777ULL);
7186   auto [res2, fpsr2] = AsmUqshl(arg3, arg2);
7187   ASSERT_EQ(res2, MakeUInt128(0x0000000000003bbbULL, 0xeeeeffffffffffffULL));
7188   ASSERT_TRUE(IsQcBitSet(fpsr2));
7189 }
7190 
TEST(Arm64InsnTest,SignedSaturatingExtractNarrowInt64x2ToInt32x2)7191 TEST(Arm64InsnTest, SignedSaturatingExtractNarrowInt64x2ToInt32x2) {
7192   constexpr auto AsmSqxtn = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqxtn %0.2s, %2.2d");
7193 
7194   __uint128_t arg1 = MakeUInt128(0x0123456789abcdefULL, 0xfedcba9876543210ULL);
7195   auto [res1, fpsr1] = AsmSqxtn(arg1);
7196   ASSERT_EQ(res1, MakeUInt128(0x800000007fffffffULL, 0x0000000000000000ULL));
7197   ASSERT_TRUE(IsQcBitSet(fpsr1));
7198 
7199   __uint128_t arg2 = MakeUInt128(0x0000000001234567ULL, 0x000000007ecdba98LL);
7200   auto [res2, fpsr2] = AsmSqxtn(arg2);
7201   ASSERT_EQ(res2, MakeUInt128(0x7ecdba9801234567ULL, 0x0000000000000000ULL));
7202   ASSERT_FALSE(IsQcBitSet(fpsr2));
7203 }
7204 
TEST(Arm64InsnTest,SignedSaturatingExtractNarrowInt64x1ToInt32x1)7205 TEST(Arm64InsnTest, SignedSaturatingExtractNarrowInt64x1ToInt32x1) {
7206   constexpr auto AsmSqxtn = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqxtn %s0, %d2");
7207 
7208   __uint128_t arg1 = MakeUInt128(0x1234567812345678ULL, 0x0ULL);
7209   auto [res1, fpsr1] = AsmSqxtn(arg1);
7210   ASSERT_EQ(res1, MakeUInt128(0x000000007fffffffULL, 0x0000000000000000ULL));
7211   ASSERT_TRUE(IsQcBitSet(fpsr1));
7212 
7213   __uint128_t arg2 = MakeUInt128(0x0000000012345678ULL, 0x0ULL);
7214   auto [res2, fpsr2] = AsmSqxtn(arg2);
7215   ASSERT_EQ(res2, MakeUInt128(0x0000000012345678ULL, 0x0000000000000000ULL));
7216   ASSERT_FALSE(IsQcBitSet(fpsr2));
7217 }
7218 
TEST(Arm64InsnTest,UnsignedSaturatingExtractNarrowInt64x2ToInt32x2)7219 TEST(Arm64InsnTest, UnsignedSaturatingExtractNarrowInt64x2ToInt32x2) {
7220   constexpr auto AsmUqstn = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("uqxtn %0.2s, %2.2d");
7221 
7222   __uint128_t arg1 = MakeUInt128(0x0123456789abcdefULL, 0xfedcba9876543210ULL);
7223   auto [res1, fpsr1] = AsmUqstn(arg1);
7224   ASSERT_EQ(res1, MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
7225   ASSERT_TRUE(IsQcBitSet(fpsr1));
7226 
7227   __uint128_t arg2 = MakeUInt128(0x0000000001234567ULL, 0x00000000fecdba98LL);
7228   auto [res2, fpsr2] = AsmUqstn(arg2);
7229   ASSERT_EQ(res2, MakeUInt128(0xfecdba9801234567ULL, 0x0000000000000000ULL));
7230   ASSERT_FALSE(IsQcBitSet(fpsr2));
7231 }
7232 
TEST(Arm64InsnTest,UnsignedSaturatingExtractNarrowInt64x1ToInt32x1)7233 TEST(Arm64InsnTest, UnsignedSaturatingExtractNarrowInt64x1ToInt32x1) {
7234   constexpr auto AsmUqxtn = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("uqxtn %s0, %d2");
7235 
7236   __uint128_t arg1 = MakeUInt128(0x1234567812345678ULL, 0x0ULL);
7237   auto [res1, fpsr1] = AsmUqxtn(arg1);
7238   ASSERT_EQ(res1, MakeUInt128(0x00000000ffffffffULL, 0x0000000000000000ULL));
7239   ASSERT_TRUE(IsQcBitSet(fpsr1));
7240 
7241   __uint128_t arg2 = MakeUInt128(0x0000000087654321ULL, 0x0ULL);
7242   auto [res2, fpsr2] = AsmUqxtn(arg2);
7243   ASSERT_EQ(res2, MakeUInt128(0x0000000087654321ULL, 0x0000000000000000ULL));
7244   ASSERT_FALSE(IsQcBitSet(fpsr2));
7245 }
7246 
TEST(Arm64InsnTest,SignedSaturatingExtractNarrow2Int64x2ToInt32x2)7247 TEST(Arm64InsnTest, SignedSaturatingExtractNarrow2Int64x2ToInt32x2) {
7248   constexpr auto AsmSqxtn2 = ASM_INSN_WRAP_FUNC_WQ_RES_W0_ARG("sqxtn2 %0.4s, %2.2d");
7249 
7250   __uint128_t arg1 = MakeUInt128(0x0123456789abcdefULL, 0xfedcba9876543210ULL);
7251   __uint128_t arg2 = MakeUInt128(0x6121865619673378ULL, 0x6236256125216320ULL);
7252   auto [res1, fpsr1] = AsmSqxtn2(arg1, arg2);
7253   ASSERT_EQ(res1, MakeUInt128(0x6121865619673378ULL, 0x800000007fffffffULL));
7254   ASSERT_TRUE(IsQcBitSet(fpsr1));
7255 
7256   __uint128_t arg3 = MakeUInt128(0x0000000001234567ULL, 0x000000007ecdba98LL);
7257   __uint128_t arg4 = MakeUInt128(0x6121865619673378ULL, 0x6236256125216320ULL);
7258   auto [res2, fpsr2] = AsmSqxtn2(arg3, arg4);
7259   ASSERT_EQ(res2, MakeUInt128(0x6121865619673378ULL, 0x7ecdba9801234567ULL));
7260   ASSERT_FALSE(IsQcBitSet(fpsr2));
7261 }
7262 
TEST(Arm64InsnTest,UnsignedSaturatingExtractNarrow2Int64x2ToInt32x4)7263 TEST(Arm64InsnTest, UnsignedSaturatingExtractNarrow2Int64x2ToInt32x4) {
7264   constexpr auto AsmUqxtn2 = ASM_INSN_WRAP_FUNC_WQ_RES_W0_ARG("uqxtn2 %0.4s, %2.2d");
7265 
7266   __uint128_t arg1 = MakeUInt128(0x0123456789abcdefULL, 0xfedcba9876543210ULL);
7267   __uint128_t arg2 = MakeUInt128(0x6121865619673378ULL, 0x6236256125216320ULL);
7268   auto [res1, fpsr1] = AsmUqxtn2(arg1, arg2);
7269   ASSERT_EQ(res1, MakeUInt128(0x6121865619673378ULL, 0xffffffffffffffffULL));
7270   ASSERT_TRUE(IsQcBitSet(fpsr1));
7271 
7272   __uint128_t arg3 = MakeUInt128(0x0000000001234567ULL, 0x00000000fecdba98LL);
7273   __uint128_t arg4 = MakeUInt128(0x6121865619673378ULL, 0x6236256125216320ULL);
7274   auto [res2, fpsr2] = AsmUqxtn2(arg3, arg4);
7275   ASSERT_EQ(res2, MakeUInt128(0x6121865619673378ULL, 0xfecdba9801234567ULL));
7276   ASSERT_FALSE(IsQcBitSet(fpsr2));
7277 }
7278 
TEST(Arm64InsnTest,SignedSaturatingExtractUnsignedNarrowInt64x2ToInt32x2)7279 TEST(Arm64InsnTest, SignedSaturatingExtractUnsignedNarrowInt64x2ToInt32x2) {
7280   constexpr auto AsmSqxtun = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqxtun %0.2s, %2.2d");
7281 
7282   __uint128_t arg1 = MakeUInt128(0x0000000044332211ULL, 0x00000001aabbccddULL);
7283   auto [res1, fpsr1] = AsmSqxtun(arg1);
7284   ASSERT_EQ(res1, MakeUInt128(0xffffffff44332211ULL, 0x0000000000000000ULL));
7285   ASSERT_TRUE(IsQcBitSet(fpsr1));
7286 
7287   __uint128_t arg2 = MakeUInt128(0x0000000001234567ULL, 0x00000000fecdba98LL);
7288   auto [res2, fpsr2] = AsmSqxtun(arg2);
7289   ASSERT_EQ(res2, MakeUInt128(0xfecdba9801234567ULL, 0x0000000000000000ULL));
7290   ASSERT_FALSE(IsQcBitSet(fpsr2));
7291 }
7292 
TEST(Arm64InsnTest,SignedSaturatingExtractUnsignedNarrowInt64x1ToInt32x1)7293 TEST(Arm64InsnTest, SignedSaturatingExtractUnsignedNarrowInt64x1ToInt32x1) {
7294   constexpr auto AsmSqxtun = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqxtun %s0, %d2");
7295 
7296   __uint128_t arg1 = MakeUInt128(0x00000001ff332211ULL, 0x0ULL);
7297   auto [res1, fpsr1] = AsmSqxtun(arg1);
7298   ASSERT_EQ(res1, MakeUInt128(0x00000000ffffffffULL, 0x0000000000000000ULL));
7299   ASSERT_TRUE(IsQcBitSet(fpsr1));
7300 
7301   __uint128_t arg2 = MakeUInt128(0x00000000ff332211ULL, 0x0ULL);
7302   auto [res2, fpsr2] = AsmSqxtun(arg2);
7303   ASSERT_EQ(res2, MakeUInt128(0x00000000ff332211ULL, 0x0000000000000000ULL));
7304   ASSERT_FALSE(IsQcBitSet(fpsr2));
7305 }
7306 
TEST(Arm64InsnTest,SignedSaturatingExtractUnsignedNarrow2Int64x2ToInt32x4)7307 TEST(Arm64InsnTest, SignedSaturatingExtractUnsignedNarrow2Int64x2ToInt32x4) {
7308   constexpr auto AsmSqxtun2 = ASM_INSN_WRAP_FUNC_WQ_RES_W0_ARG("sqxtun2 %0.4s, %2.2d");
7309 
7310   __uint128_t arg1 = MakeUInt128(0x0000000089abcdefULL, 0xfedcba9876543210ULL);
7311   __uint128_t arg2 = MakeUInt128(0x0123456789abcdefULL, 0xfedcba9876543210ULL);
7312   auto [res1, fpsr1] = AsmSqxtun2(arg1, arg2);
7313   ASSERT_EQ(res1, MakeUInt128(0x0123456789abcdefULL, 0x0000000089abcdefULL));
7314   ASSERT_TRUE(IsQcBitSet(fpsr1));
7315 
7316   __uint128_t arg3 = MakeUInt128(0x0000000001234567ULL, 0x00000000fecdba98LL);
7317   __uint128_t arg4 = MakeUInt128(0x0123456789abcdefULL, 0xfedcba9876543210ULL);
7318   auto [res2, fpsr2] = AsmSqxtun2(arg3, arg4);
7319   ASSERT_EQ(res2, MakeUInt128(0x0123456789abcdefULL, 0xfecdba9801234567ULL));
7320   ASSERT_FALSE(IsQcBitSet(fpsr2));
7321 }
7322 
TEST(Arm64InsnTest,SignedSaturatingAccumulateOfUnsignedValueInt32x1)7323 TEST(Arm64InsnTest, SignedSaturatingAccumulateOfUnsignedValueInt32x1) {
7324   constexpr auto AsmSuqadd = ASM_INSN_WRAP_FUNC_WQ_RES_W0_ARG("suqadd %s0, %s2");
7325 
7326   __uint128_t arg1 = MakeUInt128(0x9392023115638719ULL, 0x5080502467972579ULL);
7327   __uint128_t arg2 = MakeUInt128(0x2497605762625913ULL, 0x3285597263712112ULL);
7328   auto [res1, fpsr1] = AsmSuqadd(arg1, arg2);
7329   ASSERT_EQ(res1, MakeUInt128(0x0000000077c5e02cULL, 0x0000000000000000ULL));
7330   ASSERT_FALSE(IsQcBitSet(fpsr1));
7331 
7332   __uint128_t arg3 = MakeUInt128(0x9099791776687477ULL, 0x4481882870632315ULL);
7333   __uint128_t arg4 = MakeUInt128(0x5158650328981642ULL, 0x2828823274686610ULL);
7334   auto [res2, fpsr2] = AsmSuqadd(arg3, arg4);
7335   ASSERT_EQ(res2, MakeUInt128(0x000000007fffffffULL, 0x0000000000000000ULL));
7336   ASSERT_TRUE(IsQcBitSet(fpsr2));
7337 }
7338 
TEST(Arm64InsnTest,SignedSaturatingAccumulateOfUnsignedValueInt32x4)7339 TEST(Arm64InsnTest, SignedSaturatingAccumulateOfUnsignedValueInt32x4) {
7340   constexpr auto AsmSuqadd = ASM_INSN_WRAP_FUNC_WQ_RES_W0_ARG("suqadd %0.4s, %2.4s");
7341 
7342   __uint128_t arg1 = MakeUInt128(0x2590181000350989ULL, 0x2864120419516355ULL);
7343   __uint128_t arg2 = MakeUInt128(0x1108763204267612ULL, 0x9798265294258829ULL);
7344   auto [res1, fpsr1] = AsmSuqadd(arg1, arg2);
7345   ASSERT_EQ(res1, MakeUInt128(0x36988e42045b7f9bULL, 0xbffc3856ad76eb7eULL));
7346   ASSERT_FALSE(IsQcBitSet(fpsr1));
7347 
7348   __uint128_t arg3 = MakeUInt128(0x9082888934938376ULL, 0x4393992569006040ULL);
7349   __uint128_t arg4 = MakeUInt128(0x6731142209331219ULL, 0x5936202982972351ULL);
7350   auto [res2, fpsr2] = AsmSuqadd(arg3, arg4);
7351   ASSERT_EQ(res2, MakeUInt128(0x7fffffff3dc6958fULL, 0x7fffffffeb978391ULL));
7352   ASSERT_TRUE(IsQcBitSet(fpsr2));
7353 }
7354 
TEST(Arm64InsnTest,UnsignedSaturatingAccumulateOfSignedValueInt32x1)7355 TEST(Arm64InsnTest, UnsignedSaturatingAccumulateOfSignedValueInt32x1) {
7356   constexpr auto AsmUsqadd = ASM_INSN_WRAP_FUNC_WQ_RES_W0_ARG("usqadd %s0, %s2");
7357 
7358   __uint128_t arg1 = MakeUInt128(0x9052523242348615ULL, 0x3152097693846104ULL);
7359   __uint128_t arg2 = MakeUInt128(0x2582849714963475ULL, 0x3418375620030149ULL);
7360   auto [res1, fpsr1] = AsmUsqadd(arg1, arg2);
7361   ASSERT_EQ(res1, MakeUInt128(0x0000000056caba8aULL, 0x0000000000000000ULL));
7362   ASSERT_FALSE(IsQcBitSet(fpsr1));
7363 
7364   __uint128_t arg3 = MakeUInt128(0x9887125387801719ULL, 0x6071816407812484ULL);
7365   __uint128_t arg4 = MakeUInt128(0x7847257912407824ULL, 0x5443616823452395ULL);
7366   auto [res2, fpsr2] = AsmUsqadd(arg3, arg4);
7367   ASSERT_EQ(res2, MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
7368   ASSERT_TRUE(IsQcBitSet(fpsr2));
7369 
7370   __uint128_t arg5 = MakeUInt128(0x9708583970761645ULL, 0x8229630324424328ULL);
7371   __uint128_t arg6 = MakeUInt128(0x2377374595170285ULL, 0x6069806788952176ULL);
7372   auto [res3, fpsr3] = AsmUsqadd(arg5, arg6);
7373   ASSERT_EQ(res3, MakeUInt128(0x00000000ffffffffULL, 0x0000000000000000ULL));
7374   ASSERT_TRUE(IsQcBitSet(fpsr3));
7375 }
7376 
TEST(Arm64InsnTest,UnsignedSaturatingAccumulateOfSignedValueInt32x4)7377 TEST(Arm64InsnTest, UnsignedSaturatingAccumulateOfSignedValueInt32x4) {
7378   constexpr auto AsmUsqadd = ASM_INSN_WRAP_FUNC_WQ_RES_W0_ARG("usqadd %0.4s, %2.4s");
7379 
7380   __uint128_t arg1 = MakeUInt128(0x4129137074982305ULL, 0x7592909166293919ULL);
7381   __uint128_t arg2 = MakeUInt128(0x5014721157586067ULL, 0x2700925477180257ULL);
7382   auto [res1, fpsr1] = AsmUsqadd(arg1, arg2);
7383   ASSERT_EQ(res1, MakeUInt128(0x913d8581cbf0836cULL, 0x9c9322e5dd413b70ULL));
7384   ASSERT_FALSE(IsQcBitSet(fpsr1));
7385 
7386   __uint128_t arg3 = MakeUInt128(0x7816422828823274ULL, 0x6866106592732197ULL);
7387   __uint128_t arg4 = MakeUInt128(0x9071623846421534ULL, 0x8985247621678905ULL);
7388   auto [res2, fpsr2] = AsmUsqadd(arg3, arg4);
7389   ASSERT_EQ(res2, MakeUInt128(0xffffffff6ec447a8ULL, 0xf1eb34db00000000ULL));
7390   ASSERT_TRUE(IsQcBitSet(fpsr2));
7391 }
7392 
TEST(Arm64InsnTest,SignedSaturatingRoundingShiftLeftInt32x1)7393 TEST(Arm64InsnTest, SignedSaturatingRoundingShiftLeftInt32x1) {
7394   constexpr auto AsmSqrshl = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqrshl %s0, %s2, %s3");
7395 
7396   __uint128_t res;
7397   uint32_t fpsr;
7398 
7399   __uint128_t arg = MakeUInt128(0x9736705435580445ULL, 0x8657202276378404ULL);
7400   std::tie(res, fpsr) = AsmSqrshl(arg, -33);
7401   ASSERT_EQ(res, MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
7402   ASSERT_FALSE(IsQcBitSet(fpsr));
7403 
7404   std::tie(res, fpsr) = AsmSqrshl(arg, -32);
7405   ASSERT_EQ(res, MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
7406   ASSERT_FALSE(IsQcBitSet(fpsr));
7407 
7408   std::tie(res, fpsr) = AsmSqrshl(arg, -31);
7409   ASSERT_EQ(res, MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
7410   ASSERT_FALSE(IsQcBitSet(fpsr));
7411 
7412   std::tie(res, fpsr) = AsmSqrshl(arg, -1);
7413   ASSERT_EQ(res, MakeUInt128(0x000000001aac0223ULL, 0x0000000000000000ULL));
7414   ASSERT_FALSE(IsQcBitSet(fpsr));
7415 
7416   std::tie(res, fpsr) = AsmSqrshl(arg, 0);
7417   ASSERT_EQ(res, MakeUInt128(0x0000000035580445ULL, 0x0000000000000000ULL));
7418   ASSERT_FALSE(IsQcBitSet(fpsr));
7419 
7420   std::tie(res, fpsr) = AsmSqrshl(arg, 1);
7421   ASSERT_EQ(res, MakeUInt128(0x000000006ab0088aULL, 0x0000000000000000ULL));
7422   ASSERT_FALSE(IsQcBitSet(fpsr));
7423 
7424   std::tie(res, fpsr) = AsmSqrshl(arg, 31);
7425   ASSERT_EQ(res, MakeUInt128(0x000000007fffffffULL, 0x0000000000000000ULL));
7426   ASSERT_TRUE(IsQcBitSet(fpsr));
7427 
7428   std::tie(res, fpsr) = AsmSqrshl(arg, 32);
7429   ASSERT_EQ(res, MakeUInt128(0x000000007fffffffULL, 0x0000000000000000ULL));
7430   ASSERT_TRUE(IsQcBitSet(fpsr));
7431 
7432   std::tie(res, fpsr) = AsmSqrshl(arg, 33);
7433   ASSERT_EQ(res, MakeUInt128(0x000000007fffffffULL, 0x0000000000000000ULL));
7434   ASSERT_TRUE(IsQcBitSet(fpsr));
7435 }
7436 
TEST(Arm64InsnTest,SignedSaturatingRoundingShiftLeftInt16x8)7437 TEST(Arm64InsnTest, SignedSaturatingRoundingShiftLeftInt16x8) {
7438   constexpr auto AsmSqrshl = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqrshl %0.8h, %2.8h, %3.8h");
7439 
7440   __uint128_t arg1 = MakeUInt128(0x0000000000000099ULL, 0x9999099999999999ULL);
7441   __uint128_t arg2 = MakeUInt128(0x00110010000f0001ULL, 0xfffffff1fff0ffefULL);
7442   auto [res1, fpsr1] = AsmSqrshl(arg1, arg2);
7443   ASSERT_EQ(res1, MakeUInt128(0x0000000000000132ULL, 0xcccd000000000000ULL));
7444   ASSERT_FALSE(IsQcBitSet(fpsr1));
7445 
7446   __uint128_t arg3 = MakeUInt128(0x0099009900990099ULL, 0x0099009900990099ULL);
7447   auto [res2, fpsr2] = AsmSqrshl(arg3, arg2);
7448   ASSERT_EQ(res2, MakeUInt128(0x7fff7fff7fff0132ULL, 0x004d000000000000ULL));
7449   ASSERT_TRUE(IsQcBitSet(fpsr2));
7450 }
7451 
TEST(Arm64InsnTest,UnsignedSaturatingRoundingShiftLeftInt32x1)7452 TEST(Arm64InsnTest, UnsignedSaturatingRoundingShiftLeftInt32x1) {
7453   constexpr auto AsmUqrshl = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("uqrshl %s0, %s2, %s3");
7454 
7455   __uint128_t res;
7456   uint32_t fpsr;
7457 
7458   __uint128_t arg = MakeUInt128(0x9984124848262367ULL, 0x3771467226061633ULL);
7459   std::tie(res, fpsr) = AsmUqrshl(arg, -33);
7460   ASSERT_EQ(res, MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
7461   ASSERT_FALSE(IsQcBitSet(fpsr));
7462 
7463   std::tie(res, fpsr) = AsmUqrshl(arg, -32);
7464   ASSERT_EQ(res, MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
7465   ASSERT_FALSE(IsQcBitSet(fpsr));
7466 
7467   std::tie(res, fpsr) = AsmUqrshl(arg, -31);
7468   ASSERT_EQ(res, MakeUInt128(0x0000000000000001ULL, 0x0000000000000000ULL));
7469   ASSERT_FALSE(IsQcBitSet(fpsr));
7470 
7471   std::tie(res, fpsr) = AsmUqrshl(arg, -1);
7472   ASSERT_EQ(res, MakeUInt128(0x00000000241311b4ULL, 0x0000000000000000ULL));
7473   ASSERT_FALSE(IsQcBitSet(fpsr));
7474 
7475   std::tie(res, fpsr) = AsmUqrshl(arg, 0);
7476   ASSERT_EQ(res, MakeUInt128(0x0000000048262367ULL, 0x0000000000000000ULL));
7477   ASSERT_FALSE(IsQcBitSet(fpsr));
7478 
7479   std::tie(res, fpsr) = AsmUqrshl(arg, 1);
7480   ASSERT_EQ(res, MakeUInt128(0x00000000904c46ceULL, 0x0000000000000000ULL));
7481   ASSERT_FALSE(IsQcBitSet(fpsr));
7482 
7483   std::tie(res, fpsr) = AsmUqrshl(arg, 31);
7484   ASSERT_EQ(res, MakeUInt128(0x00000000ffffffffULL, 0x0000000000000000ULL));
7485   ASSERT_TRUE(IsQcBitSet(fpsr));
7486 
7487   std::tie(res, fpsr) = AsmUqrshl(arg, 32);
7488   ASSERT_EQ(res, MakeUInt128(0x00000000ffffffffULL, 0x0000000000000000ULL));
7489   ASSERT_TRUE(IsQcBitSet(fpsr));
7490 
7491   std::tie(res, fpsr) = AsmUqrshl(arg, 33);
7492   ASSERT_EQ(res, MakeUInt128(0x00000000ffffffffULL, 0x0000000000000000ULL));
7493   ASSERT_TRUE(IsQcBitSet(fpsr));
7494 }
7495 
TEST(Arm64InsnTest,UnsignedSaturatingRoundingShiftLeftInt16x8)7496 TEST(Arm64InsnTest, UnsignedSaturatingRoundingShiftLeftInt16x8) {
7497   constexpr auto AsmUqrshl = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("uqrshl %0.8h, %2.8h, %3.8h");
7498 
7499   __uint128_t arg1 = MakeUInt128(0x0000000000000099ULL, 0x9999099999999999ULL);
7500   __uint128_t arg2 = MakeUInt128(0x00110010000f0001ULL, 0xfffffff1fff0ffefULL);
7501   auto [res1, fpsr1] = AsmUqrshl(arg1, arg2);
7502   ASSERT_EQ(res1, MakeUInt128(0x0000000000000132ULL, 0x4ccd000000010000ULL));
7503   ASSERT_FALSE(IsQcBitSet(fpsr1));
7504 
7505   __uint128_t arg3 = MakeUInt128(0x0099009900990099ULL, 0x0099009900990099ULL);
7506   auto [res2, fpsr2] = AsmUqrshl(arg3, arg2);
7507   ASSERT_EQ(res2, MakeUInt128(0xffffffffffff0132ULL, 0x004d000000000000ULL));
7508   ASSERT_TRUE(IsQcBitSet(fpsr2));
7509 }
7510 
TEST(Arm64InsnTest,SignedSaturatingShiftRightNarrowInt16x1)7511 TEST(Arm64InsnTest, SignedSaturatingShiftRightNarrowInt16x1) {
7512   constexpr auto AsmSqshrn = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqshrn %b0, %h2, #4");
7513 
7514   __uint128_t arg1 = MakeUInt128(0x888786614762f943ULL, 0x4140104988899316ULL);
7515   auto [res1, fpsr1] = AsmSqshrn(arg1);
7516   ASSERT_EQ(res1, MakeUInt128(0x94U, 0U));
7517   ASSERT_FALSE(IsQcBitSet(fpsr1));
7518 
7519   __uint128_t arg2 = MakeUInt128(0x0051207678103588ULL, 0x6116602029611936ULL);
7520   auto [res2, fpsr2] = AsmSqshrn(arg2);
7521   ASSERT_EQ(res2, MakeUInt128(0x7fU, 0U));
7522   ASSERT_TRUE(IsQcBitSet(fpsr2));
7523 }
7524 
TEST(Arm64InsnTest,SignedSaturatingShiftRightNarrowInt16x8)7525 TEST(Arm64InsnTest, SignedSaturatingShiftRightNarrowInt16x8) {
7526   constexpr auto AsmSqshrn = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqshrn %0.8b, %2.8h, #4");
7527 
7528   __uint128_t arg1 = MakeUInt128(0x0625051604340253ULL, 0x0299028602670568ULL);
7529   auto [res1, fpsr1] = AsmSqshrn(arg1);
7530   ASSERT_EQ(res1, MakeUInt128(0x2928265662514325ULL, 0U));
7531   ASSERT_FALSE(IsQcBitSet(fpsr1));
7532 
7533   __uint128_t arg2 = MakeUInt128(0x2405806005642114ULL, 0x9386436864224724ULL);
7534   auto [res2, fpsr2] = AsmSqshrn(arg2);
7535   ASSERT_EQ(res2, MakeUInt128(0x807f7f7f7f80567fULL, 0U));
7536   ASSERT_TRUE(IsQcBitSet(fpsr2));
7537 }
7538 
TEST(Arm64InsnTest,SignedSaturatingShiftRightNarrowInt16x8Upper)7539 TEST(Arm64InsnTest, SignedSaturatingShiftRightNarrowInt16x8Upper) {
7540   constexpr auto AsmSqshrn2 = ASM_INSN_WRAP_FUNC_WQ_RES_W0_ARG("sqshrn2 %0.16b, %2.8h, #4");
7541 
7542   __uint128_t arg1 = MakeUInt128(0x0367034704100536ULL, 0x0175064803000078ULL);
7543   __uint128_t arg2 = MakeUInt128(0x3494819262681110ULL, 0x7399482506073949ULL);
7544   auto [res1, fpsr1] = AsmSqshrn2(arg1, arg2);
7545   ASSERT_EQ(res1, MakeUInt128(0x3494819262681110ULL, 0x1764300736344153ULL));
7546   ASSERT_FALSE(IsQcBitSet(fpsr1));
7547 
7548   __uint128_t arg3 = MakeUInt128(0x4641074501673719ULL, 0x0483109676711344ULL);
7549   auto [res2, fpsr2] = AsmSqshrn2(arg3, arg2);
7550   ASSERT_EQ(res2, MakeUInt128(0x3494819262681110ULL, 0x487f7f7f7f74167fULL));
7551   ASSERT_TRUE(IsQcBitSet(fpsr2));
7552 }
7553 
TEST(Arm64InsnTest,UnsignedSaturatingShiftRightNarrowInt16x1)7554 TEST(Arm64InsnTest, UnsignedSaturatingShiftRightNarrowInt16x1) {
7555   constexpr auto AsmUqshrn = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("uqshrn %b0, %h2, #4");
7556 
7557   __uint128_t arg1 = MakeUInt128(0x6797172898220360ULL, 0x7028806908776866ULL);
7558   auto [res1, fpsr1] = AsmUqshrn(arg1);
7559   ASSERT_EQ(res1, MakeUInt128(0x36U, 0U));
7560   ASSERT_FALSE(IsQcBitSet(fpsr1));
7561 
7562   __uint128_t arg2 = MakeUInt128(0x0593252746378405ULL, 0x3976918480820410ULL);
7563   auto [res2, fpsr2] = AsmUqshrn(arg2);
7564   ASSERT_EQ(res2, MakeUInt128(0xffU, 0U));
7565   ASSERT_TRUE(IsQcBitSet(fpsr2));
7566 }
7567 
TEST(Arm64InsnTest,UnsignedSaturatingShiftRightNarrowInt16x8)7568 TEST(Arm64InsnTest, UnsignedSaturatingShiftRightNarrowInt16x8) {
7569   constexpr auto AsmUqshrn = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("uqshrn %0.8b, %2.8h, #4");
7570 
7571   __uint128_t arg1 = MakeUInt128(0x0867067907600099ULL, 0x0693007509490515ULL);
7572   auto [res1, fpsr1] = AsmUqshrn(arg1);
7573   ASSERT_EQ(res1, MakeUInt128(0x6907945186677609ULL, 0U));
7574   ASSERT_FALSE(IsQcBitSet(fpsr1));
7575 
7576   __uint128_t arg2 = MakeUInt128(0x2736049811890413ULL, 0x0433116627747123ULL);
7577   auto [res2, fpsr2] = AsmUqshrn(arg2);
7578   ASSERT_EQ(res2, MakeUInt128(0x43ffffffff49ff41ULL, 0U));
7579   ASSERT_TRUE(IsQcBitSet(fpsr2));
7580 }
7581 
TEST(Arm64InsnTest,UnignedSaturatingShiftRightNarrowInt16x8Upper)7582 TEST(Arm64InsnTest, UnignedSaturatingShiftRightNarrowInt16x8Upper) {
7583   constexpr auto AsmUqshrn2 = ASM_INSN_WRAP_FUNC_WQ_RES_W0_ARG("uqshrn2 %0.16b, %2.8h, #4");
7584 
7585   __uint128_t arg1 = MakeUInt128(0x0441018407410768ULL, 0x0981066307240048ULL);
7586   __uint128_t arg2 = MakeUInt128(0x2393582740194493ULL, 0x5665161088463125ULL);
7587   auto [res1, fpsr1] = AsmUqshrn2(arg1, arg2);
7588   ASSERT_EQ(res1, MakeUInt128(0x2393582740194493ULL, 0x9866720444187476ULL));
7589   ASSERT_FALSE(IsQcBitSet(fpsr1));
7590 
7591   __uint128_t arg3 = MakeUInt128(0x0785297709734684ULL, 0x3030614624180358ULL);
7592   auto [res2, fpsr2] = AsmUqshrn2(arg3, arg2);
7593   ASSERT_EQ(res2, MakeUInt128(0x2393582740194493ULL, 0xffffff3578ff97ffULL));
7594   ASSERT_TRUE(IsQcBitSet(fpsr2));
7595 }
7596 
TEST(Arm64InsnTest,SignedSaturatingRoundingShiftRightNarrowInt16x1)7597 TEST(Arm64InsnTest, SignedSaturatingRoundingShiftRightNarrowInt16x1) {
7598   constexpr auto AsmSqrshrn = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqrshrn %b0, %h2, #4");
7599 
7600   __uint128_t arg1 = MakeUInt128(0x9610330799410534ULL, 0x7784574699992128ULL);
7601   auto [res1, fpsr1] = AsmSqrshrn(arg1);
7602   ASSERT_EQ(res1, MakeUInt128(0x0000000000000053ULL, 0x0000000000000000ULL));
7603   ASSERT_FALSE(IsQcBitSet(fpsr1));
7604 
7605   __uint128_t arg2 = MakeUInt128(0x5999993996122816ULL, 0x1521931488876938ULL);
7606   auto [res2, fpsr2] = AsmSqrshrn(arg2);
7607   ASSERT_EQ(res2, MakeUInt128(0x000000000000007fULL, 0x0000000000000000ULL));
7608   ASSERT_TRUE(IsQcBitSet(fpsr2));
7609 
7610   __uint128_t arg3 = MakeUInt128(0x8022281083009986ULL, 0x0165494165426169ULL);
7611   auto [res3, fpsr3] = AsmSqrshrn(arg3);
7612   ASSERT_EQ(res3, MakeUInt128(0x0000000000000080ULL, 0x0000000000000000ULL));
7613   ASSERT_TRUE(IsQcBitSet(fpsr3));
7614 }
7615 
TEST(Arm64InsnTest,SignedSaturatingRoundingShiftRightNarrowInt16x8)7616 TEST(Arm64InsnTest, SignedSaturatingRoundingShiftRightNarrowInt16x8) {
7617   constexpr auto AsmSqrshrn = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqrshrn %0.8b, %2.8h, #4");
7618 
7619   __uint128_t arg1 = MakeUInt128(0x0666070401700260ULL, 0x0520059204930759ULL);
7620   auto [res1, fpsr1] = AsmSqrshrn(arg1);
7621   ASSERT_EQ(res1, MakeUInt128(0x5259497666701726ULL, 0x0000000000000000ULL));
7622   ASSERT_FALSE(IsQcBitSet(fpsr1));
7623 
7624   __uint128_t arg2 = MakeUInt128(0x4143408146852981ULL, 0x5053947178900451ULL);
7625   auto [res2, fpsr2] = AsmSqrshrn(arg2);
7626   ASSERT_EQ(res2, MakeUInt128(0x7f807f457f7f7f7fULL, 0x0000000000000000ULL));
7627   ASSERT_TRUE(IsQcBitSet(fpsr2));
7628 }
7629 
TEST(Arm64InsnTest,SignedSaturatingRoundingShiftRightNarrowInt16x8Upper)7630 TEST(Arm64InsnTest, SignedSaturatingRoundingShiftRightNarrowInt16x8Upper) {
7631   constexpr auto AsmSqrshrn2 = ASM_INSN_WRAP_FUNC_WQ_RES_W0_ARG("sqrshrn2 %0.16b, %2.8h, #4");
7632 
7633   __uint128_t arg1 = MakeUInt128(0x0784017103960497ULL, 0x0707072501740336ULL);
7634   __uint128_t arg2 = MakeUInt128(0x5662725928440620ULL, 0x4302141137199227ULL);
7635   auto [res1, fpsr1] = AsmSqrshrn2(arg1, arg2);
7636   ASSERT_EQ(res1, MakeUInt128(0x5662725928440620ULL, 0x7072173378173949ULL));
7637   ASSERT_FALSE(IsQcBitSet(fpsr1));
7638 
7639   __uint128_t arg3 = MakeUInt128(0x2066886512756882ULL, 0x6614973078865701ULL);
7640   __uint128_t arg4 = MakeUInt128(0x5685016918647488ULL, 0x5416791545965072ULL);
7641   auto [res2, fpsr2] = AsmSqrshrn2(arg3, arg4);
7642   ASSERT_EQ(res2, MakeUInt128(0x5685016918647488ULL, 0x7f807f7f7f807f7fULL));
7643   ASSERT_TRUE(IsQcBitSet(fpsr2));
7644 }
7645 
TEST(Arm64InsnTest,UnsignedSaturatingRoundingShiftRightNarrowInt16x1)7646 TEST(Arm64InsnTest, UnsignedSaturatingRoundingShiftRightNarrowInt16x1) {
7647   constexpr auto AsmUqrshrn = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("uqrshrn %b0, %h2, #4");
7648 
7649   __uint128_t arg1 = MakeUInt128(0x9614236585950920ULL, 0x9083073323356034ULL);
7650   auto [res1, fpsr1] = AsmUqrshrn(arg1);
7651   ASSERT_EQ(res1, MakeUInt128(0x0000000000000092ULL, 0x0000000000000000ULL));
7652   ASSERT_FALSE(IsQcBitSet(fpsr1));
7653 
7654   __uint128_t arg2 = MakeUInt128(0x8465318730299026ULL, 0x6596450137183754ULL);
7655   auto [res2, fpsr2] = AsmUqrshrn(arg2);
7656   ASSERT_EQ(res2, MakeUInt128(0x00000000000000ffULL, 0x0000000000000000ULL));
7657   ASSERT_TRUE(IsQcBitSet(fpsr2));
7658 }
7659 
TEST(Arm64InsnTest,UnsignedSaturatingRoundingShiftRightNarrowInt16x8)7660 TEST(Arm64InsnTest, UnsignedSaturatingRoundingShiftRightNarrowInt16x8) {
7661   constexpr auto AsmUqrshrn = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("uqrshrn %0.8b, %2.8h, #4");
7662 
7663   __uint128_t arg1 = MakeUInt128(0x0301067603860240ULL, 0x0011030402470073ULL);
7664   auto [res1, fpsr1] = AsmUqrshrn(arg1);
7665   ASSERT_EQ(res1, MakeUInt128(0x0130240730673824ULL, 0x0000000000000000ULL));
7666   ASSERT_FALSE(IsQcBitSet(fpsr1));
7667 
7668   __uint128_t arg2 = MakeUInt128(0x5085082872462713ULL, 0x4946368501815469ULL);
7669   auto [res2, fpsr2] = AsmUqrshrn(arg2);
7670   ASSERT_EQ(res2, MakeUInt128(0xffff18ffff83ffffULL, 0x0000000000000000ULL));
7671   ASSERT_TRUE(IsQcBitSet(fpsr2));
7672 }
7673 
TEST(Arm64InsnTest,UnsignedSaturatingRoundingShiftRightNarrowInt16x8Upper)7674 TEST(Arm64InsnTest, UnsignedSaturatingRoundingShiftRightNarrowInt16x8Upper) {
7675   constexpr auto AsmUqrshrn = ASM_INSN_WRAP_FUNC_WQ_RES_W0_ARG("uqrshrn2 %0.16b, %2.8h, #4");
7676 
7677   __uint128_t arg1 = MakeUInt128(0x0388099005730661ULL, 0x0237022304780112ULL);
7678   __uint128_t arg2 = MakeUInt128(0x0392269110277722ULL, 0x6102544149221576ULL);
7679   auto [res1, fpsr1] = AsmUqrshrn(arg1, arg2);
7680   ASSERT_EQ(res1, MakeUInt128(0x0392269110277722ULL, 0x2322481139995766ULL));
7681   ASSERT_FALSE(IsQcBitSet(fpsr1));
7682 
7683   __uint128_t arg3 = MakeUInt128(0x9254069617600504ULL, 0x7974928060721268ULL);
7684   __uint128_t arg4 = MakeUInt128(0x8414695726397884ULL, 0x2560084531214065ULL);
7685   auto [res2, fpsr2] = AsmUqrshrn(arg3, arg4);
7686   ASSERT_EQ(res2, MakeUInt128(0x8414695726397884ULL, 0xffffffffff69ff50ULL));
7687   ASSERT_TRUE(IsQcBitSet(fpsr2));
7688 }
7689 
TEST(Arm64InsnTest,SignedSaturatingShiftRightUnsignedNarrowInt16x1)7690 TEST(Arm64InsnTest, SignedSaturatingShiftRightUnsignedNarrowInt16x1) {
7691   constexpr auto AsmSqshrun = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqshrun %b0, %h2, #4");
7692 
7693   __uint128_t arg1 = MakeUInt128(0x9143611439920063ULL, 0x8005083214098760ULL);
7694   auto [res1, fpsr1] = AsmSqshrun(arg1);
7695   ASSERT_EQ(res1, MakeUInt128(0x06U, 0U));
7696   ASSERT_FALSE(IsQcBitSet(fpsr1));
7697 
7698   __uint128_t arg2 = MakeUInt128(0x3815174571259975ULL, 0x4953580239983146ULL);
7699   auto [res2, fpsr2] = AsmSqshrun(arg2);
7700   ASSERT_EQ(res2, MakeUInt128(0x00U, 0U));
7701   ASSERT_TRUE(IsQcBitSet(fpsr2));
7702 
7703   __uint128_t arg3 = MakeUInt128(0x4599309324851025ULL, 0x1682944672606661ULL);
7704   auto [res3, fpsr3] = AsmSqshrun(arg3);
7705   ASSERT_EQ(res3, MakeUInt128(0xffU, 0U));
7706   ASSERT_TRUE(IsQcBitSet(fpsr3));
7707 }
7708 
TEST(Arm64InsnTest,SignedSaturatingShiftRightUnsignedNarrowInt16x8)7709 TEST(Arm64InsnTest, SignedSaturatingShiftRightUnsignedNarrowInt16x8) {
7710   constexpr auto AsmSqshrun = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqshrun %0.8b, %2.8h, #4");
7711 
7712   __uint128_t arg1 = MakeUInt128(0x0911066408340874ULL, 0x0800074107250670ULL);
7713   auto [res1, fpsr1] = AsmSqshrun(arg1);
7714   ASSERT_EQ(res1, MakeUInt128(0x8074726791668387ULL, 0U));
7715   ASSERT_FALSE(IsQcBitSet(fpsr1));
7716 
7717   __uint128_t arg2 = MakeUInt128(0x4792258319129415ULL, 0x7390809143831384ULL);
7718   auto [res2, fpsr2] = AsmSqshrun(arg2);
7719   ASSERT_EQ(res2, MakeUInt128(0xff00ffffffffff00ULL, 0U));
7720   ASSERT_TRUE(IsQcBitSet(fpsr2));
7721 }
7722 
TEST(Arm64InsnTest,SignedSaturatingShiftRightUnsignedNarrowInt16x8Upper)7723 TEST(Arm64InsnTest, SignedSaturatingShiftRightUnsignedNarrowInt16x8Upper) {
7724   constexpr auto AsmSqshrun2 = ASM_INSN_WRAP_FUNC_WQ_RES_W0_ARG("sqshrun2 %0.16b, %2.8h, #4");
7725 
7726   __uint128_t arg1 = MakeUInt128(0x0625082101740415ULL, 0x0233074903960353ULL);
7727   __uint128_t arg2 = MakeUInt128(0x0136178653673760ULL, 0x6421667781377399ULL);
7728   auto [res1, fpsr1] = AsmSqshrun2(arg1, arg2);
7729   ASSERT_EQ(res1, MakeUInt128(0x0136178653673760ULL, 0x2374393562821741ULL));
7730   ASSERT_FALSE(IsQcBitSet(fpsr1));
7731 
7732   __uint128_t arg3 = MakeUInt128(0x4295810545651083ULL, 0x1046297282937584ULL);
7733   __uint128_t arg4 = MakeUInt128(0x1611625325625165ULL, 0x7249807849209989ULL);
7734   auto [res2, fpsr2] = AsmSqshrun2(arg3, arg4);
7735   ASSERT_EQ(res2, MakeUInt128(0x1611625325625165ULL, 0xffff00ffff00ffffULL));
7736   ASSERT_TRUE(IsQcBitSet(fpsr2));
7737 }
7738 
TEST(Arm64InsnTest,SignedSaturatingRoundingShiftRightUnsignedNarrowInt16x1)7739 TEST(Arm64InsnTest, SignedSaturatingRoundingShiftRightUnsignedNarrowInt16x1) {
7740   constexpr auto AsmSqrshrun = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqrshrun %b0, %h2, #4");
7741 
7742   __uint128_t arg1 = MakeUInt128(0x5760186946490886ULL, 0x8154528562134698ULL);
7743   auto [res1, fpsr1] = AsmSqrshrun(arg1);
7744   ASSERT_EQ(res1, MakeUInt128(0x88ULL, 0U));
7745   ASSERT_FALSE(IsQcBitSet(fpsr1));
7746 
7747   __uint128_t arg2 = MakeUInt128(0x8355444560249556ULL, 0x6684366029221951ULL);
7748   auto [res2, fpsr2] = AsmSqrshrun(arg2);
7749   ASSERT_EQ(res2, MakeUInt128(0x00ULL, 0U));
7750   ASSERT_TRUE(IsQcBitSet(fpsr2));
7751 
7752   __uint128_t arg3 = MakeUInt128(0x2483091060537720ULL, 0x1980218310103270ULL);
7753   auto [res3, fpsr3] = AsmSqrshrun(arg3);
7754   ASSERT_EQ(res3, MakeUInt128(0xffULL, 0U));
7755   ASSERT_TRUE(IsQcBitSet(fpsr3));
7756 }
7757 
TEST(Arm64InsnTest,SignedSaturatingRoundingShiftRightUnsignedNarrowInt16x8)7758 TEST(Arm64InsnTest, SignedSaturatingRoundingShiftRightUnsignedNarrowInt16x8) {
7759   constexpr auto AsmSqrshrun = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqrshrun %0.8b, %2.8h, #4");
7760 
7761   __uint128_t arg1 = MakeUInt128(0x0150069001490702ULL, 0x0673033808340550ULL);
7762   auto [res1, fpsr1] = AsmSqrshrun(arg1);
7763   ASSERT_EQ(res1, MakeUInt128(0x6734835515691570ULL, 0U));
7764   ASSERT_FALSE(IsQcBitSet(fpsr1));
7765 
7766   __uint128_t arg2 = MakeUInt128(0x8363660178487710ULL, 0x6080980426924713ULL);
7767   auto [res2, fpsr2] = AsmSqrshrun(arg2);
7768   ASSERT_EQ(res2, MakeUInt128(0xff00ffff00ffffffULL, 0U));
7769   ASSERT_TRUE(IsQcBitSet(fpsr2));
7770 }
7771 
TEST(Arm64InsnTest,SignedSaturatingRoundingShiftRightUnsignedNarrowInt16x8Upper)7772 TEST(Arm64InsnTest, SignedSaturatingRoundingShiftRightUnsignedNarrowInt16x8Upper) {
7773   constexpr auto AsmSqrshrun2 = ASM_INSN_WRAP_FUNC_WQ_RES_W0_ARG("sqrshrun2 %0.16b, %2.8h, #4");
7774 
7775   __uint128_t arg1 = MakeUInt128(0x0733049502080757ULL, 0x0651018705990498ULL);
7776   __uint128_t arg2 = MakeUInt128(0x5693795623875551ULL, 0x6175754380917805ULL);
7777   auto [res1, fpsr1] = AsmSqrshrun2(arg1, arg2);
7778   ASSERT_EQ(res1, MakeUInt128(0x5693795623875551ULL, 0x65185a4a73492175ULL));
7779   ASSERT_FALSE(IsQcBitSet(fpsr1));
7780 
7781   __uint128_t arg3 = MakeUInt128(0x1444671298615527ULL, 0x5982014514102756ULL);
7782   __uint128_t arg4 = MakeUInt128(0x0068929750246304ULL, 0x0173514891945763ULL);
7783   auto [res2, fpsr2] = AsmSqrshrun2(arg3, arg4);
7784   ASSERT_EQ(res2, MakeUInt128(0x0068929750246304ULL, 0xff14ffffffff00ffULL));
7785   ASSERT_TRUE(IsQcBitSet(fpsr2));
7786 }
7787 
TEST(Arm64InsnTest,SignedSaturatingShiftLeftUnsignedImmInt32x1)7788 TEST(Arm64InsnTest, SignedSaturatingShiftLeftUnsignedImmInt32x1) {
7789   constexpr auto AsmSqshlu = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqshlu %s0, %s2, #4");
7790 
7791   __uint128_t arg1 = MakeUInt128(0x9704033001862556ULL, 0x1473321177711744ULL);
7792   auto [res1, fpsr1] = AsmSqshlu(arg1);
7793   ASSERT_EQ(res1, MakeUInt128(0x18625560ULL, 0U));
7794   ASSERT_FALSE(IsQcBitSet(fpsr1));
7795 
7796   __uint128_t arg2 = MakeUInt128(0x3095760196946490ULL, 0x8868154528562134ULL);
7797   auto [res2, fpsr2] = AsmSqshlu(arg2);
7798   ASSERT_EQ(res2, MakeUInt128(0x00000000ULL, 0U));
7799   ASSERT_TRUE(IsQcBitSet(fpsr2));
7800 
7801   __uint128_t arg3 = MakeUInt128(0x1335028160884035ULL, 0x1781452541964320ULL);
7802   auto [res3, fpsr3] = AsmSqshlu(arg3);
7803   ASSERT_EQ(res3, MakeUInt128(0xffffffffULL, 0U));
7804   ASSERT_TRUE(IsQcBitSet(fpsr3));
7805 }
7806 
TEST(Arm64InsnTest,SignedSaturatingShiftLeftUnsignedImmInt32x4)7807 TEST(Arm64InsnTest, SignedSaturatingShiftLeftUnsignedImmInt32x4) {
7808   constexpr auto AsmSqshlu = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqshlu %0.4s, %2.4s, #4");
7809 
7810   __uint128_t arg1 = MakeUInt128(0x0865174507877133ULL, 0x0813875205980941ULL);
7811   auto [res1, fpsr1] = AsmSqshlu(arg1);
7812   ASSERT_EQ(res1, MakeUInt128(0x8651745078771330ULL, 0x8138752059809410ULL));
7813   ASSERT_FALSE(IsQcBitSet(fpsr1));
7814 
7815   __uint128_t arg2 = MakeUInt128(0x2174227300352296ULL, 0x0080891797050682ULL);
7816   auto [res2, fpsr2] = AsmSqshlu(arg2);
7817   ASSERT_EQ(res2, MakeUInt128(0xffffffff03522960ULL, 0x0808917000000000ULL));
7818   ASSERT_TRUE(IsQcBitSet(fpsr2));
7819 }
7820 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyLong32x2)7821 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyLong32x2) {
7822   constexpr auto AsmSqdmull = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmull %0.2d, %2.2s, %3.2s");
7823 
7824   __uint128_t arg1 = MakeUInt128(0x0000000200000004ULL, 0xfeed000300000010ULL);
7825   __uint128_t arg2 = MakeUInt128(0x0000000300000002ULL, 0xfeed00040000002ULL);
7826   auto [res1, fpsr1] = AsmSqdmull(arg1, arg2);
7827   ASSERT_EQ(res1, MakeUInt128(0x0000000000000010ULL, 0x000000000000000cULL));
7828   ASSERT_FALSE(IsQcBitSet(fpsr1));
7829 
7830   __uint128_t arg3 = MakeUInt128(0x8000000000000004ULL, 0xfeed000300000010ULL);
7831   __uint128_t arg4 = MakeUInt128(0x8000000000000002ULL, 0xfeed00040000002ULL);
7832   auto [res2, fpsr2] = AsmSqdmull(arg3, arg4);
7833   ASSERT_EQ(res2, MakeUInt128(0x0000000000000010ULL, 0x7fffffffffffffffULL));
7834   ASSERT_TRUE(IsQcBitSet(fpsr2));
7835 }
7836 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyLong16x4)7837 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyLong16x4) {
7838   constexpr auto AsmSqdmull = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmull %0.4s, %2.4h, %3.4h");
7839 
7840   __uint128_t arg1 = MakeUInt128(0x0004000200f00004ULL, 0xfeedfeedfeedfeedULL);
7841   __uint128_t arg2 = MakeUInt128(0x0008000300800002ULL, 0xabcd0123ffff4567ULL);
7842   auto [res1, fpsr1] = AsmSqdmull(arg1, arg2);
7843   ASSERT_EQ(res1, MakeUInt128(0x0000f00000000010ULL, 0x000000400000000cULL));
7844   ASSERT_FALSE(IsQcBitSet(fpsr1));
7845 
7846   __uint128_t arg3 = MakeUInt128(0x8000000200f00004ULL, 0xfeedfeedfeedfeedULL);
7847   __uint128_t arg4 = MakeUInt128(0x8000000300800002ULL, 0xabcd0123ffff4567ULL);
7848   auto [res2, fpsr2] = AsmSqdmull(arg3, arg4);
7849   ASSERT_EQ(res2, MakeUInt128(0x0000f00000000010ULL, 0x7fffffff0000000cULL));
7850   ASSERT_TRUE(IsQcBitSet(fpsr2));
7851 }
7852 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyLongUpper32x2)7853 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyLongUpper32x2) {
7854   constexpr auto AsmSqdmull = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmull2 %0.2d, %2.4s, %3.4s");
7855 
7856   __uint128_t arg1 = MakeUInt128(0x0000000200000004ULL, 0xfeed000300000010ULL);
7857   __uint128_t arg2 = MakeUInt128(0x0000000300000002ULL, 0xfeed00040000002ULL);
7858   auto [res1, fpsr1] = AsmSqdmull(arg1, arg2);
7859   ASSERT_EQ(res1, MakeUInt128(0x0000000800000040ULL, 0xffddc4ed7f98e000ULL));
7860   ASSERT_FALSE(IsQcBitSet(fpsr1));
7861 
7862   __uint128_t arg3 = MakeUInt128(0x8000000000000004ULL, 0x8000000000000010ULL);
7863   __uint128_t arg4 = MakeUInt128(0x8000000000000002ULL, 0x8000000000000002ULL);
7864   auto [res2, fpsr2] = AsmSqdmull(arg3, arg4);
7865   ASSERT_EQ(res2, MakeUInt128(0x0000000000000040ULL, 0x7fffffffffffffffULL));
7866   ASSERT_TRUE(IsQcBitSet(fpsr2));
7867 }
7868 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyLongUpper16x4)7869 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyLongUpper16x4) {
7870   constexpr auto AsmSqdmull = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmull2 %0.4s, %2.8h, %3.8h");
7871 
7872   __uint128_t arg1 = MakeUInt128(0x0004000200f00004ULL, 0xfeedfeedfeedfeedULL);
7873   __uint128_t arg2 = MakeUInt128(0x0008000300800002ULL, 0xabcd0123ffff4567ULL);
7874   auto [res1, fpsr1] = AsmSqdmull(arg1, arg2);
7875   ASSERT_EQ(res1, MakeUInt128(0x00000226ff6ae4b6ULL, 0x00b4e592fffd8eceULL));
7876   ASSERT_FALSE(IsQcBitSet(fpsr1));
7877 
7878   __uint128_t arg3 = MakeUInt128(0x8000000000000004ULL, 0x8000000000000010ULL);
7879   __uint128_t arg4 = MakeUInt128(0x8000000000000002ULL, 0x8000000000000002ULL);
7880   auto [res2, fpsr2] = AsmSqdmull(arg3, arg4);
7881   ASSERT_EQ(res2, MakeUInt128(0x0000000000000040ULL, 0x7fffffff00000000ULL));
7882   ASSERT_TRUE(IsQcBitSet(fpsr2));
7883 }
7884 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyLong64x2IndexedElem)7885 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyLong64x2IndexedElem) {
7886   constexpr auto AsmSqdmull = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmull %0.2d, %2.2s, %3.s[1]");
7887 
7888   __uint128_t arg1 = MakeUInt128(0x0022002211223344ULL, 0x1122334400110011LL);
7889   __uint128_t arg2 = MakeUInt128(0x0000000200000000ULL, 0x000000000000000ULL);
7890   auto [res1, fpsr1] = AsmSqdmull(arg1, arg2);
7891   ASSERT_EQ(res1, MakeUInt128(0x000000004488cd10ULL, 0x0000000000880088ULL));
7892   ASSERT_FALSE(IsQcBitSet(fpsr1));
7893 
7894   __uint128_t arg3 = MakeUInt128(0x0022002280000000ULL, 0x1122334400110011LL);
7895   __uint128_t arg4 = MakeUInt128(0x8000000000000000ULL, 0x000000000000000ULL);
7896   auto [res2, fpsr2] = AsmSqdmull(arg3, arg4);
7897   ASSERT_EQ(res2, MakeUInt128(0x7fffffffffffffffULL, 0xffddffde00000000ULL));
7898   ASSERT_TRUE(IsQcBitSet(fpsr2));
7899 }
7900 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyLong32x4IndexedElem)7901 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyLong32x4IndexedElem) {
7902   constexpr auto AsmSqdmull = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmull %0.4s, %2.4h, %3.h[4]");
7903 
7904   __uint128_t arg1 = MakeUInt128(0x0022002211223344ULL, 0x1122334400110011LL);
7905   __uint128_t arg2 = MakeUInt128(0x000f000f000f000fULL, 0x000f000f000f0002ULL);
7906   auto [res1, fpsr1] = AsmSqdmull(arg1, arg2);
7907   ASSERT_EQ(res1, MakeUInt128(0x000044880000cd10ULL, 0x0000008800000088ULL));
7908   ASSERT_FALSE(IsQcBitSet(fpsr1));
7909 
7910   __uint128_t arg3 = MakeUInt128(0x0022002280000000ULL, 0x1122334400118000ULL);
7911   __uint128_t arg4 = MakeUInt128(0x1111111122222222ULL, 0x1122334411228000ULL);
7912   auto [res2, fpsr2] = AsmSqdmull(arg3, arg4);
7913   ASSERT_EQ(res2, MakeUInt128(0x7fffffff00000000ULL, 0xffde0000ffde0000ULL));
7914   ASSERT_TRUE(IsQcBitSet(fpsr2));
7915 }
7916 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyLongUpper64x2IndexedElem)7917 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyLongUpper64x2IndexedElem) {
7918   constexpr auto AsmSqdmull = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmull2 %0.2d, %2.4s, %3.s[3]");
7919 
7920   __uint128_t arg1 = MakeUInt128(0x0022002211223344ULL, 0x1122334400110011ULL);
7921   __uint128_t arg2 = MakeUInt128(0xffffffffffffffffULL, 0x00000002ffffffffULL);
7922   auto [res1, fpsr1] = AsmSqdmull(arg1, arg2);
7923   ASSERT_EQ(res1, MakeUInt128(0x0000000000440044ULL, 0x000000004488cd10ULL));
7924   ASSERT_FALSE(IsQcBitSet(fpsr1));
7925 
7926   __uint128_t arg3 = MakeUInt128(0x80000000ffffffffULL, 0x1122334480000000ULL);
7927   __uint128_t arg4 = MakeUInt128(0x1122334411223344ULL, 0x80000000ffffffffULL);
7928   auto [res2, fpsr2] = AsmSqdmull(arg3, arg4);
7929   ASSERT_EQ(res2, MakeUInt128(0x7fffffffffffffffULL, 0xeeddccbc00000000ULL));
7930   ASSERT_TRUE(IsQcBitSet(fpsr2));
7931 }
7932 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyLongUpper32x4IndexedElem)7933 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyLongUpper32x4IndexedElem) {
7934   constexpr auto AsmSqdmull = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmull2 %0.4s, %2.8h, %3.h[7]");
7935 
7936   __uint128_t arg1 = MakeUInt128(0x0022002211223344ULL, 0x1122334400110011ULL);
7937   __uint128_t arg2 = MakeUInt128(0xffffffffffffffffULL, 0x0002ffffffffffffULL);
7938   auto [res1, fpsr1] = AsmSqdmull(arg1, arg2);
7939   ASSERT_EQ(res1, MakeUInt128(0x0000004400000044ULL, 0x000044880000cd10ULL));
7940   ASSERT_FALSE(IsQcBitSet(fpsr1));
7941 
7942   __uint128_t arg3 = MakeUInt128(0x80000000ffffffffULL, 0x112233448000ffffULL);
7943   __uint128_t arg4 = MakeUInt128(0x1122334411223344ULL, 0x8000ffffffffffffULL);
7944   auto [res2, fpsr2] = AsmSqdmull(arg3, arg4);
7945   ASSERT_EQ(res2, MakeUInt128(0x7fffffff00010000ULL, 0xeede0000ccbc0000ULL));
7946 }
7947 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyLong64x1)7948 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyLong64x1) {
7949   constexpr auto AsmSqdmull = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmull %d0, %s2, %s3");
7950   __uint128_t arg1 = MakeUInt128(0x0000000811112222ULL, 0x0000000700000006ULL);
7951   __uint128_t arg2 = MakeUInt128(0x0000000510000000ULL, 0x0000000300000002ULL);
7952   auto [res1, fpsr1] = AsmSqdmull(arg1, arg2);
7953   ASSERT_EQ(res1, MakeUInt128(0x0222244440000000ULL, 0x0000000000000000ULL));
7954   ASSERT_FALSE(IsQcBitSet(fpsr1));
7955 
7956   __uint128_t arg3 = MakeUInt128(0xaabbccdd80000000ULL, 0x1122334400110011ULL);
7957   __uint128_t arg4 = MakeUInt128(0xff11ff1180000000ULL, 0xffffffff11223344ULL);
7958   auto [res2, fpsr2] = AsmSqdmull(arg3, arg4);
7959   ASSERT_EQ(res2, MakeUInt128(0x7fffffffffffffffULL, 0x0000000000000000ULL));
7960   ASSERT_TRUE(IsQcBitSet(fpsr2));
7961 }
7962 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyLong32x1)7963 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyLong32x1) {
7964   constexpr auto AsmSqdmull = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmull %s0, %h2, %h3");
7965   __uint128_t arg1 = MakeUInt128(0x1111111811112222ULL, 0xf000000700080006ULL);
7966   __uint128_t arg2 = MakeUInt128(0x0000000510004444ULL, 0xf000000300080002ULL);
7967   auto [res1, fpsr1] = AsmSqdmull(arg1, arg2);
7968   ASSERT_EQ(res1, MakeUInt128(0x0000000012343210ULL, 0x0000000000000000ULL));
7969   ASSERT_FALSE(IsQcBitSet(fpsr1));
7970 
7971   __uint128_t arg3 = MakeUInt128(0xaabbccdd00008000ULL, 0x1122334400110011ULL);
7972   __uint128_t arg4 = MakeUInt128(0xff11ff1100008000ULL, 0xffffffff11223344ULL);
7973   auto [res2, fpsr2] = AsmSqdmull(arg3, arg4);
7974   ASSERT_EQ(res2, MakeUInt128(0x000000007fffffffULL, 0x0000000000000000ULL));
7975   ASSERT_TRUE(IsQcBitSet(fpsr2));
7976 }
7977 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyLong32x1IndexedElem)7978 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyLong32x1IndexedElem) {
7979   constexpr auto AsmSqdmull = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmull %s0, %h2, %3.h[7]");
7980   __uint128_t arg1 = MakeUInt128(0x0000000811112222ULL, 0x0000000700000006ULL);
7981   __uint128_t arg2 = MakeUInt128(0x0000000510000000ULL, 0x1111000300000002ULL);
7982   auto [res1, fpsr1] = AsmSqdmull(arg1, arg2);
7983   ASSERT_EQ(res1, MakeUInt128(0x00000000048d0c84ULL, 0x0000000000000000ULL));
7984   ASSERT_FALSE(IsQcBitSet(fpsr1));
7985 
7986   __uint128_t arg3 = MakeUInt128(0xaabbccddaabb8000ULL, 0x1122334400110011ULL);
7987   __uint128_t arg4 = MakeUInt128(0xff11ff11ff000ff0ULL, 0x8000aabb11223344ULL);
7988   auto [res2, fpsr2] = AsmSqdmull(arg3, arg4);
7989   ASSERT_EQ(res2, MakeUInt128(0x000000007fffffffULL, 0x0000000000000000ULL));
7990   ASSERT_TRUE(IsQcBitSet(fpsr2));
7991 }
7992 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyLong64x1IndexedElem)7993 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyLong64x1IndexedElem) {
7994   constexpr auto AsmSqdmull = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmull %d0, %s2, %3.s[3]");
7995   __uint128_t arg1 = MakeUInt128(0x0000000811112222ULL, 0x0000000700000006ULL);
7996   __uint128_t arg2 = MakeUInt128(0x0000000510000000ULL, 0x0000000300000002ULL);
7997   auto [res1, fpsr1] = AsmSqdmull(arg1, arg2);
7998   ASSERT_EQ(res1, MakeUInt128(0x000000006666ccccULL, 0x0000000000000000ULL));
7999   ASSERT_FALSE(IsQcBitSet(fpsr1));
8000 
8001   __uint128_t arg3 = MakeUInt128(0xaabbccdd80000000ULL, 0x1122334400110011ULL);
8002   __uint128_t arg4 = MakeUInt128(0xff11ff11ff000ff0ULL, 0x8000000011223344ULL);
8003   auto [res2, fpsr2] = AsmSqdmull(arg3, arg4);
8004   ASSERT_EQ(res2, MakeUInt128(0x7fffffffffffffffULL, 0x0000000000000000ULL));
8005   ASSERT_TRUE(IsQcBitSet(fpsr2));
8006 }
8007 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyAddLong32x2)8008 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyAddLong32x2) {
8009   constexpr auto AsmSqdmlal = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlal %0.2d, %2.2s, %3.2s");
8010 
8011   // No saturation.
8012   __uint128_t arg1 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8013   __uint128_t arg2 = MakeUInt128(0x0000000400000004ULL, 0x0123456701234567ULL);
8014   __uint128_t arg3 = MakeUInt128(0x0100010001000100ULL, 0x0400040004000400ULL);
8015   auto [res1, fpsr1] = AsmSqdmlal(arg1, arg2, arg3);
8016   ASSERT_EQ(res1, MakeUInt128(0x0100010111011100ULL, 0x040004008c008c00ULL));
8017   ASSERT_FALSE(IsQcBitSet(fpsr1));
8018 
8019   // Saturates in the multiplication.
8020   __uint128_t arg4 = MakeUInt128(0x8000000000000004ULL, 0xfeed000300000010ULL);
8021   __uint128_t arg5 = MakeUInt128(0x8000000000000002ULL, 0xfeed000400000020ULL);
8022   __uint128_t arg6 = MakeUInt128(0x0000080000000900ULL, 0x00000a000000b000ULL);
8023   auto [res2, fpsr2] = AsmSqdmlal(arg4, arg5, arg6);
8024   ASSERT_EQ(res2, MakeUInt128(0x0000080000000910ULL, 0x7fffffffffffffffULL));
8025   ASSERT_TRUE(IsQcBitSet(fpsr2));
8026 
8027   // Saturates in the addition.
8028   __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8029   __uint128_t arg8 = MakeUInt128(0x0000000400000004ULL, 0x0123456701234567ULL);
8030   __uint128_t arg9 = MakeUInt128(0x7fffffffffffffffULL, 0x00000a000000b000ULL);
8031   auto [res3, fpsr3] = AsmSqdmlal(arg7, arg8, arg9);
8032   ASSERT_EQ(res3, MakeUInt128(0x7fffffffffffffffULL, 0x00000a0088013800ULL));
8033   ASSERT_TRUE(IsQcBitSet(fpsr3));
8034 }
8035 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyAddLong16x4)8036 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyAddLong16x4) {
8037   constexpr auto AsmSqdmlal = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlal %0.4s, %2.4h, %3.4h");
8038 
8039   // No saturation.
8040   __uint128_t arg1 = MakeUInt128(0x8000110022002200ULL, 0x7654321076543210ULL);
8041   __uint128_t arg2 = MakeUInt128(0x0010001100000004ULL, 0x0123456701234567ULL);
8042   __uint128_t arg3 = MakeUInt128(0x0100010001000100ULL, 0x0400040004000400ULL);
8043   auto [res1, fpsr1] = AsmSqdmlal(arg1, arg2, arg3);
8044   ASSERT_EQ(res1, MakeUInt128(0x0100010001011100ULL, 0x03f0040004024600ULL));
8045   ASSERT_FALSE(IsQcBitSet(fpsr1));
8046 
8047   // Saturates in the multiplication.
8048   __uint128_t arg4 = MakeUInt128(0x8000111111111111ULL, 0x1234123412341234ULL);
8049   __uint128_t arg5 = MakeUInt128(0x8000111111111111ULL, 0x1234123412341234ULL);
8050   __uint128_t arg6 = MakeUInt128(0x0123456701234567ULL, 0x0123456701234567ULL);
8051   auto [res2, fpsr2] = AsmSqdmlal(arg4, arg5, arg6);
8052   ASSERT_EQ(res2, MakeUInt128(0x0369cba90369cba9ULL, 0x7fffffff0369cba9ULL));
8053   ASSERT_TRUE(IsQcBitSet(fpsr2));
8054 
8055   // Saturates in the addition.
8056   __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8057   __uint128_t arg8 = MakeUInt128(0x0000000400010004ULL, 0x0123456701234567ULL);
8058   __uint128_t arg9 = MakeUInt128(0x7fffffff12345678ULL, 0x00000a000000b000ULL);
8059   auto [res3, fpsr3] = AsmSqdmlal(arg7, arg8, arg9);
8060   ASSERT_EQ(res3, MakeUInt128(0x7fffffff12356678ULL, 0x00000a0000013800ULL));
8061   ASSERT_TRUE(IsQcBitSet(fpsr3));
8062 }
8063 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyAddLongUpper32x2)8064 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyAddLongUpper32x2) {
8065   constexpr auto AsmSqdmlal = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlal2 %0.2d, %2.4s, %3.4s");
8066 
8067   // No saturation.
8068   __uint128_t arg1 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8069   __uint128_t arg2 = MakeUInt128(0x0000000400000004ULL, 0x0123456701234567ULL);
8070   __uint128_t arg3 = MakeUInt128(0x0100010001000100ULL, 0x0400040004000400ULL);
8071   auto [res1, fpsr1] = AsmSqdmlal(arg1, arg2, arg3);
8072   ASSERT_EQ(res1, MakeUInt128(0x020d44926c1ce9e0ULL, 0x050d47926f1cece0ULL));
8073   ASSERT_FALSE(IsQcBitSet(fpsr1));
8074 
8075   // Saturates in the multiplication.
8076   __uint128_t arg4 = MakeUInt128(0x1234567800000004ULL, 0x8000000001100010ULL);
8077   __uint128_t arg5 = MakeUInt128(0x1234567800000002ULL, 0x8000000001100020ULL);
8078   __uint128_t arg6 = MakeUInt128(0x0000080000000900ULL, 0x00000a000000b000ULL);
8079   auto [res2, fpsr2] = AsmSqdmlal(arg4, arg5, arg6);
8080   ASSERT_EQ(res2, MakeUInt128(0x00024a0066000d00ULL, 0x7fffffffffffffffULL));
8081   ASSERT_TRUE(IsQcBitSet(fpsr2));
8082 
8083   // Saturates in the addition.
8084   __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8085   __uint128_t arg8 = MakeUInt128(0x0000000400000004ULL, 0x0123456701234567ULL);
8086   __uint128_t arg9 = MakeUInt128(0x1234567812345678ULL, 0x7fffffffffffffffULL);
8087   auto [res3, fpsr3] = AsmSqdmlal(arg7, arg8, arg9);
8088   ASSERT_EQ(res3, MakeUInt128(0x13419a0a7d513f58ULL, 0x7fffffffffffffffULL));
8089   ASSERT_TRUE(IsQcBitSet(fpsr3));
8090 }
8091 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyAddLongUpper16x4)8092 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyAddLongUpper16x4) {
8093   constexpr auto AsmSqdmlal = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlal2 %0.4s, %2.8h, %3.8h");
8094 
8095   // No saturation.
8096   __uint128_t arg1 = MakeUInt128(0x8000110022002200ULL, 0x7654321076543210ULL);
8097   __uint128_t arg2 = MakeUInt128(0x0010001100000004ULL, 0x0123456701234567ULL);
8098   __uint128_t arg3 = MakeUInt128(0x0100010001000100ULL, 0x0400040004000400ULL);
8099   auto [res1, fpsr1] = AsmSqdmlal(arg1, arg2, arg3);
8100   ASSERT_EQ(res1, MakeUInt128(0x020d03f81c24e9e0ULL, 0x050d06f81f24ece0ULL));
8101   ASSERT_FALSE(IsQcBitSet(fpsr1));
8102 
8103   // Saturates in the multiplication.
8104   __uint128_t arg4 = MakeUInt128(0x1111111111111111ULL, 0x8000123412341234ULL);
8105   __uint128_t arg5 = MakeUInt128(0x1111111111111111ULL, 0x8000123412341234ULL);
8106   __uint128_t arg6 = MakeUInt128(0x0123456701234567ULL, 0x0123456701234567ULL);
8107   auto [res2, fpsr2] = AsmSqdmlal(arg4, arg5, arg6);
8108   ASSERT_EQ(res2, MakeUInt128(0x03b9fa8703b9fa87ULL, 0x7fffffff03b9fa87ULL));
8109   ASSERT_TRUE(IsQcBitSet(fpsr2));
8110 
8111   // Saturates in the addition.
8112   __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8113   __uint128_t arg8 = MakeUInt128(0x0000000400010004ULL, 0x0123456701234567ULL);
8114   __uint128_t arg9 = MakeUInt128(0x1234567812345678ULL, 0x7fffffff0000b000ULL);
8115   auto [res3, fpsr3] = AsmSqdmlal(arg7, arg8, arg9);
8116   ASSERT_EQ(res3, MakeUInt128(0x134159702d593f58ULL, 0x7fffffff1b2598e0ULL));
8117   ASSERT_TRUE(IsQcBitSet(fpsr3));
8118 }
8119 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyAddLong64x1)8120 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyAddLong64x1) {
8121   constexpr auto AsmSqdmlal = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlal %d0, %s2, %s3");
8122 
8123   // No saturation.
8124   __uint128_t arg1 = MakeUInt128(0x1100110011223344ULL, 0x7654321076543210ULL);
8125   __uint128_t arg2 = MakeUInt128(0x0000000020000000ULL, 0x0123456701234567ULL);
8126   __uint128_t arg3 = MakeUInt128(0x12345678000000FFULL, 0x0400040004000400ULL);
8127   auto [res1, fpsr1] = AsmSqdmlal(arg1, arg2, arg3);
8128   ASSERT_EQ(res1, MakeUInt128(0x167ce349000000ffULL, 0x0000000000000000ULL));
8129   ASSERT_FALSE(IsQcBitSet(fpsr1));
8130 
8131   // Saturates in the multiplication.
8132   __uint128_t arg4 = MakeUInt128(0x1122334480000000ULL, 0xfeed000300000010ULL);
8133   __uint128_t arg5 = MakeUInt128(0xaabbccdd80000000ULL, 0xfeed000400000020ULL);
8134   __uint128_t arg6 = MakeUInt128(0x1122334411111111ULL, 0x00000a000000b000ULL);
8135   auto [res2, fpsr2] = AsmSqdmlal(arg4, arg5, arg6);
8136   ASSERT_EQ(res2, MakeUInt128(0x7fffffffffffffffULL, 0x0000000000000000ULL));
8137   ASSERT_TRUE(IsQcBitSet(fpsr2));
8138 
8139   // Saturates in the addition.
8140   __uint128_t arg7 = MakeUInt128(0x1122334400111111ULL, 0x7654321076543210ULL);
8141   __uint128_t arg8 = MakeUInt128(0xaabbccdd00222222ULL, 0x0123456701234567ULL);
8142   __uint128_t arg9 = MakeUInt128(0x7fffffffffffffffULL, 0x00000a000000b000ULL);
8143   auto [res3, fpsr3] = AsmSqdmlal(arg7, arg8, arg9);
8144   ASSERT_EQ(res3, MakeUInt128(0x7fffffffffffffffULL, 0x0000000000000000ULL));
8145   ASSERT_TRUE(IsQcBitSet(fpsr3));
8146 }
8147 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyAddLong32x1)8148 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyAddLong32x1) {
8149   constexpr auto AsmSqdmlal = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlal %s0, %h2, %h3");
8150 
8151   // No saturation.
8152   __uint128_t arg1 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8153   __uint128_t arg2 = MakeUInt128(0x0000000000000004ULL, 0x0123456701234567ULL);
8154   __uint128_t arg3 = MakeUInt128(0x0100010001000100ULL, 0x0400040004000400ULL);
8155   auto [res1, fpsr1] = AsmSqdmlal(arg1, arg2, arg3);
8156   ASSERT_EQ(res1, MakeUInt128(0x0000000001011100ULL, 0x0000000000000000ULL));
8157   ASSERT_FALSE(IsQcBitSet(fpsr1));
8158 
8159   // Saturates in the multiplication.
8160   __uint128_t arg4 = MakeUInt128(0x1122334411228000ULL, 0xfeed000300000010ULL);
8161   __uint128_t arg5 = MakeUInt128(0xaabbccddaabb8000ULL, 0xfeed000400000020ULL);
8162   __uint128_t arg6 = MakeUInt128(0x1122334411111111ULL, 0x00000a000000b000ULL);
8163   auto [res2, fpsr2] = AsmSqdmlal(arg4, arg5, arg6);
8164   ASSERT_EQ(res2, MakeUInt128(0x000000007fffffffULL, 0x0000000000000000ULL));
8165   ASSERT_TRUE(IsQcBitSet(fpsr2));
8166 
8167   // Saturates in the addition.
8168   __uint128_t arg7 = MakeUInt128(0x1122334411220123ULL, 0x7654321076543210ULL);
8169   __uint128_t arg8 = MakeUInt128(0xaabbccddaabb0044ULL, 0x0123456701234567ULL);
8170   __uint128_t arg9 = MakeUInt128(0xaabbccdd7fffffffULL, 0x00000a000000b000ULL);
8171   auto [res3, fpsr3] = AsmSqdmlal(arg7, arg8, arg9);
8172   ASSERT_EQ(res3, MakeUInt128(0x000000007fffffffULL, 0x0000000000000000ULL));
8173   ASSERT_TRUE(IsQcBitSet(fpsr3));
8174 }
8175 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyAddLong64x2IndexedElem)8176 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyAddLong64x2IndexedElem) {
8177   constexpr auto AsmSqdmlal = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlal %0.2d, %2.2s, %3.s[1]");
8178 
8179   // No saturation.
8180   __uint128_t arg1 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8181   __uint128_t arg2 = MakeUInt128(0x0000000400000004ULL, 0x0123456701234567ULL);
8182   __uint128_t arg3 = MakeUInt128(0x0100010001000100ULL, 0x0400040004000400ULL);
8183   auto [res1, fpsr1] = AsmSqdmlal(arg1, arg2, arg3);
8184   ASSERT_EQ(res1, MakeUInt128(0x0100010111011100ULL, 0x040004008c008c00ULL));
8185   ASSERT_FALSE(IsQcBitSet(fpsr1));
8186 
8187   // Saturates in the multiplication.
8188   __uint128_t arg4 = MakeUInt128(0x8000000000000004ULL, 0xfeed000300000010ULL);
8189   __uint128_t arg5 = MakeUInt128(0x8000000000000002ULL, 0xfeed000400000020ULL);
8190   __uint128_t arg6 = MakeUInt128(0x0000080000000900ULL, 0x00000a000000b000ULL);
8191   auto [res2, fpsr2] = AsmSqdmlal(arg4, arg5, arg6);
8192   ASSERT_EQ(res2, MakeUInt128(0x000007fc00000900ULL, 0x7fffffffffffffffULL));
8193   ASSERT_TRUE(IsQcBitSet(fpsr2));
8194 
8195   // Saturates in the addition.
8196   __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8197   __uint128_t arg8 = MakeUInt128(0x0000000400000004ULL, 0x0123456701234567ULL);
8198   __uint128_t arg9 = MakeUInt128(0x7fffffffffffffffULL, 0x00000a000000b000ULL);
8199   auto [res3, fpsr3] = AsmSqdmlal(arg7, arg8, arg9);
8200   ASSERT_EQ(res3, MakeUInt128(0x7fffffffffffffffULL, 0x00000a0088013800ULL));
8201   ASSERT_TRUE(IsQcBitSet(fpsr3));
8202 }
8203 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyAddLong32x4IndexedElem)8204 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyAddLong32x4IndexedElem) {
8205   constexpr auto AsmSqdmlal = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlal %0.4s, %2.4h, %3.h[7]");
8206 
8207   // No saturation.
8208   __uint128_t arg1 = MakeUInt128(0x0102030405060708ULL, 0x7654321076543210ULL);
8209   __uint128_t arg2 = MakeUInt128(0x1122334488776655ULL, 0x0123456701234567ULL);
8210   __uint128_t arg3 = MakeUInt128(0x0123456789abcdefULL, 0xfedcba9876543210ULL);
8211   auto [res1, fpsr1] = AsmSqdmlal(arg1, arg2, arg3);
8212   ASSERT_EQ(res1, MakeUInt128(0x012eb10b89bbca1fULL, 0xfedf0524765b0d28ULL));
8213   ASSERT_FALSE(IsQcBitSet(fpsr1));
8214 
8215   // Saturates in the multiplication.
8216   __uint128_t arg4 = MakeUInt128(0x80000123456789a4ULL, 0xfeed000300000010ULL);
8217   __uint128_t arg5 = MakeUInt128(0x0123456789abcdefULL, 0x8000fedcba123456ULL);
8218   __uint128_t arg6 = MakeUInt128(0x0123456701234567ULL, 0x0123456701234567ULL);
8219   auto [res2, fpsr2] = AsmSqdmlal(arg4, arg5, arg6);
8220   ASSERT_EQ(res2, MakeUInt128(0xbbbc4567777f4567ULL, 0x7fffffff00004567ULL));
8221   ASSERT_TRUE(IsQcBitSet(fpsr2));
8222 
8223   // Saturates in the addition.
8224   __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8225   __uint128_t arg8 = MakeUInt128(0x8888111122223333ULL, 0x01234567ffffeeeeULL);
8226   __uint128_t arg9 = MakeUInt128(0x7fffffffffffffffULL, 0x00000a000000b000ULL);
8227   auto [res3, fpsr3] = AsmSqdmlal(arg7, arg8, arg9);
8228   ASSERT_EQ(res3, MakeUInt128(0x7fffffff004d4bffULL, 0x0026b00000275600ULL));
8229   ASSERT_TRUE(IsQcBitSet(fpsr3));
8230 }
8231 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyAddLongUpper64x2IndexedElem)8232 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyAddLongUpper64x2IndexedElem) {
8233   constexpr auto AsmSqdmlal = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlal2 %0.2d, %2.4s, %3.s[3]");
8234 
8235   // No saturation.
8236   __uint128_t arg1 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8237   __uint128_t arg2 = MakeUInt128(0x0000000400000004ULL, 0x0123456701234567ULL);
8238   __uint128_t arg3 = MakeUInt128(0x0100010001000100ULL, 0x0400040004000400ULL);
8239   auto [res1, fpsr1] = AsmSqdmlal(arg1, arg2, arg3);
8240   ASSERT_EQ(res1, MakeUInt128(0x020d44926c1ce9e0ULL, 0x050d47926f1cece0ULL));
8241   ASSERT_FALSE(IsQcBitSet(fpsr1));
8242 
8243   // Saturates in the multiplication.
8244   __uint128_t arg4 = MakeUInt128(0x0123456789abcdefULL, 0x1122334480000000ULL);
8245   __uint128_t arg5 = MakeUInt128(0x0123456789abcdefULL, 0x8000000011223344ULL);
8246   __uint128_t arg6 = MakeUInt128(0x0101010102020202ULL, 0x0303030304040404ULL);
8247   auto [res2, fpsr2] = AsmSqdmlal(arg4, arg5, arg6);
8248   ASSERT_EQ(res2, MakeUInt128(0x7fffffffffffffffULL, 0xf1e0cfbf04040404ULL));
8249   ASSERT_TRUE(IsQcBitSet(fpsr2));
8250 
8251   // Saturates in the addition.
8252   __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8253   __uint128_t arg8 = MakeUInt128(0x1122334444332211ULL, 0x0123456701234567ULL);
8254   __uint128_t arg9 = MakeUInt128(0x7fffffffffffffffULL, 0x00000a000000b000ULL);
8255   auto [res3, fpsr3] = AsmSqdmlal(arg7, arg8, arg9);
8256   ASSERT_EQ(res3, MakeUInt128(0x7fffffffffffffffULL, 0x010d4d926b1d98e0ULL));
8257   ASSERT_TRUE(IsQcBitSet(fpsr3));
8258 }
8259 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyAddLongUpper32x4IndexedElem)8260 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyAddLongUpper32x4IndexedElem) {
8261   constexpr auto AsmSqdmlal = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlal2 %0.4s, %2.8h, %3.h[7]");
8262 
8263   // No saturation.
8264   __uint128_t arg1 = MakeUInt128(0x0102030405060708ULL, 0x7654321076543210ULL);
8265   __uint128_t arg2 = MakeUInt128(0x1122334488776655ULL, 0x0123456701234567ULL);
8266   __uint128_t arg3 = MakeUInt128(0x0123456789abcdefULL, 0xfedcba9876543210ULL);
8267   auto [res1, fpsr1] = AsmSqdmlal(arg1, arg2, arg3);
8268   ASSERT_EQ(res1, MakeUInt128(0x0230485f8a1d9e4fULL, 0xffe9bd9076c60270ULL));
8269   ASSERT_FALSE(IsQcBitSet(fpsr1));
8270 
8271   // Saturates in the multiplication.
8272   __uint128_t arg4 = MakeUInt128(0x0011223344556677ULL, 0xfeedfeedfeed8000ULL);
8273   __uint128_t arg5 = MakeUInt128(0x0123456789abcdefULL, 0x8000fedcba123456ULL);
8274   __uint128_t arg6 = MakeUInt128(0x0123456701234567ULL, 0x0123456701234567ULL);
8275   auto [res2, fpsr2] = AsmSqdmlal(arg4, arg5, arg6);
8276   ASSERT_EQ(res2, MakeUInt128(0x023645677fffffffULL, 0x0236456702364567ULL));
8277   ASSERT_TRUE(IsQcBitSet(fpsr2));
8278 
8279   // Saturates in the addition.
8280   __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8281   __uint128_t arg8 = MakeUInt128(0x8888111122223333ULL, 0x01234567ffffeeeeULL);
8282   __uint128_t arg9 = MakeUInt128(0x7fffffffffffffffULL, 0x00000a000000b000ULL);
8283   auto [res3, fpsr3] = AsmSqdmlal(arg7, arg8, arg9);
8284   ASSERT_EQ(res3, MakeUInt128(0x7fffffff0071d05fULL, 0x010d0cf800728060ULL));
8285   ASSERT_TRUE(IsQcBitSet(fpsr3));
8286 }
8287 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyAddLong64x1IndexedElem)8288 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyAddLong64x1IndexedElem) {
8289   constexpr auto AsmSqdmlal = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlal %d0, %s2, %3.s[3]");
8290 
8291   // No saturation.
8292   __uint128_t arg1 = MakeUInt128(0x0102030405060708ULL, 0x7654321076543210ULL);
8293   __uint128_t arg2 = MakeUInt128(0x1122334488776655ULL, 0x0123456701234567ULL);
8294   __uint128_t arg3 = MakeUInt128(0x0123456789abcdefULL, 0xfedcba9876543210ULL);
8295   auto [res1, fpsr1] = AsmSqdmlal(arg1, arg2, arg3);
8296   ASSERT_EQ(res1, MakeUInt128(0x012eb3d4d07fc65fULL, 0x0000000000000000ULL));
8297   ASSERT_FALSE(IsQcBitSet(fpsr1));
8298 
8299   // Saturates in the multiplication.
8300   __uint128_t arg4 = MakeUInt128(0x0011223380000000ULL, 0xfeedfeedfeed8000ULL);
8301   __uint128_t arg5 = MakeUInt128(0x0123456789abcdefULL, 0x80000000ba123456ULL);
8302   __uint128_t arg6 = MakeUInt128(0x0123456701234567ULL, 0x0123456701234567ULL);
8303   auto [res2, fpsr2] = AsmSqdmlal(arg4, arg5, arg6);
8304   ASSERT_EQ(res2, MakeUInt128(0x7fffffffffffffffULL, 0x0000000000000000ULL));
8305   ASSERT_TRUE(IsQcBitSet(fpsr2));
8306 
8307   // Saturates in the addition.
8308   __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8309   __uint128_t arg8 = MakeUInt128(0x8888111122223333ULL, 0x01234567ffffeeeeULL);
8310   __uint128_t arg9 = MakeUInt128(0x7fffffffffffffffULL, 0x00000a000000b000ULL);
8311   auto [res3, fpsr3] = AsmSqdmlal(arg7, arg8, arg9);
8312   ASSERT_EQ(res3, MakeUInt128(0x7fffffffffffffffULL, 0x0000000000000000ULL));
8313   ASSERT_TRUE(IsQcBitSet(fpsr3));
8314 }
8315 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyAddLong32x1IndexedElem)8316 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyAddLong32x1IndexedElem) {
8317   constexpr auto AsmSqdmlal = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlal %s0, %h2, %3.h[7]");
8318 
8319   // No saturation.
8320   __uint128_t arg1 = MakeUInt128(0x0102030405060708ULL, 0x7654321076543210ULL);
8321   __uint128_t arg2 = MakeUInt128(0x1122334488776655ULL, 0x0123456701234567ULL);
8322   __uint128_t arg3 = MakeUInt128(0x0123456789abcdefULL, 0xfedcba9876543210ULL);
8323   auto [res1, fpsr1] = AsmSqdmlal(arg1, arg2, arg3);
8324   ASSERT_EQ(res1, MakeUInt128(0x0000000089bbca1fULL, 0x0000000000000000ULL));
8325   ASSERT_FALSE(IsQcBitSet(fpsr1));
8326 
8327   // Saturates in the multiplication.
8328   __uint128_t arg4 = MakeUInt128(0x0011223344558000ULL, 0xfeedfeedfeed1234ULL);
8329   __uint128_t arg5 = MakeUInt128(0x0123456789abcdefULL, 0x8000fedcba123456ULL);
8330   __uint128_t arg6 = MakeUInt128(0x0123456701234567ULL, 0x0123456701234567ULL);
8331   auto [res2, fpsr2] = AsmSqdmlal(arg4, arg5, arg6);
8332   ASSERT_EQ(res2, MakeUInt128(0x000000007fffffffULL, 0x0000000000000000ULL));
8333   ASSERT_TRUE(IsQcBitSet(fpsr2));
8334 
8335   // Saturates in the addition.
8336   __uint128_t arg7 = MakeUInt128(0xaabbccddeeff2200ULL, 0x7654321076543210ULL);
8337   __uint128_t arg8 = MakeUInt128(0x8888111122223333ULL, 0x0123aabbccddeeffULL);
8338   __uint128_t arg9 = MakeUInt128(0xaabbccdd7fffffffULL, 0x0011223344556677ULL);
8339   auto [res3, fpsr3] = AsmSqdmlal(arg7, arg8, arg9);
8340   ASSERT_EQ(res3, MakeUInt128(0x000000007fffffffULL, 0x0000000000000000ULL));
8341   ASSERT_TRUE(IsQcBitSet(fpsr3));
8342 }
8343 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplySubtractLong32x2)8344 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplySubtractLong32x2) {
8345   constexpr auto AsmSqdmlsl = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlsl %0.2d, %2.2s, %3.2s");
8346 
8347   // No saturation.
8348   __uint128_t arg1 = MakeUInt128(0x0000000080000001ULL, 0x7654321076543210ULL);
8349   __uint128_t arg2 = MakeUInt128(0x0000000100000004ULL, 0x0123456701234567ULL);
8350   __uint128_t arg3 = MakeUInt128(0x0000100000000001ULL, 0x0400040004000400ULL);
8351   auto [res1, fpsr1] = AsmSqdmlsl(arg1, arg2, arg3);
8352   ASSERT_EQ(res1, MakeUInt128(0x00001003fffffff9ULL, 0x0400040004000400ULL));
8353   ASSERT_FALSE(IsQcBitSet(fpsr1));
8354 
8355   // Saturates in the multiplication.
8356   __uint128_t arg4 = MakeUInt128(0x8000000000000004ULL, 0xfeed000300000010ULL);
8357   __uint128_t arg5 = MakeUInt128(0x8000000000000002ULL, 0xfeed000400000020ULL);
8358   __uint128_t arg6 = MakeUInt128(0x0000000000000900ULL, 0x00000a000000b000ULL);
8359   auto [res2, fpsr2] = AsmSqdmlsl(arg4, arg5, arg6);
8360   ASSERT_EQ(res2, MakeUInt128(0x00000000000008f0ULL, 0x80000a000000b001ULL));
8361   ASSERT_TRUE(IsQcBitSet(fpsr2));
8362 
8363   // Saturates in the subtraction.
8364   __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8365   __uint128_t arg8 = MakeUInt128(0x0000000400000004ULL, 0x0123456701234567ULL);
8366   __uint128_t arg9 = MakeUInt128(0x8000000000000000ULL, 0x00000a000000b000ULL);
8367   auto [res3, fpsr3] = AsmSqdmlsl(arg7, arg8, arg9);
8368   ASSERT_EQ(res3, MakeUInt128(0x8000000000000000ULL, 0x000009ff78002800ULL));
8369   ASSERT_TRUE(IsQcBitSet(fpsr3));
8370 }
8371 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplySubtractLong16x4)8372 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplySubtractLong16x4) {
8373   constexpr auto AsmSqdmlsl = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlsl %0.4s, %2.4h, %3.4h");
8374 
8375   // No saturation.
8376   __uint128_t arg1 = MakeUInt128(0x8000110022002200ULL, 0x7654321076543210ULL);
8377   __uint128_t arg2 = MakeUInt128(0x0010001100000004ULL, 0x0123456701234567ULL);
8378   __uint128_t arg3 = MakeUInt128(0x0100010001000100ULL, 0x0400040004000400ULL);
8379   auto [res1, fpsr1] = AsmSqdmlsl(arg1, arg2, arg3);
8380   ASSERT_EQ(res1, MakeUInt128(0x0100010000fef100ULL, 0x0410040003fdc200ULL));
8381   ASSERT_FALSE(IsQcBitSet(fpsr1));
8382 
8383   // Saturates in the multiplication.
8384   __uint128_t arg4 = MakeUInt128(0x8000111111111111ULL, 0x1234123412341234ULL);
8385   __uint128_t arg5 = MakeUInt128(0x8000111111111111ULL, 0x1234123412341234ULL);
8386   __uint128_t arg6 = MakeUInt128(0x0123456701234567ULL, 0x0123456701234567ULL);
8387   auto [res2, fpsr2] = AsmSqdmlsl(arg4, arg5, arg6);
8388   ASSERT_EQ(res2, MakeUInt128(0xfedcbf25fedcbf25ULL, 0x81234568fedcbf25ULL));
8389   ASSERT_TRUE(IsQcBitSet(fpsr2));
8390 
8391   // Saturates in the subtraction.
8392   __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8393   __uint128_t arg8 = MakeUInt128(0x0000000400010004ULL, 0x0123456701234567ULL);
8394   __uint128_t arg9 = MakeUInt128(0x8000000012345678ULL, 0x00000a000000b000ULL);
8395   auto [res3, fpsr3] = AsmSqdmlsl(arg7, arg8, arg9);
8396   ASSERT_EQ(res3, MakeUInt128(0x8000000012334678ULL, 0x00000a0000002800ULL));
8397   ASSERT_TRUE(IsQcBitSet(fpsr3));
8398 }
8399 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplySubtractLongUpper32x2)8400 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplySubtractLongUpper32x2) {
8401   constexpr auto AsmSqdmlsl = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlsl2 %0.2d, %2.4s, %3.4s");
8402 
8403   // No saturation.
8404   __uint128_t arg1 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8405   __uint128_t arg2 = MakeUInt128(0x0000000400000004ULL, 0x0123456701234567ULL);
8406   __uint128_t arg3 = MakeUInt128(0x0100010001000100ULL, 0x0400040004000400ULL);
8407   auto [res1, fpsr1] = AsmSqdmlsl(arg1, arg2, arg3);
8408   ASSERT_EQ(res1, MakeUInt128(0xfff2bd6d95e31820ULL, 0x02f2c06d98e31b20ULL));
8409   ASSERT_FALSE(IsQcBitSet(fpsr1));
8410 
8411   // Saturates in the multiplication.
8412   __uint128_t arg4 = MakeUInt128(0x1234567800000004ULL, 0x8000000001100010ULL);
8413   __uint128_t arg5 = MakeUInt128(0x1234567800000002ULL, 0x8000000001100020ULL);
8414   __uint128_t arg6 = MakeUInt128(0x0000080000000900ULL, 0x00000a000000b000ULL);
8415   auto [res2, fpsr2] = AsmSqdmlsl(arg4, arg5, arg6);
8416   ASSERT_EQ(res2, MakeUInt128(0xfffdc5ff9a000500ULL, 0x80000a000000b001ULL));
8417   ASSERT_TRUE(IsQcBitSet(fpsr2));
8418 
8419   // Saturates in the subtraction.
8420   __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8421   __uint128_t arg8 = MakeUInt128(0x0000000400000004ULL, 0x0123456701234567ULL);
8422   __uint128_t arg9 = MakeUInt128(0x1234567812345678ULL, 0x8000000000000000ULL);
8423   auto [res3, fpsr3] = AsmSqdmlsl(arg7, arg8, arg9);
8424   ASSERT_EQ(res3, MakeUInt128(0x112712e5a7176d98ULL, 0x8000000000000000ULL));
8425   ASSERT_TRUE(IsQcBitSet(fpsr3));
8426 }
8427 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplySubtractLongUpper16x4)8428 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplySubtractLongUpper16x4) {
8429   constexpr auto AsmSqdmlsl = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlsl2 %0.4s, %2.8h, %3.8h");
8430 
8431   // No saturation.
8432   __uint128_t arg1 = MakeUInt128(0x8000110022002200ULL, 0x7654321076543210ULL);
8433   __uint128_t arg2 = MakeUInt128(0x0010001100000004ULL, 0x0123456701234567ULL);
8434   __uint128_t arg3 = MakeUInt128(0x0100010001000100ULL, 0x0400040004000400ULL);
8435   auto [res1, fpsr1] = AsmSqdmlsl(arg1, arg2, arg3);
8436   ASSERT_EQ(res1, MakeUInt128(0xfff2fe08e5db1820ULL, 0x02f30108e8db1b20ULL));
8437   ASSERT_FALSE(IsQcBitSet(fpsr1));
8438 
8439   // Saturates in the multiplication.
8440   __uint128_t arg4 = MakeUInt128(0x1111111111111111ULL, 0x8000123412341234ULL);
8441   __uint128_t arg5 = MakeUInt128(0x1111111111111111ULL, 0x8000123412341234ULL);
8442   __uint128_t arg6 = MakeUInt128(0x0123456701234567ULL, 0x0123456701234567ULL);
8443   auto [res2, fpsr2] = AsmSqdmlsl(arg4, arg5, arg6);
8444   ASSERT_EQ(res2, MakeUInt128(0xfe8c9047fe8c9047ULL, 0x81234568fe8c9047ULL));
8445   ASSERT_TRUE(IsQcBitSet(fpsr2));
8446 
8447   // Saturates in the subtraction.
8448   __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8449   __uint128_t arg8 = MakeUInt128(0x0000000400010004ULL, 0x0123456701234567ULL);
8450   __uint128_t arg9 = MakeUInt128(0x1234567812345678ULL, 0x800000000000b000ULL);
8451   auto [res3, fpsr3] = AsmSqdmlsl(arg7, arg8, arg9);
8452   ASSERT_EQ(res3, MakeUInt128(0x11275380f70f6d98ULL, 0x80000000e4dbc720ULL));
8453   ASSERT_TRUE(IsQcBitSet(fpsr3));
8454 }
8455 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplySubtractLong64x1)8456 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplySubtractLong64x1) {
8457   constexpr auto AsmSqdmlsl = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlsl %d0, %s2, %s3");
8458 
8459   // No saturation.
8460   __uint128_t arg1 = MakeUInt128(0x1100110011223344ULL, 0x7654321076543210ULL);
8461   __uint128_t arg2 = MakeUInt128(0x0000000020000000ULL, 0x0123456701234567ULL);
8462   __uint128_t arg3 = MakeUInt128(0x12345678000000FFULL, 0x0400040004000400ULL);
8463   auto [res1, fpsr1] = AsmSqdmlsl(arg1, arg2, arg3);
8464   ASSERT_EQ(res1, MakeUInt128(0x0debc9a7000000ffULL, 0x0000000000000000ULL));
8465   ASSERT_FALSE(IsQcBitSet(fpsr1));
8466 
8467   // Saturates in the multiplication.
8468   __uint128_t arg4 = MakeUInt128(0x1122334480000000ULL, 0xfeed000300000010ULL);
8469   __uint128_t arg5 = MakeUInt128(0xaabbccdd80000000ULL, 0xfeed000400000020ULL);
8470   __uint128_t arg6 = MakeUInt128(0x1122334411111111ULL, 0x00000a000000b000ULL);
8471   auto [res2, fpsr2] = AsmSqdmlsl(arg4, arg5, arg6);
8472   ASSERT_EQ(res2, MakeUInt128(0x9122334411111112ULL, 0x0000000000000000ULL));
8473   ASSERT_TRUE(IsQcBitSet(fpsr2));
8474 
8475   // Saturates in the subtraction.
8476   __uint128_t arg7 = MakeUInt128(0x1122334400111111ULL, 0x7654321076543210ULL);
8477   __uint128_t arg8 = MakeUInt128(0xaabbccdd00222222ULL, 0x0123456701234567ULL);
8478   __uint128_t arg9 = MakeUInt128(0x8000000000000000ULL, 0x00000a000000b000ULL);
8479   auto [res3, fpsr3] = AsmSqdmlsl(arg7, arg8, arg9);
8480   ASSERT_EQ(res3, MakeUInt128(0x8000000000000000ULL, 0x0000000000000000ULL));
8481   ASSERT_TRUE(IsQcBitSet(fpsr3));
8482 }
8483 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplySubtractLong32x1)8484 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplySubtractLong32x1) {
8485   constexpr auto AsmSqdmlsl = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlsl %s0, %h2, %h3");
8486 
8487   // No saturation.
8488   __uint128_t arg1 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8489   __uint128_t arg2 = MakeUInt128(0x0000000000000004ULL, 0x0123456701234567ULL);
8490   __uint128_t arg3 = MakeUInt128(0x0100010001000100ULL, 0x0400040004000400ULL);
8491   auto [res1, fpsr1] = AsmSqdmlsl(arg1, arg2, arg3);
8492   ASSERT_EQ(res1, MakeUInt128(0x0000000000fef100ULL, 0x0000000000000000ULL));
8493   ASSERT_FALSE(IsQcBitSet(fpsr1));
8494 
8495   // Saturates in the multiplication.
8496   __uint128_t arg4 = MakeUInt128(0x1122334411228000ULL, 0xfeed000300000010ULL);
8497   __uint128_t arg5 = MakeUInt128(0xaabbccddaabb8000ULL, 0xfeed000400000020ULL);
8498   __uint128_t arg6 = MakeUInt128(0x1122334411111111ULL, 0x00000a000000b000ULL);
8499   auto [res2, fpsr2] = AsmSqdmlsl(arg4, arg5, arg6);
8500   ASSERT_EQ(res2, MakeUInt128(0x0000000091111112ULL, 0x0000000000000000ULL));
8501   ASSERT_TRUE(IsQcBitSet(fpsr2));
8502 
8503   // Saturates in the subtraction.
8504   __uint128_t arg7 = MakeUInt128(0x1122334411220123ULL, 0x7654321076543210ULL);
8505   __uint128_t arg8 = MakeUInt128(0xaabbccddaabb0044ULL, 0x0123456701234567ULL);
8506   __uint128_t arg9 = MakeUInt128(0xaabbccdd80000000ULL, 0x00000a000000b000ULL);
8507   auto [res3, fpsr3] = AsmSqdmlsl(arg7, arg8, arg9);
8508   ASSERT_EQ(res3, MakeUInt128(0x0000000080000000ULL, 0x0000000000000000ULL));
8509   ASSERT_TRUE(IsQcBitSet(fpsr3));
8510 }
8511 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplySubtractLong64x2IndexedElem)8512 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplySubtractLong64x2IndexedElem) {
8513   constexpr auto AsmSqdmlsl = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlsl %0.2d, %2.2s, %3.s[1]");
8514 
8515   // No saturation.
8516   __uint128_t arg1 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8517   __uint128_t arg2 = MakeUInt128(0x0000000400000004ULL, 0x0123456701234567ULL);
8518   __uint128_t arg3 = MakeUInt128(0x0100010001000100ULL, 0x0400040004000400ULL);
8519   auto [res1, fpsr1] = AsmSqdmlsl(arg1, arg2, arg3);
8520   ASSERT_EQ(res1, MakeUInt128(0x010000fef0fef100ULL, 0x040003ff7bff7c00ULL));
8521   ASSERT_FALSE(IsQcBitSet(fpsr1));
8522 
8523   // Saturates in the multiplication.
8524   __uint128_t arg4 = MakeUInt128(0x8000000000000004ULL, 0xfeed000300000010ULL);
8525   __uint128_t arg5 = MakeUInt128(0x8000000000000002ULL, 0xfeed000400000020ULL);
8526   __uint128_t arg6 = MakeUInt128(0x0000080000000900ULL, 0x00000a000000b000ULL);
8527   auto [res2, fpsr2] = AsmSqdmlsl(arg4, arg5, arg6);
8528   ASSERT_EQ(res2, MakeUInt128(0x0000080400000900ULL, 0x80000a000000b001ULL));
8529   ASSERT_TRUE(IsQcBitSet(fpsr2));
8530 
8531   // Saturates in the subtraction.
8532   __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8533   __uint128_t arg8 = MakeUInt128(0x0000000400000004ULL, 0x0123456701234567ULL);
8534   __uint128_t arg9 = MakeUInt128(0x8000000000000000ULL, 0x00000a000000b000ULL);
8535   auto [res3, fpsr3] = AsmSqdmlsl(arg7, arg8, arg9);
8536   ASSERT_EQ(res3, MakeUInt128(0x8000000000000000ULL, 0x000009ff78002800ULL));
8537   ASSERT_TRUE(IsQcBitSet(fpsr3));
8538 }
8539 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplySubtractLong32x4IndexedElem)8540 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplySubtractLong32x4IndexedElem) {
8541   constexpr auto AsmSqdmlsl = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlsl %0.4s, %2.4h, %3.h[7]");
8542 
8543   // No saturation.
8544   __uint128_t arg1 = MakeUInt128(0x0102030405060708ULL, 0x7654321076543210ULL);
8545   __uint128_t arg2 = MakeUInt128(0x1122334488776655ULL, 0x0123456701234567ULL);
8546   __uint128_t arg3 = MakeUInt128(0x0123456789abcdefULL, 0xfedcba9876543210ULL);
8547   auto [res1, fpsr1] = AsmSqdmlsl(arg1, arg2, arg3);
8548   ASSERT_EQ(res1, MakeUInt128(0x0117d9c3899bd1bfULL, 0xfeda700c764d56f8ULL));
8549   ASSERT_FALSE(IsQcBitSet(fpsr1));
8550 
8551   // Saturates in the multiplication.
8552   __uint128_t arg4 = MakeUInt128(0x80000123456789a4ULL, 0xfeed000300000010ULL);
8553   __uint128_t arg5 = MakeUInt128(0x0123456789abcdefULL, 0x8000fedcba123456ULL);
8554   __uint128_t arg6 = MakeUInt128(0x0123456701234567ULL, 0x0123456701234567ULL);
8555   auto [res2, fpsr2] = AsmSqdmlsl(arg4, arg5, arg6);
8556   ASSERT_EQ(res2, MakeUInt128(0x468a45678ac74567ULL, 0x8123456802464567ULL));
8557   ASSERT_TRUE(IsQcBitSet(fpsr2));
8558 
8559   // Saturates in the subtraction.
8560   __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8561   __uint128_t arg8 = MakeUInt128(0x8888111122223333ULL, 0x01234567ffffeeeeULL);
8562   __uint128_t arg9 = MakeUInt128(0x8000000000000000ULL, 0x00000a000000b000ULL);
8563   auto [res3, fpsr3] = AsmSqdmlsl(arg7, arg8, arg9);
8564   ASSERT_EQ(res3, MakeUInt128(0x80000000ffb2b400ULL, 0xffd96400ffda0a00ULL));
8565   ASSERT_TRUE(IsQcBitSet(fpsr3));
8566 }
8567 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplySubtractLongUpper64x2IndexedElem)8568 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplySubtractLongUpper64x2IndexedElem) {
8569   constexpr auto AsmSqdmlsl = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlsl2 %0.2d, %2.4s, %3.s[3]");
8570 
8571   // No saturation.
8572   __uint128_t arg1 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8573   __uint128_t arg2 = MakeUInt128(0x0000000400000004ULL, 0x0123456701234567ULL);
8574   __uint128_t arg3 = MakeUInt128(0x0100010001000100ULL, 0x0400040004000400ULL);
8575   auto [res1, fpsr1] = AsmSqdmlsl(arg1, arg2, arg3);
8576   ASSERT_EQ(res1, MakeUInt128(0xfff2bd6d95e31820ULL, 0x02f2c06d98e31b20ULL));
8577   ASSERT_FALSE(IsQcBitSet(fpsr1));
8578 
8579   // Saturates in the multiplication.
8580   __uint128_t arg4 = MakeUInt128(0x0123456789abcdefULL, 0x1122334480000000ULL);
8581   __uint128_t arg5 = MakeUInt128(0x0123456789abcdefULL, 0x8000000011223344ULL);
8582   __uint128_t arg6 = MakeUInt128(0x0101010102020202ULL, 0x0303030304040404ULL);
8583   auto [res2, fpsr2] = AsmSqdmlsl(arg4, arg5, arg6);
8584   ASSERT_EQ(res2, MakeUInt128(0x8101010102020203ULL, 0x1425364704040404ULL));
8585   ASSERT_TRUE(IsQcBitSet(fpsr2));
8586 
8587   // Saturates in the subtraction.
8588   __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8589   __uint128_t arg8 = MakeUInt128(0x1122334444332211ULL, 0x0123456701234567ULL);
8590   __uint128_t arg9 = MakeUInt128(0x8000000000000000ULL, 0x00000a000000b000ULL);
8591   auto [res3, fpsr3] = AsmSqdmlsl(arg7, arg8, arg9);
8592   ASSERT_EQ(res3, MakeUInt128(0x8000000000000000ULL, 0xfef2c66d94e3c720ULL));
8593   ASSERT_TRUE(IsQcBitSet(fpsr3));
8594 }
8595 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplySubtractLongUpper32x4IndexedElem)8596 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplySubtractLongUpper32x4IndexedElem) {
8597   constexpr auto AsmSqdmlsl = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlsl2 %0.4s, %2.8h, %3.h[7]");
8598 
8599   // No saturation.
8600   __uint128_t arg1 = MakeUInt128(0x0102030405060708ULL, 0x7654321076543210ULL);
8601   __uint128_t arg2 = MakeUInt128(0x1122334488776655ULL, 0x0123456701234567ULL);
8602   __uint128_t arg3 = MakeUInt128(0x0123456789abcdefULL, 0xfedcba9876543210ULL);
8603   auto [res1, fpsr1] = AsmSqdmlsl(arg1, arg2, arg3);
8604   ASSERT_EQ(res1, MakeUInt128(0x0016426f8939fd8fULL, 0xfdcfb7a075e261b0ULL));
8605   ASSERT_FALSE(IsQcBitSet(fpsr1));
8606 
8607   // Saturates in the multiplication.
8608   __uint128_t arg4 = MakeUInt128(0x0011223344556677ULL, 0xfeedfeedfeed8000ULL);
8609   __uint128_t arg5 = MakeUInt128(0x0123456789abcdefULL, 0x8000fedcba123456ULL);
8610   __uint128_t arg6 = MakeUInt128(0x0123456701234567ULL, 0x0123456701234567ULL);
8611   auto [res2, fpsr2] = AsmSqdmlsl(arg4, arg5, arg6);
8612   ASSERT_EQ(res2, MakeUInt128(0x0010456781234568ULL, 0x0010456700104567ULL));
8613   ASSERT_TRUE(IsQcBitSet(fpsr2));
8614 
8615   // Saturates in the subtraction.
8616   __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8617   __uint128_t arg8 = MakeUInt128(0x8888111122223333ULL, 0x01234567ffffeeeeULL);
8618   __uint128_t arg9 = MakeUInt128(0x8000000000000000ULL, 0x00000a000000b000ULL);
8619   auto [res3, fpsr3] = AsmSqdmlsl(arg7, arg8, arg9);
8620   ASSERT_EQ(res3, MakeUInt128(0x80000000ff8e2fa0ULL, 0xfef30708ff8edfa0ULL));
8621   ASSERT_TRUE(IsQcBitSet(fpsr3));
8622 }
8623 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplySubtractLong64x1IndexedElem)8624 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplySubtractLong64x1IndexedElem) {
8625   constexpr auto AsmSqdmlsl = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlsl %d0, %s2, %3.s[3]");
8626 
8627   // No saturation.
8628   __uint128_t arg1 = MakeUInt128(0x0102030405060708ULL, 0x7654321076543210ULL);
8629   __uint128_t arg2 = MakeUInt128(0x1122334488776655ULL, 0x0123456701234567ULL);
8630   __uint128_t arg3 = MakeUInt128(0x0123456789abcdefULL, 0xfedcba9876543210ULL);
8631   auto [res1, fpsr1] = AsmSqdmlsl(arg1, arg2, arg3);
8632   ASSERT_EQ(res1, MakeUInt128(0x0117d6fa42d7d57fULL, 0x0ULL));
8633   ASSERT_FALSE(IsQcBitSet(fpsr1));
8634 
8635   // Saturates in the multiplication.
8636   __uint128_t arg4 = MakeUInt128(0x0011223380000000ULL, 0xfeedfeedfeed8000ULL);
8637   __uint128_t arg5 = MakeUInt128(0x0123456789abcdefULL, 0x80000000ba123456ULL);
8638   __uint128_t arg6 = MakeUInt128(0x0123456701234567ULL, 0x0123456701234567ULL);
8639   auto [res2, fpsr2] = AsmSqdmlsl(arg4, arg5, arg6);
8640   ASSERT_EQ(res2, MakeUInt128(0x8123456701234568ULL, 0x0ULL));
8641   ASSERT_TRUE(IsQcBitSet(fpsr2));
8642 
8643   // Saturates in the subtraction.
8644   __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8645   __uint128_t arg8 = MakeUInt128(0x8888111122223333ULL, 0x01234567ffffeeeeULL);
8646   __uint128_t arg9 = MakeUInt128(0x8000000000000000ULL, 0x00000a000000b000ULL);
8647   auto [res3, fpsr3] = AsmSqdmlsl(arg7, arg8, arg9);
8648   ASSERT_EQ(res3, MakeUInt128(0x8000000000000000ULL, 0x0ULL));
8649   ASSERT_TRUE(IsQcBitSet(fpsr3));
8650 }
8651 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplySubtractLong32x1IndexedElem)8652 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplySubtractLong32x1IndexedElem) {
8653   constexpr auto AsmSqdmlsl = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlsl %s0, %h2, %3.h[7]");
8654 
8655   // No saturation.
8656   __uint128_t arg1 = MakeUInt128(0x0102030405060708ULL, 0x7654321076543210ULL);
8657   __uint128_t arg2 = MakeUInt128(0x1122334488776655ULL, 0x0123456701234567ULL);
8658   __uint128_t arg3 = MakeUInt128(0x0123456789abcdefULL, 0xfedcba9876543210ULL);
8659   auto [res1, fpsr1] = AsmSqdmlsl(arg1, arg2, arg3);
8660   ASSERT_EQ(res1, MakeUInt128(0x00000000899bd1bfULL, 0x0ULL));
8661   ASSERT_FALSE(IsQcBitSet(fpsr1));
8662 
8663   // Saturates in the multiplication.
8664   __uint128_t arg4 = MakeUInt128(0x0011223344558000ULL, 0xfeedfeedfeed1234ULL);
8665   __uint128_t arg5 = MakeUInt128(0x0123456789abcdefULL, 0x8000fedcba123456ULL);
8666   __uint128_t arg6 = MakeUInt128(0x0123456701234567ULL, 0x0123456701234567ULL);
8667   auto [res2, fpsr2] = AsmSqdmlsl(arg4, arg5, arg6);
8668   ASSERT_EQ(res2, MakeUInt128(0x0000000081234568ULL, 0x0ULL));
8669   ASSERT_TRUE(IsQcBitSet(fpsr2));
8670 
8671   // Saturates in the subtraction.
8672   __uint128_t arg7 = MakeUInt128(0xaabbccddeeff2200ULL, 0x7654321076543210ULL);
8673   __uint128_t arg8 = MakeUInt128(0x8888111122223333ULL, 0x0123aabbccddeeffULL);
8674   __uint128_t arg9 = MakeUInt128(0xaabbccdd80000000ULL, 0x0011223344556677ULL);
8675   auto [res3, fpsr3] = AsmSqdmlsl(arg7, arg8, arg9);
8676   ASSERT_EQ(res3, MakeUInt128(0x0000000080000000ULL, 0x0ULL));
8677   ASSERT_TRUE(IsQcBitSet(fpsr3));
8678 }
8679 
TEST(Arm64InsnTest,SignedSaturatingRoundingDoublingMultiplyHighHalf32x4)8680 TEST(Arm64InsnTest, SignedSaturatingRoundingDoublingMultiplyHighHalf32x4) {
8681   constexpr auto AsmSqrdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqrdmulh %0.4s, %2.4s, %3.4s");
8682 
8683   __uint128_t arg1 = MakeU32x4(0x20000001UL, 0x00000004UL, 0x7eed0003UL, 0x00000010UL);
8684   __uint128_t arg2 = MakeU32x4(0x00000008UL, 0x00000002UL, 0x7eed0004UL, 0x00000002UL);
8685   auto [res1, fpsr1] = AsmSqrdmulh(arg1, arg2);
8686   ASSERT_EQ(res1, MakeU32x4(0x2UL, 0x0UL, 0x7ddc4ed9UL, 0x0UL));
8687   ASSERT_FALSE(IsQcBitSet(fpsr1));
8688 
8689   __uint128_t arg3 = MakeU32x4(0x80000000UL, 0x00000004UL, 0xfeed0003UL, 0x00000010UL);
8690   __uint128_t arg4 = MakeU32x4(0x80000000UL, 0x00000002UL, 0xfeed0004UL, 0x00000002UL);
8691   auto [res2, fpsr2] = AsmSqrdmulh(arg3, arg4);
8692   ASSERT_EQ(res2, MakeU32x4(0x7fffffffUL, 0x0UL, 0x00024ed2UL, 0x0UL));
8693   ASSERT_TRUE(IsQcBitSet(fpsr2));
8694 }
8695 
TEST(Arm64InsnTest,SignedSaturatingRoundingDoublingMultiplyHighHalf32x2)8696 TEST(Arm64InsnTest, SignedSaturatingRoundingDoublingMultiplyHighHalf32x2) {
8697   constexpr auto AsmSqrdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqrdmulh %0.2s, %2.2s, %3.2s");
8698 
8699   __uint128_t arg1 = MakeU32x4(0x55555555UL, 0x00000004UL, 0xdeadc0deUL, 0xdeadc0deUL);
8700   __uint128_t arg2 = MakeU32x4(0x00000004UL, 0x00000002UL, 0xdeadc0deUL, 0xdeadc0deUL);
8701   auto [res1, fpsr1] = AsmSqrdmulh(arg1, arg2);
8702   ASSERT_EQ(res1, MakeU32x4(0x3, 0x0UL, 0x0UL, 0x0UL));
8703   ASSERT_FALSE(IsQcBitSet(fpsr1));
8704 
8705   __uint128_t arg3 = MakeU32x4(0x80000000UL, 0x00000004UL, 0xdeadc0deUL, 0xdeadc0deUL);
8706   __uint128_t arg4 = MakeU32x4(0x80000000UL, 0x00000002UL, 0xdeadc0deUL, 0xdeadc0deUL);
8707   auto [res2, fpsr2] = AsmSqrdmulh(arg3, arg4);
8708   ASSERT_EQ(res2, MakeU32x4(0x7fffffffUL, 0x0UL, 0x0UL, 0x0UL));
8709   ASSERT_TRUE(IsQcBitSet(fpsr2));
8710 }
8711 
TEST(Arm64InsnTest,SignedSaturatingRoundingDoublingMultiplyHighHalf16x8)8712 TEST(Arm64InsnTest, SignedSaturatingRoundingDoublingMultiplyHighHalf16x8) {
8713   constexpr auto AsmSqrdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqrdmulh %0.8h, %2.8h, %3.8h");
8714 
8715   __uint128_t arg1 = MakeUInt128(0x200000017fff1111ULL, 0x7eed000300000010ULL);
8716   __uint128_t arg2 = MakeUInt128(0x0008000840000000ULL, 0x7eed000400000002ULL);
8717   auto [res1, fpsr1] = AsmSqrdmulh(arg1, arg2);
8718   ASSERT_EQ(res1, MakeUInt128(0x0002000040000000ULL, 0x7ddc000000000000ULL));
8719   ASSERT_FALSE(IsQcBitSet(fpsr1));
8720 
8721   __uint128_t arg3 = MakeUInt128(0x8000700040010000ULL, 0xfeed0003ffff0010ULL);
8722   __uint128_t arg4 = MakeUInt128(0x8000000100040000ULL, 0xfeed0004ffff0002ULL);
8723   auto [res2, fpsr2] = AsmSqrdmulh(arg3, arg4);
8724   ASSERT_EQ(res2, MakeUInt128(0x7fff000100020000ULL, 0x0002000000000000ULL));
8725   ASSERT_TRUE(IsQcBitSet(fpsr2));
8726 }
8727 
TEST(Arm64InsnTest,SignedSaturatingRoundingDoublingMultiplyHighHalf16x4)8728 TEST(Arm64InsnTest, SignedSaturatingRoundingDoublingMultiplyHighHalf16x4) {
8729   constexpr auto AsmSqrdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqrdmulh %0.4h, %2.4h, %3.4h");
8730 
8731   __uint128_t arg1 = MakeUInt128(0x555500017fff1111ULL, 0xdeadc0dedeadc0deULL);
8732   __uint128_t arg2 = MakeUInt128(0x0004000840000000ULL, 0xdeadc0dedeadc0deULL);
8733   auto [res1, fpsr1] = AsmSqrdmulh(arg1, arg2);
8734   ASSERT_EQ(res1, MakeUInt128(0x0003000040000000ULL, 0x0000000000000000ULL));
8735   ASSERT_FALSE(IsQcBitSet(fpsr1));
8736 
8737   __uint128_t arg3 = MakeUInt128(0x8000700040010000ULL, 0xdeadc0dedeadc0deULL);
8738   __uint128_t arg4 = MakeUInt128(0x8000000100040000ULL, 0xdeadc0dedeadc0deULL);
8739   auto [res2, fpsr2] = AsmSqrdmulh(arg3, arg4);
8740   ASSERT_EQ(res2, MakeUInt128(0x7fff000100020000ULL, 0x0000000000000000ULL));
8741   ASSERT_TRUE(IsQcBitSet(fpsr2));
8742 }
8743 
TEST(Arm64InsnTest,SignedSaturatingRoundingDoublingMultiplyHighHalf32x4IndexedElem)8744 TEST(Arm64InsnTest, SignedSaturatingRoundingDoublingMultiplyHighHalf32x4IndexedElem) {
8745   constexpr auto AsmSqrdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqrdmulh %0.4s, %2.4s, %3.s[0]");
8746 
8747   __uint128_t arg1 = MakeU32x4(0x20000001UL, 0x00000004UL, 0x7eed0003, 0x00000010UL);
8748   __uint128_t arg2 = MakeU32x4(0x00000008UL, 0xfeedfeedUL, 0xfeedfeed, 0xfeedfeedUL);
8749   auto [res1, fpsr1] = AsmSqrdmulh(arg1, arg2);
8750   // Without rounding, result should be 7 instead of 8.
8751   ASSERT_EQ(res1, MakeU32x4(0x2UL, 0x0UL, 0x8UL, 0x0UL));
8752   ASSERT_FALSE(IsQcBitSet(fpsr1));
8753 
8754   __uint128_t arg3 = MakeU32x4(0x80000000UL, 0x00000004UL, 0xfeed0003UL, 0x00000010UL);
8755   __uint128_t arg4 = MakeU32x4(0x80000000UL, 0xfeedfeedUL, 0xfeedfeedUL, 0xfeedfeedUL);
8756   auto [res2, fpsr2] = AsmSqrdmulh(arg3, arg4);
8757   ASSERT_EQ(res2, MakeU32x4(0x7fffffffUL, 0xfffffffcUL, 0x0112fffdUL, 0xfffffff0UL));
8758   ASSERT_TRUE(IsQcBitSet(fpsr2));
8759 }
8760 
TEST(Arm64InsnTest,SignedSaturatingRoundingDoublingMultiplyHighHalf32x2IndexedElem)8761 TEST(Arm64InsnTest, SignedSaturatingRoundingDoublingMultiplyHighHalf32x2IndexedElem) {
8762   constexpr auto AsmSqrdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqrdmulh %0.2s, %2.2s, %3.s[0]");
8763 
8764   __uint128_t arg1 = MakeU32x4(0x55555555UL, 0x00000004UL, 0xdeadc0deUL, 0xdeadc0deUL);
8765   __uint128_t arg2 = MakeU32x4(0x00000004UL, 0xdeadc0deUL, 0xdeadc0deUL, 0xdeadc0deUL);
8766   auto [res1, fpsr1] = AsmSqrdmulh(arg1, arg2);
8767   ASSERT_EQ(res1, MakeU32x4(0x3UL, 0x0UL, 0x0UL, 0x0UL));
8768   ASSERT_FALSE(IsQcBitSet(fpsr1));
8769 
8770   __uint128_t arg3 = MakeU32x4(0x80000000UL, 0x00000004UL, 0xdeadc0deUL, 0xdeadc0deUL);
8771   __uint128_t arg4 = MakeU32x4(0x80000000UL, 0xdeadc0deUL, 0xdeadc0deUL, 0xdeadc0deUL);
8772   auto [res2, fpsr2] = AsmSqrdmulh(arg3, arg4);
8773   ASSERT_EQ(res2, MakeU32x4(0x7fffffffUL, 0xfffffffcUL, 0x0UL, 0x0UL));
8774   ASSERT_TRUE(IsQcBitSet(fpsr2));
8775 }
8776 
TEST(Arm64InsnTest,SignedSaturatingRoundingDoublingMultiplyHighHalf16x8IndexedElem)8777 TEST(Arm64InsnTest, SignedSaturatingRoundingDoublingMultiplyHighHalf16x8IndexedElem) {
8778   constexpr auto AsmSqrdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqrdmulh %0.8h, %2.8h, %3.h[7]");
8779 
8780   __uint128_t arg1 = MakeUInt128(0x7fff800045670000ULL, 0xfe00780020004001ULL);
8781   __uint128_t arg2 = MakeUInt128(0xfeedfeedfeedfeedULL, 0x0008feedfeedfeedULL);
8782   auto [res1, fpsr1] = AsmSqrdmulh(arg1, arg2);
8783   ASSERT_EQ(res1, MakeUInt128(0x0008fff800040000ULL, 0x0000000800020004ULL));
8784   ASSERT_FALSE(IsQcBitSet(fpsr1));
8785 
8786   __uint128_t arg3 = MakeUInt128(0x7fff800045670000ULL, 0xfe00780020004001ULL);
8787   __uint128_t arg4 = MakeUInt128(0xfeedfeedfeedfeedULL, 0x8000feedfeedfeedULL);
8788   auto [res2, fpsr2] = AsmSqrdmulh(arg3, arg4);
8789   ASSERT_EQ(res2, MakeUInt128(0x80017fffba990000ULL, 0x02008800e000bfffULL));
8790   ASSERT_TRUE(IsQcBitSet(fpsr2));
8791 }
8792 
TEST(Arm64InsnTest,SignedSaturatingRoundingDoublingMultiplyHighHalf16x4IndexedElem)8793 TEST(Arm64InsnTest, SignedSaturatingRoundingDoublingMultiplyHighHalf16x4IndexedElem) {
8794   constexpr auto AsmSqrdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqrdmulh %0.4h, %2.4h, %3.h[7]");
8795 
8796   __uint128_t arg1 = MakeUInt128(0x7fff800055550000ULL, 0xdeadc0dedeadc0deULL);
8797   __uint128_t arg2 = MakeUInt128(0xdeadc0dedeadc0deULL, 0x0004c0dedeadc0deULL);
8798   auto [res1, fpsr1] = AsmSqrdmulh(arg1, arg2);
8799   ASSERT_EQ(res1, MakeUInt128(0x0004fffc00030000ULL, 0x0000000000000000ULL));
8800   ASSERT_FALSE(IsQcBitSet(fpsr1));
8801 
8802   __uint128_t arg3 = MakeUInt128(0x7fff800045670000ULL, 0xdeadc0dedeadc0deULL);
8803   __uint128_t arg4 = MakeUInt128(0xdeadc0dedeadc0deULL, 0x8000c0dedeadc0deULL);
8804   auto [res2, fpsr2] = AsmSqrdmulh(arg3, arg4);
8805   ASSERT_EQ(res2, MakeUInt128(0x80017fffba990000ULL, 0x0000000000000000ULL));
8806   ASSERT_TRUE(IsQcBitSet(fpsr2));
8807 }
8808 
TEST(Arm64InsnTest,SignedSaturatingRoundingDoublingMultiplyHighHalf32x1)8809 TEST(Arm64InsnTest, SignedSaturatingRoundingDoublingMultiplyHighHalf32x1) {
8810   constexpr auto AsmSqrdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqrdmulh %s0, %s2, %s3");
8811 
8812   __uint128_t arg1 = MakeU32x4(0x556789abUL, 0xfeedfeedUL, 0xfeedfeedUL, 0xfeedfeedUL);
8813   __uint128_t arg2 = MakeU32x4(0x00000004UL, 0xfeedfeedUL, 0xfeedfeedUL, 0xfeedfeedUL);
8814   auto [res1, fpsr1] = AsmSqrdmulh(arg1, arg2);
8815   // Without roundings, result should be 2 instead of 3.
8816   ASSERT_EQ(res1, MakeU32x4(0x3UL, 0x0UL, 0x0UL, 0x0UL));
8817   ASSERT_FALSE(IsQcBitSet(fpsr1));
8818 
8819   __uint128_t arg3 = MakeU32x4(0x80000000UL, 0xfeedfeedUL, 0xfeedfeedUL, 0xfeedfeedUL);
8820   __uint128_t arg4 = MakeU32x4(0x80000000UL, 0xfeedfeedUL, 0xfeedfeedUL, 0xfeedfeedUL);
8821   auto [res2, fpsr2] = AsmSqrdmulh(arg3, arg4);
8822   ASSERT_EQ(res2, MakeU32x4(0x7fffffffUL, 0x0UL, 0x0UL, 0x0UL));
8823   ASSERT_TRUE(IsQcBitSet(fpsr2));
8824 }
8825 
TEST(Arm64InsnTest,SignedSaturatingRoundingDoublingMultiplyHighHalf16x1)8826 TEST(Arm64InsnTest, SignedSaturatingRoundingDoublingMultiplyHighHalf16x1) {
8827   constexpr auto AsmSqrdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqrdmulh %h0, %h2, %h3");
8828 
8829   __uint128_t arg1 = MakeUInt128(0xfeedfeedfeed5567ULL, 0xfeedfeedfeedfeedULL);
8830   __uint128_t arg2 = MakeUInt128(0xfeedfeedfeed0004ULL, 0xfeedfeedfeedfeedULL);
8831   auto [res1, fpsr1] = AsmSqrdmulh(arg1, arg2);
8832   ASSERT_EQ(res1, MakeUInt128(0x0000000000000003ULL, 0x0ULL));
8833   ASSERT_FALSE(IsQcBitSet(fpsr1));
8834 
8835   __uint128_t arg3 = MakeUInt128(0xfeedfeedfeed8000ULL, 0xfeedfeedfeedfeedULL);
8836   __uint128_t arg4 = MakeUInt128(0xfeedfeedfeed8000ULL, 0xfeedfeedfeedfeedULL);
8837   auto [res2, fpsr2] = AsmSqrdmulh(arg3, arg4);
8838   ASSERT_EQ(res2, MakeUInt128(0x0000000000007fffULL, 0x0ULL));
8839   ASSERT_TRUE(IsQcBitSet(fpsr2));
8840 }
8841 
TEST(Arm64InsnTest,SignedSaturatingRoundingDoublingMultiplyHighHalf32x1IndexedElem)8842 TEST(Arm64InsnTest, SignedSaturatingRoundingDoublingMultiplyHighHalf32x1IndexedElem) {
8843   constexpr auto AsmSqrdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqrdmulh %s0, %s2, %3.s[2]");
8844 
8845   __uint128_t arg1 = MakeU32x4(0x556789abUL, 0xfeedfeedUL, 0xfeedfeedUL, 0xfeedfeedUL);
8846   __uint128_t arg2 = MakeU32x4(0xfeedfeedUL, 0xfeedfeedUL, 0x00000004UL, 0xfeedfeedUL);
8847   auto [res1, fpsr1] = AsmSqrdmulh(arg1, arg2);
8848   // Without rounding, result should be 2 instead of 3.
8849   ASSERT_EQ(res1, MakeU32x4(0x3UL, 0x0UL, 0x0UL, 0x0UL));
8850   ASSERT_FALSE(IsQcBitSet(fpsr1));
8851 
8852   __uint128_t arg3 = MakeU32x4(0x80000000UL, 0xfeedfeedUL, 0xfeedfeedUL, 0xfeedfeedUL);
8853   __uint128_t arg4 = MakeU32x4(0xfeedfeedUL, 0xfeedfeedUL, 0x80000000UL, 0xfeedfeedUL);
8854   auto [res2, fpsr2] = AsmSqrdmulh(arg3, arg4);
8855   ASSERT_EQ(res2, MakeU32x4(0x7fffffffUL, 0x0UL, 0x0UL, 0x0UL));
8856   ASSERT_TRUE(IsQcBitSet(fpsr2));
8857 }
8858 
TEST(Arm64InsnTest,SignedSaturatingRoundingDoublingMultiplyHighHalf16x1IndexedElem)8859 TEST(Arm64InsnTest, SignedSaturatingRoundingDoublingMultiplyHighHalf16x1IndexedElem) {
8860   constexpr auto AsmSqrdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqrdmulh %h0, %h2, %3.h[7]");
8861 
8862   __uint128_t arg1 = MakeUInt128(0xfeedfeedfeed5567ULL, 0xfeedfeedfeedfeedULL);
8863   __uint128_t arg2 = MakeUInt128(0xfeedfeedfeedfeedULL, 0x0004feedfeedfeedULL);
8864   auto [res1, fpsr1] = AsmSqrdmulh(arg1, arg2);
8865   // Without rounding, result should be 2 instead of 3.
8866   ASSERT_EQ(res1, MakeUInt128(0x0000000000000003ULL, 0x0ULL));
8867   ASSERT_FALSE(IsQcBitSet(fpsr1));
8868 
8869   __uint128_t arg3 = MakeUInt128(0xfeedfeedfeed8000ULL, 0xfeedfeedfeedfeedULL);
8870   __uint128_t arg4 = MakeUInt128(0xfeedfeedfeedfeedULL, 0x8000feedfeedfeedULL);
8871   auto [res2, fpsr2] = AsmSqrdmulh(arg3, arg4);
8872   ASSERT_EQ(res2, MakeUInt128(0x0000000000007fffULL, 0x0ULL));
8873   ASSERT_TRUE(IsQcBitSet(fpsr2));
8874 }
8875 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyHighHalf32x4)8876 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyHighHalf32x4) {
8877   constexpr auto AsmSqdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmulh %0.4s, %2.4s, %3.4s");
8878 
8879   __uint128_t arg1 = MakeU32x4(0x20000001UL, 0x00000004UL, 0x7eed0003UL, 0x00000010UL);
8880   __uint128_t arg2 = MakeU32x4(0x00000008UL, 0x00000002UL, 0x7eed0004UL, 0x00000002UL);
8881   auto [res1, fpsr1] = AsmSqdmulh(arg1, arg2);
8882   ASSERT_EQ(res1, MakeU32x4(0x2UL, 0x0UL, 0x7ddc4ed8UL, 0x0UL));
8883   ASSERT_FALSE(IsQcBitSet(fpsr1));
8884 
8885   __uint128_t arg3 = MakeU32x4(0x80000000UL, 0x00000004UL, 0xfeed0003UL, 0x00000010UL);
8886   __uint128_t arg4 = MakeU32x4(0x80000000UL, 0x00000002UL, 0xfeed0004UL, 0x00000002UL);
8887   auto [res2, fpsr2] = AsmSqdmulh(arg3, arg4);
8888   ASSERT_EQ(res2, MakeU32x4(0x7fffffffUL, 0x0UL, 0x00024ed1UL, 0x0UL));
8889   ASSERT_TRUE(IsQcBitSet(fpsr2));
8890 }
8891 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyHighHalf32x2)8892 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyHighHalf32x2) {
8893   constexpr auto AsmSqdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmulh %0.2s, %2.2s, %3.2s");
8894 
8895   __uint128_t arg1 = MakeU32x4(0x55555555UL, 0x00000004UL, 0xdeadc0deUL, 0xdeadc0deUL);
8896   __uint128_t arg2 = MakeU32x4(0x00000004UL, 0x00000002UL, 0xdeadc0deUL, 0xdeadc0deUL);
8897   auto [res1, fpsr1] = AsmSqdmulh(arg1, arg2);
8898   ASSERT_EQ(res1, MakeU32x4(0x2UL, 0x0UL, 0x0UL, 0x0UL));
8899   ASSERT_FALSE(IsQcBitSet(fpsr1));
8900 
8901   __uint128_t arg3 = MakeU32x4(0x80000000UL, 0x00000004UL, 0xdeadc0deUL, 0xdeadc0deUL);
8902   __uint128_t arg4 = MakeU32x4(0x80000000UL, 0x00000002UL, 0xdeadc0deUL, 0xdeadc0deUL);
8903   auto [res2, fpsr2] = AsmSqdmulh(arg3, arg4);
8904   ASSERT_EQ(res2, MakeU32x4(0x7fffffff, 0x0UL, 0x0UL, 0x0UL));
8905   ASSERT_TRUE(IsQcBitSet(fpsr2));
8906 }
8907 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyHighHalf16x8)8908 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyHighHalf16x8) {
8909   constexpr auto AsmSqdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmulh %0.8h, %2.8h, %3.8h");
8910 
8911   __uint128_t arg1 = MakeUInt128(0x200000017fff1111ULL, 0x7eed000300000010ULL);
8912   __uint128_t arg2 = MakeUInt128(0x0008000840000000ULL, 0x7eed000400000002ULL);
8913   auto [res1, fpsr1] = AsmSqdmulh(arg1, arg2);
8914   ASSERT_EQ(res1, MakeUInt128(0x000200003fff0000ULL, 0x7ddc000000000000ULL));
8915   ASSERT_FALSE(IsQcBitSet(fpsr1));
8916 
8917   __uint128_t arg3 = MakeUInt128(0x8000700040010000ULL, 0xfeed0003ffff0010ULL);
8918   __uint128_t arg4 = MakeUInt128(0x8000000100040000ULL, 0xfeed0004ffff0002ULL);
8919   auto [res2, fpsr2] = AsmSqdmulh(arg3, arg4);
8920   ASSERT_EQ(res2, MakeUInt128(0x7fff000000020000ULL, 0x0002000000000000ULL));
8921   ASSERT_TRUE(IsQcBitSet(fpsr2));
8922 }
8923 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyHighHalf16x4)8924 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyHighHalf16x4) {
8925   constexpr auto AsmSqdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmulh %0.4h, %2.4h, %3.4h");
8926 
8927   __uint128_t arg1 = MakeUInt128(0x555500017fff1111ULL, 0xdeadc0dedeadc0deULL);
8928   __uint128_t arg2 = MakeUInt128(0x0004000840000000ULL, 0xdeadc0dedeadc0deULL);
8929   auto [res1, fpsr1] = AsmSqdmulh(arg1, arg2);
8930   ASSERT_EQ(res1, MakeUInt128(0x000200003fff0000ULL, 0x0000000000000000ULL));
8931   ASSERT_FALSE(IsQcBitSet(fpsr1));
8932 
8933   __uint128_t arg3 = MakeUInt128(0x8000700040010000ULL, 0xdeadc0dedeadc0deULL);
8934   __uint128_t arg4 = MakeUInt128(0x8000000100040000ULL, 0xdeadc0dedeadc0deULL);
8935   auto [res2, fpsr2] = AsmSqdmulh(arg3, arg4);
8936   ASSERT_EQ(res2, MakeUInt128(0x7fff000000020000ULL, 0x0000000000000000ULL));
8937   ASSERT_TRUE(IsQcBitSet(fpsr2));
8938 }
8939 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyHighHalf32x4IndexedElem)8940 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyHighHalf32x4IndexedElem) {
8941   constexpr auto AsmSqdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmulh %0.4s, %2.4s, %3.s[0]");
8942 
8943   __uint128_t arg1 = MakeU32x4(0x20000001UL, 0x00000004UL, 0x7eed0003UL, 0x00000010UL);
8944   __uint128_t arg2 = MakeU32x4(0x00000008UL, 0xfeedfeedUL, 0xfeedfeedUL, 0xfeedfeedUL);
8945   auto [res1, fpsr1] = AsmSqdmulh(arg1, arg2);
8946   ASSERT_EQ(res1, MakeU32x4(0x2UL, 0x0UL, 0x7UL, 0x0UL));
8947   ASSERT_FALSE(IsQcBitSet(fpsr1));
8948 
8949   __uint128_t arg3 = MakeU32x4(0x80000000UL, 0x00000004UL, 0xfeed0003UL, 0x00000010UL);
8950   __uint128_t arg4 = MakeU32x4(0x80000000UL, 0xfeedfeedUL, 0xfeedfeedUL, 0xfeedfeedUL);
8951   auto [res2, fpsr2] = AsmSqdmulh(arg3, arg4);
8952   ASSERT_EQ(res2, MakeU32x4(0x7fffffffUL, 0xfffffffcUL, 0x0112fffdUL, 0xfffffff0UL));
8953   ASSERT_TRUE(IsQcBitSet(fpsr2));
8954 }
8955 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyHighHalf32x2IndexedElem)8956 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyHighHalf32x2IndexedElem) {
8957   constexpr auto AsmSqdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmulh %0.2s, %2.2s, %3.s[0]");
8958 
8959   __uint128_t arg1 = MakeU32x4(0x55555555UL, 0x00000004UL, 0xdeadc0deUL, 0xdeadc0deUL);
8960   __uint128_t arg2 = MakeU32x4(0x00000004UL, 0xdeadc0deUL, 0xdeadc0deUL, 0xdeadc0deUL);
8961   auto [res1, fpsr1] = AsmSqdmulh(arg1, arg2);
8962   ASSERT_EQ(res1, MakeU32x4(0x2UL, 0x0UL, 0x0UL, 0x0UL));
8963   ASSERT_FALSE(IsQcBitSet(fpsr1));
8964 
8965   __uint128_t arg3 = MakeU32x4(0x80000000UL, 0x00000004UL, 0xdeadc0deUL, 0xdeadc0deUL);
8966   __uint128_t arg4 = MakeU32x4(0x80000000UL, 0xdeadc0deUL, 0xdeadc0deUL, 0xdeadc0deUL);
8967   auto [res2, fpsr2] = AsmSqdmulh(arg3, arg4);
8968   ASSERT_EQ(res2, MakeU32x4(0x7fffffffUL, 0xfffffffcUL, 0x0UL, 0x0UL));
8969   ASSERT_TRUE(IsQcBitSet(fpsr2));
8970 }
8971 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyHighHalf16x8IndexedElem)8972 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyHighHalf16x8IndexedElem) {
8973   constexpr auto AsmSqdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmulh %0.8h, %2.8h, %3.h[7]");
8974 
8975   __uint128_t arg1 = MakeUInt128(0x7fff800045670000ULL, 0xfe00780020004001ULL);
8976   __uint128_t arg2 = MakeUInt128(0xfeedfeedfeedfeedULL, 0x0008feedfeedfeedULL);
8977   auto [res1, fpsr1] = AsmSqdmulh(arg1, arg2);
8978   ASSERT_EQ(res1, MakeUInt128(0x0007fff800040000ULL, 0xffff000700020004ULL));
8979   ASSERT_FALSE(IsQcBitSet(fpsr1));
8980 
8981   __uint128_t arg3 = MakeUInt128(0x7fff800045670000ULL, 0xfe00780020004001ULL);
8982   __uint128_t arg4 = MakeUInt128(0xfeedfeedfeedfeedULL, 0x8000feedfeedfeedULL);
8983   auto [res2, fpsr2] = AsmSqdmulh(arg3, arg4);
8984   ASSERT_EQ(res2, MakeUInt128(0x80017fffba990000ULL, 0x02008800e000bfffULL));
8985   ASSERT_TRUE(IsQcBitSet(fpsr2));
8986 }
8987 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyHighHalf16x4IndexedElem)8988 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyHighHalf16x4IndexedElem) {
8989   constexpr auto AsmSqdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmulh %0.4h, %2.4h, %3.h[7]");
8990 
8991   __uint128_t arg1 = MakeUInt128(0x7fff800055550000ULL, 0xdeadc0dedeadc0deULL);
8992   __uint128_t arg2 = MakeUInt128(0xdeadc0dedeadc0deULL, 0x0004c0dedeadc0deULL);
8993   auto [res1, fpsr1] = AsmSqdmulh(arg1, arg2);
8994   ASSERT_EQ(res1, MakeUInt128(0x0003fffc00020000ULL, 0x0000000000000000ULL));
8995   ASSERT_FALSE(IsQcBitSet(fpsr1));
8996 
8997   __uint128_t arg3 = MakeUInt128(0x7fff800045670000ULL, 0xdeadc0dedeadc0deULL);
8998   __uint128_t arg4 = MakeUInt128(0xdeadc0dedeadc0deULL, 0x8000c0dedeadc0deULL);
8999   auto [res2, fpsr2] = AsmSqdmulh(arg3, arg4);
9000   ASSERT_EQ(res2, MakeUInt128(0x80017fffba990000ULL, 0x0000000000000000ULL));
9001   ASSERT_TRUE(IsQcBitSet(fpsr2));
9002 }
9003 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyHighHalf32x1)9004 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyHighHalf32x1) {
9005   constexpr auto AsmSqdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmulh %s0, %s2, %s3");
9006 
9007   __uint128_t arg1 = MakeU32x4(0x556789abUL, 0xfeedfeedUL, 0xfeedfeedUL, 0xfeedfeedUL);
9008   __uint128_t arg2 = MakeU32x4(0x00000004UL, 0xfeedfeedUL, 0xfeedfeedUL, 0xfeedfeedUL);
9009   auto [res1, fpsr1] = AsmSqdmulh(arg1, arg2);
9010   ASSERT_EQ(res1, MakeU32x4(0x2UL, 0x0UL, 0x0UL, 0x0ULL));
9011   ASSERT_FALSE(IsQcBitSet(fpsr1));
9012 
9013   __uint128_t arg3 = MakeU32x4(0x80000000UL, 0xfeedfeedUL, 0xfeedfeedUL, 0xfeedfeedUL);
9014   __uint128_t arg4 = MakeU32x4(0x80000000UL, 0xfeedfeedUL, 0xfeedfeedUL, 0xfeedfeedUL);
9015   auto [res2, fpsr2] = AsmSqdmulh(arg3, arg4);
9016   ASSERT_EQ(res2, MakeU32x4(0x7fffffffUL, 0x0UL, 0x0UL, 0x0UL));
9017   ASSERT_TRUE(IsQcBitSet(fpsr2));
9018 }
9019 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyHighHalf16x1)9020 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyHighHalf16x1) {
9021   constexpr auto AsmSqdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmulh %h0, %h2, %h3");
9022 
9023   __uint128_t arg1 = MakeUInt128(0xfeedfeedfeed5567ULL, 0xfeedfeedfeedfeedULL);
9024   __uint128_t arg2 = MakeUInt128(0xfeedfeedfeed0004ULL, 0xfeedfeedfeedfeedULL);
9025   auto [res1, fpsr1] = AsmSqdmulh(arg1, arg2);
9026   ASSERT_EQ(res1, MakeUInt128(0x0000000000000002ULL, 0x0ULL));
9027   ASSERT_FALSE(IsQcBitSet(fpsr1));
9028 
9029   __uint128_t arg3 = MakeUInt128(0xfeedfeedfeed8000ULL, 0xfeedfeedfeedfeedULL);
9030   __uint128_t arg4 = MakeUInt128(0xfeedfeedfeed8000ULL, 0xfeedfeedfeedfeedULL);
9031   auto [res2, fpsr2] = AsmSqdmulh(arg3, arg4);
9032   ASSERT_EQ(res2, MakeUInt128(0x0000000000007fffULL, 0x0ULL));
9033   ASSERT_TRUE(IsQcBitSet(fpsr2));
9034 }
9035 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyHighHalf32x1IndexedElem)9036 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyHighHalf32x1IndexedElem) {
9037   constexpr auto AsmSqdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmulh %s0, %s2, %3.s[2]");
9038 
9039   __uint128_t arg1 = MakeU32x4(0x556789abUL, 0xfeedfeedUL, 0xfeedfeedUL, 0xfeedfeedUL);
9040   __uint128_t arg2 = MakeU32x4(0xfeedfeedUL, 0xfeedfeedUL, 0x00000004UL, 0xfeedfeedUL);
9041   auto [res1, fpsr1] = AsmSqdmulh(arg1, arg2);
9042   ASSERT_EQ(res1, MakeU32x4(0x2UL, 0x0UL, 0x0UL, 0x0UL));
9043   ASSERT_FALSE(IsQcBitSet(fpsr1));
9044 
9045   __uint128_t arg3 = MakeU32x4(0x80000000UL, 0xfeedfeedUL, 0xfeedfeedUL, 0xfeedfeedUL);
9046   __uint128_t arg4 = MakeU32x4(0xfeedfeedUL, 0xfeedfeedUL, 0x80000000UL, 0xfeedfeedUL);
9047   auto [res2, fpsr2] = AsmSqdmulh(arg3, arg4);
9048   ASSERT_EQ(res2, MakeU32x4(0x7fffffffUL, 0x0UL, 0x0UL, 0x0UL));
9049   ASSERT_TRUE(IsQcBitSet(fpsr2));
9050 }
9051 
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyHighHalf16x1IndexedElem)9052 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyHighHalf16x1IndexedElem) {
9053   constexpr auto AsmSqdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmulh %h0, %h2, %3.h[7]");
9054 
9055   __uint128_t arg1 = MakeUInt128(0xfeedfeedfeed5567ULL, 0xfeedfeedfeedfeedULL);
9056   __uint128_t arg2 = MakeUInt128(0xfeedfeedfeedfeedULL, 0x0004feedfeedfeedULL);
9057   auto [res1, fpsr1] = AsmSqdmulh(arg1, arg2);
9058   ASSERT_EQ(res1, MakeUInt128(0x0000000000000002ULL, 0x0ULL));
9059   ASSERT_FALSE(IsQcBitSet(fpsr1));
9060 
9061   __uint128_t arg3 = MakeUInt128(0xfeedfeedfeed8000ULL, 0xfeedfeedfeedfeedULL);
9062   __uint128_t arg4 = MakeUInt128(0xfeedfeedfeedfeedULL, 0x8000feedfeedfeedULL);
9063   auto [res2, fpsr2] = AsmSqdmulh(arg3, arg4);
9064   ASSERT_EQ(res2, MakeUInt128(0x0000000000007fffULL, 0x0ULL));
9065   ASSERT_TRUE(IsQcBitSet(fpsr2));
9066 }
9067 
9068 class FpcrBitSupport : public testing::TestWithParam<uint64_t> {};
9069 
TEST_P(FpcrBitSupport,SupportsBit)9070 TEST_P(FpcrBitSupport, SupportsBit) {
9071   uint64_t fpcr1;
9072   asm("msr fpcr, %x1\n\t"
9073       "mrs %x0, fpcr"
9074       : "=r"(fpcr1)
9075       : "r"(static_cast<uint64_t>(GetParam())));
9076   ASSERT_EQ(fpcr1, GetParam()) << "Should be able to set then get FPCR bit: " << GetParam();
9077 };
9078 
9079 // Note: The exception enablement flags (such as IOE) are not checked, because when tested on actual
9080 // ARM64 device we find that the tests fail either because they cannot be written or are RAZ (read
9081 // as zero).
9082 INSTANTIATE_TEST_SUITE_P(Arm64InsnTest,
9083                          FpcrBitSupport,
9084                          testing::Values(kFpcrRModeTieEven,
9085                                          kFpcrRModeZero,
9086                                          kFpcrRModeNegInf,
9087                                          kFpcrRModePosInf,
9088                                          kFpcrFzBit,
9089                                          kFpcrDnBit,
9090                                          0));
9091 
9092 class FpsrBitSupport : public testing::TestWithParam<uint64_t> {};
9093 
TEST_P(FpsrBitSupport,SupportsBit)9094 TEST_P(FpsrBitSupport, SupportsBit) {
9095   uint64_t fpsr1;
9096   asm("msr fpsr, %1\n\t"
9097       "mrs %0, fpsr"
9098       : "=r"(fpsr1)
9099       : "r"(static_cast<uint64_t>(GetParam())));
9100   ASSERT_EQ(fpsr1, GetParam()) << "Should be able to set then get FPSR bit";
9101 };
9102 
9103 INSTANTIATE_TEST_SUITE_P(Arm64InsnTest,
9104                          FpsrBitSupport,
9105                          testing::Values(kFpsrIocBit,
9106                                          kFpsrDzcBit,
9107                                          kFpsrOfcBit,
9108                                          kFpsrUfcBit,
9109                                          kFpsrIxcBit,
9110                                          kFpsrIdcBit,
9111                                          kFpsrQcBit));
9112 
TEST(Arm64InsnTest,UnsignedDivide64)9113 TEST(Arm64InsnTest, UnsignedDivide64) {
9114   auto udiv64 = [](uint64_t num, uint64_t den) {
9115     uint64_t result;
9116     asm("udiv %0, %1, %2" : "=r"(result) : "r"(num), "r"(den));
9117     return result;
9118   };
9119   ASSERT_EQ(udiv64(0x8'0000'0000ULL, 2ULL), 0x4'0000'0000ULL) << "Division should be 64-bit.";
9120   ASSERT_EQ(udiv64(123ULL, 0ULL), 0ULL) << "Div by 0 should result in 0.";
9121 }
9122 
TEST(Arm64InsnTest,SignedDivide64)9123 TEST(Arm64InsnTest, SignedDivide64) {
9124   auto div64 = [](int64_t num, int64_t den) {
9125     int64_t result;
9126     asm("sdiv %0, %1, %2" : "=r"(result) : "r"(num), "r"(den));
9127     return result;
9128   };
9129   ASSERT_EQ(div64(67802402LL, -1LL), -67802402LL)
9130       << "Division by -1 should flip sign if dividend is not numeric_limits::min.";
9131   ASSERT_EQ(div64(-531675317891LL, -1LL), 531675317891LL)
9132       << "Division by -1 should flip sign if dividend is not numeric_limits::min.";
9133   ASSERT_EQ(div64(std::numeric_limits<int64_t>::min(), -1LL), std::numeric_limits<int64_t>::min())
9134       << "Div of numeric_limits::min by -1 should result in numeric_limits::min.";
9135 }
9136 
TEST(Arm64InsnTest,AesEncode)9137 TEST(Arm64InsnTest, AesEncode) {
9138   __uint128_t arg = MakeUInt128(0x1111'2222'3333'4444ULL, 0x5555'6666'7777'8888ULL);
9139   __uint128_t key = MakeUInt128(0xaaaa'bbbb'cccc'ddddULL, 0xeeee'ffff'0000'9999ULL);
9140   __uint128_t res;
9141   asm("aese %0.16b, %2.16b" : "=w"(res) : "0"(arg), "w"(key));
9142   ASSERT_EQ(res, MakeUInt128(0x16ea'82ee'eaf5'eeeeULL, 0xf5ea'eeee'ea16'ee82ULL));
9143 }
9144 
TEST(Arm64InsnTest,AesMixColumns)9145 TEST(Arm64InsnTest, AesMixColumns) {
9146   __uint128_t arg = MakeUInt128(0x1111'2222'3333'4444ULL, 0x5555'6666'7777'8888ULL);
9147   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("aesmc %0.16b, %1.16b")(arg);
9148   ASSERT_EQ(res, MakeUInt128(0x77114422dd33aa44ULL, 0x3355006692776d88ULL));
9149 }
9150 
TEST(Arm64InsnTest,AesDecode)9151 TEST(Arm64InsnTest, AesDecode) {
9152   // Check that it's opposite to AesEncode with extra XORs.
9153   __uint128_t arg = MakeUInt128(0x16ea'82ee'eaf5'eeeeULL, 0xf5ea'eeee'ea16'ee82ULL);
9154   __uint128_t key = MakeUInt128(0xaaaa'bbbb'cccc'ddddULL, 0xeeee'ffff'0000'9999ULL);
9155   arg ^= key;
9156   __uint128_t res;
9157   asm("aesd %0.16b, %2.16b" : "=w"(res) : "0"(arg), "w"(key));
9158   ASSERT_EQ(res ^ key, MakeUInt128(0x1111'2222'3333'4444ULL, 0x5555'6666'7777'8888ULL));
9159 }
9160 
TEST(Arm64InsnTest,AesInverseMixColumns)9161 TEST(Arm64InsnTest, AesInverseMixColumns) {
9162   __uint128_t arg = MakeUInt128(0x77114422dd33aa44ULL, 0x3355006692776d88ULL);
9163   __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("aesimc %0.16b, %1.16b")(arg);
9164   ASSERT_EQ(res, MakeUInt128(0x1111'2222'3333'4444ULL, 0x5555'6666'7777'8888ULL));
9165 }
9166 
9167 }  // namespace
9168