1 /*
2 * Copyright (C) 2019 The Android Open Source Project
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 * http://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17 #include "gtest/gtest.h"
18
19 #include <cstdint>
20 #include <initializer_list>
21 #include <limits>
22
23 #include "utility.h"
24
25 namespace {
26
TEST(Arm64InsnTest,UnsignedBitfieldMoveNoShift)27 TEST(Arm64InsnTest, UnsignedBitfieldMoveNoShift) {
28 uint64_t arg = 0x3952247371907021ULL;
29 uint64_t res;
30
31 asm("ubfm %0, %1, #0, #63" : "=r"(res) : "r"(arg));
32
33 ASSERT_EQ(res, 0x3952247371907021ULL);
34 }
35
TEST(Arm64InsnTest,BitfieldLeftInsertion)36 TEST(Arm64InsnTest, BitfieldLeftInsertion) {
37 uint64_t arg = 0x389522868478abcdULL;
38 uint64_t res = 0x1101044682325271ULL;
39
40 asm("bfm %0, %1, #40, #15" : "=r"(res) : "r"(arg), "0"(res));
41
42 ASSERT_EQ(res, 0x110104abcd325271ULL);
43 }
44
TEST(Arm64InsnTest,BitfieldRightInsertion)45 TEST(Arm64InsnTest, BitfieldRightInsertion) {
46 uint64_t arg = 0x3276561809377344ULL;
47 uint64_t res = 0x1668039626579787ULL;
48
49 asm("bfm %0, %1, #4, #39" : "=r"(res) : "r"(arg), "0"(res));
50
51 ASSERT_EQ(res, 0x1668039180937734ULL);
52 }
53
TEST(Arm64InsnTest,MoveImmToFp32)54 TEST(Arm64InsnTest, MoveImmToFp32) {
55 // The tests below verify that fmov works with various immediates.
56 // Specifically, the instruction has an 8-bit immediate field consisting of
57 // the following four subfields:
58 //
59 // - sign (one bit)
60 // - upper exponent (one bit)
61 // - lower exponent (two bits)
62 // - mantisa (four bits)
63 //
64 // For example, we decompose imm8 = 0b01001111 into:
65 //
66 // - sign = 0 (positive)
67 // - upper exponent = 1
68 // - lower exponent = 00
69 // - mantisa = 1111
70 //
71 // This immediate corresponds to 32-bit floating point value:
72 //
73 // 0 011111 00 1111 0000000000000000000
74 // | | | | |
75 // | | | | +- 19 zeros
76 // | | | +------ mantisa
77 // | | +--------- lower exponent
78 // | +---------------- upper exponent (custom extended to 6 bits)
79 // +------------------ sign
80 //
81 // Thus we have:
82 //
83 // 1.11110000... * 2^(124-127) = 0.2421875
84 //
85 // where 1.11110000... is in binary.
86 //
87 // See VFPExpandImm in the ARM Architecture Manual for details.
88 //
89 // We enumerate all possible 8-bit immediate encodings of the form:
90 //
91 // {0,1}{0,1}{00,11}{0000,1111}
92 //
93 // to verify that the decoder correctly splits the immediate into the
94 // subfields and reconstructs the intended floating-point value.
95
96 // imm8 = 0b00000000
97 __uint128_t res1 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #2.0e+00")();
98 ASSERT_EQ(res1, MakeUInt128(0x40000000U, 0U));
99
100 // imm8 = 0b00001111
101 __uint128_t res2 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #3.8750e+00")();
102 ASSERT_EQ(res2, MakeUInt128(0x40780000U, 0U));
103
104 // imm8 = 0b00110000
105 __uint128_t res3 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #1.60e+01")();
106 ASSERT_EQ(res3, MakeUInt128(0x41800000U, 0U));
107
108 // imm8 = 0b00111111
109 __uint128_t res4 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #3.10e+01")();
110 ASSERT_EQ(res4, MakeUInt128(0x41f80000U, 0U));
111
112 // imm8 = 0b01000000
113 __uint128_t res5 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #1.250e-01")();
114 ASSERT_EQ(res5, MakeUInt128(0x3e000000U, 0U));
115
116 // imm8 = 0b01001111
117 __uint128_t res6 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #2.4218750e-01")();
118 ASSERT_EQ(res6, MakeUInt128(0x3e780000U, 0U));
119
120 // imm8 = 0b01110000
121 __uint128_t res7 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #1.0e+00")();
122 ASSERT_EQ(res7, MakeUInt128(0x3f800000U, 0U));
123
124 // imm8 = 0b01111111
125 __uint128_t res8 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #1.93750e+00")();
126 ASSERT_EQ(res8, MakeUInt128(0x3ff80000U, 0U));
127
128 // imm8 = 0b10000000
129 __uint128_t res9 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #-2.0e+00")();
130 ASSERT_EQ(res9, MakeUInt128(0xc0000000U, 0U));
131
132 // imm8 = 0b10001111
133 __uint128_t res10 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #-3.8750e+00")();
134 ASSERT_EQ(res10, MakeUInt128(0xc0780000U, 0U));
135
136 // imm8 = 0b10110000
137 __uint128_t res11 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #-1.60e+01")();
138 ASSERT_EQ(res11, MakeUInt128(0xc1800000U, 0U));
139
140 // imm8 = 0b10111111
141 __uint128_t res12 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #-3.10e+01")();
142 ASSERT_EQ(res12, MakeUInt128(0xc1f80000U, 0U));
143
144 // imm8 = 0b11000000
145 __uint128_t res13 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #-1.250e-01")();
146 ASSERT_EQ(res13, MakeUInt128(0xbe000000U, 0U));
147
148 // imm8 = 0b11001111
149 __uint128_t res14 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #-2.4218750e-01")();
150 ASSERT_EQ(res14, MakeUInt128(0xbe780000U, 0U));
151
152 // imm8 = 0b11110000
153 __uint128_t res15 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #-1.0e+00")();
154 ASSERT_EQ(res15, MakeUInt128(0xbf800000U, 0U));
155
156 // imm8 = 0b11111111
157 __uint128_t res16 = ASM_INSN_WRAP_FUNC_W_RES("fmov s0, #-1.93750e+00")();
158 ASSERT_EQ(res16, MakeUInt128(0xbff80000U, 0U));
159 }
160
TEST(Arm64InsnTest,MoveImmToFp64)161 TEST(Arm64InsnTest, MoveImmToFp64) {
162 // The tests below verify that fmov works with various immediates.
163 // Specifically, the instruction has an 8-bit immediate field consisting of
164 // the following four subfields:
165 //
166 // - sign (one bit)
167 // - upper exponent (one bit)
168 // - lower exponent (two bits)
169 // - mantisa (four bits)
170 //
171 // For example, we decompose imm8 = 0b01001111 into:
172 //
173 // - sign = 0 (positive)
174 // - upper exponent = 1
175 // - lower exponent = 00
176 // - mantisa = 1111
177 //
178 // This immediate corresponds to 64-bit floating point value:
179 //
180 // 0 011111111 00 1111 000000000000000000000000000000000000000000000000
181 // | | | | |
182 // | | | | +- 48 zeros
183 // | | | +------ mantisa
184 // | | +--------- lower exponent
185 // | +------------------- upper exponent (custom extended to 9 bits)
186 // +--------------------- sign
187 //
188 // Thus we have:
189 //
190 // 1.11110000... * 2^(1020-1023) = 0.2421875
191 //
192 // where 1.11110000... is in binary.
193 //
194 // See VFPExpandImm in the ARM Architecture Manual for details.
195 //
196 // We enumerate all possible 8-bit immediate encodings of the form:
197 //
198 // {0,1}{0,1}{00,11}{0000,1111}
199 //
200 // to verify that the decoder correctly splits the immediate into the
201 // subfields and reconstructs the intended floating-point value.
202
203 // imm8 = 0b00000000
204 __uint128_t res1 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #2.0e+00")();
205 ASSERT_EQ(res1, MakeUInt128(0x4000000000000000ULL, 0U));
206
207 // imm8 = 0b00001111
208 __uint128_t res2 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #3.8750e+00")();
209 ASSERT_EQ(res2, MakeUInt128(0x400f000000000000ULL, 0U));
210
211 // imm8 = 0b00110000
212 __uint128_t res3 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #1.60e+01")();
213 ASSERT_EQ(res3, MakeUInt128(0x4030000000000000ULL, 0U));
214
215 // imm8 = 0b00111111
216 __uint128_t res4 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #3.10e+01")();
217 ASSERT_EQ(res4, MakeUInt128(0x403f000000000000ULL, 0U));
218
219 // imm8 = 0b01000000
220 __uint128_t res5 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #1.250e-01")();
221 ASSERT_EQ(res5, MakeUInt128(0x3fc0000000000000ULL, 0U));
222
223 // imm8 = 0b01001111
224 __uint128_t res6 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #2.4218750e-01")();
225 ASSERT_EQ(res6, MakeUInt128(0x3fcf000000000000ULL, 0U));
226
227 // imm8 = 0b01110000
228 __uint128_t res7 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #1.0e+00")();
229 ASSERT_EQ(res7, MakeUInt128(0x3ff0000000000000ULL, 0U));
230
231 // imm8 = 0b01111111
232 __uint128_t res8 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #1.93750e+00")();
233 ASSERT_EQ(res8, MakeUInt128(0x3fff000000000000ULL, 0U));
234
235 // imm8 = 0b10000000
236 __uint128_t res9 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #-2.0e+00")();
237 ASSERT_EQ(res9, MakeUInt128(0xc000000000000000ULL, 0U));
238
239 // imm8 = 0b10001111
240 __uint128_t res10 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #-3.8750e+00")();
241 ASSERT_EQ(res10, MakeUInt128(0xc00f000000000000ULL, 0U));
242
243 // imm8 = 0b10110000
244 __uint128_t res11 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #-1.60e+01")();
245 ASSERT_EQ(res11, MakeUInt128(0xc030000000000000ULL, 0U));
246
247 // imm8 = 0b10111111
248 __uint128_t res12 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #-3.10e+01")();
249 ASSERT_EQ(res12, MakeUInt128(0xc03f000000000000ULL, 0U));
250
251 // imm8 = 0b11000000
252 __uint128_t res13 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #-1.250e-01")();
253 ASSERT_EQ(res13, MakeUInt128(0xbfc0000000000000ULL, 0U));
254
255 // imm8 = 0b11001111
256 __uint128_t res14 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #-2.4218750e-01")();
257 ASSERT_EQ(res14, MakeUInt128(0xbfcf000000000000ULL, 0U));
258
259 // imm8 = 0b11110000
260 __uint128_t res15 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #-1.0e+00")();
261 ASSERT_EQ(res15, MakeUInt128(0xbff0000000000000ULL, 0U));
262
263 // imm8 = 0b11111111
264 __uint128_t res16 = ASM_INSN_WRAP_FUNC_W_RES("fmov %d0, #-1.93750e+00")();
265 ASSERT_EQ(res16, MakeUInt128(0xbfff000000000000ULL, 0U));
266 }
267
TEST(Arm64InsnTest,MoveImmToF32x4)268 TEST(Arm64InsnTest, MoveImmToF32x4) {
269 // The tests below verify that fmov works with various immediates.
270 // Specifically, the instruction has an 8-bit immediate field consisting of
271 // the following four subfields:
272 //
273 // - sign (one bit)
274 // - upper exponent (one bit)
275 // - lower exponent (two bits)
276 // - mantisa (four bits)
277 //
278 // We enumerate all possible 8-bit immediate encodings of the form:
279 //
280 // {0,1}{0,1}{00,11}{0000,1111}
281 //
282 // to verify that the decoder correctly splits the immediate into the
283 // subfields and reconstructs the intended floating-point value.
284
285 // imm8 = 0b00000000
286 __uint128_t res1 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #2.0e+00")();
287 ASSERT_EQ(res1, MakeUInt128(0x4000000040000000ULL, 0x4000000040000000ULL));
288
289 // imm8 = 0b00001111
290 __uint128_t res2 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #3.8750e+00")();
291 ASSERT_EQ(res2, MakeUInt128(0x4078000040780000ULL, 0x4078000040780000ULL));
292
293 // imm8 = 0b00110000
294 __uint128_t res3 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #1.60e+01")();
295 ASSERT_EQ(res3, MakeUInt128(0x4180000041800000ULL, 0x4180000041800000ULL));
296
297 // imm8 = 0b00111111
298 __uint128_t res4 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #3.10e+01")();
299 ASSERT_EQ(res4, MakeUInt128(0x41f8000041f80000ULL, 0x41f8000041f80000ULL));
300
301 // imm8 = 0b01000000
302 __uint128_t res5 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #1.250e-01")();
303 ASSERT_EQ(res5, MakeUInt128(0x3e0000003e000000ULL, 0x3e0000003e000000ULL));
304
305 // imm8 = 0b01001111
306 __uint128_t res6 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #2.4218750e-01")();
307 ASSERT_EQ(res6, MakeUInt128(0x3e7800003e780000ULL, 0x3e7800003e780000ULL));
308
309 // imm8 = 0b01110000
310 __uint128_t res7 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #1.0e+00")();
311 ASSERT_EQ(res7, MakeUInt128(0x3f8000003f800000ULL, 0x3f8000003f800000ULL));
312
313 // imm8 = 0b01111111
314 __uint128_t res8 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #1.93750e+00")();
315 ASSERT_EQ(res8, MakeUInt128(0x3ff800003ff80000ULL, 0x3ff800003ff80000ULL));
316
317 // imm8 = 0b10000000
318 __uint128_t res9 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #-2.0e+00")();
319 ASSERT_EQ(res9, MakeUInt128(0xc0000000c0000000ULL, 0xc0000000c0000000ULL));
320
321 // imm8 = 0b10001111
322 __uint128_t res10 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #-3.8750e+00")();
323 ASSERT_EQ(res10, MakeUInt128(0xc0780000c0780000ULL, 0xc0780000c0780000ULL));
324
325 // imm8 = 0b10110000
326 __uint128_t res11 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #-1.60e+01")();
327 ASSERT_EQ(res11, MakeUInt128(0xc1800000c1800000ULL, 0xc1800000c1800000ULL));
328
329 // imm8 = 0b10111111
330 __uint128_t res12 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #-3.10e+01")();
331 ASSERT_EQ(res12, MakeUInt128(0xc1f80000c1f80000ULL, 0xc1f80000c1f80000ULL));
332
333 // imm8 = 0b11000000
334 __uint128_t res13 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #-1.250e-01")();
335 ASSERT_EQ(res13, MakeUInt128(0xbe000000be000000ULL, 0xbe000000be000000ULL));
336
337 // imm8 = 0b11001111
338 __uint128_t res14 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #-2.4218750e-01")();
339 ASSERT_EQ(res14, MakeUInt128(0xbe780000be780000ULL, 0xbe780000be780000ULL));
340
341 // imm8 = 0b11110000
342 __uint128_t res15 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #-1.0e+00")();
343 ASSERT_EQ(res15, MakeUInt128(0xbf800000bf800000ULL, 0xbf800000bf800000ULL));
344
345 // imm8 = 0b11111111
346 __uint128_t res16 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.4s, #-1.93750e+00")();
347 ASSERT_EQ(res16, MakeUInt128(0xbff80000bff80000ULL, 0xbff80000bff80000ULL));
348 }
349
TEST(Arm64InsnTest,MoveImmToF64x2)350 TEST(Arm64InsnTest, MoveImmToF64x2) {
351 // The tests below verify that fmov works with various immediates.
352 // Specifically, the instruction has an 8-bit immediate field consisting of
353 // the following four subfields:
354 //
355 // - sign (one bit)
356 // - upper exponent (one bit)
357 // - lower exponent (two bits)
358 // - mantisa (four bits)
359 //
360 // We enumerate all possible 8-bit immediate encodings of the form:
361 //
362 // {0,1}{0,1}{00,11}{0000,1111}
363 //
364 // to verify that the decoder correctly splits the immediate into the
365 // subfields and reconstructs the intended floating-point value.
366
367 // imm8 = 0b00000000
368 __uint128_t res1 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #2.0e+00")();
369 ASSERT_EQ(res1, MakeUInt128(0x4000000000000000ULL, 0x4000000000000000ULL));
370
371 // imm8 = 0b00001111
372 __uint128_t res2 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #3.8750e+00")();
373 ASSERT_EQ(res2, MakeUInt128(0x400f000000000000ULL, 0x400f000000000000ULL));
374
375 // imm8 = 0b00110000
376 __uint128_t res3 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #1.60e+01")();
377 ASSERT_EQ(res3, MakeUInt128(0x4030000000000000ULL, 0x4030000000000000ULL));
378
379 // imm8 = 0b00111111
380 __uint128_t res4 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #3.10e+01")();
381 ASSERT_EQ(res4, MakeUInt128(0x403f000000000000ULL, 0x403f000000000000ULL));
382
383 // imm8 = 0b01000000
384 __uint128_t res5 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #1.250e-01")();
385 ASSERT_EQ(res5, MakeUInt128(0x3fc0000000000000ULL, 0x3fc0000000000000ULL));
386
387 // imm8 = 0b01001111
388 __uint128_t res6 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #2.4218750e-01")();
389 ASSERT_EQ(res6, MakeUInt128(0x3fcf000000000000ULL, 0x3fcf000000000000ULL));
390
391 // imm8 = 0b01110000
392 __uint128_t res7 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #1.0e+00")();
393 ASSERT_EQ(res7, MakeUInt128(0x3ff0000000000000ULL, 0x3ff0000000000000ULL));
394
395 // imm8 = 0b01111111
396 __uint128_t res8 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #1.93750e+00")();
397 ASSERT_EQ(res8, MakeUInt128(0x3fff000000000000ULL, 0x3fff000000000000ULL));
398
399 // imm8 = 0b10000000
400 __uint128_t res9 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #-2.0e+00")();
401 ASSERT_EQ(res9, MakeUInt128(0xc000000000000000ULL, 0xc000000000000000ULL));
402
403 // imm8 = 0b10001111
404 __uint128_t res10 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #-3.8750e+00")();
405 ASSERT_EQ(res10, MakeUInt128(0xc00f000000000000ULL, 0xc00f000000000000ULL));
406
407 // imm8 = 0b10110000
408 __uint128_t res11 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #-1.60e+01")();
409 ASSERT_EQ(res11, MakeUInt128(0xc030000000000000ULL, 0xc030000000000000ULL));
410
411 // imm8 = 0b10111111
412 __uint128_t res12 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #-3.10e+01")();
413 ASSERT_EQ(res12, MakeUInt128(0xc03f000000000000ULL, 0xc03f000000000000ULL));
414
415 // imm8 = 0b11000000
416 __uint128_t res13 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #-1.250e-01")();
417 ASSERT_EQ(res13, MakeUInt128(0xbfc0000000000000ULL, 0xbfc0000000000000ULL));
418
419 // imm8 = 0b11001111
420 __uint128_t res14 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #-2.4218750e-01")();
421 ASSERT_EQ(res14, MakeUInt128(0xbfcf000000000000ULL, 0xbfcf000000000000ULL));
422
423 // imm8 = 0b11110000
424 __uint128_t res15 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #-1.0e+00")();
425 ASSERT_EQ(res15, MakeUInt128(0xbff0000000000000ULL, 0xbff0000000000000ULL));
426
427 // imm8 = 0b11111111
428 __uint128_t res16 = ASM_INSN_WRAP_FUNC_W_RES("fmov %0.2d, #-1.93750e+00")();
429 ASSERT_EQ(res16, MakeUInt128(0xbfff000000000000ULL, 0xbfff000000000000ULL));
430 }
431
TEST(Arm64InsnTest,MoveFpRegToReg)432 TEST(Arm64InsnTest, MoveFpRegToReg) {
433 __uint128_t arg = MakeUInt128(0x1111aaaa2222bbbbULL, 0x3333cccc4444ddddULL);
434 uint64_t res = 0xffffeeeeddddccccULL;
435
436 // Move from high double.
437 asm("fmov %0, %1.d[1]" : "=r"(res) : "w"(arg));
438 ASSERT_EQ(res, 0x3333cccc4444ddddULL);
439
440 // Move from low double.
441 asm("fmov %0, %d1" : "=r"(res) : "w"(arg));
442 ASSERT_EQ(res, 0x1111aaaa2222bbbbULL);
443
444 // Move from single.
445 asm("fmov %w0, %s1" : "=r"(res) : "w"(arg));
446 ASSERT_EQ(res, 0x2222bbbbULL);
447 }
448
TEST(Arm64InsnTest,MoveRegToFpReg)449 TEST(Arm64InsnTest, MoveRegToFpReg) {
450 uint64_t arg = 0xffffeeeeddddccccULL;
451 __uint128_t res = MakeUInt128(0x1111aaaa2222bbbbULL, 0x3333cccc4444ddddULL);
452
453 // Move to high double.
454 asm("fmov %0.d[1], %1" : "=w"(res) : "r"(arg), "0"(res));
455 ASSERT_EQ(res, MakeUInt128(0x1111aaaa2222bbbbULL, 0xffffeeeeddddccccULL));
456
457 // Move to low double.
458 asm("fmov %d0, %1" : "=w"(res) : "r"(arg));
459 ASSERT_EQ(res, MakeUInt128(0xffffeeeeddddccccULL, 0x0));
460
461 // Move to single.
462 asm("fmov %s0, %w1" : "=w"(res) : "r"(arg));
463 ASSERT_EQ(res, MakeUInt128(0xddddccccULL, 0x0));
464 }
465
TEST(Arm64InsnTest,MoveFpRegToFpReg)466 TEST(Arm64InsnTest, MoveFpRegToFpReg) {
467 __uint128_t res;
468
469 __uint128_t fp64_arg =
470 MakeUInt128(0x402e9eb851eb851fULL, 0xdeadbeefaabbccddULL); // 15.31 in double
471 asm("fmov %d0, %d1" : "=w"(res) : "w"(fp64_arg));
472 ASSERT_EQ(res, MakeUInt128(0x402e9eb851eb851fULL, 0ULL));
473
474 __uint128_t fp32_arg =
475 MakeUInt128(0xaabbccdd40e51eb8ULL, 0x0011223344556677ULL); // 7.16 in float
476 asm("fmov %s0, %s1" : "=w"(res) : "w"(fp32_arg));
477 ASSERT_EQ(res, MakeUInt128(0x40e51eb8ULL, 0ULL));
478 }
479
TEST(Arm64InsnTest,InsertRegPartIntoSimd128)480 TEST(Arm64InsnTest, InsertRegPartIntoSimd128) {
481 uint64_t arg = 0xffffeeeeddddccccULL;
482 __uint128_t res = MakeUInt128(0x1111aaaa2222bbbbULL, 0x3333cccc4444ddddULL);
483
484 // Byte.
485 asm("mov %0.b[3], %w1" : "=w"(res) : "r"(arg), "0"(res));
486 ASSERT_EQ(res, MakeUInt128(0x1111aaaacc22bbbbULL, 0x3333cccc4444ddddULL));
487
488 // Double word.
489 asm("mov %0.d[1], %1" : "=w"(res) : "r"(arg), "0"(res));
490 ASSERT_EQ(res, MakeUInt128(0x1111aaaacc22bbbbULL, 0xffffeeeeddddccccULL));
491 }
492
TEST(Arm64InsnTest,DuplicateRegIntoSimd128)493 TEST(Arm64InsnTest, DuplicateRegIntoSimd128) {
494 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("dup %0.16b, %w1")(0xabU);
495 ASSERT_EQ(res, MakeUInt128(0xababababababababULL, 0xababababababababULL));
496 }
497
TEST(Arm64InsnTest,MoveSimd128ElemToRegSigned)498 TEST(Arm64InsnTest, MoveSimd128ElemToRegSigned) {
499 uint64_t res = 0;
500 __uint128_t arg = MakeUInt128(0x9796959493929190ULL, 0x9f9e9d9c9b9a99ULL);
501
502 // Single word.
503 asm("smov %0, %1.s[0]" : "=r"(res) : "w"(arg));
504 ASSERT_EQ(res, 0xffffffff93929190ULL);
505
506 asm("smov %0, %1.s[2]" : "=r"(res) : "w"(arg));
507 ASSERT_EQ(res, 0xffffffff9c9b9a99ULL);
508
509 // Half word.
510 asm("smov %w0, %1.h[0]" : "=r"(res) : "w"(arg));
511 ASSERT_EQ(res, 0x00000000ffff9190ULL);
512
513 asm("smov %w0, %1.h[2]" : "=r"(res) : "w"(arg));
514 ASSERT_EQ(res, 0x00000000ffff9594ULL);
515
516 // Byte.
517 asm("smov %w0, %1.b[0]" : "=r"(res) : "w"(arg));
518 ASSERT_EQ(res, 0x00000000ffffff90ULL);
519
520 asm("smov %w0, %1.b[2]" : "=r"(res) : "w"(arg));
521 ASSERT_EQ(res, 0x00000000ffffff92ULL);
522 }
523
TEST(Arm64InsnTest,MoveSimd128ElemToRegUnsigned)524 TEST(Arm64InsnTest, MoveSimd128ElemToRegUnsigned) {
525 uint64_t res = 0;
526 __uint128_t arg = MakeUInt128(0xaaaabbbbcccceeeeULL, 0xffff000011112222ULL);
527
528 // Double word.
529 asm("umov %0, %1.d[0]" : "=r"(res) : "w"(arg));
530 ASSERT_EQ(res, 0xaaaabbbbcccceeeeULL);
531
532 asm("umov %0, %1.d[1]" : "=r"(res) : "w"(arg));
533 ASSERT_EQ(res, 0xffff000011112222ULL);
534
535 // Single word.
536 asm("umov %w0, %1.s[0]" : "=r"(res) : "w"(arg));
537 ASSERT_EQ(res, 0xcccceeeeULL);
538
539 asm("umov %w0, %1.s[2]" : "=r"(res) : "w"(arg));
540 ASSERT_EQ(res, 0x11112222ULL);
541
542 // Half word.
543 asm("umov %w0, %1.h[0]" : "=r"(res) : "w"(arg));
544 ASSERT_EQ(res, 0xeeeeULL);
545
546 asm("umov %w0, %1.h[2]" : "=r"(res) : "w"(arg));
547 ASSERT_EQ(res, 0xbbbbULL);
548
549 // Byte.
550 asm("umov %w0, %1.b[0]" : "=r"(res) : "w"(arg));
551 ASSERT_EQ(res, 0xeeULL);
552
553 asm("umov %w0, %1.b[2]" : "=r"(res) : "w"(arg));
554 ASSERT_EQ(res, 0xccULL);
555 }
556
TEST(Arm64InsnTest,SignedMultiplyAddLongElemI16x4)557 TEST(Arm64InsnTest, SignedMultiplyAddLongElemI16x4) {
558 __uint128_t arg1 = MakeUInt128(0x9463229563989898ULL, 0x9358211674562701ULL);
559 __uint128_t arg2 = MakeUInt128(0x0218356462201349ULL, 0x6715188190973038ULL);
560 __uint128_t arg3 = MakeUInt128(0x1198004973407239ULL, 0x6103685406643193ULL);
561 __uint128_t res =
562 ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("smlal %0.4s, %1.4h, %2.h[1]")(arg1, arg2, arg3);
563 ASSERT_EQ(res, MakeUInt128(0x37c4a3494b9db539ULL, 0x37c3dab413a58e33ULL));
564 }
565
TEST(Arm64InsnTest,SignedMultiplyAddLongElemI16x4Upper)566 TEST(Arm64InsnTest, SignedMultiplyAddLongElemI16x4Upper) {
567 __uint128_t arg1 = MakeUInt128(0x9478221818528624ULL, 0x0851400666044332ULL);
568 __uint128_t arg2 = MakeUInt128(0x5888569867054315ULL, 0x4706965747458550ULL);
569 __uint128_t arg3 = MakeUInt128(0x3323233421073015ULL, 0x4594051655379068ULL);
570 __uint128_t res =
571 ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("smlal2 %0.4s, %1.8h, %2.h[1]")(arg1, arg2, arg3);
572 ASSERT_EQ(res, MakeUInt128(0x5c30bd483c119e0fULL, 0x48ecc5ab6efb3a86ULL));
573 }
574
TEST(Arm64InsnTest,SignedMultiplyAddLongElemI16x4Upper2)575 TEST(Arm64InsnTest, SignedMultiplyAddLongElemI16x4Upper2) {
576 __uint128_t arg1 = MakeUInt128(0x9968262824727064ULL, 0x1336222178923903ULL);
577 __uint128_t arg2 = MakeUInt128(0x1760854289437339ULL, 0x3561889165125042ULL);
578 __uint128_t arg3 = MakeUInt128(0x4404008952719837ULL, 0x8738648058472689ULL);
579 __uint128_t res =
580 ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("smlal2 %0.4s, %1.8h, %2.h[7]")(arg1, arg2, arg3);
581 ASSERT_EQ(res, MakeUInt128(0x5d27e9db5e54d15aULL, 0x8b39d9f65f64ea0aULL));
582 }
583
TEST(Arm64InsnTest,SignedMultiplySubtractLongElemI16x4)584 TEST(Arm64InsnTest, SignedMultiplySubtractLongElemI16x4) {
585 __uint128_t arg1 = MakeUInt128(0x9143447886360410ULL, 0x3182350736502778ULL);
586 __uint128_t arg2 = MakeUInt128(0x5908975782727313ULL, 0x0504889398900992ULL);
587 __uint128_t arg3 = MakeUInt128(0x3913503373250855ULL, 0x9826558670892426ULL);
588 __uint128_t res =
589 ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("smlsl %0.4s, %1.4h, %2.h[1]")(arg1, arg2, arg3);
590 ASSERT_EQ(res, MakeUInt128(0xfd58202775231935ULL, 0x61d69fb0921db6b6ULL));
591 }
592
TEST(Arm64InsnTest,SignedMultiplySubtractLongElemI16x4Upper)593 TEST(Arm64InsnTest, SignedMultiplySubtractLongElemI16x4Upper) {
594 __uint128_t arg1 = MakeUInt128(0x9320199199688285ULL, 0x1718395366913452ULL);
595 __uint128_t arg2 = MakeUInt128(0x2244470804592396ULL, 0x6028171565515656ULL);
596 __uint128_t arg3 = MakeUInt128(0x6611135982311225ULL, 0x0628905854914509ULL);
597 __uint128_t res =
598 ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("smlsl2 %0.4s, %1.8h, %2.h[1]")(arg1, arg2, arg3);
599 ASSERT_EQ(res, MakeUInt128(0x645326f0814d99a3ULL, 0x05c4290053980b2eULL));
600 }
601
TEST(Arm64InsnTest,UnsignedMultiplyAddLongElemI16x4)602 TEST(Arm64InsnTest, UnsignedMultiplyAddLongElemI16x4) {
603 __uint128_t arg1 = MakeUInt128(0x9027601834840306ULL, 0x8113818551059797ULL);
604 __uint128_t arg2 = MakeUInt128(0x0566400750942608ULL, 0x7885735796037324ULL);
605 __uint128_t arg3 = MakeUInt128(0x5141467867036880ULL, 0x9880609716425849ULL);
606 __uint128_t res =
607 ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("umlal %0.4s, %1.4h, %2.h[1]")(arg1, arg2, arg3);
608 ASSERT_EQ(res, MakeUInt128(0x61c8e2c867f707f8ULL, 0xc5dfe72334816629ULL));
609 }
610
TEST(Arm64InsnTest,UnsignedMultiplyAddLongElemI16x4Upper)611 TEST(Arm64InsnTest, UnsignedMultiplyAddLongElemI16x4Upper) {
612 __uint128_t arg1 = MakeUInt128(0x9454236828860613ULL, 0x4084148637767009ULL);
613 __uint128_t arg2 = MakeUInt128(0x6120715124914043ULL, 0x0272538607648236ULL);
614 __uint128_t arg3 = MakeUInt128(0x3414334623518975ULL, 0x7664521641376796ULL);
615 __uint128_t res =
616 ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("umlal2 %0.4s, %1.8h, %2.h[1]")(arg1, arg2, arg3);
617 ASSERT_EQ(res, MakeUInt128(0x3c00351c3352428eULL, 0x7f9b6cda4425df7cULL));
618 }
619
TEST(Arm64InsnTest,UnsignedMultiplySubtractLongElemI16x4)620 TEST(Arm64InsnTest, UnsignedMultiplySubtractLongElemI16x4) {
621 __uint128_t arg1 = MakeUInt128(0x9128009282525619ULL, 0x0205263016391147ULL);
622 __uint128_t arg2 = MakeUInt128(0x7247331485739107ULL, 0x7758744253876117ULL);
623 __uint128_t arg3 = MakeUInt128(0x4657867116941477ULL, 0x6421441111263583ULL);
624 __uint128_t res =
625 ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("umlsl %0.4s, %1.4h, %2.h[1]")(arg1, arg2, arg3);
626 ASSERT_EQ(res, MakeUInt128(0x0268619be9b26a3cULL, 0x1876471910da19edULL));
627 }
628
TEST(Arm64InsnTest,UnsignedMultiplySubtractLongElemI16x4Upper)629 TEST(Arm64InsnTest, UnsignedMultiplySubtractLongElemI16x4Upper) {
630 __uint128_t arg1 = MakeUInt128(0x9420757136275167ULL, 0x4573189189456283ULL);
631 __uint128_t arg2 = MakeUInt128(0x5257044133543758ULL, 0x5753426986994725ULL);
632 __uint128_t arg3 = MakeUInt128(0x4703165661399199ULL, 0x9682628247270641ULL);
633 __uint128_t res =
634 ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("umlsl2 %0.4s, %1.8h, %2.h[1]")(arg1, arg2, arg3);
635 ASSERT_EQ(res, MakeUInt128(0x2b7d4cb24d79259dULL, 0x8895afc6423a13adULL));
636 }
637
TEST(Arm64InsnTest,AsmConvertI32F32)638 TEST(Arm64InsnTest, AsmConvertI32F32) {
639 constexpr auto AsmConvertI32F32 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("scvtf %s0, %w1");
640 ASSERT_EQ(AsmConvertI32F32(21), MakeUInt128(0x41a80000U, 0U));
641 }
642
TEST(Arm64InsnTest,AsmConvertU32F32)643 TEST(Arm64InsnTest, AsmConvertU32F32) {
644 constexpr auto AsmConvertU32F32 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("ucvtf %s0, %w1");
645
646 ASSERT_EQ(AsmConvertU32F32(29), MakeUInt128(0x41e80000U, 0U));
647
648 // Test that the topmost bit isn't treated as the sign.
649 ASSERT_EQ(AsmConvertU32F32(1U << 31), MakeUInt128(0x4f000000U, 0U));
650 }
651
TEST(Arm64InsnTest,AsmConvertU32F32FromSimdReg)652 TEST(Arm64InsnTest, AsmConvertU32F32FromSimdReg) {
653 constexpr auto AsmUcvtf = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ucvtf %s0, %s1");
654
655 ASSERT_EQ(AsmUcvtf(28), MakeUInt128(0x41e00000U, 0U));
656
657 // Test that the topmost bit isn't treated as the sign.
658 ASSERT_EQ(AsmUcvtf(1U << 31), MakeUInt128(0x4f000000U, 0U));
659 }
660
TEST(Arm64InsnTest,AsmConvertI32F64)661 TEST(Arm64InsnTest, AsmConvertI32F64) {
662 constexpr auto AsmConvertI32F64 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("scvtf %d0, %w1");
663 ASSERT_EQ(AsmConvertI32F64(21), MakeUInt128(0x4035000000000000ULL, 0U));
664 }
665
TEST(Arm64InsnTest,AsmConvertU32F64)666 TEST(Arm64InsnTest, AsmConvertU32F64) {
667 constexpr auto AsmConvertU32F64 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("ucvtf %d0, %w1");
668
669 ASSERT_EQ(AsmConvertU32F64(18), MakeUInt128(0x4032000000000000ULL, 0U));
670
671 // Test that the topmost bit isn't treated as the sign.
672 ASSERT_EQ(AsmConvertU32F64(1U << 31), MakeUInt128(0x41e0000000000000ULL, 0U));
673 }
674
TEST(Arm64InsnTest,AsmConvertI64F32)675 TEST(Arm64InsnTest, AsmConvertI64F32) {
676 constexpr auto AsmConvertI64F32 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("scvtf %s0, %x1");
677 ASSERT_EQ(AsmConvertI64F32(11), MakeUInt128(0x41300000U, 0U));
678 }
679
TEST(Arm64InsnTest,AsmConvertU64F32)680 TEST(Arm64InsnTest, AsmConvertU64F32) {
681 constexpr auto AsmConvertU64F32 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("ucvtf %s0, %x1");
682
683 ASSERT_EQ(AsmConvertU64F32(3), MakeUInt128(0x40400000U, 0U));
684
685 // Test that the topmost bit isn't treated as the sign.
686 ASSERT_EQ(AsmConvertU64F32(1ULL << 63), MakeUInt128(0x5f000000U, 0U));
687 }
688
TEST(Arm64InsnTest,AsmConvertI64F64)689 TEST(Arm64InsnTest, AsmConvertI64F64) {
690 constexpr auto AsmConvertI64F64 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("scvtf %d0, %x1");
691 ASSERT_EQ(AsmConvertI64F64(137), MakeUInt128(0x4061200000000000ULL, 0U));
692 }
693
TEST(Arm64InsnTest,AsmConvertI32F32FromSimdReg)694 TEST(Arm64InsnTest, AsmConvertI32F32FromSimdReg) {
695 constexpr auto AsmConvertI32F32 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("scvtf %s0, %s1");
696 ASSERT_EQ(AsmConvertI32F32(1109), MakeUInt128(0x448aa000ULL, 0U));
697 }
698
TEST(Arm64InsnTest,AsmConvertI64F64FromSimdReg)699 TEST(Arm64InsnTest, AsmConvertI64F64FromSimdReg) {
700 constexpr auto AsmConvertI64F64 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("scvtf %d0, %d1");
701 ASSERT_EQ(AsmConvertI64F64(123), MakeUInt128(0x405ec00000000000ULL, 0U));
702 }
703
TEST(Arm64InsnTest,AsmConvertI32x4F32x4)704 TEST(Arm64InsnTest, AsmConvertI32x4F32x4) {
705 constexpr auto AsmConvertI32F32 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("scvtf %0.4s, %1.4s");
706 __uint128_t arg = MakeUInt128(0x0000003500000014ULL, 0x0000005400000009ULL);
707 ASSERT_EQ(AsmConvertI32F32(arg), MakeUInt128(0x4254000041a00000ULL, 0x42a8000041100000ULL));
708 }
709
TEST(Arm64InsnTest,AsmConvertI64x2F64x2)710 TEST(Arm64InsnTest, AsmConvertI64x2F64x2) {
711 constexpr auto AsmConvertI64F64 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("scvtf %0.2d, %1.2d");
712 __uint128_t arg = MakeUInt128(static_cast<int64_t>(-9), 17U);
713 ASSERT_EQ(AsmConvertI64F64(arg), MakeUInt128(0xc022000000000000ULL, 0x4031000000000000ULL));
714 }
715
TEST(Arm64InsnTest,AsmConvertU32x4F32x4)716 TEST(Arm64InsnTest, AsmConvertU32x4F32x4) {
717 constexpr auto AsmConvertU32F32 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ucvtf %0.4s, %1.4s");
718 __uint128_t arg = MakeUInt128(0x8000000000000019ULL, 0x0000005800000010ULL);
719 ASSERT_EQ(AsmConvertU32F32(arg), MakeUInt128(0x4f00000041c80000ULL, 0x42b0000041800000ULL));
720 }
721
TEST(Arm64InsnTest,AsmConvertU64x2F64x2)722 TEST(Arm64InsnTest, AsmConvertU64x2F64x2) {
723 constexpr auto AsmConvertU64F64 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ucvtf %0.2d, %1.2d");
724 __uint128_t arg = MakeUInt128(1ULL << 63, 29U);
725 ASSERT_EQ(AsmConvertU64F64(arg), MakeUInt128(0x43e0000000000000ULL, 0x403d000000000000ULL));
726 }
727
TEST(Arm64InsnTest,AsmConvertU64F64)728 TEST(Arm64InsnTest, AsmConvertU64F64) {
729 constexpr auto AsmConvertU64F64 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("ucvtf %d0, %x1");
730
731 ASSERT_EQ(AsmConvertU64F64(49), MakeUInt128(0x4048800000000000ULL, 0U));
732
733 // Test that the topmost bit isn't treated as the sign.
734 ASSERT_EQ(AsmConvertU64F64(1ULL << 63), MakeUInt128(0x43e0000000000000ULL, 0U));
735 }
736
TEST(Arm64InsnTest,AsmConvertU64F64FromSimdReg)737 TEST(Arm64InsnTest, AsmConvertU64F64FromSimdReg) {
738 constexpr auto AsmUcvtf = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ucvtf %d0, %d1");
739
740 ASSERT_EQ(AsmUcvtf(47), MakeUInt128(0x4047800000000000ULL, 0U));
741
742 // Test that the topmost bit isn't treated as the sign.
743 ASSERT_EQ(AsmUcvtf(1ULL << 63), MakeUInt128(0x43e0000000000000ULL, 0U));
744 }
745
TEST(Arm64InsnTest,AsmConvertLiterals)746 TEST(Arm64InsnTest, AsmConvertLiterals) {
747 // Verify that the compiler encodes the floating-point literals used in the
748 // conversion tests below exactly as expected.
749 ASSERT_EQ(bit_cast<uint32_t>(-7.50f), 0xc0f00000U);
750 ASSERT_EQ(bit_cast<uint32_t>(-6.75f), 0xc0d80000U);
751 ASSERT_EQ(bit_cast<uint32_t>(-6.50f), 0xc0d00000U);
752 ASSERT_EQ(bit_cast<uint32_t>(-6.25f), 0xc0c80000U);
753 ASSERT_EQ(bit_cast<uint32_t>(6.25f), 0x40c80000U);
754 ASSERT_EQ(bit_cast<uint32_t>(6.50f), 0x40d00000U);
755 ASSERT_EQ(bit_cast<uint32_t>(6.75f), 0x40d80000U);
756 ASSERT_EQ(bit_cast<uint32_t>(7.50f), 0x40f00000U);
757
758 ASSERT_EQ(bit_cast<uint64_t>(-7.50), 0xc01e000000000000ULL);
759 ASSERT_EQ(bit_cast<uint64_t>(-6.75), 0xc01b000000000000ULL);
760 ASSERT_EQ(bit_cast<uint64_t>(-6.50), 0xc01a000000000000ULL);
761 ASSERT_EQ(bit_cast<uint64_t>(-6.25), 0xc019000000000000ULL);
762 ASSERT_EQ(bit_cast<uint64_t>(6.25), 0x4019000000000000ULL);
763 ASSERT_EQ(bit_cast<uint64_t>(6.50), 0x401a000000000000ULL);
764 ASSERT_EQ(bit_cast<uint64_t>(6.75), 0x401b000000000000ULL);
765 ASSERT_EQ(bit_cast<uint64_t>(7.50), 0x401e000000000000ULL);
766 }
767
768 template <typename IntType, typename FuncType>
TestConvertF32ToInt(FuncType AsmFunc,std::initializer_list<int> expected)769 void TestConvertF32ToInt(FuncType AsmFunc, std::initializer_list<int> expected) {
770 // Note that bit_cast isn't a constexpr.
771 static const uint32_t kConvertF32ToIntInputs[] = {
772 bit_cast<uint32_t>(-7.50f),
773 bit_cast<uint32_t>(-6.75f),
774 bit_cast<uint32_t>(-6.50f),
775 bit_cast<uint32_t>(-6.25f),
776 bit_cast<uint32_t>(6.25f),
777 bit_cast<uint32_t>(6.50f),
778 bit_cast<uint32_t>(6.75f),
779 bit_cast<uint32_t>(7.50f),
780 };
781
782 const size_t kConvertF32ToIntInputsSize = sizeof(kConvertF32ToIntInputs) / sizeof(uint32_t);
783 ASSERT_EQ(kConvertF32ToIntInputsSize, expected.size());
784
785 auto expected_it = expected.begin();
786 for (size_t input_it = 0; input_it < kConvertF32ToIntInputsSize; input_it++) {
787 ASSERT_EQ(AsmFunc(kConvertF32ToIntInputs[input_it]), static_cast<IntType>(*expected_it++));
788 }
789 }
790
TEST(Arm64InsnTest,AsmConvertF32I32TieAway)791 TEST(Arm64InsnTest, AsmConvertF32I32TieAway) {
792 constexpr auto AsmFcvtas = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtas %w0, %s1");
793 TestConvertF32ToInt<uint32_t>(AsmFcvtas, {-8, -7, -7, -6, 6U, 7U, 7U, 8U});
794 }
795
TEST(Arm64InsnTest,AsmConvertF32U32TieAway)796 TEST(Arm64InsnTest, AsmConvertF32U32TieAway) {
797 constexpr auto AsmFcvtau = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtau %w0, %s1");
798 TestConvertF32ToInt<uint32_t>(AsmFcvtau, {0U, 0U, 0U, 0U, 6U, 7U, 7U, 8U});
799 }
800
TEST(Arm64InsnTest,AsmConvertF32I32NegInf)801 TEST(Arm64InsnTest, AsmConvertF32I32NegInf) {
802 constexpr auto AsmFcvtms = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtms %w0, %s1");
803 TestConvertF32ToInt<uint32_t>(AsmFcvtms, {-8, -7, -7, -7, 6U, 6U, 6U, 7U});
804 }
805
TEST(Arm64InsnTest,AsmConvertF32U32NegInf)806 TEST(Arm64InsnTest, AsmConvertF32U32NegInf) {
807 constexpr auto AsmFcvtmu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtmu %w0, %s1");
808 TestConvertF32ToInt<uint32_t>(AsmFcvtmu, {0U, 0U, 0U, 0U, 6U, 6U, 6U, 7U});
809 }
810
TEST(Arm64InsnTest,AsmConvertF32I32TieEven)811 TEST(Arm64InsnTest, AsmConvertF32I32TieEven) {
812 constexpr auto AsmFcvtns = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtns %w0, %s1");
813 TestConvertF32ToInt<uint32_t>(AsmFcvtns, {-8, -7, -6, -6, 6U, 6U, 7U, 8U});
814 }
815
TEST(Arm64InsnTest,AsmConvertF32U32TieEven)816 TEST(Arm64InsnTest, AsmConvertF32U32TieEven) {
817 constexpr auto AsmFcvtnu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtnu %w0, %s1");
818 TestConvertF32ToInt<uint32_t>(AsmFcvtnu, {0U, 0U, 0U, 0U, 6U, 6U, 7U, 8U});
819 }
820
TEST(Arm64InsnTest,AsmConvertF32I32PosInf)821 TEST(Arm64InsnTest, AsmConvertF32I32PosInf) {
822 constexpr auto AsmFcvtps = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtps %w0, %s1");
823 TestConvertF32ToInt<uint32_t>(AsmFcvtps, {-7, -6, -6, -6, 7U, 7U, 7U, 8U});
824 }
825
TEST(Arm64InsnTest,AsmConvertF32U32PosInf)826 TEST(Arm64InsnTest, AsmConvertF32U32PosInf) {
827 constexpr auto AsmFcvtpu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtpu %w0, %s1");
828 TestConvertF32ToInt<uint32_t>(AsmFcvtpu, {0U, 0U, 0U, 0U, 7U, 7U, 7U, 8U});
829 }
830
TEST(Arm64InsnTest,AsmConvertF32I32Truncate)831 TEST(Arm64InsnTest, AsmConvertF32I32Truncate) {
832 constexpr auto AsmFcvtzs = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzs %w0, %s1");
833 TestConvertF32ToInt<uint32_t>(AsmFcvtzs, {-7, -6, -6, -6, 6U, 6U, 6U, 7U});
834 }
835
TEST(Arm64InsnTest,AsmConvertF32U32Truncate)836 TEST(Arm64InsnTest, AsmConvertF32U32Truncate) {
837 constexpr auto AsmFcvtzu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzu %w0, %s1");
838 TestConvertF32ToInt<uint32_t>(AsmFcvtzu, {0U, 0U, 0U, 0U, 6U, 6U, 6U, 7U});
839 }
840
TEST(Arm64InsnTest,AsmConvertF32I64TieAway)841 TEST(Arm64InsnTest, AsmConvertF32I64TieAway) {
842 constexpr auto AsmFcvtas = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtas %x0, %s1");
843 TestConvertF32ToInt<uint64_t>(AsmFcvtas, {-8, -7, -7, -6, 6U, 7U, 7U, 8U});
844 }
845
TEST(Arm64InsnTest,AsmConvertF32U64TieAway)846 TEST(Arm64InsnTest, AsmConvertF32U64TieAway) {
847 constexpr auto AsmFcvtau = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtau %x0, %s1");
848 TestConvertF32ToInt<uint64_t>(AsmFcvtau, {0U, 0U, 0U, 0U, 6U, 7U, 7U, 8U});
849 }
850
TEST(Arm64InsnTest,AsmConvertF32I64NegInf)851 TEST(Arm64InsnTest, AsmConvertF32I64NegInf) {
852 constexpr auto AsmFcvtms = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtms %x0, %s1");
853 TestConvertF32ToInt<uint64_t>(AsmFcvtms, {-8, -7, -7, -7, 6U, 6U, 6U, 7U});
854 }
855
TEST(Arm64InsnTest,AsmConvertF32U64NegInf)856 TEST(Arm64InsnTest, AsmConvertF32U64NegInf) {
857 constexpr auto AsmFcvtmu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtmu %x0, %s1");
858 TestConvertF32ToInt<uint64_t>(AsmFcvtmu, {0U, 0U, 0U, 0U, 6U, 6U, 6U, 7U});
859 }
860
TEST(Arm64InsnTest,AsmConvertF32I64TieEven)861 TEST(Arm64InsnTest, AsmConvertF32I64TieEven) {
862 constexpr auto AsmFcvtns = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtns %x0, %s1");
863 TestConvertF32ToInt<uint64_t>(AsmFcvtns, {-8, -7, -6, -6, 6U, 6U, 7U, 8U});
864 }
865
TEST(Arm64InsnTest,AsmConvertF32U64TieEven)866 TEST(Arm64InsnTest, AsmConvertF32U64TieEven) {
867 constexpr auto AsmFcvtnu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtnu %x0, %s1");
868 TestConvertF32ToInt<uint64_t>(AsmFcvtnu, {0U, 0U, 0U, 0U, 6U, 6U, 7U, 8U});
869 }
870
TEST(Arm64InsnTest,AsmConvertF32I64PosInf)871 TEST(Arm64InsnTest, AsmConvertF32I64PosInf) {
872 constexpr auto AsmFcvtps = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtps %x0, %s1");
873 TestConvertF32ToInt<uint64_t>(AsmFcvtps, {-7, -6, -6, -6, 7U, 7U, 7U, 8U});
874 }
875
TEST(Arm64InsnTest,AsmConvertF32U64PosInf)876 TEST(Arm64InsnTest, AsmConvertF32U64PosInf) {
877 constexpr auto AsmFcvtpu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtpu %x0, %s1");
878 TestConvertF32ToInt<uint64_t>(AsmFcvtpu, {0U, 0U, 0U, 0U, 7U, 7U, 7U, 8U});
879 }
880
TEST(Arm64InsnTest,AsmConvertF32I64Truncate)881 TEST(Arm64InsnTest, AsmConvertF32I64Truncate) {
882 constexpr auto AsmFcvtzs = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzs %x0, %s1");
883 TestConvertF32ToInt<uint64_t>(AsmFcvtzs, {-7, -6, -6, -6, 6U, 6U, 6U, 7U});
884 }
885
TEST(Arm64InsnTest,AsmConvertF32U64Truncate)886 TEST(Arm64InsnTest, AsmConvertF32U64Truncate) {
887 constexpr auto AsmFcvtzu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzu %x0, %s1");
888 TestConvertF32ToInt<uint64_t>(AsmFcvtzu, {0U, 0U, 0U, 0U, 6U, 6U, 6U, 7U});
889 }
890
891 template <typename IntType, typename FuncType>
TestConvertF64ToInt(FuncType AsmFunc,std::initializer_list<int> expected)892 void TestConvertF64ToInt(FuncType AsmFunc, std::initializer_list<int> expected) {
893 // Note that bit_cast isn't a constexpr.
894 static const uint64_t kConvertF64ToIntInputs[] = {
895 bit_cast<uint64_t>(-7.50),
896 bit_cast<uint64_t>(-6.75),
897 bit_cast<uint64_t>(-6.50),
898 bit_cast<uint64_t>(-6.25),
899 bit_cast<uint64_t>(6.25),
900 bit_cast<uint64_t>(6.50),
901 bit_cast<uint64_t>(6.75),
902 bit_cast<uint64_t>(7.50),
903 };
904
905 const size_t kConvertF64ToIntInputsSize = sizeof(kConvertF64ToIntInputs) / sizeof(uint64_t);
906 ASSERT_EQ(kConvertF64ToIntInputsSize, expected.size());
907
908 auto expected_it = expected.begin();
909 for (size_t input_it = 0; input_it < kConvertF64ToIntInputsSize; input_it++) {
910 ASSERT_EQ(AsmFunc(kConvertF64ToIntInputs[input_it]), static_cast<IntType>(*expected_it++));
911 }
912 }
913
TEST(Arm64InsnTest,AsmConvertF64I32TieAway)914 TEST(Arm64InsnTest, AsmConvertF64I32TieAway) {
915 constexpr auto AsmFcvtas = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtas %w0, %d1");
916 TestConvertF64ToInt<uint32_t>(AsmFcvtas, {-8, -7, -7, -6, 6U, 7U, 7U, 8U});
917 }
918
TEST(Arm64InsnTest,AsmConvertF64U32TieAway)919 TEST(Arm64InsnTest, AsmConvertF64U32TieAway) {
920 constexpr auto AsmFcvtau = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtau %w0, %d1");
921 TestConvertF64ToInt<uint32_t>(AsmFcvtau, {0U, 0U, 0U, 0U, 6U, 7U, 7U, 8U});
922 }
923
TEST(Arm64InsnTest,AsmConvertF64I32NegInf)924 TEST(Arm64InsnTest, AsmConvertF64I32NegInf) {
925 constexpr auto AsmFcvtms = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtms %w0, %d1");
926 TestConvertF64ToInt<uint32_t>(AsmFcvtms, {-8, -7, -7, -7, 6U, 6U, 6U, 7U});
927 }
928
TEST(Arm64InsnTest,AsmConvertF64U32NegInf)929 TEST(Arm64InsnTest, AsmConvertF64U32NegInf) {
930 constexpr auto AsmFcvtmu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtmu %w0, %d1");
931 TestConvertF64ToInt<uint32_t>(AsmFcvtmu, {0U, 0U, 0U, 0U, 6U, 6U, 6U, 7U});
932 }
933
TEST(Arm64InsnTest,AsmConvertF64I32TieEven)934 TEST(Arm64InsnTest, AsmConvertF64I32TieEven) {
935 constexpr auto AsmFcvtns = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtns %w0, %d1");
936 TestConvertF64ToInt<uint32_t>(AsmFcvtns, {-8, -7, -6, -6, 6U, 6U, 7U, 8U});
937 }
938
TEST(Arm64InsnTest,AsmConvertF64U32TieEven)939 TEST(Arm64InsnTest, AsmConvertF64U32TieEven) {
940 constexpr auto AsmFcvtnu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtnu %w0, %d1");
941 TestConvertF64ToInt<uint32_t>(AsmFcvtnu, {0U, 0U, 0U, 0U, 6U, 6U, 7U, 8U});
942 }
943
TEST(Arm64InsnTest,AsmConvertF64I32PosInf)944 TEST(Arm64InsnTest, AsmConvertF64I32PosInf) {
945 constexpr auto AsmFcvtps = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtps %w0, %d1");
946 TestConvertF64ToInt<uint32_t>(AsmFcvtps, {-7, -6, -6, -6, 7U, 7U, 7U, 8U});
947 }
948
TEST(Arm64InsnTest,AsmConvertF64U32PosInf)949 TEST(Arm64InsnTest, AsmConvertF64U32PosInf) {
950 constexpr auto AsmFcvtpu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtpu %w0, %d1");
951 TestConvertF64ToInt<uint32_t>(AsmFcvtpu, {0U, 0U, 0U, 0U, 7U, 7U, 7U, 8U});
952 }
953
TEST(Arm64InsnTest,AsmConvertF64I32Truncate)954 TEST(Arm64InsnTest, AsmConvertF64I32Truncate) {
955 constexpr auto AsmFcvtzs = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzs %w0, %d1");
956 TestConvertF64ToInt<uint32_t>(AsmFcvtzs, {-7, -6, -6, -6, 6U, 6U, 6U, 7U});
957 }
958
TEST(Arm64InsnTest,AsmConvertF64U32Truncate)959 TEST(Arm64InsnTest, AsmConvertF64U32Truncate) {
960 constexpr auto AsmFcvtzu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzu %w0, %d1");
961 TestConvertF64ToInt<uint32_t>(AsmFcvtzu, {0U, 0U, 0U, 0U, 6U, 6U, 6U, 7U});
962 }
963
TEST(Arm64InsnTest,AsmConvertF64I64TieAway)964 TEST(Arm64InsnTest, AsmConvertF64I64TieAway) {
965 constexpr auto AsmFcvtas = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtas %x0, %d1");
966 TestConvertF64ToInt<uint64_t>(AsmFcvtas, {-8, -7, -7, -6, 6U, 7U, 7U, 8U});
967 }
968
TEST(Arm64InsnTest,AsmConvertF64U64TieAway)969 TEST(Arm64InsnTest, AsmConvertF64U64TieAway) {
970 constexpr auto AsmFcvtau = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtau %x0, %d1");
971 TestConvertF64ToInt<uint64_t>(AsmFcvtau, {0U, 0U, 0U, 0U, 6U, 7U, 7U, 8U});
972 }
973
TEST(Arm64InsnTest,AsmConvertF64I64NegInf)974 TEST(Arm64InsnTest, AsmConvertF64I64NegInf) {
975 constexpr auto AsmFcvtms = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtms %x0, %d1");
976 TestConvertF64ToInt<uint64_t>(AsmFcvtms, {-8, -7, -7, -7, 6U, 6U, 6U, 7U});
977 }
978
TEST(Arm64InsnTest,AsmConvertF64U64NegInf)979 TEST(Arm64InsnTest, AsmConvertF64U64NegInf) {
980 constexpr auto AsmFcvtmu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtmu %x0, %d1");
981 TestConvertF64ToInt<uint64_t>(AsmFcvtmu, {0U, 0U, 0U, 0U, 6U, 6U, 6U, 7U});
982 }
983
TEST(Arm64InsnTest,AsmConvertF64I64TieEven)984 TEST(Arm64InsnTest, AsmConvertF64I64TieEven) {
985 constexpr auto AsmFcvtns = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtns %x0, %d1");
986 TestConvertF64ToInt<uint64_t>(AsmFcvtns, {-8, -7, -6, -6, 6U, 6U, 7U, 8U});
987 }
988
TEST(Arm64InsnTest,AsmConvertF64U64TieEven)989 TEST(Arm64InsnTest, AsmConvertF64U64TieEven) {
990 constexpr auto AsmFcvtnu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtnu %x0, %d1");
991 TestConvertF64ToInt<uint64_t>(AsmFcvtnu, {0U, 0U, 0U, 0U, 6U, 6U, 7U, 8U});
992 }
993
TEST(Arm64InsnTest,AsmConvertF64I64PosInf)994 TEST(Arm64InsnTest, AsmConvertF64I64PosInf) {
995 constexpr auto AsmFcvtps = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtps %x0, %d1");
996 TestConvertF64ToInt<uint64_t>(AsmFcvtps, {-7, -6, -6, -6, 7U, 7U, 7U, 8U});
997 }
998
TEST(Arm64InsnTest,AsmConvertF64U64PosInf)999 TEST(Arm64InsnTest, AsmConvertF64U64PosInf) {
1000 constexpr auto AsmFcvtpu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtpu %x0, %d1");
1001 TestConvertF64ToInt<uint64_t>(AsmFcvtpu, {0U, 0U, 0U, 0U, 7U, 7U, 7U, 8U});
1002 }
1003
TEST(Arm64InsnTest,AsmConvertF64I64Truncate)1004 TEST(Arm64InsnTest, AsmConvertF64I64Truncate) {
1005 constexpr auto AsmFcvtzs = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzs %x0, %d1");
1006 TestConvertF64ToInt<uint64_t>(AsmFcvtzs, {-7, -6, -6, -6, 6U, 6U, 6U, 7U});
1007 }
1008
TEST(Arm64InsnTest,AsmConvertF64U64Truncate)1009 TEST(Arm64InsnTest, AsmConvertF64U64Truncate) {
1010 constexpr auto AsmFcvtzu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzu %x0, %d1");
1011 TestConvertF64ToInt<uint64_t>(AsmFcvtzu, {0U, 0U, 0U, 0U, 6U, 6U, 6U, 7U});
1012 }
1013
TEST(Arm64InsnTest,AsmConvertF32I32ScalarTieAway)1014 TEST(Arm64InsnTest, AsmConvertF32I32ScalarTieAway) {
1015 constexpr auto AsmFcvtas = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtas %s0, %s1");
1016 TestConvertF32ToInt<uint32_t>(AsmFcvtas, {-8, -7, -7, -6, 6U, 7U, 7U, 8U});
1017 }
1018
TEST(Arm64InsnTest,AsmConvertF32U32ScalarTieAway)1019 TEST(Arm64InsnTest, AsmConvertF32U32ScalarTieAway) {
1020 constexpr auto AsmFcvtau = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtau %s0, %s1");
1021 TestConvertF32ToInt<uint32_t>(AsmFcvtau, {0U, 0U, 0U, 0U, 6U, 7U, 7U, 8U});
1022 }
1023
TEST(Arm64InsnTest,AsmConvertF32I32ScalarNegInf)1024 TEST(Arm64InsnTest, AsmConvertF32I32ScalarNegInf) {
1025 constexpr auto AsmFcvtms = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtms %s0, %s1");
1026 TestConvertF32ToInt<uint32_t>(AsmFcvtms, {-8, -7, -7, -7, 6U, 6U, 6U, 7U});
1027 }
1028
TEST(Arm64InsnTest,AsmConvertF32U32ScalarNegInf)1029 TEST(Arm64InsnTest, AsmConvertF32U32ScalarNegInf) {
1030 constexpr auto AsmFcvtmu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtmu %s0, %s1");
1031 TestConvertF32ToInt<uint32_t>(AsmFcvtmu, {0U, 0U, 0U, 0U, 6U, 6U, 6U, 7U});
1032 }
1033
TEST(Arm64InsnTest,AsmConvertF32I32ScalarTieEven)1034 TEST(Arm64InsnTest, AsmConvertF32I32ScalarTieEven) {
1035 constexpr auto AsmFcvtns = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtns %s0, %s1");
1036 TestConvertF32ToInt<uint32_t>(AsmFcvtns, {-8, -7, -6, -6, 6U, 6U, 7U, 8U});
1037 }
1038
TEST(Arm64InsnTest,AsmConvertF32U32ScalarTieEven)1039 TEST(Arm64InsnTest, AsmConvertF32U32ScalarTieEven) {
1040 constexpr auto AsmFcvtnu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtnu %s0, %s1");
1041 TestConvertF32ToInt<uint32_t>(AsmFcvtnu, {0U, 0U, 0U, 0U, 6U, 6U, 7U, 8U});
1042 }
1043
TEST(Arm64InsnTest,AsmConvertF32I32ScalarPosInf)1044 TEST(Arm64InsnTest, AsmConvertF32I32ScalarPosInf) {
1045 constexpr auto AsmFcvtps = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtps %s0, %s1");
1046 TestConvertF32ToInt<uint32_t>(AsmFcvtps, {-7, -6, -6, -6, 7U, 7U, 7U, 8U});
1047 }
1048
TEST(Arm64InsnTest,AsmConvertF32U32ScalarPosInf)1049 TEST(Arm64InsnTest, AsmConvertF32U32ScalarPosInf) {
1050 constexpr auto AsmFcvtpu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtpu %s0, %s1");
1051 TestConvertF32ToInt<uint32_t>(AsmFcvtpu, {0U, 0U, 0U, 0U, 7U, 7U, 7U, 8U});
1052 }
1053
TEST(Arm64InsnTest,AsmConvertF32I32ScalarTruncate)1054 TEST(Arm64InsnTest, AsmConvertF32I32ScalarTruncate) {
1055 constexpr auto AsmFcvtzs = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtzs %s0, %s1");
1056 TestConvertF32ToInt<uint32_t>(AsmFcvtzs, {-7, -6, -6, -6, 6U, 6U, 6U, 7U});
1057 }
1058
TEST(Arm64InsnTest,AsmConvertF32U32ScalarTruncate)1059 TEST(Arm64InsnTest, AsmConvertF32U32ScalarTruncate) {
1060 constexpr auto AsmFcvtzu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtzu %s0, %s1");
1061 TestConvertF32ToInt<uint32_t>(AsmFcvtzu, {0U, 0U, 0U, 0U, 6U, 6U, 6U, 7U});
1062 }
1063
TEST(Arm64InsnTest,AsmConvertF64I64ScalarTieAway)1064 TEST(Arm64InsnTest, AsmConvertF64I64ScalarTieAway) {
1065 constexpr auto AsmFcvtas = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtas %d0, %d1");
1066 TestConvertF64ToInt<uint64_t>(AsmFcvtas, {-8, -7, -7, -6, 6U, 7U, 7U, 8U});
1067 }
1068
TEST(Arm64InsnTest,AsmConvertF64U64ScalarTieAway)1069 TEST(Arm64InsnTest, AsmConvertF64U64ScalarTieAway) {
1070 constexpr auto AsmFcvtau = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtau %d0, %d1");
1071 TestConvertF64ToInt<uint64_t>(AsmFcvtau, {0U, 0U, 0U, 0U, 6U, 7U, 7U, 8U});
1072 }
1073
TEST(Arm64InsnTest,AsmConvertF64I64ScalarNegInf)1074 TEST(Arm64InsnTest, AsmConvertF64I64ScalarNegInf) {
1075 constexpr auto AsmFcvtms = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtms %d0, %d1");
1076 TestConvertF64ToInt<uint64_t>(AsmFcvtms, {-8, -7, -7, -7, 6U, 6U, 6U, 7U});
1077 }
1078
TEST(Arm64InsnTest,AsmConvertF64U64ScalarNegInf)1079 TEST(Arm64InsnTest, AsmConvertF64U64ScalarNegInf) {
1080 constexpr auto AsmFcvtmu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtmu %d0, %d1");
1081 TestConvertF64ToInt<uint64_t>(AsmFcvtmu, {0U, 0U, 0U, 0U, 6U, 6U, 6U, 7U});
1082 }
1083
TEST(Arm64InsnTest,AsmConvertF64I64ScalarTieEven)1084 TEST(Arm64InsnTest, AsmConvertF64I64ScalarTieEven) {
1085 constexpr auto AsmFcvtns = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtns %d0, %d1");
1086 TestConvertF64ToInt<uint64_t>(AsmFcvtns, {-8, -7, -6, -6, 6U, 6U, 7U, 8U});
1087 }
1088
TEST(Arm64InsnTest,AsmConvertF64U64ScalarTieEven)1089 TEST(Arm64InsnTest, AsmConvertF64U64ScalarTieEven) {
1090 constexpr auto AsmFcvtnu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtnu %d0, %d1");
1091 TestConvertF64ToInt<uint64_t>(AsmFcvtnu, {0U, 0U, 0U, 0U, 6U, 6U, 7U, 8U});
1092 }
1093
TEST(Arm64InsnTest,AsmConvertF64I64ScalarPosInf)1094 TEST(Arm64InsnTest, AsmConvertF64I64ScalarPosInf) {
1095 constexpr auto AsmFcvtps = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtps %d0, %d1");
1096 TestConvertF64ToInt<uint64_t>(AsmFcvtps, {-7, -6, -6, -6, 7U, 7U, 7U, 8U});
1097 }
1098
TEST(Arm64InsnTest,AsmConvertF64U64ScalarPosInf)1099 TEST(Arm64InsnTest, AsmConvertF64U64ScalarPosInf) {
1100 constexpr auto AsmFcvtpu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtpu %d0, %d1");
1101 TestConvertF64ToInt<uint64_t>(AsmFcvtpu, {0U, 0U, 0U, 0U, 7U, 7U, 7U, 8U});
1102 }
1103
TEST(Arm64InsnTest,AsmConvertF64I64ScalarTruncate)1104 TEST(Arm64InsnTest, AsmConvertF64I64ScalarTruncate) {
1105 constexpr auto AsmFcvtzs = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtzs %d0, %d1");
1106 TestConvertF64ToInt<uint64_t>(AsmFcvtzs, {-7, -6, -6, -6, 6U, 6U, 6U, 7U});
1107 }
1108
TEST(Arm64InsnTest,AsmConvertF64U64ScalarTruncate)1109 TEST(Arm64InsnTest, AsmConvertF64U64ScalarTruncate) {
1110 constexpr auto AsmFcvtzu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtzu %d0, %d1");
1111 TestConvertF64ToInt<uint64_t>(AsmFcvtzu, {0U, 0U, 0U, 0U, 6U, 6U, 6U, 7U});
1112 }
1113
TEST(Arm64InsnTest,AsmConvertF32I32x4TieAway)1114 TEST(Arm64InsnTest, AsmConvertF32I32x4TieAway) {
1115 constexpr auto AsmFcvtas = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtas %0.4s, %1.4s");
1116 __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
1117 ASSERT_EQ(AsmFcvtas(arg1), MakeUInt128(0xfffffff9fffffff8ULL, 0xfffffffafffffff9ULL));
1118 __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
1119 ASSERT_EQ(AsmFcvtas(arg2), MakeUInt128(0x0000000700000006ULL, 0x0000000800000007ULL));
1120 }
1121
TEST(Arm64InsnTest,AsmConvertF32U32x4TieAway)1122 TEST(Arm64InsnTest, AsmConvertF32U32x4TieAway) {
1123 constexpr auto AsmFcvtau = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtau %0.4s, %1.4s");
1124 __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
1125 ASSERT_EQ(AsmFcvtau(arg1), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
1126 __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
1127 ASSERT_EQ(AsmFcvtau(arg2), MakeUInt128(0x0000000700000006ULL, 0x0000000800000007ULL));
1128 }
1129
TEST(Arm64InsnTest,AsmConvertF32I32x4NegInf)1130 TEST(Arm64InsnTest, AsmConvertF32I32x4NegInf) {
1131 constexpr auto AsmFcvtms = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtms %0.4s, %1.4s");
1132 __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
1133 ASSERT_EQ(AsmFcvtms(arg1), MakeUInt128(0xfffffff9fffffff8ULL, 0xfffffff9fffffff9ULL));
1134 __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
1135 ASSERT_EQ(AsmFcvtms(arg2), MakeUInt128(0x0000000600000006ULL, 0x0000000700000006ULL));
1136 }
1137
TEST(Arm64InsnTest,AsmConvertF32U32x4NegInf)1138 TEST(Arm64InsnTest, AsmConvertF32U32x4NegInf) {
1139 constexpr auto AsmFcvtmu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtmu %0.4s, %1.4s");
1140 __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
1141 ASSERT_EQ(AsmFcvtmu(arg1), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
1142 __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
1143 ASSERT_EQ(AsmFcvtmu(arg2), MakeUInt128(0x0000000600000006ULL, 0x0000000700000006ULL));
1144 }
1145
TEST(Arm64InsnTest,AsmConvertF32I32x4TieEven)1146 TEST(Arm64InsnTest, AsmConvertF32I32x4TieEven) {
1147 constexpr auto AsmFcvtns = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtns %0.4s, %1.4s");
1148 __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
1149 ASSERT_EQ(AsmFcvtns(arg1), MakeUInt128(0xfffffff9fffffff8ULL, 0xfffffffafffffffaULL));
1150 __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
1151 ASSERT_EQ(AsmFcvtns(arg2), MakeUInt128(0x0000000600000006ULL, 0x0000000800000007ULL));
1152 }
1153
TEST(Arm64InsnTest,AsmConvertF32U32x4TieEven)1154 TEST(Arm64InsnTest, AsmConvertF32U32x4TieEven) {
1155 constexpr auto AsmFcvtnu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtnu %0.4s, %1.4s");
1156 __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
1157 ASSERT_EQ(AsmFcvtnu(arg1), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
1158 __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
1159 ASSERT_EQ(AsmFcvtnu(arg2), MakeUInt128(0x0000000600000006ULL, 0x0000000800000007ULL));
1160 }
1161
TEST(Arm64InsnTest,AsmConvertF32I32x4PosInf)1162 TEST(Arm64InsnTest, AsmConvertF32I32x4PosInf) {
1163 constexpr auto AsmFcvtps = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtps %0.4s, %1.4s");
1164 __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
1165 ASSERT_EQ(AsmFcvtps(arg1), MakeUInt128(0xfffffffafffffff9ULL, 0xfffffffafffffffaULL));
1166 __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
1167 ASSERT_EQ(AsmFcvtps(arg2), MakeUInt128(0x0000000700000007ULL, 0x0000000800000007ULL));
1168 }
1169
TEST(Arm64InsnTest,AsmConvertF32U32x4PosInf)1170 TEST(Arm64InsnTest, AsmConvertF32U32x4PosInf) {
1171 constexpr auto AsmFcvtpu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtpu %0.4s, %1.4s");
1172 __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
1173 ASSERT_EQ(AsmFcvtpu(arg1), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
1174 __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
1175 ASSERT_EQ(AsmFcvtpu(arg2), MakeUInt128(0x0000000700000007ULL, 0x0000000800000007ULL));
1176 }
1177
TEST(Arm64InsnTest,AsmConvertF32I32x4Truncate)1178 TEST(Arm64InsnTest, AsmConvertF32I32x4Truncate) {
1179 constexpr auto AsmFcvtzs = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtzs %0.4s, %1.4s");
1180 __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
1181 ASSERT_EQ(AsmFcvtzs(arg1), MakeUInt128(0xfffffffafffffff9ULL, 0xfffffffafffffffaULL));
1182 __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
1183 ASSERT_EQ(AsmFcvtzs(arg2), MakeUInt128(0x0000000600000006ULL, 0x0000000700000006ULL));
1184 }
1185
TEST(Arm64InsnTest,AsmConvertF32U32x4Truncate)1186 TEST(Arm64InsnTest, AsmConvertF32U32x4Truncate) {
1187 constexpr auto AsmFcvtzu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtzu %0.4s, %1.4s");
1188 __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
1189 ASSERT_EQ(AsmFcvtzu(arg1), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
1190 __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
1191 ASSERT_EQ(AsmFcvtzu(arg2), MakeUInt128(0x0000000600000006ULL, 0x0000000700000006ULL));
1192 }
1193
TEST(Arm64InsnTest,AsmConvertF64I64x4TieAway)1194 TEST(Arm64InsnTest, AsmConvertF64I64x4TieAway) {
1195 constexpr auto AsmFcvtas = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtas %0.2d, %1.2d");
1196 __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
1197 ASSERT_EQ(AsmFcvtas(arg1), MakeUInt128(0xfffffffffffffff8ULL, 0xfffffffffffffff9ULL));
1198 __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
1199 ASSERT_EQ(AsmFcvtas(arg2), MakeUInt128(0xfffffffffffffff9ULL, 0xfffffffffffffffaULL));
1200 __uint128_t arg3 = MakeF64x2(6.25, 6.50);
1201 ASSERT_EQ(AsmFcvtas(arg3), MakeUInt128(0x0000000000000006ULL, 0x0000000000000007ULL));
1202 __uint128_t arg4 = MakeF64x2(6.75, 7.50);
1203 ASSERT_EQ(AsmFcvtas(arg4), MakeUInt128(0x0000000000000007ULL, 0x0000000000000008ULL));
1204 }
1205
TEST(Arm64InsnTest,AsmConvertF64U64x4TieAway)1206 TEST(Arm64InsnTest, AsmConvertF64U64x4TieAway) {
1207 constexpr auto AsmFcvtau = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtau %0.2d, %1.2d");
1208 __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
1209 ASSERT_EQ(AsmFcvtau(arg1), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
1210 __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
1211 ASSERT_EQ(AsmFcvtau(arg2), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
1212 __uint128_t arg3 = MakeF64x2(6.25, 6.50);
1213 ASSERT_EQ(AsmFcvtau(arg3), MakeUInt128(0x0000000000000006ULL, 0x0000000000000007ULL));
1214 __uint128_t arg4 = MakeF64x2(6.75, 7.50);
1215 ASSERT_EQ(AsmFcvtau(arg4), MakeUInt128(0x0000000000000007ULL, 0x0000000000000008ULL));
1216 }
1217
TEST(Arm64InsnTest,AsmConvertF64I64x4NegInf)1218 TEST(Arm64InsnTest, AsmConvertF64I64x4NegInf) {
1219 constexpr auto AsmFcvtms = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtms %0.2d, %1.2d");
1220 __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
1221 ASSERT_EQ(AsmFcvtms(arg1), MakeUInt128(0xfffffffffffffff8ULL, 0xfffffffffffffff9ULL));
1222 __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
1223 ASSERT_EQ(AsmFcvtms(arg2), MakeUInt128(0xfffffffffffffff9ULL, 0xfffffffffffffff9ULL));
1224 __uint128_t arg3 = MakeF64x2(6.25, 6.50);
1225 ASSERT_EQ(AsmFcvtms(arg3), MakeUInt128(0x0000000000000006ULL, 0x0000000000000006ULL));
1226 __uint128_t arg4 = MakeF64x2(6.75, 7.50);
1227 ASSERT_EQ(AsmFcvtms(arg4), MakeUInt128(0x0000000000000006ULL, 0x0000000000000007ULL));
1228 }
1229
TEST(Arm64InsnTest,AsmConvertF64U64x4NegInf)1230 TEST(Arm64InsnTest, AsmConvertF64U64x4NegInf) {
1231 constexpr auto AsmFcvtmu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtmu %0.2d, %1.2d");
1232 __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
1233 ASSERT_EQ(AsmFcvtmu(arg1), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
1234 __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
1235 ASSERT_EQ(AsmFcvtmu(arg2), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
1236 __uint128_t arg3 = MakeF64x2(6.25, 6.50);
1237 ASSERT_EQ(AsmFcvtmu(arg3), MakeUInt128(0x0000000000000006ULL, 0x0000000000000006ULL));
1238 __uint128_t arg4 = MakeF64x2(6.75, 7.50);
1239 ASSERT_EQ(AsmFcvtmu(arg4), MakeUInt128(0x0000000000000006ULL, 0x0000000000000007ULL));
1240 }
1241
TEST(Arm64InsnTest,AsmConvertF64I64x4TieEven)1242 TEST(Arm64InsnTest, AsmConvertF64I64x4TieEven) {
1243 constexpr auto AsmFcvtns = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtns %0.2d, %1.2d");
1244 __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
1245 ASSERT_EQ(AsmFcvtns(arg1), MakeUInt128(0xfffffffffffffff8ULL, 0xfffffffffffffff9ULL));
1246 __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
1247 ASSERT_EQ(AsmFcvtns(arg2), MakeUInt128(0xfffffffffffffffaULL, 0xfffffffffffffffaULL));
1248 __uint128_t arg3 = MakeF64x2(6.25, 6.50);
1249 ASSERT_EQ(AsmFcvtns(arg3), MakeUInt128(0x0000000000000006ULL, 0x0000000000000006ULL));
1250 __uint128_t arg4 = MakeF64x2(6.75, 7.50);
1251 ASSERT_EQ(AsmFcvtns(arg4), MakeUInt128(0x0000000000000007ULL, 0x0000000000000008ULL));
1252 }
1253
TEST(Arm64InsnTest,AsmConvertF64U64x4TieEven)1254 TEST(Arm64InsnTest, AsmConvertF64U64x4TieEven) {
1255 constexpr auto AsmFcvtnu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtnu %0.2d, %1.2d");
1256 __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
1257 ASSERT_EQ(AsmFcvtnu(arg1), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
1258 __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
1259 ASSERT_EQ(AsmFcvtnu(arg2), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
1260 __uint128_t arg3 = MakeF64x2(6.25, 6.50);
1261 ASSERT_EQ(AsmFcvtnu(arg3), MakeUInt128(0x0000000000000006ULL, 0x0000000000000006ULL));
1262 __uint128_t arg4 = MakeF64x2(6.75, 7.50);
1263 ASSERT_EQ(AsmFcvtnu(arg4), MakeUInt128(0x0000000000000007ULL, 0x0000000000000008ULL));
1264 }
1265
TEST(Arm64InsnTest,AsmConvertF64I64x4PosInf)1266 TEST(Arm64InsnTest, AsmConvertF64I64x4PosInf) {
1267 constexpr auto AsmFcvtps = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtps %0.2d, %1.2d");
1268 __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
1269 ASSERT_EQ(AsmFcvtps(arg1), MakeUInt128(0xfffffffffffffff9ULL, 0xfffffffffffffffaULL));
1270 __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
1271 ASSERT_EQ(AsmFcvtps(arg2), MakeUInt128(0xfffffffffffffffaULL, 0xfffffffffffffffaULL));
1272 __uint128_t arg3 = MakeF64x2(6.25, 6.50);
1273 ASSERT_EQ(AsmFcvtps(arg3), MakeUInt128(0x0000000000000007ULL, 0x0000000000000007ULL));
1274 __uint128_t arg4 = MakeF64x2(6.75, 7.50);
1275 ASSERT_EQ(AsmFcvtps(arg4), MakeUInt128(0x0000000000000007ULL, 0x0000000000000008ULL));
1276 }
1277
TEST(Arm64InsnTest,AsmConvertF64U64x4PosInf)1278 TEST(Arm64InsnTest, AsmConvertF64U64x4PosInf) {
1279 constexpr auto AsmFcvtpu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtpu %0.2d, %1.2d");
1280 __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
1281 ASSERT_EQ(AsmFcvtpu(arg1), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
1282 __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
1283 ASSERT_EQ(AsmFcvtpu(arg2), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
1284 __uint128_t arg3 = MakeF64x2(6.25, 6.50);
1285 ASSERT_EQ(AsmFcvtpu(arg3), MakeUInt128(0x0000000000000007ULL, 0x0000000000000007ULL));
1286 __uint128_t arg4 = MakeF64x2(6.75, 7.50);
1287 ASSERT_EQ(AsmFcvtpu(arg4), MakeUInt128(0x0000000000000007ULL, 0x0000000000000008ULL));
1288 }
1289
TEST(Arm64InsnTest,AsmConvertF64I64x4Truncate)1290 TEST(Arm64InsnTest, AsmConvertF64I64x4Truncate) {
1291 constexpr auto AsmFcvtzs = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtzs %0.2d, %1.2d");
1292 __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
1293 ASSERT_EQ(AsmFcvtzs(arg1), MakeUInt128(0xfffffffffffffff9ULL, 0xfffffffffffffffaULL));
1294 __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
1295 ASSERT_EQ(AsmFcvtzs(arg2), MakeUInt128(0xfffffffffffffffaULL, 0xfffffffffffffffaULL));
1296 __uint128_t arg3 = MakeF64x2(6.25, 6.50);
1297 ASSERT_EQ(AsmFcvtzs(arg3), MakeUInt128(0x0000000000000006ULL, 0x0000000000000006ULL));
1298 __uint128_t arg4 = MakeF64x2(6.75, 7.50);
1299 ASSERT_EQ(AsmFcvtzs(arg4), MakeUInt128(0x0000000000000006ULL, 0x0000000000000007ULL));
1300 }
1301
TEST(Arm64InsnTest,AsmConvertF64U64x4Truncate)1302 TEST(Arm64InsnTest, AsmConvertF64U64x4Truncate) {
1303 constexpr auto AsmFcvtzu = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtzu %0.2d, %1.2d");
1304 __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
1305 ASSERT_EQ(AsmFcvtzu(arg1), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
1306 __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
1307 ASSERT_EQ(AsmFcvtzu(arg2), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
1308 __uint128_t arg3 = MakeF64x2(6.25, 6.50);
1309 ASSERT_EQ(AsmFcvtzu(arg3), MakeUInt128(0x0000000000000006ULL, 0x0000000000000006ULL));
1310 __uint128_t arg4 = MakeF64x2(6.75, 7.50);
1311 ASSERT_EQ(AsmFcvtzu(arg4), MakeUInt128(0x0000000000000006ULL, 0x0000000000000007ULL));
1312 }
1313
TEST(Arm64InsnTest,AsmConvertX32F32Scalar)1314 TEST(Arm64InsnTest, AsmConvertX32F32Scalar) {
1315 constexpr auto AsmConvertX32F32 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("scvtf %s0, %w1, #7");
1316
1317 ASSERT_EQ(AsmConvertX32F32(0x610), MakeUInt128(0x41420000ULL, 0U));
1318
1319 ASSERT_EQ(AsmConvertX32F32(1U << 31), MakeUInt128(0xcb800000ULL, 0U));
1320 }
1321
TEST(Arm64InsnTest,AsmConvertX32F64Scalar)1322 TEST(Arm64InsnTest, AsmConvertX32F64Scalar) {
1323 constexpr auto AsmConvertX32F64 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("scvtf %d0, %w1, #8");
1324
1325 ASSERT_EQ(AsmConvertX32F64(0x487), MakeUInt128(0x40121c0000000000ULL, 0U));
1326
1327 ASSERT_EQ(AsmConvertX32F64(1 << 31), MakeUInt128(0xc160000000000000ULL, 0U));
1328 }
1329
TEST(Arm64InsnTest,AsmConvertX32F32)1330 TEST(Arm64InsnTest, AsmConvertX32F32) {
1331 constexpr auto AsmConvertX32F32 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("scvtf %s0, %s1, #7");
1332
1333 ASSERT_EQ(AsmConvertX32F32(0x123), MakeUInt128(0x40118000ULL, 0U));
1334
1335 ASSERT_EQ(AsmConvertX32F32(1U << 31), MakeUInt128(0xcb800000ULL, 0U));
1336 }
1337
TEST(Arm64InsnTest,AsmConvertX32x4F32x4)1338 TEST(Arm64InsnTest, AsmConvertX32x4F32x4) {
1339 constexpr auto AsmConvertX32F32 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("scvtf %0.4s, %1.4s, #11");
1340 __uint128_t arg = MakeUInt128(0x80000000ffff9852ULL, 0x0000110200001254ULL);
1341 ASSERT_EQ(AsmConvertX32F32(arg), MakeUInt128(0xc9800000c14f5c00ULL, 0x400810004012a000ULL));
1342 }
1343
TEST(Arm64InsnTest,AsmConvertUX32F32Scalar)1344 TEST(Arm64InsnTest, AsmConvertUX32F32Scalar) {
1345 constexpr auto AsmConvertUX32F32 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("ucvtf %s0, %w1, #7");
1346
1347 ASSERT_EQ(AsmConvertUX32F32(0x857), MakeUInt128(0x41857000ULL, 0U));
1348
1349 ASSERT_EQ(AsmConvertUX32F32(1U << 31), MakeUInt128(0x4b800000ULL, 0U));
1350
1351 // Test the default rounding behavior (FPRounding_TIEEVEN).
1352 ASSERT_EQ(AsmConvertUX32F32(0x80000080), MakeUInt128(0x4b800000ULL, 0U));
1353 ASSERT_EQ(AsmConvertUX32F32(0x800000c0), MakeUInt128(0x4b800001ULL, 0U));
1354 ASSERT_EQ(AsmConvertUX32F32(0x80000140), MakeUInt128(0x4b800001ULL, 0U));
1355 ASSERT_EQ(AsmConvertUX32F32(0x80000180), MakeUInt128(0x4b800002ULL, 0U));
1356 }
1357
TEST(Arm64InsnTest,AsmConvertUX32F64Scalar)1358 TEST(Arm64InsnTest, AsmConvertUX32F64Scalar) {
1359 constexpr auto AsmConvertUX32F64 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("ucvtf %d0, %w1, #8");
1360
1361 ASSERT_EQ(AsmConvertUX32F64(0x361), MakeUInt128(0x400b080000000000ULL, 0U));
1362
1363 ASSERT_EQ(AsmConvertUX32F64(1U << 31), MakeUInt128(0x4160000000000000ULL, 0U));
1364 }
1365
TEST(Arm64InsnTest,AsmConvertUX32F32)1366 TEST(Arm64InsnTest, AsmConvertUX32F32) {
1367 constexpr auto AsmConvertUX32F32 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ucvtf %s0, %s1, #7");
1368
1369 ASSERT_EQ(AsmConvertUX32F32(0x456), MakeUInt128(0x410ac000ULL, 0U));
1370
1371 ASSERT_EQ(AsmConvertUX32F32(1U << 31), MakeUInt128(0x4b800000ULL, 0U));
1372 }
1373
TEST(Arm64InsnTest,AsmConvertUX32x4F32x4)1374 TEST(Arm64InsnTest, AsmConvertUX32x4F32x4) {
1375 constexpr auto AsmConvertUX32F32 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ucvtf %0.4s, %1.4s, #11");
1376 __uint128_t arg = MakeUInt128(0x8000000000008023ULL, 0x0000201800001956ULL);
1377 ASSERT_EQ(AsmConvertUX32F32(arg), MakeUInt128(0x4980000041802300ULL, 0x40806000404ab000ULL));
1378 }
1379
TEST(Arm64InsnTest,AsmConvertX64F32Scalar)1380 TEST(Arm64InsnTest, AsmConvertX64F32Scalar) {
1381 constexpr auto AsmConvertX64F32 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("scvtf %s0, %x1, #10");
1382
1383 ASSERT_EQ(AsmConvertX64F32(0x2234), MakeUInt128(0x4108d000ULL, 0U));
1384 }
1385
TEST(Arm64InsnTest,AsmConvertX64F64Scalar)1386 TEST(Arm64InsnTest, AsmConvertX64F64Scalar) {
1387 constexpr auto AsmConvertX64F64 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("scvtf %d0, %x1, #10");
1388
1389 ASSERT_EQ(AsmConvertX64F64(0x1324), MakeUInt128(0x4013240000000000ULL, 0U));
1390 }
1391
TEST(Arm64InsnTest,AsmConvertUX64F32Scalar)1392 TEST(Arm64InsnTest, AsmConvertUX64F32Scalar) {
1393 constexpr auto AsmConvertUX64F32 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("ucvtf %s0, %x1, #10");
1394
1395 ASSERT_EQ(AsmConvertUX64F32(0x5763), MakeUInt128(0x41aec600ULL, 0U));
1396 }
1397
TEST(Arm64InsnTest,AsmConvertUX64F64Scalar)1398 TEST(Arm64InsnTest, AsmConvertUX64F64Scalar) {
1399 constexpr auto AsmConvertUX64F64 = ASM_INSN_WRAP_FUNC_W_RES_R_ARG("ucvtf %d0, %x1, #10");
1400
1401 ASSERT_EQ(AsmConvertUX64F64(0x2217), MakeUInt128(0x40210b8000000000ULL, 0U));
1402 }
1403
TEST(Arm64InsnTest,AsmConvertX64F64)1404 TEST(Arm64InsnTest, AsmConvertX64F64) {
1405 constexpr auto AsmConvertX64F64 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("scvtf %d0, %d1, #12");
1406
1407 ASSERT_EQ(AsmConvertX64F64(0x723), MakeUInt128(0x3fdc8c0000000000ULL, 0U));
1408
1409 ASSERT_EQ(AsmConvertX64F64(1ULL << 63), MakeUInt128(0xc320000000000000ULL, 0U));
1410 }
1411
TEST(Arm64InsnTest,AsmConvertUX64F64)1412 TEST(Arm64InsnTest, AsmConvertUX64F64) {
1413 constexpr auto AsmConvertUX64F64 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ucvtf %d0, %d1, #12");
1414
1415 ASSERT_EQ(AsmConvertUX64F64(0x416), MakeUInt128(0x3fd0580000000000ULL, 0U));
1416
1417 ASSERT_EQ(AsmConvertUX64F64(1ULL << 63), MakeUInt128(0x4320000000000000ULL, 0U));
1418 }
1419
TEST(Arm64InsnTest,AsmConvertUX64F64With64BitFraction)1420 TEST(Arm64InsnTest, AsmConvertUX64F64With64BitFraction) {
1421 constexpr auto AsmConvertUX64F64 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ucvtf %d0, %d1, #64");
1422
1423 ASSERT_EQ(AsmConvertUX64F64(1ULL << 63), MakeUInt128(0x3fe0'0000'0000'0000ULL, 0U));
1424 }
1425
TEST(Arm64InsnTest,AsmConvertX64x2F64x2)1426 TEST(Arm64InsnTest, AsmConvertX64x2F64x2) {
1427 constexpr auto AsmConvertX64F64 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("scvtf %0.2d, %1.2d, #12");
1428 __uint128_t arg = MakeUInt128(1ULL << 63, 0x8086U);
1429 ASSERT_EQ(AsmConvertX64F64(arg), MakeUInt128(0xc320000000000000ULL, 0x402010c000000000ULL));
1430 }
1431
TEST(Arm64InsnTest,AsmConvertUX64x2F64x2)1432 TEST(Arm64InsnTest, AsmConvertUX64x2F64x2) {
1433 constexpr auto AsmConvertUX64F64 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ucvtf %0.2d, %1.2d, #12");
1434 __uint128_t arg = MakeUInt128(1ULL << 63, 0x6809U);
1435 ASSERT_EQ(AsmConvertUX64F64(arg), MakeUInt128(0x4320000000000000ULL, 0x401a024000000000ULL));
1436 }
1437
TEST(Arm64InsnTest,AsmConvertUX64x2F64x2With64BitFraction)1438 TEST(Arm64InsnTest, AsmConvertUX64x2F64x2With64BitFraction) {
1439 constexpr auto AsmConvertUX64F64 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ucvtf %0.2d, %1.2d, #64");
1440 __uint128_t arg = MakeUInt128(0x7874'211c'b7aa'f597ULL, 0x2c0f'5504'd25e'f673ULL);
1441 ASSERT_EQ(AsmConvertUX64F64(arg),
1442 MakeUInt128(0x3fde'1d08'472d'eabdULL, 0x3fc6'07aa'8269'2f7bULL));
1443 }
1444
TEST(Arm64InsnTest,AsmConvertF32X32Scalar)1445 TEST(Arm64InsnTest, AsmConvertF32X32Scalar) {
1446 constexpr auto AsmConvertF32X32 = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzs %w0, %s1, #16");
1447 uint32_t arg1 = 0x4091eb85U; // 4.56 in float
1448 ASSERT_EQ(AsmConvertF32X32(arg1), MakeUInt128(0x00048f5cU, 0U));
1449
1450 uint32_t arg2 = 0xc0d80000U; // -6.75 in float
1451 ASSERT_EQ(AsmConvertF32X32(arg2), MakeUInt128(0xfff94000U, 0U));
1452
1453 ASSERT_EQ(AsmConvertF32X32(kDefaultNaN32AsInteger), MakeUInt128(bit_cast<uint32_t>(0.0f), 0U));
1454 }
1455
TEST(Arm64InsnTest,AsmConvertF32UX32Scalar)1456 TEST(Arm64InsnTest, AsmConvertF32UX32Scalar) {
1457 constexpr auto AsmConvertF32UX32 = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzs %w0, %s1, #16");
1458 uint32_t arg1 = 0x41223d71U; // 10.14 in float
1459 ASSERT_EQ(AsmConvertF32UX32(arg1), MakeUInt128(0x000a23d7U, 0U));
1460
1461 uint32_t arg2 = 0xc1540000U; // -13.25 in float
1462 ASSERT_EQ(AsmConvertF32UX32(arg2), MakeUInt128(0xfff2c000U, 0U));
1463
1464 ASSERT_EQ(AsmConvertF32UX32(kDefaultNaN32AsInteger), MakeUInt128(bit_cast<uint32_t>(0.0f), 0U));
1465 }
1466
TEST(Arm64InsnTest,AsmConvertF32UX32With31FractionalBits)1467 TEST(Arm64InsnTest, AsmConvertF32UX32With31FractionalBits) {
1468 constexpr auto AsmConvertF32UX32 = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzs %w0, %s1, #31");
1469 uint32_t arg1 = bit_cast<uint32_t>(0.25f);
1470 ASSERT_EQ(AsmConvertF32UX32(arg1), MakeUInt128(0x20000000U, 0U));
1471 }
1472
TEST(Arm64InsnTest,AsmConvertF64X32Scalar)1473 TEST(Arm64InsnTest, AsmConvertF64X32Scalar) {
1474 constexpr auto AsmConvertF64X32 = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzs %w0, %d1, #16");
1475 uint64_t arg1 = 0x401e8f5c28f5c28fULL; // 7.46 in double
1476 ASSERT_EQ(AsmConvertF64X32(arg1), MakeUInt128(0x0007a3d7U, 0U));
1477
1478 uint64_t arg2 = 0xc040200000000000ULL; // -32.44 in double
1479 ASSERT_EQ(AsmConvertF64X32(arg2), MakeUInt128(0xffdfc000U, 0U));
1480 }
1481
TEST(Arm64InsnTest,AsmConvertF32X64Scalar)1482 TEST(Arm64InsnTest, AsmConvertF32X64Scalar) {
1483 constexpr auto AsmFcvtzs = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzs %x0, %s1, #16");
1484 uint64_t arg1 = bit_cast<uint32_t>(7.50f);
1485 ASSERT_EQ(AsmFcvtzs(arg1), MakeUInt128(0x0000000000078000ULL, 0ULL));
1486
1487 uint64_t arg2 = bit_cast<uint32_t>(-6.50f);
1488 ASSERT_EQ(AsmFcvtzs(arg2), MakeUInt128(0xfffffffffff98000ULL, 0ULL));
1489 }
1490
TEST(Arm64InsnTest,AsmConvertF32UX64With63FractionalBits)1491 TEST(Arm64InsnTest, AsmConvertF32UX64With63FractionalBits) {
1492 constexpr auto AsmConvertF32UX64 = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzs %x0, %s1, #63");
1493 uint32_t arg1 = bit_cast<uint32_t>(0.25f);
1494 ASSERT_EQ(AsmConvertF32UX64(arg1), MakeUInt128(0x20000000'00000000ULL, 0U));
1495 }
1496
TEST(Arm64InsnTest,AsmConvertF64X64Scalar)1497 TEST(Arm64InsnTest, AsmConvertF64X64Scalar) {
1498 constexpr auto AsmFcvtzs = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzs %x0, %d1, #16");
1499 uint64_t arg1 = bit_cast<uint64_t>(7.50);
1500 ASSERT_EQ(AsmFcvtzs(arg1), MakeUInt128(0x0000000000078000ULL, 0ULL));
1501
1502 uint64_t arg2 = bit_cast<uint64_t>(-6.50);
1503 ASSERT_EQ(AsmFcvtzs(arg2), MakeUInt128(0xfffffffffff98000ULL, 0ULL));
1504 }
1505
TEST(Arm64InsnTest,AsmConvertF32X32x4)1506 TEST(Arm64InsnTest, AsmConvertF32X32x4) {
1507 constexpr auto AsmFcvtzs = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtzs %0.4s, %1.4s, #2");
1508 __uint128_t res = AsmFcvtzs(MakeF32x4(-5.5f, -0.0f, 0.0f, 6.5f));
1509 ASSERT_EQ(res, MakeUInt128(0x00000000ffffffeaULL, 0x0000001a00000000ULL));
1510 }
1511
TEST(Arm64InsnTest,AsmConvertF64UX32Scalar)1512 TEST(Arm64InsnTest, AsmConvertF64UX32Scalar) {
1513 constexpr auto AsmConvertF64UX32 = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzu %w0, %d1, #16");
1514 uint64_t arg1 = 0x4020947ae147ae14ULL; // 8.29 in double
1515 ASSERT_EQ(AsmConvertF64UX32(arg1), MakeUInt128(0x00084a3dU, 0U));
1516
1517 uint64_t arg2 = 0xc023666666666666ULL; // -9.70 in double
1518 ASSERT_EQ(AsmConvertF64UX32(arg2), MakeUInt128(0U, 0U));
1519 }
1520
TEST(Arm64InsnTest,AsmConvertF32UX64Scalar)1521 TEST(Arm64InsnTest, AsmConvertF32UX64Scalar) {
1522 constexpr auto AsmFcvtzu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzu %x0, %s1, #16");
1523 uint64_t arg1 = bit_cast<uint32_t>(7.50f);
1524 ASSERT_EQ(AsmFcvtzu(arg1), MakeUInt128(0x0000000000078000ULL, 0ULL));
1525 uint64_t arg2 = bit_cast<uint32_t>(-6.50f);
1526 ASSERT_EQ(AsmFcvtzu(arg2), 0ULL);
1527 }
1528
TEST(Arm64InsnTest,AsmConvertF64UX64Scalar)1529 TEST(Arm64InsnTest, AsmConvertF64UX64Scalar) {
1530 constexpr auto AsmFcvtzu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzu %x0, %d1, #16");
1531 uint64_t arg1 = bit_cast<uint64_t>(7.50);
1532 ASSERT_EQ(AsmFcvtzu(arg1), MakeUInt128(0x0000000000078000ULL, 0ULL));
1533
1534 uint64_t arg2 = bit_cast<uint64_t>(-6.50);
1535 ASSERT_EQ(AsmFcvtzu(arg2), MakeUInt128(0ULL, 0ULL));
1536 }
1537
TEST(Arm64InsnTest,AsmConvertF64UX64ScalarWith64BitFraction)1538 TEST(Arm64InsnTest, AsmConvertF64UX64ScalarWith64BitFraction) {
1539 constexpr auto AsmFcvtzu = ASM_INSN_WRAP_FUNC_R_RES_W_ARG("fcvtzu %x0, %d1, #64");
1540 uint64_t arg = bit_cast<uint64_t>(0.625);
1541 ASSERT_EQ(AsmFcvtzu(arg), MakeUInt128(0xa000'0000'0000'0000ULL, 0ULL));
1542 }
1543
TEST(Arm64InsnTest,AsmConvertF32UX32x4)1544 TEST(Arm64InsnTest, AsmConvertF32UX32x4) {
1545 constexpr auto AsmFcvtzs = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtzu %0.4s, %1.4s, #2");
1546 __uint128_t res = AsmFcvtzs(MakeF32x4(-5.5f, -0.0f, 0.0f, 6.5f));
1547 ASSERT_EQ(res, MakeUInt128(0x0000000000000000ULL, 0x0000001a00000000ULL));
1548 }
1549
TEST(Arm64InsnTest,Fp32ConditionalSelect)1550 TEST(Arm64InsnTest, Fp32ConditionalSelect) {
1551 uint64_t int_arg1 = 3;
1552 uint64_t int_arg2 = 7;
1553 uint64_t fp_arg1 = 0xfedcba9876543210ULL;
1554 uint64_t fp_arg2 = 0x0123456789abcdefULL;
1555 __uint128_t res;
1556
1557 asm("cmp %x1,%x2\n\t"
1558 "fcsel %s0, %s3, %s4, eq"
1559 : "=w"(res)
1560 : "r"(int_arg1), "r"(int_arg2), "w"(fp_arg1), "w"(fp_arg2));
1561 ASSERT_EQ(res, MakeUInt128(0x89abcdefULL, 0U));
1562
1563 asm("cmp %x1,%x2\n\t"
1564 "fcsel %s0, %s3, %s4, ne"
1565 : "=w"(res)
1566 : "r"(int_arg1), "r"(int_arg2), "w"(fp_arg1), "w"(fp_arg2));
1567 ASSERT_EQ(res, MakeUInt128(0x76543210ULL, 0U));
1568 }
1569
TEST(Arm64InsnTest,Fp64ConditionalSelect)1570 TEST(Arm64InsnTest, Fp64ConditionalSelect) {
1571 uint64_t int_arg1 = 8;
1572 uint64_t int_arg2 = 3;
1573 uint64_t fp_arg1 = 0xfedcba9876543210ULL;
1574 uint64_t fp_arg2 = 0x0123456789abcdefULL;
1575 __uint128_t res;
1576
1577 asm("cmp %x1,%x2\n\t"
1578 "fcsel %d0, %d3, %d4, eq"
1579 : "=w"(res)
1580 : "r"(int_arg1), "r"(int_arg2), "w"(fp_arg1), "w"(fp_arg2));
1581 ASSERT_EQ(res, MakeUInt128(0x0123456789abcdefULL, 0U));
1582
1583 asm("cmp %x1,%x2\n\t"
1584 "fcsel %d0, %d3, %d4, ne"
1585 : "=w"(res)
1586 : "r"(int_arg1), "r"(int_arg2), "w"(fp_arg1), "w"(fp_arg2));
1587 ASSERT_EQ(res, MakeUInt128(0xfedcba9876543210ULL, 0U));
1588 }
1589
TEST(Arm64InsnTest,RoundUpFp32)1590 TEST(Arm64InsnTest, RoundUpFp32) {
1591 // The lower 32-bit represents 2.7182817 in float.
1592 uint64_t fp_arg = 0xdeadbeef402df854ULL;
1593 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintp %s0, %s1")(fp_arg);
1594 ASSERT_EQ(res, MakeUInt128(0x40400000ULL, 0U)); // 3.0 in float
1595 }
1596
TEST(Arm64InsnTest,RoundUpFp64)1597 TEST(Arm64InsnTest, RoundUpFp64) {
1598 // 2.7182817 in double.
1599 uint64_t fp_arg = 0x4005BF0A8B145769ULL;
1600 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintp %d0, %d1")(fp_arg);
1601 ASSERT_EQ(res, MakeUInt128(0x4008000000000000ULL, 0U)); // 3.0 in double
1602 }
1603
TEST(Arm64InsnTest,RoundToIntNearestTiesAwayFp64)1604 TEST(Arm64InsnTest, RoundToIntNearestTiesAwayFp64) {
1605 constexpr auto AsmFrinta = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frinta %d0, %d1");
1606
1607 // -7.50 -> -8.00 (ties away from zero as opposted to even)
1608 ASSERT_EQ(AsmFrinta(0xc01E000000000000ULL), MakeUInt128(0xc020000000000000ULL, 0U));
1609
1610 // -6.75 -> -7.00
1611 ASSERT_EQ(AsmFrinta(0xc01B000000000000ULL), MakeUInt128(0xc01c000000000000ULL, 0U));
1612
1613 // -6.50 -> -7.00 (ties away from zero as opposted to even)
1614 ASSERT_EQ(AsmFrinta(0xc01A000000000000ULL), MakeUInt128(0xc01c000000000000ULL, 0U));
1615
1616 // -6.25 -> -6.00
1617 ASSERT_EQ(AsmFrinta(0xc019000000000000ULL), MakeUInt128(0xc018000000000000ULL, 0U));
1618
1619 // 6.25 -> 6.00
1620 ASSERT_EQ(AsmFrinta(0x4019000000000000ULL), MakeUInt128(0x4018000000000000ULL, 0U));
1621
1622 // 6.50 -> 7.00 (ties away from zero as opposted to even)
1623 ASSERT_EQ(AsmFrinta(0x401A000000000000ULL), MakeUInt128(0x401c000000000000ULL, 0U));
1624
1625 // 6.75 -> 7.00
1626 ASSERT_EQ(AsmFrinta(0x401B000000000000ULL), MakeUInt128(0x401c000000000000ULL, 0U));
1627
1628 // 7.50 -> 8.00 (ties away from zero as opposted to even)
1629 ASSERT_EQ(AsmFrinta(0x401E000000000000ULL), MakeUInt128(0x4020000000000000ULL, 0U));
1630
1631 // -0.49999999999999994 -> -0.0 (should not "tie away" since -0.4999... != -0.5)
1632 ASSERT_EQ(AsmFrinta(0xBFDFFFFFFFFFFFFF), MakeUInt128(0x8000000000000000U, 0U));
1633
1634 // A number too large to have fractional precision, should not change upon rounding with tie-away
1635 ASSERT_EQ(AsmFrinta(bit_cast<uint64_t>(0.5 / std::numeric_limits<double>::epsilon())),
1636 MakeUInt128(bit_cast<uint64_t>(0.5 / std::numeric_limits<double>::epsilon()), 0U));
1637 ASSERT_EQ(AsmFrinta(bit_cast<uint64_t>(-0.5 / std::numeric_limits<double>::epsilon())),
1638 MakeUInt128(bit_cast<uint64_t>(-0.5 / std::numeric_limits<double>::epsilon()), 0U));
1639 ASSERT_EQ(AsmFrinta(bit_cast<uint64_t>(0.75 / std::numeric_limits<double>::epsilon())),
1640 MakeUInt128(bit_cast<uint64_t>(0.75 / std::numeric_limits<double>::epsilon()), 0U));
1641 ASSERT_EQ(AsmFrinta(bit_cast<uint64_t>(-0.75 / std::numeric_limits<double>::epsilon())),
1642 MakeUInt128(bit_cast<uint64_t>(-0.75 / std::numeric_limits<double>::epsilon()), 0U));
1643 ASSERT_EQ(AsmFrinta(bit_cast<uint64_t>(1.0 / std::numeric_limits<double>::epsilon())),
1644 MakeUInt128(bit_cast<uint64_t>(1.0 / std::numeric_limits<double>::epsilon()), 0U));
1645 ASSERT_EQ(AsmFrinta(bit_cast<uint64_t>(-1.0 / std::numeric_limits<double>::epsilon())),
1646 MakeUInt128(bit_cast<uint64_t>(-1.0 / std::numeric_limits<double>::epsilon()), 0U));
1647 ASSERT_EQ(AsmFrinta(bit_cast<uint64_t>(2.0 / std::numeric_limits<double>::epsilon())),
1648 MakeUInt128(bit_cast<uint64_t>(2.0 / std::numeric_limits<double>::epsilon()), 0U));
1649 ASSERT_EQ(AsmFrinta(bit_cast<uint64_t>(-2.0 / std::numeric_limits<double>::epsilon())),
1650 MakeUInt128(bit_cast<uint64_t>(-2.0 / std::numeric_limits<double>::epsilon()), 0U));
1651 ASSERT_EQ(AsmFrinta(bit_cast<uint64_t>(1.0e100)), MakeUInt128(bit_cast<uint64_t>(1.0e100), 0U));
1652 ASSERT_EQ(AsmFrinta(bit_cast<uint64_t>(-1.0e100)), MakeUInt128(bit_cast<uint64_t>(-1.0e100), 0U));
1653 }
1654
TEST(Arm64InsnTest,RoundToIntNearestTiesAwayFp32)1655 TEST(Arm64InsnTest, RoundToIntNearestTiesAwayFp32) {
1656 constexpr auto AsmFrinta = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frinta %s0, %s1");
1657
1658 // -7.50 -> -8.00 (ties away from zero as opposted to even)
1659 ASSERT_EQ(AsmFrinta(0xc0f00000U), MakeUInt128(0xc1000000U, 0U));
1660
1661 // -6.75 -> -7.00
1662 ASSERT_EQ(AsmFrinta(0xc0d80000U), MakeUInt128(0xc0e00000U, 0U));
1663
1664 // -6.50 -> -7.00 (ties away from zero as opposted to even)
1665 ASSERT_EQ(AsmFrinta(0xc0d00000U), MakeUInt128(0xc0e00000U, 0U));
1666
1667 // -6.25 -> -6.00
1668 ASSERT_EQ(AsmFrinta(0xc0c80000U), MakeUInt128(0xc0c00000U, 0U));
1669
1670 // 6.25 -> 6.00
1671 ASSERT_EQ(AsmFrinta(0x40c80000U), MakeUInt128(0x40c00000U, 0U));
1672
1673 // 6.50 -> 7.00 (ties away from zero as opposted to even)
1674 ASSERT_EQ(AsmFrinta(0x40d00000U), MakeUInt128(0x40e00000U, 0U));
1675
1676 // 6.75 -> 7.00
1677 ASSERT_EQ(AsmFrinta(0x40d80000U), MakeUInt128(0x40e00000U, 0U));
1678
1679 // 7.50 -> 8.00 (ties away from zero as opposted to even)
1680 ASSERT_EQ(AsmFrinta(0x40f00000U), MakeUInt128(0x41000000U, 0U));
1681
1682 // -0.49999997019767761 -> -0.0 (should not "tie away" since -0.4999... != -0.5)
1683 ASSERT_EQ(AsmFrinta(0xbeffffff), MakeUInt128(0x80000000U, 0U));
1684
1685 // A number too large to have fractional precision, should not change upon rounding with tie-away
1686 ASSERT_EQ(
1687 AsmFrinta(bit_cast<uint32_t>(float{0.5 / std::numeric_limits<float>::epsilon()})),
1688 MakeUInt128(bit_cast<uint32_t>(float{0.5 / std::numeric_limits<float>::epsilon()}), 0U));
1689 ASSERT_EQ(
1690 AsmFrinta(bit_cast<uint32_t>(float{-0.5 / std::numeric_limits<float>::epsilon()})),
1691 MakeUInt128(bit_cast<uint32_t>(float{-0.5 / std::numeric_limits<float>::epsilon()}), 0U));
1692 ASSERT_EQ(
1693 AsmFrinta(bit_cast<uint32_t>(float{0.75 / std::numeric_limits<float>::epsilon()})),
1694 MakeUInt128(bit_cast<uint32_t>(float{0.75 / std::numeric_limits<float>::epsilon()}), 0U));
1695 ASSERT_EQ(
1696 AsmFrinta(bit_cast<uint32_t>(float{-0.75 / std::numeric_limits<float>::epsilon()})),
1697 MakeUInt128(bit_cast<uint32_t>(float{-0.75 / std::numeric_limits<float>::epsilon()}), 0U));
1698 ASSERT_EQ(
1699 AsmFrinta(bit_cast<uint32_t>(float{1.0 / std::numeric_limits<float>::epsilon()})),
1700 MakeUInt128(bit_cast<uint32_t>(float{1.0 / std::numeric_limits<float>::epsilon()}), 0U));
1701 ASSERT_EQ(
1702 AsmFrinta(bit_cast<uint32_t>(float{-1.0 / std::numeric_limits<float>::epsilon()})),
1703 MakeUInt128(bit_cast<uint32_t>(float{-1.0 / std::numeric_limits<float>::epsilon()}), 0U));
1704 ASSERT_EQ(
1705 AsmFrinta(bit_cast<uint32_t>(float{2.0 / std::numeric_limits<float>::epsilon()})),
1706 MakeUInt128(bit_cast<uint32_t>(float{2.0 / std::numeric_limits<float>::epsilon()}), 0U));
1707 ASSERT_EQ(
1708 AsmFrinta(bit_cast<uint32_t>(float{-2.0 / std::numeric_limits<float>::epsilon()})),
1709 MakeUInt128(bit_cast<uint32_t>(float{-2.0 / std::numeric_limits<float>::epsilon()}), 0U));
1710 ASSERT_EQ(AsmFrinta(bit_cast<uint32_t>(1.0e38f)), MakeUInt128(bit_cast<uint32_t>(1.0e38f), 0U));
1711 ASSERT_EQ(AsmFrinta(bit_cast<uint32_t>(-1.0e38f)), MakeUInt128(bit_cast<uint32_t>(-1.0e38f), 0U));
1712 }
1713
TEST(Arm64InsnTest,RoundToIntDownwardFp64)1714 TEST(Arm64InsnTest, RoundToIntDownwardFp64) {
1715 constexpr auto AsmFrintm = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintm %d0, %d1");
1716
1717 // 7.7 -> 7.00
1718 ASSERT_EQ(AsmFrintm(0x401ecccccccccccdULL), MakeUInt128(0x401c000000000000, 0U));
1719
1720 // 7.1 -> 7.00
1721 ASSERT_EQ(AsmFrintm(0x401c666666666666ULL), MakeUInt128(0x401c000000000000, 0U));
1722
1723 // -7.10 -> -8.00
1724 ASSERT_EQ(AsmFrintm(0xc01c666666666666ULL), MakeUInt128(0xc020000000000000, 0U));
1725
1726 // -7.90 -> -8.00
1727 ASSERT_EQ(AsmFrintm(0xc01f99999999999aULL), MakeUInt128(0xc020000000000000, 0U));
1728
1729 // 0 -> 0
1730 ASSERT_EQ(AsmFrintm(0x0000000000000000ULL), MakeUInt128(0x0000000000000000, 0U));
1731
1732 // -0 -> -0
1733 ASSERT_EQ(AsmFrintm(0x8000000000000000ULL), MakeUInt128(0x8000000000000000, 0U));
1734 }
1735
TEST(Arm64InsnTest,RoundToIntDownwardFp32)1736 TEST(Arm64InsnTest, RoundToIntDownwardFp32) {
1737 constexpr auto AsmFrintm = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintm %s0, %s1");
1738
1739 // 7.7 -> 7.00
1740 ASSERT_EQ(AsmFrintm(0x40f66666), 0x40e00000);
1741
1742 // 7.1 -> 7.00
1743 ASSERT_EQ(AsmFrintm(0x40e33333), 0x40e00000);
1744
1745 // -7.10 -> -8.00
1746 ASSERT_EQ(AsmFrintm(0xc0e33333), 0xc1000000);
1747
1748 // -7.90 -> -8.00
1749 ASSERT_EQ(AsmFrintm(0xc0fccccd), 0xc1000000);
1750
1751 // 0 -> 0
1752 ASSERT_EQ(AsmFrintm(0x00000000), 0x00000000);
1753
1754 // -0 -> -0
1755 ASSERT_EQ(AsmFrintm(0x80000000), 0x80000000);
1756 }
1757
TEST(Arm64InsnTest,RoundToIntNearestFp64)1758 TEST(Arm64InsnTest, RoundToIntNearestFp64) {
1759 constexpr auto AsmFrintn = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintn %d0, %d1");
1760
1761 // 7.5 -> 8.00 (ties to even)
1762 ASSERT_EQ(AsmFrintn(0x401e000000000000ULL), MakeUInt128(0x4020000000000000, 0U));
1763
1764 // 8.5 -> 8.00 (ties to even)
1765 ASSERT_EQ(AsmFrintn(0x4021000000000000), MakeUInt128(0x4020000000000000, 0U));
1766
1767 // 7.10 -> 7.00
1768 ASSERT_EQ(AsmFrintn(0x401c666666666666), MakeUInt128(0x401c000000000000, 0U));
1769
1770 // 7.90 -> 8.00
1771 ASSERT_EQ(AsmFrintn(0x401f99999999999a), MakeUInt128(0x4020000000000000, 0U));
1772
1773 // -7.5 -> -8.00 (ties to even)
1774 ASSERT_EQ(AsmFrintn(0xc01e000000000000), MakeUInt128(0xc020000000000000, 0U));
1775
1776 // // -8.5 -> -8.00 (ties to even)
1777 ASSERT_EQ(AsmFrintn(0xc021000000000000), MakeUInt128(0xc020000000000000, 0U));
1778
1779 // -7.10 -> -7.00
1780 ASSERT_EQ(AsmFrintn(0xc01c666666666666), MakeUInt128(0xc01c000000000000, 0U));
1781
1782 // -7.90 -> -8.00
1783 ASSERT_EQ(AsmFrintn(0xc01f99999999999a), MakeUInt128(0xc020000000000000, 0U));
1784
1785 // 0 -> 0
1786 ASSERT_EQ(AsmFrintn(0x0000000000000000ULL), MakeUInt128(0x0000000000000000, 0U));
1787
1788 // -0 -> -0
1789 ASSERT_EQ(AsmFrintn(0x8000000000000000ULL), MakeUInt128(0x8000000000000000, 0U));
1790 }
1791
TEST(Arm64InsnTest,RoundToIntToNearestFp32)1792 TEST(Arm64InsnTest, RoundToIntToNearestFp32) {
1793 constexpr auto AsmFrintn = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintn %s0, %s1");
1794
1795 // 7.5 -> 8.00 (ties to even)
1796 ASSERT_EQ(AsmFrintn(0x40f00000), 0x41000000);
1797
1798 // 8.5 -> 8.00 (ties to even)
1799 ASSERT_EQ(AsmFrintn(0x41080000), 0x41000000);
1800
1801 // 7.10 -> 7.00
1802 ASSERT_EQ(AsmFrintn(0x40e33333), 0x40e00000);
1803
1804 // 7.90 -> 8.00
1805 ASSERT_EQ(AsmFrintn(0x40fccccd), 0x41000000);
1806
1807 // -7.5 -> -8.00 (ties to even)
1808 ASSERT_EQ(AsmFrintn(0xc0f00000), 0xc1000000);
1809
1810 // -8.5 -> -8.00 (ties to even)
1811 ASSERT_EQ(AsmFrintn(0xc1080000), 0xc1000000);
1812
1813 // -7.10 -> -7.00
1814 ASSERT_EQ(AsmFrintn(0xc0e33333), 0xc0e00000);
1815
1816 // -7.90 -> -8.00
1817 ASSERT_EQ(AsmFrintn(0xc0fccccd), 0xc1000000);
1818
1819 // 0 -> 0
1820 ASSERT_EQ(AsmFrintn(0x00000000), 0x00000000);
1821
1822 // -0 -> -0
1823 ASSERT_EQ(AsmFrintn(0x80000000), 0x80000000);
1824 }
1825
TEST(Arm64InsnTest,RoundToIntTowardZeroFp64)1826 TEST(Arm64InsnTest, RoundToIntTowardZeroFp64) {
1827 constexpr auto AsmFrintz = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintz %d0, %d1");
1828
1829 // 7.7 -> 7.00
1830 ASSERT_EQ(AsmFrintz(0x401ecccccccccccdULL), MakeUInt128(0x401c000000000000, 0U));
1831
1832 // 7.1 -> 7.00
1833 ASSERT_EQ(AsmFrintz(0x401c666666666666ULL), MakeUInt128(0x401c000000000000, 0U));
1834
1835 // -7.10 -> -7.00
1836 ASSERT_EQ(AsmFrintz(0xc01c666666666666ULL), MakeUInt128(0xc01c000000000000, 0U));
1837
1838 // -7.90 -> -7.00
1839 ASSERT_EQ(AsmFrintz(0xc01f99999999999aULL), MakeUInt128(0xc01c000000000000, 0U));
1840
1841 // 0 -> 0
1842 ASSERT_EQ(AsmFrintz(0x0000000000000000ULL), MakeUInt128(0x0000000000000000, 0U));
1843
1844 // -0 -> -0
1845 ASSERT_EQ(AsmFrintz(0x8000000000000000ULL), MakeUInt128(0x8000000000000000, 0U));
1846 }
1847
TEST(Arm64InsnTest,RoundToIntTowardZeroFp32)1848 TEST(Arm64InsnTest, RoundToIntTowardZeroFp32) {
1849 constexpr auto AsmFrintz = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintz %s0, %s1");
1850
1851 // 7.7 -> 7.00
1852 ASSERT_EQ(AsmFrintz(0x40f66666), 0x40e00000);
1853
1854 // 7.1 -> 7.00
1855 ASSERT_EQ(AsmFrintz(0x40e33333), 0x40e00000);
1856
1857 // -7.10 -> -7.00
1858 ASSERT_EQ(AsmFrintz(0xc0e33333), 0xc0e00000);
1859
1860 // -7.90 -> -7.00
1861 ASSERT_EQ(AsmFrintz(0xc0fccccd), 0xc0e00000);
1862
1863 // 0 -> 0
1864 ASSERT_EQ(AsmFrintz(0x00000000), 0x00000000);
1865
1866 // -0 -> -0
1867 ASSERT_EQ(AsmFrintz(0x80000000), 0x80000000);
1868 }
1869
TEST(Arm64InsnTest,AsmConvertF32x4TieAway)1870 TEST(Arm64InsnTest, AsmConvertF32x4TieAway) {
1871 constexpr auto AsmFcvta = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frinta %0.4s, %1.4s");
1872 __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
1873 ASSERT_EQ(AsmFcvta(arg1), MakeF32x4(-8.00f, -7.00f, -7.00f, -6.00f));
1874 __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
1875 ASSERT_EQ(AsmFcvta(arg2), MakeF32x4(6.00f, 7.00f, 7.00f, 8.00f));
1876 }
1877
TEST(Arm64InsnTest,AsmConvertF32x4NegInf)1878 TEST(Arm64InsnTest, AsmConvertF32x4NegInf) {
1879 constexpr auto AsmFcvtm = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintm %0.4s, %1.4s");
1880 __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
1881 ASSERT_EQ(AsmFcvtm(arg1), MakeF32x4(-8.00f, -7.00f, -7.00f, -7.00f));
1882 __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
1883 ASSERT_EQ(AsmFcvtm(arg2), MakeF32x4(6.00f, 6.00f, 6.00f, 7.00f));
1884 }
1885
TEST(Arm64InsnTest,AsmConvertF32x4TieEven)1886 TEST(Arm64InsnTest, AsmConvertF32x4TieEven) {
1887 constexpr auto AsmFcvtn = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintn %0.4s, %1.4s");
1888 __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
1889 ASSERT_EQ(AsmFcvtn(arg1), MakeF32x4(-8.00f, -7.00f, -6.00f, -6.00f));
1890 __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
1891 ASSERT_EQ(AsmFcvtn(arg2), MakeF32x4(6.00f, 6.00f, 7.00f, 8.00f));
1892 }
1893
TEST(Arm64InsnTest,AsmConvertF32x4PosInf)1894 TEST(Arm64InsnTest, AsmConvertF32x4PosInf) {
1895 constexpr auto AsmFcvtp = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintp %0.4s, %1.4s");
1896 __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
1897 ASSERT_EQ(AsmFcvtp(arg1), MakeF32x4(-7.00f, -6.00f, -6.00f, -6.00f));
1898 __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
1899 ASSERT_EQ(AsmFcvtp(arg2), MakeF32x4(7.00f, 7.00f, 7.00f, 8.00f));
1900 }
1901
TEST(Arm64InsnTest,AsmConvertF32x4Truncate)1902 TEST(Arm64InsnTest, AsmConvertF32x4Truncate) {
1903 constexpr auto AsmFcvtz = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintz %0.4s, %1.4s");
1904 __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
1905 ASSERT_EQ(AsmFcvtz(arg1), MakeF32x4(-7.00f, -6.00f, -6.00f, -6.00f));
1906 __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
1907 ASSERT_EQ(AsmFcvtz(arg2), MakeF32x4(6.00f, 6.00f, 6.00f, 7.00f));
1908 }
1909
TEST(Arm64InsnTest,AsmConvertF64x4TieAway)1910 TEST(Arm64InsnTest, AsmConvertF64x4TieAway) {
1911 constexpr auto AsmFcvta = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frinta %0.2d, %1.2d");
1912 __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
1913 ASSERT_EQ(AsmFcvta(arg1), MakeF64x2(-8.00, -7.00));
1914 __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
1915 ASSERT_EQ(AsmFcvta(arg2), MakeF64x2(-7.00, -6.00));
1916 __uint128_t arg3 = MakeF64x2(6.25, 6.50);
1917 ASSERT_EQ(AsmFcvta(arg3), MakeF64x2(6.00, 7.00));
1918 __uint128_t arg4 = MakeF64x2(6.75, 7.50);
1919 ASSERT_EQ(AsmFcvta(arg4), MakeF64x2(7.00, 8.00));
1920 }
1921
TEST(Arm64InsnTest,AsmConvertF64x4NegInf)1922 TEST(Arm64InsnTest, AsmConvertF64x4NegInf) {
1923 constexpr auto AsmFcvtm = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintm %0.2d, %1.2d");
1924 __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
1925 ASSERT_EQ(AsmFcvtm(arg1), MakeF64x2(-8.00, -7.00));
1926 __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
1927 ASSERT_EQ(AsmFcvtm(arg2), MakeF64x2(-7.00, -7.00));
1928 __uint128_t arg3 = MakeF64x2(6.25, 6.50);
1929 ASSERT_EQ(AsmFcvtm(arg3), MakeF64x2(6.00, 6.00));
1930 __uint128_t arg4 = MakeF64x2(6.75, 7.50);
1931 ASSERT_EQ(AsmFcvtm(arg4), MakeF64x2(6.00, 7.00));
1932 }
1933
TEST(Arm64InsnTest,AsmConvertF64x4TieEven)1934 TEST(Arm64InsnTest, AsmConvertF64x4TieEven) {
1935 constexpr auto AsmFcvtn = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintn %0.2d, %1.2d");
1936 __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
1937 ASSERT_EQ(AsmFcvtn(arg1), MakeF64x2(-8.00, -7.00));
1938 __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
1939 ASSERT_EQ(AsmFcvtn(arg2), MakeF64x2(-6.00, -6.00));
1940 __uint128_t arg3 = MakeF64x2(6.25, 6.50);
1941 ASSERT_EQ(AsmFcvtn(arg3), MakeF64x2(6.00, 6.00));
1942 __uint128_t arg4 = MakeF64x2(6.75, 7.50);
1943 ASSERT_EQ(AsmFcvtn(arg4), MakeF64x2(7.00, 8.00));
1944 }
1945
TEST(Arm64InsnTest,AsmConvertF64x4PosInf)1946 TEST(Arm64InsnTest, AsmConvertF64x4PosInf) {
1947 constexpr auto AsmFcvtp = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintp %0.2d, %1.2d");
1948 __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
1949 ASSERT_EQ(AsmFcvtp(arg1), MakeF64x2(-7.00, -6.00));
1950 __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
1951 ASSERT_EQ(AsmFcvtp(arg2), MakeF64x2(-6.00, -6.00));
1952 __uint128_t arg3 = MakeF64x2(6.25, 6.50);
1953 ASSERT_EQ(AsmFcvtp(arg3), MakeF64x2(7.00, 7.00));
1954 __uint128_t arg4 = MakeF64x2(6.75, 7.50);
1955 ASSERT_EQ(AsmFcvtp(arg4), MakeF64x2(7.00, 8.00));
1956 }
1957
TEST(Arm64InsnTest,AsmConvertF64x4Truncate)1958 TEST(Arm64InsnTest, AsmConvertF64x4Truncate) {
1959 constexpr auto AsmFcvtz = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frintz %0.2d, %1.2d");
1960 __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
1961 ASSERT_EQ(AsmFcvtz(arg1), MakeF64x2(-7.00, -6.00));
1962 __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
1963 ASSERT_EQ(AsmFcvtz(arg2), MakeF64x2(-6.00, -6.00));
1964 __uint128_t arg3 = MakeF64x2(6.25, 6.50);
1965 ASSERT_EQ(AsmFcvtz(arg3), MakeF64x2(6.00, 6.00));
1966 __uint128_t arg4 = MakeF64x2(6.75, 7.50);
1967 ASSERT_EQ(AsmFcvtz(arg4), MakeF64x2(6.00, 7.00));
1968 }
1969
TEST(Arm64InsnTest,AsmRoundCurrentModeF32)1970 TEST(Arm64InsnTest, AsmRoundCurrentModeF32) {
1971 constexpr auto AsmFrinti = ASM_INSN_WRAP_FUNC_W_RES_WC_ARG("frinti %s0, %s1");
1972 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-7.50f), kFpcrRModeTieEven), bit_cast<uint32_t>(-8.00f));
1973 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-6.75f), kFpcrRModeTieEven), bit_cast<uint32_t>(-7.00f));
1974 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-6.50f), kFpcrRModeTieEven), bit_cast<uint32_t>(-6.00f));
1975 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-6.25f), kFpcrRModeTieEven), bit_cast<uint32_t>(-6.00f));
1976 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(6.25f), kFpcrRModeTieEven), bit_cast<uint32_t>(6.00f));
1977 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(6.50f), kFpcrRModeTieEven), bit_cast<uint32_t>(6.00f));
1978 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(6.75f), kFpcrRModeTieEven), bit_cast<uint32_t>(7.00f));
1979 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(7.50f), kFpcrRModeTieEven), bit_cast<uint32_t>(8.00f));
1980 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-7.50f), kFpcrRModeNegInf), bit_cast<uint32_t>(-8.00f));
1981 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-6.75f), kFpcrRModeNegInf), bit_cast<uint32_t>(-7.00f));
1982 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-6.50f), kFpcrRModeNegInf), bit_cast<uint32_t>(-7.00f));
1983 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-6.25f), kFpcrRModeNegInf), bit_cast<uint32_t>(-7.00f));
1984 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(6.25f), kFpcrRModeNegInf), bit_cast<uint32_t>(6.00f));
1985 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(6.50f), kFpcrRModeNegInf), bit_cast<uint32_t>(6.00f));
1986 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(6.75f), kFpcrRModeNegInf), bit_cast<uint32_t>(6.00f));
1987 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(7.50f), kFpcrRModeNegInf), bit_cast<uint32_t>(7.00f));
1988 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-7.50f), kFpcrRModePosInf), bit_cast<uint32_t>(-7.00f));
1989 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-6.75f), kFpcrRModePosInf), bit_cast<uint32_t>(-6.00f));
1990 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-6.50f), kFpcrRModePosInf), bit_cast<uint32_t>(-6.00f));
1991 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-6.25f), kFpcrRModePosInf), bit_cast<uint32_t>(-6.00f));
1992 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(6.25f), kFpcrRModePosInf), bit_cast<uint32_t>(7.00f));
1993 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(6.50f), kFpcrRModePosInf), bit_cast<uint32_t>(7.00f));
1994 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(6.75f), kFpcrRModePosInf), bit_cast<uint32_t>(7.00f));
1995 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(7.50f), kFpcrRModePosInf), bit_cast<uint32_t>(8.00f));
1996 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-7.50f), kFpcrRModeZero), bit_cast<uint32_t>(-7.00f));
1997 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-6.75f), kFpcrRModeZero), bit_cast<uint32_t>(-6.00f));
1998 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-6.50f), kFpcrRModeZero), bit_cast<uint32_t>(-6.00f));
1999 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(-6.25f), kFpcrRModeZero), bit_cast<uint32_t>(-6.00f));
2000 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(6.25f), kFpcrRModeZero), bit_cast<uint32_t>(6.00f));
2001 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(6.50f), kFpcrRModeZero), bit_cast<uint32_t>(6.00f));
2002 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(6.75f), kFpcrRModeZero), bit_cast<uint32_t>(6.00f));
2003 ASSERT_EQ(AsmFrinti(bit_cast<uint32_t>(7.50f), kFpcrRModeZero), bit_cast<uint32_t>(7.00f));
2004 }
2005
TEST(Arm64InsnTest,AsmRoundCurrentModeF64)2006 TEST(Arm64InsnTest, AsmRoundCurrentModeF64) {
2007 constexpr auto AsmFrinti = ASM_INSN_WRAP_FUNC_W_RES_WC_ARG("frinti %d0, %d1");
2008 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-7.50), kFpcrRModeTieEven), bit_cast<uint64_t>(-8.00));
2009 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-6.75), kFpcrRModeTieEven), bit_cast<uint64_t>(-7.00));
2010 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-6.50), kFpcrRModeTieEven), bit_cast<uint64_t>(-6.00));
2011 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-6.25), kFpcrRModeTieEven), bit_cast<uint64_t>(-6.00));
2012 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(6.25), kFpcrRModeTieEven), bit_cast<uint64_t>(6.00));
2013 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(6.50), kFpcrRModeTieEven), bit_cast<uint64_t>(6.00));
2014 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(6.75), kFpcrRModeTieEven), bit_cast<uint64_t>(7.00));
2015 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(7.50), kFpcrRModeTieEven), bit_cast<uint64_t>(8.00));
2016 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-7.50), kFpcrRModeNegInf), bit_cast<uint64_t>(-8.00));
2017 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-6.75), kFpcrRModeNegInf), bit_cast<uint64_t>(-7.00));
2018 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-6.50), kFpcrRModeNegInf), bit_cast<uint64_t>(-7.00));
2019 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-6.25), kFpcrRModeNegInf), bit_cast<uint64_t>(-7.00));
2020 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(6.25), kFpcrRModeNegInf), bit_cast<uint64_t>(6.00));
2021 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(6.50), kFpcrRModeNegInf), bit_cast<uint64_t>(6.00));
2022 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(6.75), kFpcrRModeNegInf), bit_cast<uint64_t>(6.00));
2023 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(7.50), kFpcrRModeNegInf), bit_cast<uint64_t>(7.00));
2024 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-7.50), kFpcrRModePosInf), bit_cast<uint64_t>(-7.00));
2025 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-6.75), kFpcrRModePosInf), bit_cast<uint64_t>(-6.00));
2026 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-6.50), kFpcrRModePosInf), bit_cast<uint64_t>(-6.00));
2027 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-6.25), kFpcrRModePosInf), bit_cast<uint64_t>(-6.00));
2028 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(6.25), kFpcrRModePosInf), bit_cast<uint64_t>(7.00));
2029 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(6.50), kFpcrRModePosInf), bit_cast<uint64_t>(7.00));
2030 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(6.75), kFpcrRModePosInf), bit_cast<uint64_t>(7.00));
2031 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(7.50), kFpcrRModePosInf), bit_cast<uint64_t>(8.00));
2032 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-7.50), kFpcrRModeZero), bit_cast<uint64_t>(-7.00));
2033 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-6.75), kFpcrRModeZero), bit_cast<uint64_t>(-6.00));
2034 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-6.50), kFpcrRModeZero), bit_cast<uint64_t>(-6.00));
2035 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(-6.25), kFpcrRModeZero), bit_cast<uint64_t>(-6.00));
2036 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(6.25), kFpcrRModeZero), bit_cast<uint64_t>(6.00));
2037 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(6.50), kFpcrRModeZero), bit_cast<uint64_t>(6.00));
2038 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(6.75), kFpcrRModeZero), bit_cast<uint64_t>(6.00));
2039 ASSERT_EQ(AsmFrinti(bit_cast<uint64_t>(7.50), kFpcrRModeZero), bit_cast<uint64_t>(7.00));
2040 }
2041
TEST(Arm64InsnTest,AsmRoundCurrentModeF32x4)2042 TEST(Arm64InsnTest, AsmRoundCurrentModeF32x4) {
2043 constexpr auto AsmFrinti = ASM_INSN_WRAP_FUNC_W_RES_WC_ARG("frinti %0.4s, %1.4s");
2044 __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
2045 ASSERT_EQ(AsmFrinti(arg1, kFpcrRModeTieEven), MakeF32x4(-8.00f, -7.00f, -6.00f, -6.00f));
2046 __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
2047 ASSERT_EQ(AsmFrinti(arg2, kFpcrRModeTieEven), MakeF32x4(6.00f, 6.00f, 7.00f, 8.00f));
2048 __uint128_t arg3 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
2049 ASSERT_EQ(AsmFrinti(arg3, kFpcrRModeNegInf), MakeF32x4(-8.00f, -7.00f, -7.00f, -7.00f));
2050 __uint128_t arg4 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
2051 ASSERT_EQ(AsmFrinti(arg4, kFpcrRModeNegInf), MakeF32x4(6.00f, 6.00f, 6.00f, 7.00f));
2052 __uint128_t arg5 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
2053 ASSERT_EQ(AsmFrinti(arg5, kFpcrRModePosInf), MakeF32x4(-7.00f, -6.00f, -6.00f, -6.00f));
2054 __uint128_t arg6 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
2055 ASSERT_EQ(AsmFrinti(arg6, kFpcrRModePosInf), MakeF32x4(7.00f, 7.00f, 7.00f, 8.00f));
2056 __uint128_t arg7 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
2057 ASSERT_EQ(AsmFrinti(arg7, kFpcrRModeZero), MakeF32x4(-7.00f, -6.00f, -6.00f, -6.00f));
2058 __uint128_t arg8 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
2059 ASSERT_EQ(AsmFrinti(arg8, kFpcrRModeZero), MakeF32x4(6.00f, 6.00f, 6.00f, 7.00f));
2060 }
2061
TEST(Arm64InsnTest,AsmRoundCurrentModeF64x2)2062 TEST(Arm64InsnTest, AsmRoundCurrentModeF64x2) {
2063 constexpr auto AsmFrinti = ASM_INSN_WRAP_FUNC_W_RES_WC_ARG("frinti %0.2d, %1.2d");
2064 __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
2065 ASSERT_EQ(AsmFrinti(arg1, kFpcrRModeTieEven), MakeF64x2(-8.00, -7.00));
2066 __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
2067 ASSERT_EQ(AsmFrinti(arg2, kFpcrRModeTieEven), MakeF64x2(-6.00, -6.00));
2068 __uint128_t arg3 = MakeF64x2(6.25, 6.50);
2069 ASSERT_EQ(AsmFrinti(arg3, kFpcrRModeTieEven), MakeF64x2(6.00, 6.00));
2070 __uint128_t arg4 = MakeF64x2(6.75, 7.50);
2071 ASSERT_EQ(AsmFrinti(arg4, kFpcrRModeTieEven), MakeF64x2(7.00, 8.00));
2072 __uint128_t arg5 = MakeF64x2(-7.50, -6.75);
2073 ASSERT_EQ(AsmFrinti(arg5, kFpcrRModeNegInf), MakeF64x2(-8.00, -7.00));
2074 __uint128_t arg6 = MakeF64x2(-6.50, -6.25);
2075 ASSERT_EQ(AsmFrinti(arg6, kFpcrRModeNegInf), MakeF64x2(-7.00, -7.00));
2076 __uint128_t arg7 = MakeF64x2(6.25, 6.50);
2077 ASSERT_EQ(AsmFrinti(arg7, kFpcrRModeNegInf), MakeF64x2(6.00, 6.00));
2078 __uint128_t arg8 = MakeF64x2(6.75, 7.50);
2079 ASSERT_EQ(AsmFrinti(arg8, kFpcrRModeNegInf), MakeF64x2(6.00, 7.00));
2080 __uint128_t arg9 = MakeF64x2(-7.50, -6.75);
2081 ASSERT_EQ(AsmFrinti(arg9, kFpcrRModePosInf), MakeF64x2(-7.00, -6.00));
2082 __uint128_t arg10 = MakeF64x2(-6.50, -6.25);
2083 ASSERT_EQ(AsmFrinti(arg10, kFpcrRModePosInf), MakeF64x2(-6.00, -6.00));
2084 __uint128_t arg11 = MakeF64x2(6.25, 6.50);
2085 ASSERT_EQ(AsmFrinti(arg11, kFpcrRModePosInf), MakeF64x2(7.00, 7.00));
2086 __uint128_t arg12 = MakeF64x2(6.75, 7.50);
2087 ASSERT_EQ(AsmFrinti(arg12, kFpcrRModePosInf), MakeF64x2(7.00, 8.00));
2088 __uint128_t arg13 = MakeF64x2(-7.50, -6.75);
2089 ASSERT_EQ(AsmFrinti(arg13, kFpcrRModeZero), MakeF64x2(-7.00, -6.00));
2090 __uint128_t arg14 = MakeF64x2(-6.50, -6.25);
2091 ASSERT_EQ(AsmFrinti(arg14, kFpcrRModeZero), MakeF64x2(-6.00, -6.00));
2092 __uint128_t arg15 = MakeF64x2(6.25, 6.50);
2093 ASSERT_EQ(AsmFrinti(arg15, kFpcrRModeZero), MakeF64x2(6.00, 6.00));
2094 __uint128_t arg16 = MakeF64x2(6.75, 7.50);
2095 ASSERT_EQ(AsmFrinti(arg16, kFpcrRModeZero), MakeF64x2(6.00, 7.00));
2096 }
2097
TEST(Arm64InsnTest,AsmRoundExactF32)2098 TEST(Arm64InsnTest, AsmRoundExactF32) {
2099 constexpr auto AsmFrintx = ASM_INSN_WRAP_FUNC_W_RES_WC_ARG("frintx %s0, %s1");
2100 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-7.50f), kFpcrRModeTieEven), bit_cast<uint32_t>(-8.00f));
2101 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-6.75f), kFpcrRModeTieEven), bit_cast<uint32_t>(-7.00f));
2102 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-6.50f), kFpcrRModeTieEven), bit_cast<uint32_t>(-6.00f));
2103 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-6.25f), kFpcrRModeTieEven), bit_cast<uint32_t>(-6.00f));
2104 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(6.25f), kFpcrRModeTieEven), bit_cast<uint32_t>(6.00f));
2105 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(6.50f), kFpcrRModeTieEven), bit_cast<uint32_t>(6.00f));
2106 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(6.75f), kFpcrRModeTieEven), bit_cast<uint32_t>(7.00f));
2107 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(7.50f), kFpcrRModeTieEven), bit_cast<uint32_t>(8.00f));
2108 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-7.50f), kFpcrRModeNegInf), bit_cast<uint32_t>(-8.00f));
2109 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-6.75f), kFpcrRModeNegInf), bit_cast<uint32_t>(-7.00f));
2110 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-6.50f), kFpcrRModeNegInf), bit_cast<uint32_t>(-7.00f));
2111 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-6.25f), kFpcrRModeNegInf), bit_cast<uint32_t>(-7.00f));
2112 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(6.25f), kFpcrRModeNegInf), bit_cast<uint32_t>(6.00f));
2113 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(6.50f), kFpcrRModeNegInf), bit_cast<uint32_t>(6.00f));
2114 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(6.75f), kFpcrRModeNegInf), bit_cast<uint32_t>(6.00f));
2115 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(7.50f), kFpcrRModeNegInf), bit_cast<uint32_t>(7.00f));
2116 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-7.50f), kFpcrRModePosInf), bit_cast<uint32_t>(-7.00f));
2117 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-6.75f), kFpcrRModePosInf), bit_cast<uint32_t>(-6.00f));
2118 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-6.50f), kFpcrRModePosInf), bit_cast<uint32_t>(-6.00f));
2119 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-6.25f), kFpcrRModePosInf), bit_cast<uint32_t>(-6.00f));
2120 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(6.25f), kFpcrRModePosInf), bit_cast<uint32_t>(7.00f));
2121 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(6.50f), kFpcrRModePosInf), bit_cast<uint32_t>(7.00f));
2122 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(6.75f), kFpcrRModePosInf), bit_cast<uint32_t>(7.00f));
2123 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(7.50f), kFpcrRModePosInf), bit_cast<uint32_t>(8.00f));
2124 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-7.50f), kFpcrRModeZero), bit_cast<uint32_t>(-7.00f));
2125 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-6.75f), kFpcrRModeZero), bit_cast<uint32_t>(-6.00f));
2126 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-6.50f), kFpcrRModeZero), bit_cast<uint32_t>(-6.00f));
2127 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(-6.25f), kFpcrRModeZero), bit_cast<uint32_t>(-6.00f));
2128 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(6.25f), kFpcrRModeZero), bit_cast<uint32_t>(6.00f));
2129 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(6.50f), kFpcrRModeZero), bit_cast<uint32_t>(6.00f));
2130 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(6.75f), kFpcrRModeZero), bit_cast<uint32_t>(6.00f));
2131 ASSERT_EQ(AsmFrintx(bit_cast<uint32_t>(7.50f), kFpcrRModeZero), bit_cast<uint32_t>(7.00f));
2132 }
2133
TEST(Arm64InsnTest,AsmRoundExactF64)2134 TEST(Arm64InsnTest, AsmRoundExactF64) {
2135 constexpr auto AsmFrintx = ASM_INSN_WRAP_FUNC_W_RES_WC_ARG("frintx %d0, %d1");
2136 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-7.50), kFpcrRModeTieEven), bit_cast<uint64_t>(-8.00));
2137 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-6.75), kFpcrRModeTieEven), bit_cast<uint64_t>(-7.00));
2138 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-6.50), kFpcrRModeTieEven), bit_cast<uint64_t>(-6.00));
2139 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-6.25), kFpcrRModeTieEven), bit_cast<uint64_t>(-6.00));
2140 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(6.25), kFpcrRModeTieEven), bit_cast<uint64_t>(6.00));
2141 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(6.50), kFpcrRModeTieEven), bit_cast<uint64_t>(6.00));
2142 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(6.75), kFpcrRModeTieEven), bit_cast<uint64_t>(7.00));
2143 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(7.50), kFpcrRModeTieEven), bit_cast<uint64_t>(8.00));
2144 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-7.50), kFpcrRModeNegInf), bit_cast<uint64_t>(-8.00));
2145 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-6.75), kFpcrRModeNegInf), bit_cast<uint64_t>(-7.00));
2146 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-6.50), kFpcrRModeNegInf), bit_cast<uint64_t>(-7.00));
2147 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-6.25), kFpcrRModeNegInf), bit_cast<uint64_t>(-7.00));
2148 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(6.25), kFpcrRModeNegInf), bit_cast<uint64_t>(6.00));
2149 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(6.50), kFpcrRModeNegInf), bit_cast<uint64_t>(6.00));
2150 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(6.75), kFpcrRModeNegInf), bit_cast<uint64_t>(6.00));
2151 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(7.50), kFpcrRModeNegInf), bit_cast<uint64_t>(7.00));
2152 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-7.50), kFpcrRModePosInf), bit_cast<uint64_t>(-7.00));
2153 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-6.75), kFpcrRModePosInf), bit_cast<uint64_t>(-6.00));
2154 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-6.50), kFpcrRModePosInf), bit_cast<uint64_t>(-6.00));
2155 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-6.25), kFpcrRModePosInf), bit_cast<uint64_t>(-6.00));
2156 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(6.25), kFpcrRModePosInf), bit_cast<uint64_t>(7.00));
2157 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(6.50), kFpcrRModePosInf), bit_cast<uint64_t>(7.00));
2158 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(6.75), kFpcrRModePosInf), bit_cast<uint64_t>(7.00));
2159 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(7.50), kFpcrRModePosInf), bit_cast<uint64_t>(8.00));
2160 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-7.50), kFpcrRModeZero), bit_cast<uint64_t>(-7.00));
2161 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-6.75), kFpcrRModeZero), bit_cast<uint64_t>(-6.00));
2162 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-6.50), kFpcrRModeZero), bit_cast<uint64_t>(-6.00));
2163 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(-6.25), kFpcrRModeZero), bit_cast<uint64_t>(-6.00));
2164 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(6.25), kFpcrRModeZero), bit_cast<uint64_t>(6.00));
2165 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(6.50), kFpcrRModeZero), bit_cast<uint64_t>(6.00));
2166 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(6.75), kFpcrRModeZero), bit_cast<uint64_t>(6.00));
2167 ASSERT_EQ(AsmFrintx(bit_cast<uint64_t>(7.50), kFpcrRModeZero), bit_cast<uint64_t>(7.00));
2168 }
2169
TEST(Arm64InsnTest,AsmRoundExactF32x4)2170 TEST(Arm64InsnTest, AsmRoundExactF32x4) {
2171 constexpr auto AsmFrintx = ASM_INSN_WRAP_FUNC_W_RES_WC_ARG("frintx %0.4s, %1.4s");
2172 __uint128_t arg1 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
2173 ASSERT_EQ(AsmFrintx(arg1, kFpcrRModeTieEven), MakeF32x4(-8.00f, -7.00f, -6.00f, -6.00f));
2174 __uint128_t arg2 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
2175 ASSERT_EQ(AsmFrintx(arg2, kFpcrRModeTieEven), MakeF32x4(6.00f, 6.00f, 7.00f, 8.00f));
2176 __uint128_t arg3 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
2177 ASSERT_EQ(AsmFrintx(arg3, kFpcrRModeNegInf), MakeF32x4(-8.00f, -7.00f, -7.00f, -7.00f));
2178 __uint128_t arg4 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
2179 ASSERT_EQ(AsmFrintx(arg4, kFpcrRModeNegInf), MakeF32x4(6.00f, 6.00f, 6.00f, 7.00f));
2180 __uint128_t arg5 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
2181 ASSERT_EQ(AsmFrintx(arg5, kFpcrRModePosInf), MakeF32x4(-7.00f, -6.00f, -6.00f, -6.00f));
2182 __uint128_t arg6 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
2183 ASSERT_EQ(AsmFrintx(arg6, kFpcrRModePosInf), MakeF32x4(7.00f, 7.00f, 7.00f, 8.00f));
2184 __uint128_t arg7 = MakeF32x4(-7.50f, -6.75f, -6.50f, -6.25f);
2185 ASSERT_EQ(AsmFrintx(arg7, kFpcrRModeZero), MakeF32x4(-7.00f, -6.00f, -6.00f, -6.00f));
2186 __uint128_t arg8 = MakeF32x4(6.25f, 6.50f, 6.75f, 7.50f);
2187 ASSERT_EQ(AsmFrintx(arg8, kFpcrRModeZero), MakeF32x4(6.00f, 6.00f, 6.00f, 7.00f));
2188 }
2189
TEST(Arm64InsnTest,AsmRoundExactF64x2)2190 TEST(Arm64InsnTest, AsmRoundExactF64x2) {
2191 constexpr auto AsmFrintx = ASM_INSN_WRAP_FUNC_W_RES_WC_ARG("frintx %0.2d, %1.2d");
2192 __uint128_t arg1 = MakeF64x2(-7.50, -6.75);
2193 ASSERT_EQ(AsmFrintx(arg1, kFpcrRModeTieEven), MakeF64x2(-8.00, -7.00));
2194 __uint128_t arg2 = MakeF64x2(-6.50, -6.25);
2195 ASSERT_EQ(AsmFrintx(arg2, kFpcrRModeTieEven), MakeF64x2(-6.00, -6.00));
2196 __uint128_t arg3 = MakeF64x2(6.25, 6.50);
2197 ASSERT_EQ(AsmFrintx(arg3, kFpcrRModeTieEven), MakeF64x2(6.00, 6.00));
2198 __uint128_t arg4 = MakeF64x2(6.75, 7.50);
2199 ASSERT_EQ(AsmFrintx(arg4, kFpcrRModeTieEven), MakeF64x2(7.00, 8.00));
2200 __uint128_t arg5 = MakeF64x2(-7.50, -6.75);
2201 ASSERT_EQ(AsmFrintx(arg5, kFpcrRModeNegInf), MakeF64x2(-8.00, -7.00));
2202 __uint128_t arg6 = MakeF64x2(-6.50, -6.25);
2203 ASSERT_EQ(AsmFrintx(arg6, kFpcrRModeNegInf), MakeF64x2(-7.00, -7.00));
2204 __uint128_t arg7 = MakeF64x2(6.25, 6.50);
2205 ASSERT_EQ(AsmFrintx(arg7, kFpcrRModeNegInf), MakeF64x2(6.00, 6.00));
2206 __uint128_t arg8 = MakeF64x2(6.75, 7.50);
2207 ASSERT_EQ(AsmFrintx(arg8, kFpcrRModeNegInf), MakeF64x2(6.00, 7.00));
2208 __uint128_t arg9 = MakeF64x2(-7.50, -6.75);
2209 ASSERT_EQ(AsmFrintx(arg9, kFpcrRModePosInf), MakeF64x2(-7.00, -6.00));
2210 __uint128_t arg10 = MakeF64x2(-6.50, -6.25);
2211 ASSERT_EQ(AsmFrintx(arg10, kFpcrRModePosInf), MakeF64x2(-6.00, -6.00));
2212 __uint128_t arg11 = MakeF64x2(6.25, 6.50);
2213 ASSERT_EQ(AsmFrintx(arg11, kFpcrRModePosInf), MakeF64x2(7.00, 7.00));
2214 __uint128_t arg12 = MakeF64x2(6.75, 7.50);
2215 ASSERT_EQ(AsmFrintx(arg12, kFpcrRModePosInf), MakeF64x2(7.00, 8.00));
2216 __uint128_t arg13 = MakeF64x2(-7.50, -6.75);
2217 ASSERT_EQ(AsmFrintx(arg13, kFpcrRModeZero), MakeF64x2(-7.00, -6.00));
2218 __uint128_t arg14 = MakeF64x2(-6.50, -6.25);
2219 ASSERT_EQ(AsmFrintx(arg14, kFpcrRModeZero), MakeF64x2(-6.00, -6.00));
2220 __uint128_t arg15 = MakeF64x2(6.25, 6.50);
2221 ASSERT_EQ(AsmFrintx(arg15, kFpcrRModeZero), MakeF64x2(6.00, 6.00));
2222 __uint128_t arg16 = MakeF64x2(6.75, 7.50);
2223 ASSERT_EQ(AsmFrintx(arg16, kFpcrRModeZero), MakeF64x2(6.00, 7.00));
2224 }
2225
Fp32Compare(uint64_t arg1,uint64_t arg2)2226 uint64_t Fp32Compare(uint64_t arg1, uint64_t arg2) {
2227 uint64_t res;
2228 asm("fcmp %s1, %s2\n\t"
2229 "mrs %x0, nzcv"
2230 : "=r"(res)
2231 : "w"(arg1), "w"(arg2));
2232 return res;
2233 }
2234
Fp64Compare(uint64_t arg1,uint64_t arg2)2235 uint64_t Fp64Compare(uint64_t arg1, uint64_t arg2) {
2236 uint64_t res;
2237 asm("fcmp %d1, %d2\n\t"
2238 "mrs %x0, nzcv"
2239 : "=r"(res)
2240 : "w"(arg1), "w"(arg2));
2241 return res;
2242 }
2243
MakeNZCV(uint64_t nzcv)2244 constexpr uint64_t MakeNZCV(uint64_t nzcv) {
2245 return nzcv << 28;
2246 }
2247
TEST(Arm64InsnTest,Fp32Compare)2248 TEST(Arm64InsnTest, Fp32Compare) {
2249 // NaN and 1.83
2250 ASSERT_EQ(Fp32Compare(0x7fc00000ULL, 0x3fea3d71ULL), MakeNZCV(0b0011));
2251
2252 // 6.31 == 6.31
2253 ASSERT_EQ(Fp32Compare(0x40c9eb85ULL, 0x40c9eb85ULL), MakeNZCV(0b0110));
2254
2255 // 1.23 < 2.34
2256 ASSERT_EQ(Fp32Compare(0x3f9d70a4ULL, 0x4015c28fULL), MakeNZCV(0b1000));
2257
2258 // 5.25 > 2.94
2259 ASSERT_EQ(Fp32Compare(0x40a80000ULL, 0x403c28f6ULL), MakeNZCV(0b0010));
2260 }
2261
TEST(Arm64InsnTest,Fp32CompareZero)2262 TEST(Arm64InsnTest, Fp32CompareZero) {
2263 constexpr auto Fp32CompareZero = ASM_INSN_WRAP_FUNC_R_RES_W_ARG(
2264 "fcmp %s1, #0.0\n\t"
2265 "mrs %x0, nzcv");
2266
2267 // NaN and 0.00
2268 ASSERT_EQ(Fp32CompareZero(0x7fa00000ULL), MakeNZCV(0b0011));
2269
2270 // 0.00 == 0.00
2271 ASSERT_EQ(Fp32CompareZero(0x00000000ULL), MakeNZCV(0b0110));
2272
2273 // -2.67 < 0.00
2274 ASSERT_EQ(Fp32CompareZero(0xc02ae148ULL), MakeNZCV(0b1000));
2275
2276 // 1.56 > 0.00
2277 ASSERT_EQ(Fp32CompareZero(0x3fc7ae14ULL), MakeNZCV(0b0010));
2278 }
2279
TEST(Arm64InsnTest,Fp64Compare)2280 TEST(Arm64InsnTest, Fp64Compare) {
2281 // NaN and 1.19
2282 ASSERT_EQ(Fp64Compare(0x7ff8000000000000ULL, 0x3ff30a3d70a3d70aULL), MakeNZCV(0b0011));
2283
2284 // 8.42 == 8.42
2285 ASSERT_EQ(Fp64Compare(0x4020d70a3d70a3d7ULL, 0x4020d70a3d70a3d7ULL), MakeNZCV(0b0110));
2286
2287 // 0.50 < 1.00
2288 ASSERT_EQ(Fp64Compare(0x3fe0000000000000ULL, 0x3ff0000000000000ULL), MakeNZCV(0b1000));
2289
2290 // 7.38 > 1.54
2291 ASSERT_EQ(Fp64Compare(0x401d851eb851eb85ULL, 0x3ff8a3d70a3d70a4ULL), MakeNZCV(0b0010));
2292 }
2293
TEST(Arm64InsnTest,Fp64CompareZero)2294 TEST(Arm64InsnTest, Fp64CompareZero) {
2295 constexpr auto Fp64CompareZero = ASM_INSN_WRAP_FUNC_R_RES_W_ARG(
2296 "fcmp %d1, #0.0\n\t"
2297 "mrs %x0, nzcv");
2298
2299 // NaN and 0.00
2300 ASSERT_EQ(Fp64CompareZero(0x7ff4000000000000ULL), MakeNZCV(0b0011));
2301
2302 // 0.00 == 0.00
2303 ASSERT_EQ(Fp64CompareZero(0x0000000000000000ULL), MakeNZCV(0b0110));
2304
2305 // -7.23 < 0.00
2306 ASSERT_EQ(Fp64CompareZero(0xc01ceb851eb851ecULL), MakeNZCV(0b1000));
2307
2308 // 5.39 > 0.00
2309 ASSERT_EQ(Fp64CompareZero(0x40158f5c28f5c28fULL), MakeNZCV(0b0010));
2310 }
2311
Fp32CompareIfEqualOrSetAllFlags(float arg1,float arg2,uint64_t nzcv)2312 uint64_t Fp32CompareIfEqualOrSetAllFlags(float arg1, float arg2, uint64_t nzcv) {
2313 asm("msr nzcv, %x0\n\t"
2314 "fccmp %s2, %s3, #15, eq\n\t"
2315 "mrs %x0, nzcv\n\t"
2316 : "=r"(nzcv)
2317 : "0"(nzcv), "w"(arg1), "w"(arg2));
2318 return nzcv;
2319 }
2320
TEST(Arm64InsnTest,Fp32ConditionalCompare)2321 TEST(Arm64InsnTest, Fp32ConditionalCompare) {
2322 // Comparison is performed.
2323 constexpr uint64_t kEqual = MakeNZCV(0b0100);
2324 constexpr float kNan = std::numeric_limits<float>::quiet_NaN();
2325 ASSERT_EQ(Fp32CompareIfEqualOrSetAllFlags(1.0f, 1.0f, kEqual), MakeNZCV(0b0110));
2326 ASSERT_EQ(Fp32CompareIfEqualOrSetAllFlags(1.0f, 2.0f, kEqual), MakeNZCV(0b1000));
2327 ASSERT_EQ(Fp32CompareIfEqualOrSetAllFlags(2.0f, 1.0f, kEqual), MakeNZCV(0b0010));
2328 ASSERT_EQ(Fp32CompareIfEqualOrSetAllFlags(kNan, 1.0f, kEqual), MakeNZCV(0b0011));
2329 // Comparison is not performed; alt-nzcv is returned.
2330 constexpr uint64_t kNotEqual = MakeNZCV(0b0000);
2331 ASSERT_EQ(Fp32CompareIfEqualOrSetAllFlags(1.0f, 1.0f, kNotEqual), MakeNZCV(0b1111));
2332 ASSERT_EQ(Fp32CompareIfEqualOrSetAllFlags(1.0f, 2.0f, kNotEqual), MakeNZCV(0b1111));
2333 ASSERT_EQ(Fp32CompareIfEqualOrSetAllFlags(2.0f, 1.0f, kNotEqual), MakeNZCV(0b1111));
2334 ASSERT_EQ(Fp32CompareIfEqualOrSetAllFlags(kNan, 1.0f, kNotEqual), MakeNZCV(0b1111));
2335 }
2336
Fp64CompareIfEqualOrSetAllFlags(double arg1,double arg2,uint64_t nzcv)2337 uint64_t Fp64CompareIfEqualOrSetAllFlags(double arg1, double arg2, uint64_t nzcv) {
2338 asm("msr nzcv, %x0\n\t"
2339 "fccmp %d2, %d3, #15, eq\n\t"
2340 "mrs %x0, nzcv\n\t"
2341 : "=r"(nzcv)
2342 : "0"(nzcv), "w"(arg1), "w"(arg2));
2343 return nzcv;
2344 }
2345
TEST(Arm64InsnTest,Fp64ConditionalCompare)2346 TEST(Arm64InsnTest, Fp64ConditionalCompare) {
2347 // Comparison is performed.
2348 constexpr uint64_t kEqual = MakeNZCV(0b0100);
2349 constexpr double kNan = std::numeric_limits<double>::quiet_NaN();
2350 ASSERT_EQ(Fp64CompareIfEqualOrSetAllFlags(1.0, 1.0, kEqual), MakeNZCV(0b0110));
2351 ASSERT_EQ(Fp64CompareIfEqualOrSetAllFlags(1.0, 2.0, kEqual), MakeNZCV(0b1000));
2352 ASSERT_EQ(Fp64CompareIfEqualOrSetAllFlags(2.0, 1.0, kEqual), MakeNZCV(0b0010));
2353 ASSERT_EQ(Fp64CompareIfEqualOrSetAllFlags(kNan, 1.0, kEqual), MakeNZCV(0b0011));
2354 // Comparison is not performed; alt-nzcv is returned.
2355 constexpr uint64_t kNotEqual = MakeNZCV(0b0000);
2356 ASSERT_EQ(Fp64CompareIfEqualOrSetAllFlags(1.0, 1.0, kNotEqual), MakeNZCV(0b1111));
2357 ASSERT_EQ(Fp64CompareIfEqualOrSetAllFlags(1.0, 2.0, kNotEqual), MakeNZCV(0b1111));
2358 ASSERT_EQ(Fp64CompareIfEqualOrSetAllFlags(2.0, 1.0, kNotEqual), MakeNZCV(0b1111));
2359 ASSERT_EQ(Fp64CompareIfEqualOrSetAllFlags(kNan, 1.0f, kNotEqual), MakeNZCV(0b1111));
2360 }
2361
TEST(Arm64InsnTest,ConvertFp32ToFp64)2362 TEST(Arm64InsnTest, ConvertFp32ToFp64) {
2363 uint64_t arg = 0x40cd70a4ULL; // 6.42 in float
2364 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvt %d0, %s1")(arg);
2365 ASSERT_EQ(res, MakeUInt128(0x4019ae1480000000ULL, 0U));
2366 }
2367
TEST(Arm64InsnTest,ConvertFp64ToFp32)2368 TEST(Arm64InsnTest, ConvertFp64ToFp32) {
2369 uint64_t arg = 0x401a0a3d70a3d70aULL; // 6.51 in double
2370 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvt %s0, %d1")(arg);
2371 ASSERT_EQ(res, MakeUInt128(0x40d051ecULL, 0U));
2372 }
2373
TEST(Arm64InsnTest,ConvertFp32ToFp16)2374 TEST(Arm64InsnTest, ConvertFp32ToFp16) {
2375 constexpr auto AsmFcvt = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvt %h0, %s1");
2376 EXPECT_EQ(AsmFcvt(bit_cast<uint32_t>(2.5f)), MakeUInt128(0x4100U, 0U));
2377 EXPECT_EQ(AsmFcvt(bit_cast<uint32_t>(4.5f)), MakeUInt128(0x4480U, 0U));
2378 EXPECT_EQ(AsmFcvt(bit_cast<uint32_t>(8.5f)), MakeUInt128(0x4840U, 0U));
2379 EXPECT_EQ(AsmFcvt(bit_cast<uint32_t>(16.5f)), MakeUInt128(0x4c20U, 0U));
2380 }
2381
TEST(Arm64InsnTest,ConvertFp16ToFp32)2382 TEST(Arm64InsnTest, ConvertFp16ToFp32) {
2383 uint64_t arg = 0x4100U;
2384 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvt %s0, %h1")(arg);
2385 ASSERT_EQ(res, bit_cast<uint32_t>(2.5f));
2386 }
2387
TEST(Arm64InsnTest,ConvertFp64ToFp16)2388 TEST(Arm64InsnTest, ConvertFp64ToFp16) {
2389 uint64_t arg = bit_cast<uint64_t>(2.5);
2390 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvt %h0, %d1")(arg);
2391 ASSERT_EQ(res, MakeUInt128(0x4100U, 0U));
2392 }
2393
TEST(Arm64InsnTest,ConvertFp16ToFp64)2394 TEST(Arm64InsnTest, ConvertFp16ToFp64) {
2395 uint64_t arg = 0x4100U;
2396 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvt %d0, %h1")(arg);
2397 ASSERT_EQ(res, bit_cast<uint64_t>(2.5));
2398 }
2399
TEST(Arm64InsnTest,ConvertToNarrowF64F32x2)2400 TEST(Arm64InsnTest, ConvertToNarrowF64F32x2) {
2401 constexpr auto AsmFcvtn = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtn %0.2s, %1.2d");
2402 ASSERT_EQ(AsmFcvtn(MakeF64x2(2.0, 3.0)), MakeF32x4(2.0f, 3.0f, 0.0f, 0.0f));
2403 // Overflow or inf arguments result in inf.
2404 __uint128_t res = AsmFcvtn(
2405 MakeF64x2(std::numeric_limits<double>::max(), std::numeric_limits<double>::infinity()));
2406 ASSERT_EQ(res,
2407 MakeF32x4(std::numeric_limits<float>::infinity(),
2408 std::numeric_limits<float>::infinity(),
2409 0.0f,
2410 0.0f));
2411 res = AsmFcvtn(
2412 MakeF64x2(std::numeric_limits<double>::lowest(), -std::numeric_limits<double>::infinity()));
2413 ASSERT_EQ(res,
2414 MakeF32x4(-std::numeric_limits<float>::infinity(),
2415 -std::numeric_limits<float>::infinity(),
2416 0.0f,
2417 0.0f));
2418 }
2419
TEST(Arm64InsnTest,ConvertToNarrowF64F32x2Upper)2420 TEST(Arm64InsnTest, ConvertToNarrowF64F32x2Upper) {
2421 constexpr auto AsmFcvtn = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("fcvtn2 %0.4s, %1.2d");
2422 __uint128_t arg1 = MakeF64x2(2.0, 3.0);
2423 __uint128_t arg2 = MakeF32x4(4.0f, 5.0f, 6.0f, 7.0f);
2424 ASSERT_EQ(AsmFcvtn(arg1, arg2), MakeF32x4(4.0f, 5.0f, 2.0f, 3.0f));
2425 }
2426
TEST(Arm64InsnTest,ConvertToNarrowRoundToOddF64F32)2427 TEST(Arm64InsnTest, ConvertToNarrowRoundToOddF64F32) {
2428 constexpr auto AsmFcvtxn = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtxn %s0, %d1");
2429 ASSERT_EQ(AsmFcvtxn(bit_cast<uint64_t>(2.0)), bit_cast<uint32_t>(2.0f));
2430 // Overflow is saturated.
2431 ASSERT_EQ(AsmFcvtxn(bit_cast<uint64_t>(std::numeric_limits<double>::max())),
2432 bit_cast<uint32_t>(std::numeric_limits<float>::max()));
2433 ASSERT_EQ(AsmFcvtxn(bit_cast<uint64_t>(std::numeric_limits<double>::lowest())),
2434 bit_cast<uint32_t>(std::numeric_limits<float>::lowest()));
2435 // inf is converted to inf.
2436 ASSERT_EQ(AsmFcvtxn(bit_cast<uint64_t>(std::numeric_limits<double>::infinity())),
2437 bit_cast<uint32_t>(std::numeric_limits<float>::infinity()));
2438 // -inf is converted to -inf.
2439 ASSERT_EQ(AsmFcvtxn(bit_cast<uint64_t>(-std::numeric_limits<double>::infinity())),
2440 bit_cast<uint32_t>(-std::numeric_limits<float>::infinity()));
2441 }
2442
TEST(Arm64InsnTest,ConvertToNarrowRoundToOddF64F32x2)2443 TEST(Arm64InsnTest, ConvertToNarrowRoundToOddF64F32x2) {
2444 constexpr auto AsmFcvtxn = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtxn %0.2s, %1.2d");
2445 __uint128_t res = AsmFcvtxn(MakeF64x2(2.0, 3.0));
2446 ASSERT_EQ(res, MakeF32x4(2.0f, 3.0f, 0.0f, 0.0f));
2447 }
2448
TEST(Arm64InsnTest,ConvertToNarrowRoundToOddF64F32x2Upper)2449 TEST(Arm64InsnTest, ConvertToNarrowRoundToOddF64F32x2Upper) {
2450 constexpr auto AsmFcvtxn = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("fcvtxn2 %0.4s, %1.2d");
2451 __uint128_t arg1 = MakeF64x2(2.0, 3.0);
2452 __uint128_t arg2 = MakeF32x4(4.0f, 5.0f, 6.0f, 7.0f);
2453 ASSERT_EQ(AsmFcvtxn(arg1, arg2), MakeF32x4(4.0f, 5.0f, 2.0f, 3.0f));
2454 }
2455
TEST(Arm64InsnTest,ConvertToWiderF32F64x2Lower)2456 TEST(Arm64InsnTest, ConvertToWiderF32F64x2Lower) {
2457 constexpr auto AsmFcvtl = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtl %0.2d, %1.2s");
2458 __uint128_t arg = MakeF32x4(2.0f, 3.0f, 4.0f, 5.0f);
2459 ASSERT_EQ(AsmFcvtl(arg), MakeF64x2(2.0, 3.0));
2460 }
2461
TEST(Arm64InsnTest,ConvertToWiderF32F64x2Upper)2462 TEST(Arm64InsnTest, ConvertToWiderF32F64x2Upper) {
2463 constexpr auto AsmFcvtl2 = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtl2 %0.2d, %1.4s");
2464 __uint128_t arg = MakeF32x4(2.0f, 3.0f, 4.0f, 5.0f);
2465 ASSERT_EQ(AsmFcvtl2(arg), MakeF64x2(4.0, 5.0));
2466 }
2467
TEST(Arm64InsnTest,ConvertToWiderF16F32x4Lower)2468 TEST(Arm64InsnTest, ConvertToWiderF16F32x4Lower) {
2469 constexpr auto AsmFcvtl = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtl %0.4s, %1.4h");
2470 // 4xF16 in the lower half.
2471 __uint128_t arg = MakeUInt128(0x4c20'4840'4480'4100ULL, 0);
2472 ASSERT_EQ(AsmFcvtl(arg), MakeF32x4(2.5f, 4.5f, 8.5f, 16.5f));
2473 }
2474
TEST(Arm64InsnTest,ConvertToWiderF16F32x4Upper)2475 TEST(Arm64InsnTest, ConvertToWiderF16F32x4Upper) {
2476 constexpr auto AsmFcvtl = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtl2 %0.4s, %1.8h");
2477 // 4xF16 in the upper half.
2478 __uint128_t arg = MakeUInt128(0, 0x4c20'4840'4480'4100ULL);
2479 ASSERT_EQ(AsmFcvtl(arg), MakeF32x4(2.5f, 4.5f, 8.5f, 16.5f));
2480 }
2481
TEST(Arm64InsnTest,ConvertToNarrowF32F16x4Lower)2482 TEST(Arm64InsnTest, ConvertToNarrowF32F16x4Lower) {
2483 constexpr auto AsmFcvtn = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcvtn %0.4h, %1.4s");
2484 __uint128_t arg = MakeF32x4(2.5f, 4.5f, 8.5f, 16.5f);
2485 // 4xF16 in the lower half.
2486 ASSERT_EQ(AsmFcvtn(arg), MakeUInt128(0x4c20'4840'4480'4100ULL, 0));
2487 }
2488
TEST(Arm64InsnTest,ConvertToNarrowF32F16x4Upper)2489 TEST(Arm64InsnTest, ConvertToNarrowF32F16x4Upper) {
2490 constexpr auto AsmFcvtn = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("fcvtn2 %0.8h, %1.4s");
2491 __uint128_t arg1 = MakeF32x4(2.5f, 4.5f, 8.5f, 16.5f);
2492 __uint128_t arg2 = MakeF32x4(3.0f, 5.0f, 7.0f, 11.0f);
2493 // 4xF16 in the upper half, lower half preserved.
2494 ASSERT_EQ(AsmFcvtn(arg1, arg2), MakeUInt128(uint64_t(arg2), 0x4c20'4840'4480'4100ULL));
2495 }
2496
TEST(Arm64InsnTest,AbsF32)2497 TEST(Arm64InsnTest, AbsF32) {
2498 uint32_t arg = 0xc1273333U; // -10.45 in float
2499 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fabs %s0, %s1")(arg);
2500 ASSERT_EQ(res, MakeUInt128(0x41273333ULL, 0U)); // 10.45 in float
2501 }
2502
TEST(Arm64InsnTest,AbsF64)2503 TEST(Arm64InsnTest, AbsF64) {
2504 uint64_t arg = 0xc03de8f5c28f5c29ULL; // -29.91 in double
2505 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fabs %d0, %d1")(arg);
2506 ASSERT_EQ(res, MakeUInt128(0x403de8f5c28f5c29ULL, 0U)); // 29.91 in double
2507 }
2508
TEST(Arm64InsnTest,AbsF32x4)2509 TEST(Arm64InsnTest, AbsF32x4) {
2510 constexpr auto AsmFabs = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fabs %0.4s, %1.4s");
2511 __uint128_t arg = MakeF32x4(-0.0f, 0.0f, 3.0f, -7.0f);
2512 ASSERT_EQ(AsmFabs(arg), MakeF32x4(0.0f, 0.0f, 3.0f, 7.0f));
2513 }
2514
TEST(Arm64InsnTest,AbsF64x2)2515 TEST(Arm64InsnTest, AbsF64x2) {
2516 constexpr auto AsmFabs = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fabs %0.2d, %1.2d");
2517 __uint128_t arg = MakeF64x2(-0.0, 3.0);
2518 ASSERT_EQ(AsmFabs(arg), MakeF64x2(0.0, 3.0));
2519 }
2520
TEST(Arm64InsnTest,AbdF32)2521 TEST(Arm64InsnTest, AbdF32) {
2522 uint32_t arg1 = 0x4181851fU; // 16.19 in float
2523 uint32_t arg2 = 0x41211eb8U; // 10.06 in float
2524 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fabd %s0, %s1, %s2")(arg1, arg2);
2525 ASSERT_EQ(res, MakeUInt128(0x40c3d70cULL, 0U)); // 6.12 in float
2526 }
2527
TEST(Arm64InsnTest,AbdF64)2528 TEST(Arm64InsnTest, AbdF64) {
2529 constexpr auto AsmFabd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fabd %d0, %d1, %d2");
2530 uint64_t arg1 = 0x403828f5c28f5c29U; // 24.16 in double
2531 uint64_t arg2 = 0x4027d70a3d70a3d7U; // 11.92 in double
2532 __uint128_t res = AsmFabd(arg1, arg2);
2533 ASSERT_EQ(res, MakeUInt128(0x40287ae147ae147bULL, 0U)); // 12.24 in double
2534 }
2535
TEST(Arm64InsnTest,AbdF32x4)2536 TEST(Arm64InsnTest, AbdF32x4) {
2537 constexpr auto AsmFabd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fabd %0.4s, %1.4s, %2.4s");
2538 __uint128_t arg1 = MakeF32x4(1.0f, 5.0f, -3.0f, -2.0f);
2539 __uint128_t arg2 = MakeF32x4(-1.0f, 2.0f, -5.0f, 3.0f);
2540 __uint128_t res = AsmFabd(arg1, arg2);
2541 ASSERT_EQ(res, MakeF32x4(2.0f, 3.0f, 2.0f, 5.0f));
2542 }
2543
TEST(Arm64InsnTest,AbdF64x2)2544 TEST(Arm64InsnTest, AbdF64x2) {
2545 constexpr auto AsmFabd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fabd %0.2d, %1.2d, %2.2d");
2546 __uint128_t arg1 = MakeF64x2(5.0, -2.0);
2547 __uint128_t arg2 = MakeF64x2(4.0, 3.0);
2548 __uint128_t res = AsmFabd(arg1, arg2);
2549 ASSERT_EQ(res, MakeF64x2(1.0, 5.0));
2550 }
2551
TEST(Arm64InsnTest,NegF32)2552 TEST(Arm64InsnTest, NegF32) {
2553 uint32_t arg = 0x40eeb852U; // 7.46 in float
2554 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fneg %s0, %s1")(arg);
2555 ASSERT_EQ(res, MakeUInt128(0xc0eeb852ULL, 0U)); // -7.46 in float
2556 }
2557
TEST(Arm64InsnTest,NegF64)2558 TEST(Arm64InsnTest, NegF64) {
2559 uint64_t arg = 0x4054b28f5c28f5c3ULL; // 82.79 in double
2560 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fneg %d0, %d1")(arg);
2561 ASSERT_EQ(res, MakeUInt128(0xc054b28f5c28f5c3ULL, 0U)); // -82.79 in double
2562 }
2563
TEST(Arm64InsnTest,NegF32x4)2564 TEST(Arm64InsnTest, NegF32x4) {
2565 constexpr auto AsmFneg = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fneg %0.4s, %1.4s");
2566 __uint128_t arg = MakeF32x4(-0.0f, 0.0f, 1.0f, -3.0f);
2567 ASSERT_EQ(AsmFneg(arg), MakeF32x4(0.0f, -0.0f, -1.0f, 3.0f));
2568 }
2569
TEST(Arm64InsnTest,NegF64x2)2570 TEST(Arm64InsnTest, NegF64x2) {
2571 constexpr auto AsmFneg = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fneg %0.2d, %1.2d");
2572 __uint128_t arg = MakeF64x2(0.0, 3.0);
2573 ASSERT_EQ(AsmFneg(arg), MakeF64x2(-0.0, -3.0));
2574 }
2575
TEST(Arm64InsnTest,SqrtF32)2576 TEST(Arm64InsnTest, SqrtF32) {
2577 uint32_t arg = 0x41f3cac1U; // 30.474 in float
2578 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fsqrt %s0, %s1")(arg);
2579 ASSERT_EQ(res, MakeUInt128(0x40b0a683ULL, 0U)); // 5.5203261 in float
2580 }
2581
TEST(Arm64InsnTest,SqrtF64)2582 TEST(Arm64InsnTest, SqrtF64) {
2583 uint64_t arg = 0x403d466666666666ULL; // 29.275 in double
2584 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fsqrt %d0, %d1")(arg);
2585 ASSERT_EQ(res, MakeUInt128(0x4015a47e3392efb8ULL, 0U)); // 5.41... in double
2586 }
2587
TEST(Arm64InsnTest,SqrtF32x4)2588 TEST(Arm64InsnTest, SqrtF32x4) {
2589 constexpr auto AsmSqrt = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fsqrt %0.4s, %1.4s");
2590 __uint128_t arg = MakeF32x4(0.0f, 1.0f, 4.0f, 9.0f);
2591 ASSERT_EQ(AsmSqrt(arg), MakeF32x4(0.0f, 1.0f, 2.0f, 3.0f));
2592 }
2593
TEST(Arm64InsnTest,RecipEstimateF32)2594 TEST(Arm64InsnTest, RecipEstimateF32) {
2595 constexpr auto AsmFrecpe = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frecpe %s0, %s1");
2596 ASSERT_EQ(AsmFrecpe(bit_cast<uint32_t>(0.25f)), bit_cast<uint32_t>(3.9921875f));
2597 ASSERT_EQ(AsmFrecpe(bit_cast<uint32_t>(0.50f)), bit_cast<uint32_t>(1.99609375f));
2598 ASSERT_EQ(AsmFrecpe(bit_cast<uint32_t>(2.00f)), bit_cast<uint32_t>(0.4990234375f));
2599 ASSERT_EQ(AsmFrecpe(bit_cast<uint32_t>(4.00f)), bit_cast<uint32_t>(0.24951171875f));
2600 }
2601
TEST(Arm64InsnTest,RecipEstimateF32x4)2602 TEST(Arm64InsnTest, RecipEstimateF32x4) {
2603 constexpr auto AsmFrecpe = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frecpe %0.4s, %1.4s");
2604 __uint128_t res = AsmFrecpe(MakeF32x4(0.25f, 0.50f, 2.00f, 4.00f));
2605 ASSERT_EQ(res, MakeF32x4(3.9921875f, 1.99609375f, 0.4990234375f, 0.24951171875f));
2606 }
2607
TEST(Arm64InsnTest,RecipStepF32)2608 TEST(Arm64InsnTest, RecipStepF32) {
2609 constexpr auto AsmFrecps = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("frecps %s0, %s1, %s2");
2610 __uint128_t res1 = AsmFrecps(bit_cast<uint32_t>(1.50f), bit_cast<uint32_t>(0.50f));
2611 ASSERT_EQ(res1, bit_cast<uint32_t>(1.25f));
2612 __uint128_t res2 = AsmFrecps(bit_cast<uint32_t>(2.00f), bit_cast<uint32_t>(0.50f));
2613 ASSERT_EQ(res2, bit_cast<uint32_t>(1.00f));
2614 __uint128_t res3 = AsmFrecps(bit_cast<uint32_t>(3.00f), bit_cast<uint32_t>(0.25f));
2615 ASSERT_EQ(res3, bit_cast<uint32_t>(1.25f));
2616 __uint128_t res4 = AsmFrecps(bit_cast<uint32_t>(3.00f), bit_cast<uint32_t>(0.50f));
2617 ASSERT_EQ(res4, bit_cast<uint32_t>(0.50f));
2618 }
2619
TEST(Arm64InsnTest,RecipStepF64)2620 TEST(Arm64InsnTest, RecipStepF64) {
2621 constexpr auto AsmFrecps = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("frecps %d0, %d1, %d2");
2622 __uint128_t res1 = AsmFrecps(bit_cast<uint64_t>(1.50), bit_cast<uint64_t>(0.50));
2623 ASSERT_EQ(res1, bit_cast<uint64_t>(1.25));
2624 __uint128_t res2 = AsmFrecps(bit_cast<uint64_t>(2.00), bit_cast<uint64_t>(0.50));
2625 ASSERT_EQ(res2, bit_cast<uint64_t>(1.00));
2626 __uint128_t res3 = AsmFrecps(bit_cast<uint64_t>(3.00), bit_cast<uint64_t>(0.25));
2627 ASSERT_EQ(res3, bit_cast<uint64_t>(1.25));
2628 __uint128_t res4 = AsmFrecps(bit_cast<uint64_t>(3.00), bit_cast<uint64_t>(0.50));
2629 ASSERT_EQ(res4, bit_cast<uint64_t>(0.50));
2630 }
2631
TEST(Arm64InsnTest,RecipStepF32x4)2632 TEST(Arm64InsnTest, RecipStepF32x4) {
2633 constexpr auto AsmFrecps = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("frecps %0.4s, %1.4s, %2.4s");
2634 __uint128_t arg1 = MakeF32x4(1.50f, 2.00f, 3.00f, 3.00f);
2635 __uint128_t arg2 = MakeF32x4(0.50f, 0.50f, 0.25f, 0.50f);
2636 __uint128_t res = AsmFrecps(arg1, arg2);
2637 ASSERT_EQ(res, MakeF32x4(1.25f, 1.00f, 1.25f, 0.50f));
2638 }
2639
TEST(Arm64InsnTest,RecipStepF64x2)2640 TEST(Arm64InsnTest, RecipStepF64x2) {
2641 constexpr auto AsmFrecps = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("frecps %0.2d, %1.2d, %2.2d");
2642 __uint128_t arg1 = MakeF64x2(1.50, 2.00);
2643 __uint128_t arg2 = MakeF64x2(0.50, 0.50);
2644 ASSERT_EQ(AsmFrecps(arg1, arg2), MakeF64x2(1.25, 1.00));
2645 __uint128_t arg3 = MakeF64x2(3.00, 3.00);
2646 __uint128_t arg4 = MakeF64x2(0.25, 0.50);
2647 ASSERT_EQ(AsmFrecps(arg3, arg4), MakeF64x2(1.25, 0.50));
2648 }
2649
TEST(Arm64InsnTest,RecipSqrtEstimateF32)2650 TEST(Arm64InsnTest, RecipSqrtEstimateF32) {
2651 constexpr auto AsmFrsqrte = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frsqrte %s0, %s1");
2652 ASSERT_EQ(AsmFrsqrte(bit_cast<uint32_t>(2.0f)), bit_cast<uint32_t>(0.705078125f));
2653 ASSERT_EQ(AsmFrsqrte(bit_cast<uint32_t>(3.0f)), bit_cast<uint32_t>(0.576171875f));
2654 ASSERT_EQ(AsmFrsqrte(bit_cast<uint32_t>(4.0f)), bit_cast<uint32_t>(0.4990234375f));
2655 ASSERT_EQ(AsmFrsqrte(bit_cast<uint32_t>(5.0f)), bit_cast<uint32_t>(0.4462890625f));
2656 }
2657
TEST(Arm64InsnTest,RecipSqrtEstimateF32x2)2658 TEST(Arm64InsnTest, RecipSqrtEstimateF32x2) {
2659 constexpr auto AsmFrsqrte = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frsqrte %0.2s, %1.2s");
2660 __uint128_t arg = MakeF32x4(2.0f, 3.0f, 0, 0);
2661 __uint128_t res = AsmFrsqrte(arg);
2662 ASSERT_EQ(res, MakeF32x4(0.705078125f, 0.576171875f, 0, 0));
2663 }
2664
TEST(Arm64InsnTest,RecipSqrtEstimateF32x4)2665 TEST(Arm64InsnTest, RecipSqrtEstimateF32x4) {
2666 constexpr auto AsmFrsqrte = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frsqrte %0.4s, %1.4s");
2667 __uint128_t arg = MakeF32x4(2.0f, 3.0f, 4.0f, 5.0f);
2668 __uint128_t res = AsmFrsqrte(arg);
2669 ASSERT_EQ(res, MakeF32x4(0.705078125f, 0.576171875f, 0.4990234375f, 0.4462890625f));
2670 }
2671
TEST(Arm64InsnTest,RecipSqrtEstimateF64)2672 TEST(Arm64InsnTest, RecipSqrtEstimateF64) {
2673 constexpr auto AsmFrsqrte = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frsqrte %d0, %d1");
2674 ASSERT_EQ(AsmFrsqrte(bit_cast<uint64_t>(2.0)), bit_cast<uint64_t>(0.705078125));
2675 ASSERT_EQ(AsmFrsqrte(bit_cast<uint64_t>(3.0)), bit_cast<uint64_t>(0.576171875));
2676 ASSERT_EQ(AsmFrsqrte(bit_cast<uint64_t>(4.0)), bit_cast<uint64_t>(0.4990234375));
2677 ASSERT_EQ(AsmFrsqrte(bit_cast<uint64_t>(5.0)), bit_cast<uint64_t>(0.4462890625));
2678 }
2679
TEST(Arm64InsnTest,RecipSqrtEstimateF64x2)2680 TEST(Arm64InsnTest, RecipSqrtEstimateF64x2) {
2681 constexpr auto AsmFrsqrte = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("frsqrte %0.2d, %1.2d");
2682 __uint128_t arg = MakeF64x2(2.0, 3.0);
2683 __uint128_t res = AsmFrsqrte(arg);
2684 ASSERT_EQ(res, MakeUInt128(bit_cast<uint64_t>(0.705078125), bit_cast<uint64_t>(0.576171875)));
2685 }
2686
TEST(Arm64InsnTest,RecipSqrtStepF32)2687 TEST(Arm64InsnTest, RecipSqrtStepF32) {
2688 constexpr auto AsmFrsqrts = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("frsqrts %s0, %s1, %s2");
2689 __uint128_t res1 = AsmFrsqrts(bit_cast<uint32_t>(1.50f), bit_cast<uint32_t>(0.50f));
2690 ASSERT_EQ(res1, bit_cast<uint32_t>(1.125f));
2691 __uint128_t res2 = AsmFrsqrts(bit_cast<uint32_t>(2.00f), bit_cast<uint32_t>(0.50f));
2692 ASSERT_EQ(res2, bit_cast<uint32_t>(1.000f));
2693 __uint128_t res3 = AsmFrsqrts(bit_cast<uint32_t>(3.00f), bit_cast<uint32_t>(0.25f));
2694 ASSERT_EQ(res3, bit_cast<uint32_t>(1.125f));
2695 __uint128_t res4 = AsmFrsqrts(bit_cast<uint32_t>(3.00f), bit_cast<uint32_t>(0.50f));
2696 ASSERT_EQ(res4, bit_cast<uint32_t>(0.750f));
2697 }
2698
TEST(Arm64InsnTest,RecipSqrtStepF64)2699 TEST(Arm64InsnTest, RecipSqrtStepF64) {
2700 constexpr auto AsmFrsqrts = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("frsqrts %d0, %d1, %d2");
2701 __uint128_t res1 = AsmFrsqrts(bit_cast<uint64_t>(1.50), bit_cast<uint64_t>(0.50));
2702 ASSERT_EQ(res1, bit_cast<uint64_t>(1.125));
2703 __uint128_t res2 = AsmFrsqrts(bit_cast<uint64_t>(2.00), bit_cast<uint64_t>(0.50));
2704 ASSERT_EQ(res2, bit_cast<uint64_t>(1.000));
2705 __uint128_t res3 = AsmFrsqrts(bit_cast<uint64_t>(3.00), bit_cast<uint64_t>(0.25));
2706 ASSERT_EQ(res3, bit_cast<uint64_t>(1.125));
2707 __uint128_t res4 = AsmFrsqrts(bit_cast<uint64_t>(3.00), bit_cast<uint64_t>(0.50));
2708 ASSERT_EQ(res4, bit_cast<uint64_t>(0.750));
2709 }
2710
TEST(Arm64InsnTest,RecipSqrtStepF32x4)2711 TEST(Arm64InsnTest, RecipSqrtStepF32x4) {
2712 constexpr auto AsmFrsqrts = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("frsqrts %0.4s, %1.4s, %2.4s");
2713 __uint128_t arg1 = MakeF32x4(1.50f, 2.00f, 3.00f, 3.00f);
2714 __uint128_t arg2 = MakeF32x4(0.50f, 0.50f, 0.25f, 0.50f);
2715 __uint128_t res = AsmFrsqrts(arg1, arg2);
2716 ASSERT_EQ(res, MakeF32x4(1.125f, 1.000f, 1.125f, 0.750f));
2717 }
2718
TEST(Arm64InsnTest,RecipSqrtStepF64x2)2719 TEST(Arm64InsnTest, RecipSqrtStepF64x2) {
2720 constexpr auto AsmFrsqrts = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("frsqrts %0.2d, %1.2d, %2.2d");
2721 __uint128_t arg1 = MakeF64x2(1.50, 2.00);
2722 __uint128_t arg2 = MakeF64x2(0.50, 0.50);
2723 ASSERT_EQ(AsmFrsqrts(arg1, arg2), MakeF64x2(1.125, 1.000));
2724 __uint128_t arg3 = MakeF64x2(3.00, 3.00);
2725 __uint128_t arg4 = MakeF64x2(0.25, 0.50);
2726 ASSERT_EQ(AsmFrsqrts(arg3, arg4), MakeF64x2(1.125, 0.750));
2727 }
2728
TEST(Arm64InsnTest,AddFp32)2729 TEST(Arm64InsnTest, AddFp32) {
2730 uint64_t fp_arg1 = 0x40d5c28fULL; // 6.68 in float
2731 uint64_t fp_arg2 = 0x409f5c29ULL; // 4.98 in float
2732 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fadd %s0, %s1, %s2")(fp_arg1, fp_arg2);
2733 ASSERT_EQ(rd, MakeUInt128(0x413a8f5cULL, 0U)); // 11.66 in float
2734 }
2735
TEST(Arm64InsnTest,AddFp64)2736 TEST(Arm64InsnTest, AddFp64) {
2737 uint64_t fp_arg1 = 0x402099999999999aULL; // 8.30 in double
2738 uint64_t fp_arg2 = 0x4010ae147ae147aeULL; // 4.17 in double
2739 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fadd %d0, %d1, %d2")(fp_arg1, fp_arg2);
2740 ASSERT_EQ(rd, MakeUInt128(0x4028f0a3d70a3d71ULL, 0U)); // 12.47 in double
2741 }
2742
TEST(Arm64InsnTest,AddF32x4)2743 TEST(Arm64InsnTest, AddF32x4) {
2744 constexpr auto AsmFadd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fadd %0.4s, %1.4s, %2.4s");
2745 __uint128_t arg1 = MakeF32x4(-3.0f, 2.0f, 7.0f, -0.0f);
2746 __uint128_t arg2 = MakeF32x4(6.0f, 1.0f, -8.0f, 5.0f);
2747 ASSERT_EQ(AsmFadd(arg1, arg2), MakeF32x4(3.0f, 3.0f, -1.0f, 5.0f));
2748 }
2749
TEST(Arm64InsnTest,AddF64x2)2750 TEST(Arm64InsnTest, AddF64x2) {
2751 constexpr auto AsmFadd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fadd %0.2d, %1.2d, %2.2d");
2752 __uint128_t arg1 = MakeF64x2(3.0, 5.0);
2753 __uint128_t arg2 = MakeF64x2(-4.0, 2.0);
2754 ASSERT_EQ(AsmFadd(arg1, arg2), MakeF64x2(-1.0, 7.0));
2755 }
2756
TEST(Arm64InsnTest,AddPairwiseF32x2)2757 TEST(Arm64InsnTest, AddPairwiseF32x2) {
2758 constexpr auto AsmFaddp = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("faddp %s0, %1.2s");
2759 __uint128_t arg1 = MakeF32x4(1.0f, 2.0f, 4.0f, 8.0f);
2760 ASSERT_EQ(AsmFaddp(arg1), bit_cast<uint32_t>(3.0f));
2761 }
2762
TEST(Arm64InsnTest,AddPairwiseF32x4)2763 TEST(Arm64InsnTest, AddPairwiseF32x4) {
2764 constexpr auto AsmFaddp = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("faddp %0.4s, %1.4s, %2.4s");
2765 __uint128_t arg1 = MakeF32x4(-3.0f, 2.0f, 7.0f, -0.0f);
2766 __uint128_t arg2 = MakeF32x4(6.0f, 1.0f, -8.0f, 5.0f);
2767 ASSERT_EQ(AsmFaddp(arg1, arg2), MakeF32x4(-1.0f, 7.0f, 7.0f, -3.0f));
2768 }
2769
TEST(Arm64InsnTest,SubFp32)2770 TEST(Arm64InsnTest, SubFp32) {
2771 uint64_t fp_arg1 = 0x411f5c29ULL; // 9.96 in float
2772 uint64_t fp_arg2 = 0x404851ecULL; // 3.13 in float
2773 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fsub %s0, %s1, %s2")(fp_arg1, fp_arg2);
2774 ASSERT_EQ(rd, MakeUInt128(0x40da8f5cULL, 0U)); // 6.83 in float
2775 }
2776
TEST(Arm64InsnTest,SubFp64)2777 TEST(Arm64InsnTest, SubFp64) {
2778 uint64_t fp_arg1 = 0x401ee147ae147ae1ULL; // 7.72 in double
2779 uint64_t fp_arg2 = 0x4015666666666666ULL; // 5.35 in double
2780 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fsub %d0, %d1, %d2")(fp_arg1, fp_arg2);
2781 ASSERT_EQ(rd, MakeUInt128(0x4002f5c28f5c28f6ULL, 0U)); // 2.37 in double
2782 }
2783
TEST(Arm64InsnTest,SubF32x4)2784 TEST(Arm64InsnTest, SubF32x4) {
2785 constexpr auto AsmFsub = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fsub %0.4s, %1.4s, %2.4s");
2786 __uint128_t arg1 = MakeF32x4(-3.0f, 2.0f, 7.0f, -0.0f);
2787 __uint128_t arg2 = MakeF32x4(6.0f, 1.0f, -8.0f, 5.0f);
2788 ASSERT_EQ(AsmFsub(arg1, arg2), MakeF32x4(-9.0f, 1.0f, 15.0f, -5.0f));
2789 }
2790
TEST(Arm64InsnTest,SubF64x2)2791 TEST(Arm64InsnTest, SubF64x2) {
2792 constexpr auto AsmFsub = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fsub %0.2d, %1.2d, %2.2d");
2793 __uint128_t arg1 = MakeF64x2(3.0, 5.0);
2794 __uint128_t arg2 = MakeF64x2(-4.0, 2.0);
2795 ASSERT_EQ(AsmFsub(arg1, arg2), MakeF64x2(7.0, 3.0));
2796 }
2797
TEST(Arm64InsnTest,MaxFp32)2798 TEST(Arm64InsnTest, MaxFp32) {
2799 constexpr auto AsmFmax = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmax %s0, %s1, %s2");
2800 uint32_t fp_arg_two = bit_cast<uint32_t>(2.0f);
2801 uint32_t fp_arg_three = bit_cast<uint32_t>(3.0f);
2802
2803 ASSERT_EQ(AsmFmax(fp_arg_two, fp_arg_three), MakeU32x4(fp_arg_three, 0, 0, 0));
2804 ASSERT_EQ(AsmFmax(kDefaultNaN32AsInteger, fp_arg_three), kDefaultNaN32AsInteger);
2805 ASSERT_EQ(AsmFmax(fp_arg_three, kDefaultNaN32AsInteger), kDefaultNaN32AsInteger);
2806 }
2807
TEST(Arm64InsnTest,MaxFp64)2808 TEST(Arm64InsnTest, MaxFp64) {
2809 constexpr auto AsmFmax = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmax %d0, %d1, %d2");
2810 uint64_t fp_arg_two = bit_cast<uint64_t>(2.0);
2811 uint64_t fp_arg_three = bit_cast<uint64_t>(3.0);
2812
2813 ASSERT_EQ(AsmFmax(fp_arg_two, fp_arg_three), MakeUInt128(fp_arg_three, 0U));
2814 ASSERT_EQ(AsmFmax(kDefaultNaN64AsInteger, fp_arg_three), kDefaultNaN64AsInteger);
2815 ASSERT_EQ(AsmFmax(fp_arg_three, kDefaultNaN64AsInteger), kDefaultNaN64AsInteger);
2816 }
2817
TEST(Arm64InsnTest,MaxF32x4)2818 TEST(Arm64InsnTest, MaxF32x4) {
2819 constexpr auto AsmFmax = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmax %0.4s, %1.4s, %2.4s");
2820 __uint128_t arg1 = MakeF32x4(-0.0f, 2.0f, 3.0f, -4.0f);
2821 __uint128_t arg2 = MakeF32x4(0.0f, 1.0f, -3.0f, -3.0f);
2822 ASSERT_EQ(AsmFmax(arg1, arg2), MakeF32x4(0.0f, 2.0f, 3.0f, -3.0f));
2823
2824 __uint128_t arg3 = MakeF32x4(-0.0f, bit_cast<float>(kDefaultNaN32AsInteger), 3.0f, -4.0f);
2825 __uint128_t arg4 = MakeF32x4(0.0f, 1.0f, -3.0f, bit_cast<float>(kDefaultNaN32AsInteger));
2826 ASSERT_EQ(AsmFmax(arg3, arg4),
2827 MakeF32x4(0.0f,
2828 bit_cast<float>(kDefaultNaN32AsInteger),
2829 3.0f,
2830 bit_cast<float>(kDefaultNaN32AsInteger)));
2831 }
2832
TEST(Arm64InsnTest,MaxF64x2)2833 TEST(Arm64InsnTest, MaxF64x2) {
2834 constexpr auto AsmFmax = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmax %0.2d, %1.2d, %2.2d");
2835 __uint128_t arg1 = MakeF64x2(-0.0, 3.0);
2836 __uint128_t arg2 = MakeF64x2(0.0, -3.0);
2837 ASSERT_EQ(AsmFmax(arg1, arg2), MakeF64x2(0.0, 3.0));
2838
2839 __uint128_t arg3 = MakeF64x2(bit_cast<double>(kDefaultNaN64AsInteger), 3.0);
2840 __uint128_t arg4 = MakeF64x2(1.0, bit_cast<double>(kDefaultNaN64AsInteger));
2841 ASSERT_EQ(AsmFmax(arg3, arg4),
2842 MakeF64x2(bit_cast<double>(kDefaultNaN64AsInteger),
2843 bit_cast<double>(kDefaultNaN64AsInteger)));
2844 }
2845
TEST(Arm64InsnTest,MaxNumberFp32)2846 TEST(Arm64InsnTest, MaxNumberFp32) {
2847 constexpr auto AsmFmaxnm = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmaxnm %s0, %s1, %s2");
2848 uint32_t fp_arg_two = bit_cast<uint32_t>(2.0f);
2849 uint32_t fp_arg_three = bit_cast<uint32_t>(3.0f);
2850 uint64_t fp_arg_minus_two = bit_cast<uint64_t>(-2.0);
2851
2852 ASSERT_EQ(AsmFmaxnm(fp_arg_two, fp_arg_three), MakeU32x4(fp_arg_three, 0, 0, 0));
2853
2854 ASSERT_EQ(AsmFmaxnm(fp_arg_two, kQuietNaN32AsInteger), MakeU32x4(fp_arg_two, 0, 0, 0));
2855 ASSERT_EQ(AsmFmaxnm(fp_arg_minus_two, kQuietNaN32AsInteger),
2856 MakeU32x4(fp_arg_minus_two, 0, 0, 0));
2857 ASSERT_EQ(AsmFmaxnm(kQuietNaN32AsInteger, fp_arg_two), MakeU32x4(fp_arg_two, 0, 0, 0));
2858 ASSERT_EQ(AsmFmaxnm(kQuietNaN32AsInteger, fp_arg_minus_two),
2859 MakeU32x4(fp_arg_minus_two, 0, 0, 0));
2860 }
2861
TEST(Arm64InsnTest,MaxNumberFp64)2862 TEST(Arm64InsnTest, MaxNumberFp64) {
2863 constexpr auto AsmFmaxnm = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmaxnm %d0, %d1, %d2");
2864 uint64_t fp_arg_two = bit_cast<uint64_t>(2.0);
2865 uint64_t fp_arg_three = bit_cast<uint64_t>(3.0);
2866 uint64_t fp_arg_minus_two = bit_cast<uint64_t>(-2.0);
2867
2868 ASSERT_EQ(AsmFmaxnm(fp_arg_two, fp_arg_three), MakeUInt128(fp_arg_three, 0U));
2869
2870 ASSERT_EQ(AsmFmaxnm(fp_arg_two, kQuietNaN64AsInteger), MakeUInt128(fp_arg_two, 0U));
2871 ASSERT_EQ(AsmFmaxnm(fp_arg_minus_two, kQuietNaN64AsInteger), MakeUInt128(fp_arg_minus_two, 0));
2872 ASSERT_EQ(AsmFmaxnm(kQuietNaN64AsInteger, fp_arg_two), MakeUInt128(fp_arg_two, 0));
2873 ASSERT_EQ(AsmFmaxnm(kQuietNaN64AsInteger, fp_arg_minus_two), MakeUInt128(fp_arg_minus_two, 0));
2874 }
2875
TEST(Arm64InsnTest,MinNumberFp32)2876 TEST(Arm64InsnTest, MinNumberFp32) {
2877 constexpr auto AsmFminnm = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fminnm %s0, %s1, %s2");
2878 uint32_t fp_arg_two = bit_cast<uint32_t>(2.0f);
2879 uint32_t fp_arg_three = bit_cast<uint32_t>(3.0f);
2880 uint32_t fp_arg_minus_two = bit_cast<uint32_t>(-2.0f);
2881
2882 ASSERT_EQ(AsmFminnm(fp_arg_two, fp_arg_three), MakeU32x4(fp_arg_two, 0, 0, 0));
2883
2884 ASSERT_EQ(AsmFminnm(fp_arg_two, kQuietNaN32AsInteger), MakeU32x4(fp_arg_two, 0, 0, 0));
2885 ASSERT_EQ(AsmFminnm(fp_arg_minus_two, kQuietNaN32AsInteger),
2886 MakeU32x4(fp_arg_minus_two, 0, 0, 0));
2887 ASSERT_EQ(AsmFminnm(kQuietNaN32AsInteger, fp_arg_two), MakeU32x4(fp_arg_two, 0, 0, 0));
2888 ASSERT_EQ(AsmFminnm(kQuietNaN32AsInteger, fp_arg_minus_two),
2889 MakeU32x4(fp_arg_minus_two, 0, 0, 0));
2890 }
2891
TEST(Arm64InsnTest,MinNumberFp64)2892 TEST(Arm64InsnTest, MinNumberFp64) {
2893 constexpr auto AsmFminnm = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fminnm %d0, %d1, %d2");
2894 uint64_t fp_arg_two = bit_cast<uint64_t>(2.0);
2895 uint64_t fp_arg_three = bit_cast<uint64_t>(3.0);
2896 uint64_t fp_arg_minus_two = bit_cast<uint64_t>(-2.0);
2897
2898 ASSERT_EQ(AsmFminnm(fp_arg_two, fp_arg_three), MakeUInt128(fp_arg_two, 0U));
2899
2900 ASSERT_EQ(AsmFminnm(fp_arg_two, kQuietNaN64AsInteger), MakeUInt128(fp_arg_two, 0U));
2901 ASSERT_EQ(AsmFminnm(fp_arg_minus_two, kQuietNaN64AsInteger), MakeUInt128(fp_arg_minus_two, 0));
2902 ASSERT_EQ(AsmFminnm(kQuietNaN64AsInteger, fp_arg_two), MakeUInt128(fp_arg_two, 0));
2903 ASSERT_EQ(AsmFminnm(kQuietNaN64AsInteger, fp_arg_minus_two), MakeUInt128(fp_arg_minus_two, 0));
2904 }
2905
TEST(Arm64InsnTest,MaxNumberF32x4)2906 TEST(Arm64InsnTest, MaxNumberF32x4) {
2907 constexpr auto AsmFmaxnm = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmaxnm %0.4s, %1.4s, %2.4s");
2908 __uint128_t arg1 = MakeF32x4(-1.0f, 2.0f, 3.0f, -4.0f);
2909 __uint128_t arg2 = MakeF32x4(2.0f, 1.0f, -3.0f, -3.0f);
2910 ASSERT_EQ(AsmFmaxnm(arg1, arg2), MakeF32x4(2.0f, 2.0f, 3.0f, -3.0f));
2911
2912 __uint128_t arg3 = MakeU32x4(bit_cast<uint32_t>(1.0f),
2913 bit_cast<uint32_t>(-1.0f),
2914 kNegativeQuietNaN32AsInteger,
2915 kQuietNaN32AsInteger);
2916 __uint128_t arg4 = MakeU32x4(kNegativeQuietNaN32AsInteger,
2917 kQuietNaN32AsInteger,
2918 bit_cast<uint32_t>(1.0f),
2919 bit_cast<uint32_t>(-1.0f));
2920 ASSERT_EQ(AsmFmaxnm(arg3, arg4), MakeF32x4(1.0f, -1.0f, 1.0f, -1.0f));
2921
2922 __uint128_t arg5 = MakeU32x4(bit_cast<uint32_t>(1.0f),
2923 bit_cast<uint32_t>(-1.0f),
2924 kSignalingNaN32AsInteger_1,
2925 kQuietNaN32AsInteger);
2926 __uint128_t arg6 = MakeU32x4(kSignalingNaN32AsInteger_1,
2927 kQuietNaN32AsInteger,
2928 bit_cast<uint32_t>(1.0f),
2929 bit_cast<uint32_t>(-1.0f));
2930 ASSERT_EQ(AsmFmaxnm(arg5, arg6),
2931 MakeF32x4(bit_cast<float>(kDefaultNaN32AsInteger),
2932 -1.0f,
2933 bit_cast<float>(kDefaultNaN32AsInteger),
2934 -1.0f));
2935
2936 __uint128_t arg7 = MakeU32x4(kSignalingNaN32AsInteger_1,
2937 kSignalingNaN32AsInteger_1,
2938 kQuietNaN32AsInteger,
2939 kQuietNaN32AsInteger);
2940 __uint128_t arg8 = MakeU32x4(kSignalingNaN32AsInteger_1,
2941 kQuietNaN32AsInteger,
2942 kSignalingNaN32AsInteger_1,
2943 kQuietNaN32AsInteger);
2944 ASSERT_EQ(AsmFmaxnm(arg7, arg8),
2945 MakeF32x4(bit_cast<float>(kDefaultNaN32AsInteger),
2946 bit_cast<float>(kDefaultNaN32AsInteger),
2947 bit_cast<float>(kDefaultNaN32AsInteger),
2948 bit_cast<float>(kDefaultNaN32AsInteger)));
2949
2950 __uint128_t arg9 = MakeF32x4(-0.0f, -0.0f, 0.0f, 0.0f);
2951 __uint128_t arg10 = MakeF32x4(-0.0f, 0.0f, -0.0f, 0.0f);
2952 ASSERT_EQ(AsmFmaxnm(arg9, arg10), MakeF32x4(-0.0f, 0.0f, 0.0f, 0.0f));
2953 }
2954
TEST(Arm64InsnTest,MaxNumberF64x2)2955 TEST(Arm64InsnTest, MaxNumberF64x2) {
2956 constexpr auto AsmFmaxnm = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmaxnm %0.2d, %1.2d, %2.2d");
2957 __uint128_t arg1 = MakeF64x2(-1.0, -4.0);
2958 __uint128_t arg2 = MakeF64x2(2.0, -3.0);
2959 ASSERT_EQ(AsmFmaxnm(arg1, arg2), MakeF64x2(2.0, -3.0));
2960
2961 __uint128_t arg3 = MakeUInt128(bit_cast<uint64_t>(1.0), kQuietNaN64AsInteger);
2962 __uint128_t arg4 = MakeUInt128(kQuietNaN64AsInteger, bit_cast<uint64_t>(-1.0));
2963 ASSERT_EQ(AsmFmaxnm(arg3, arg4), MakeF64x2(1.0, -1.0));
2964 }
2965
TEST(Arm64InsnTest,MinNumberF32x4)2966 TEST(Arm64InsnTest, MinNumberF32x4) {
2967 constexpr auto AsmFminnm = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fminnm %0.4s, %1.4s, %2.4s");
2968 __uint128_t arg1 = MakeF32x4(-1.0f, 2.0f, 3.0f, -4.0f);
2969 __uint128_t arg2 = MakeF32x4(2.0f, 1.0f, -3.0f, -3.0f);
2970 ASSERT_EQ(AsmFminnm(arg1, arg2), MakeF32x4(-1.0f, 1.0f, -3.0f, -4.0f));
2971
2972 __uint128_t arg3 = MakeU32x4(bit_cast<uint32_t>(1.0f),
2973 bit_cast<uint32_t>(-1.0f),
2974 kNegativeQuietNaN32AsInteger,
2975 kQuietNaN32AsInteger);
2976 __uint128_t arg4 = MakeU32x4(kNegativeQuietNaN32AsInteger,
2977 kQuietNaN32AsInteger,
2978 bit_cast<uint32_t>(1.0f),
2979 bit_cast<uint32_t>(-1.0f));
2980 ASSERT_EQ(AsmFminnm(arg3, arg4), MakeF32x4(1.0f, -1.0f, 1.0f, -1.0f));
2981
2982 __uint128_t arg5 = MakeU32x4(bit_cast<uint32_t>(1.0f),
2983 bit_cast<uint32_t>(-1.0f),
2984 kSignalingNaN32AsInteger_1,
2985 kQuietNaN32AsInteger);
2986 __uint128_t arg6 = MakeU32x4(kSignalingNaN32AsInteger_1,
2987 kQuietNaN32AsInteger,
2988 bit_cast<uint32_t>(1.0f),
2989 bit_cast<uint32_t>(-1.0f));
2990 ASSERT_EQ(AsmFminnm(arg5, arg6),
2991 MakeF32x4(bit_cast<float>(kDefaultNaN32AsInteger),
2992 -1.0f,
2993 bit_cast<float>(kDefaultNaN32AsInteger),
2994 -1.0f));
2995
2996 __uint128_t arg7 = MakeU32x4(kSignalingNaN32AsInteger_1,
2997 kSignalingNaN32AsInteger_1,
2998 kQuietNaN32AsInteger,
2999 kQuietNaN32AsInteger);
3000 __uint128_t arg8 = MakeU32x4(kSignalingNaN32AsInteger_1,
3001 kQuietNaN32AsInteger,
3002 kSignalingNaN32AsInteger_1,
3003 kQuietNaN32AsInteger);
3004 ASSERT_EQ(AsmFminnm(arg7, arg8),
3005 MakeF32x4(bit_cast<float>(kDefaultNaN32AsInteger),
3006 bit_cast<float>(kDefaultNaN32AsInteger),
3007 bit_cast<float>(kDefaultNaN32AsInteger),
3008 bit_cast<float>(kDefaultNaN32AsInteger)));
3009
3010 __uint128_t arg9 = MakeF32x4(-0.0f, -0.0f, 0.0f, 0.0f);
3011 __uint128_t arg10 = MakeF32x4(-0.0f, 0.0f, -0.0f, 0.0f);
3012 ASSERT_EQ(AsmFminnm(arg9, arg10), MakeF32x4(-0.0f, -0.0f, -0.0f, 0.0f));
3013 }
3014
TEST(Arm64InsnTest,MinNumberF64x2)3015 TEST(Arm64InsnTest, MinNumberF64x2) {
3016 constexpr auto AsmFminnm = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fminnm %0.2d, %1.2d, %2.2d");
3017 __uint128_t arg1 = MakeF64x2(0.0, 3.0);
3018 __uint128_t arg2 = MakeF64x2(-0.0, -3.0);
3019 ASSERT_EQ(AsmFminnm(arg1, arg2), MakeF64x2(-0.0, -3.0));
3020
3021 __uint128_t arg3 = MakeUInt128(bit_cast<uint64_t>(1.0), kQuietNaN64AsInteger);
3022 __uint128_t arg4 = MakeUInt128(kQuietNaN64AsInteger, bit_cast<uint64_t>(-1.0));
3023 __uint128_t res = AsmFminnm(arg3, arg4);
3024 ASSERT_EQ(res, MakeF64x2(1.0, -1.0));
3025 }
3026
TEST(Arm64InsnTest,MinFp32)3027 TEST(Arm64InsnTest, MinFp32) {
3028 constexpr auto AsmFmin = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmin %s0, %s1, %s2");
3029 uint32_t fp_arg_two = bit_cast<uint32_t>(2.0f);
3030 uint32_t fp_arg_three = bit_cast<uint32_t>(3.0f);
3031
3032 ASSERT_EQ(AsmFmin(fp_arg_two, fp_arg_three), MakeU32x4(fp_arg_two, 0, 0, 0));
3033 ASSERT_EQ(AsmFmin(kDefaultNaN32AsInteger, fp_arg_three), kDefaultNaN32AsInteger);
3034 ASSERT_EQ(AsmFmin(fp_arg_three, kDefaultNaN32AsInteger), kDefaultNaN32AsInteger);
3035 }
3036
TEST(Arm64InsnTest,MinFp64)3037 TEST(Arm64InsnTest, MinFp64) {
3038 constexpr auto AsmFmin = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmin %d0, %d1, %d2");
3039 uint64_t fp_arg_two = bit_cast<uint64_t>(2.0);
3040 uint64_t fp_arg_three = bit_cast<uint64_t>(3.0);
3041
3042 ASSERT_EQ(AsmFmin(fp_arg_two, fp_arg_three), MakeUInt128(fp_arg_two, 0U));
3043 ASSERT_EQ(AsmFmin(kDefaultNaN64AsInteger, fp_arg_three), kDefaultNaN64AsInteger);
3044 ASSERT_EQ(AsmFmin(fp_arg_three, kDefaultNaN64AsInteger), kDefaultNaN64AsInteger);
3045 }
3046
TEST(Arm64InsnTest,MinF32x4)3047 TEST(Arm64InsnTest, MinF32x4) {
3048 constexpr auto AsmFmin = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmin %0.4s, %1.4s, %2.4s");
3049 __uint128_t arg1 = MakeF32x4(0.0f, 2.0f, 3.0f, -4.0f);
3050 __uint128_t arg2 = MakeF32x4(-0.0f, 1.0f, -3.0f, -3.0f);
3051 ASSERT_EQ(AsmFmin(arg1, arg2), MakeF32x4(-0.0f, 1.0f, -3.0f, -4.0f));
3052
3053 __uint128_t arg3 = MakeF32x4(-0.0f, bit_cast<float>(kDefaultNaN32AsInteger), 3.0f, -4.0f);
3054 __uint128_t arg4 = MakeF32x4(0.0f, 1.0f, -3.0f, bit_cast<float>(kDefaultNaN32AsInteger));
3055 ASSERT_EQ(AsmFmin(arg3, arg4),
3056 MakeF32x4(-0.0f,
3057 bit_cast<float>(kDefaultNaN32AsInteger),
3058 -3.0f,
3059 bit_cast<float>(kDefaultNaN32AsInteger)));
3060 }
3061
TEST(Arm64InsnTest,MinF64x2)3062 TEST(Arm64InsnTest, MinF64x2) {
3063 constexpr auto AsmFmin = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmin %0.2d, %1.2d, %2.2d");
3064 __uint128_t arg1 = MakeF64x2(0.0, 3.0);
3065 __uint128_t arg2 = MakeF64x2(-0.0, -3.0);
3066 ASSERT_EQ(AsmFmin(arg1, arg2), MakeF64x2(-0.0, -3.0));
3067
3068 __uint128_t arg3 = MakeF64x2(bit_cast<double>(kDefaultNaN64AsInteger), 3.0);
3069 __uint128_t arg4 = MakeF64x2(1.0, bit_cast<double>(kDefaultNaN64AsInteger));
3070 ASSERT_EQ(AsmFmin(arg3, arg4),
3071 MakeF64x2(bit_cast<double>(kDefaultNaN64AsInteger),
3072 bit_cast<double>(kDefaultNaN64AsInteger)));
3073 }
3074
TEST(Arm64InsnTest,MaxPairwiseF32Scalar)3075 TEST(Arm64InsnTest, MaxPairwiseF32Scalar) {
3076 constexpr auto AsmFmaxp = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fmaxp %s0, %1.2s");
3077 __uint128_t arg1 = MakeF32x4(-3.0f, 2.0f, 7.0f, -0.0f);
3078 ASSERT_EQ(AsmFmaxp(arg1), bit_cast<uint32_t>(2.0f));
3079
3080 __uint128_t arg2 = MakeF32x4(bit_cast<float>(kDefaultNaN32AsInteger), 2.0f, 7.0f, -0.0f);
3081 ASSERT_EQ(AsmFmaxp(arg2), kDefaultNaN32AsInteger);
3082 }
3083
TEST(Arm64InsnTest,MaxPairwiseF32x4)3084 TEST(Arm64InsnTest, MaxPairwiseF32x4) {
3085 constexpr auto AsmFmaxp = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmaxp %0.4s, %1.4s, %2.4s");
3086 __uint128_t arg1 = MakeF32x4(-3.0f, 2.0f, 7.0f, -0.0f);
3087 __uint128_t arg2 = MakeF32x4(6.0f, 1.0f, -8.0f, 5.0f);
3088 ASSERT_EQ(AsmFmaxp(arg1, arg2), MakeF32x4(2.0f, 7.0f, 6.0f, 5.0f));
3089
3090 __uint128_t arg3 = MakeF32x4(
3091 bit_cast<float>(kDefaultNaN32AsInteger), 2.0f, 7.0f, bit_cast<float>(kDefaultNaN32AsInteger));
3092 __uint128_t arg4 = MakeF32x4(6.0f, 1.0f, -8.0f, 5.0f);
3093 ASSERT_EQ(AsmFmaxp(arg3, arg4),
3094 MakeF32x4(bit_cast<float>(kDefaultNaN32AsInteger),
3095 bit_cast<float>(kDefaultNaN32AsInteger),
3096 6.0f,
3097 5.0f));
3098 }
3099
TEST(Arm64InsnTest,MinPairwiseF32Scalar)3100 TEST(Arm64InsnTest, MinPairwiseF32Scalar) {
3101 constexpr auto AsmFminp = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fminp %s0, %1.2s");
3102 __uint128_t arg1 = MakeF32x4(-3.0f, 2.0f, 7.0f, -0.0f);
3103 ASSERT_EQ(AsmFminp(arg1), bit_cast<uint32_t>(-3.0f));
3104
3105 __uint128_t arg2 = MakeF32x4(bit_cast<float>(kDefaultNaN32AsInteger), 2.0f, 7.0f, -0.0f);
3106 ASSERT_EQ(AsmFminp(arg2), kDefaultNaN32AsInteger);
3107 }
3108
TEST(Arm64InsnTest,MinPairwiseF32x4)3109 TEST(Arm64InsnTest, MinPairwiseF32x4) {
3110 constexpr auto AsmFminp = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fminp %0.4s, %1.4s, %2.4s");
3111 __uint128_t arg1 = MakeF32x4(-3.0f, 2.0f, 7.0f, -0.0f);
3112 __uint128_t arg2 = MakeF32x4(6.0f, 1.0f, -8.0f, 5.0f);
3113 ASSERT_EQ(AsmFminp(arg1, arg2), MakeF32x4(-3.0f, -0.0f, 1.0f, -8.0f));
3114
3115 __uint128_t arg3 = MakeF32x4(
3116 bit_cast<float>(kDefaultNaN32AsInteger), 2.0f, 7.0f, bit_cast<float>(kDefaultNaN32AsInteger));
3117 __uint128_t arg4 = MakeF32x4(6.0f, 1.0f, -8.0f, 5.0f);
3118 ASSERT_EQ(AsmFminp(arg3, arg4),
3119 MakeF32x4(bit_cast<float>(kDefaultNaN32AsInteger),
3120 bit_cast<float>(kDefaultNaN32AsInteger),
3121 1.0f,
3122 -8.0f));
3123 }
3124
TEST(Arm64InsnTest,MaxPairwiseNumberF32Scalar)3125 TEST(Arm64InsnTest, MaxPairwiseNumberF32Scalar) {
3126 constexpr auto AsmFmaxnmp = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fmaxnmp %s0, %1.2s");
3127 __uint128_t arg1 = MakeF32x4(-3.0f, 2.0f, 7.0f, -0.0f);
3128 ASSERT_EQ(AsmFmaxnmp(arg1), bit_cast<uint32_t>(2.0f));
3129
3130 __uint128_t arg2 = MakeF32x4(bit_cast<float>(kQuietNaN32AsInteger), 2.0f, 7.0f, -0.0f);
3131 ASSERT_EQ(AsmFmaxnmp(arg2), bit_cast<uint32_t>(2.0f));
3132 }
3133
TEST(Arm64InsnTest,MaxPairwiseNumberF32x4)3134 TEST(Arm64InsnTest, MaxPairwiseNumberF32x4) {
3135 constexpr auto AsmFmaxnmp = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmaxnmp %0.4s, %1.4s, %2.4s");
3136 __uint128_t arg1 = MakeF32x4(-3.0f, 2.0f, 7.0f, -0.0f);
3137 __uint128_t arg2 = MakeF32x4(6.0f, 1.0f, -8.0f, 5.0f);
3138 ASSERT_EQ(AsmFmaxnmp(arg1, arg2), MakeF32x4(2.0f, 7.0f, 6.0f, 5.0f));
3139
3140 __uint128_t arg3 = MakeF32x4(
3141 bit_cast<float>(kQuietNaN32AsInteger), 2.0f, 7.0f, bit_cast<float>(kQuietNaN32AsInteger));
3142 __uint128_t arg4 = MakeF32x4(6.0f, 1.0f, -8.0f, 5.0f);
3143 ASSERT_EQ(AsmFmaxnmp(arg3, arg4), MakeF32x4(2.0f, 7.0f, 6.0f, 5.0f));
3144 }
3145
TEST(Arm64InsnTest,MinPairwiseNumberF32Scalar)3146 TEST(Arm64InsnTest, MinPairwiseNumberF32Scalar) {
3147 constexpr auto AsmFminnmp = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fminnmp %s0, %1.2s");
3148 __uint128_t arg1 = MakeF32x4(-3.0f, 2.0f, 7.0f, -0.0f);
3149 ASSERT_EQ(AsmFminnmp(arg1), bit_cast<uint32_t>(-3.0f));
3150
3151 __uint128_t arg2 = MakeF32x4(bit_cast<float>(kQuietNaN32AsInteger), 2.0f, 7.0f, -0.0f);
3152 ASSERT_EQ(AsmFminnmp(arg2), bit_cast<uint32_t>(2.0f));
3153 }
3154
TEST(Arm64InsnTest,MinPairwiseNumberF32x4)3155 TEST(Arm64InsnTest, MinPairwiseNumberF32x4) {
3156 constexpr auto AsmFminnmp = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fminnmp %0.4s, %1.4s, %2.4s");
3157 __uint128_t arg1 = MakeF32x4(-3.0f, 2.0f, 7.0f, -0.0f);
3158 __uint128_t arg2 = MakeF32x4(6.0f, 1.0f, -8.0f, 5.0f);
3159 ASSERT_EQ(AsmFminnmp(arg1, arg2), MakeF32x4(-3.0f, -0.0f, 1.0f, -8.0f));
3160
3161 __uint128_t arg3 = MakeF32x4(
3162 bit_cast<float>(kQuietNaN32AsInteger), 2.0f, 7.0f, bit_cast<float>(kQuietNaN32AsInteger));
3163 __uint128_t arg4 = MakeF32x4(6.0f, 1.0f, -8.0f, 5.0f);
3164 ASSERT_EQ(AsmFminnmp(arg3, arg4), MakeF32x4(2.0f, 7.0f, 1.0f, -8.0f));
3165 }
3166
TEST(Arm64InsnTest,MaxAcrossF32x4)3167 TEST(Arm64InsnTest, MaxAcrossF32x4) {
3168 constexpr auto AsmFmaxv = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fmaxv %s0, %1.4s");
3169 __uint128_t arg1 = MakeF32x4(0.0f, 2.0f, 3.0f, -4.0f);
3170 ASSERT_EQ(AsmFmaxv(arg1), bit_cast<uint32_t>(3.0f));
3171
3172 __uint128_t arg2 = MakeF32x4(0.0f, 2.0f, bit_cast<float>(kDefaultNaN32AsInteger), -4.0f);
3173 ASSERT_EQ(AsmFmaxv(arg2), kDefaultNaN32AsInteger);
3174 }
3175
TEST(Arm64InsnTest,MinAcrossF32x4)3176 TEST(Arm64InsnTest, MinAcrossF32x4) {
3177 constexpr auto AsmFminv = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fminv %s0, %1.4s");
3178 __uint128_t arg1 = MakeF32x4(0.0f, 2.0f, 3.0f, -4.0f);
3179 ASSERT_EQ(AsmFminv(arg1), bit_cast<uint32_t>(-4.0f));
3180
3181 __uint128_t arg2 = MakeF32x4(0.0f, 2.0f, bit_cast<float>(kDefaultNaN32AsInteger), -4.0f);
3182 ASSERT_EQ(AsmFminv(arg2), kDefaultNaN32AsInteger);
3183 }
3184
TEST(Arm64InsnTest,MaxNumberAcrossF32x4)3185 TEST(Arm64InsnTest, MaxNumberAcrossF32x4) {
3186 constexpr auto AsmFmaxnmv = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fmaxnmv %s0, %1.4s");
3187 __uint128_t arg1 = MakeF32x4(0.0f, 2.0f, 3.0f, -4.0f);
3188 ASSERT_EQ(AsmFmaxnmv(arg1), bit_cast<uint32_t>(3.0f));
3189
3190 __uint128_t arg2 = MakeF32x4(0.0f, bit_cast<float>(kQuietNaN32AsInteger), 3.0f, -4.0f);
3191 ASSERT_EQ(AsmFmaxnmv(arg2), bit_cast<uint32_t>(3.0f));
3192 }
3193
TEST(Arm64InsnTest,MinNumberAcrossF32x4)3194 TEST(Arm64InsnTest, MinNumberAcrossF32x4) {
3195 constexpr auto AsmFminnmv = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fminnmv %s0, %1.4s");
3196 __uint128_t arg1 = MakeF32x4(0.0f, 2.0f, 3.0f, -4.0f);
3197 ASSERT_EQ(AsmFminnmv(arg1), bit_cast<uint32_t>(-4.0f));
3198
3199 __uint128_t arg2 = MakeF32x4(0.0f, bit_cast<float>(kQuietNaN32AsInteger), 3.0f, -4.0f);
3200 ASSERT_EQ(AsmFminnmv(arg2), bit_cast<uint32_t>(-4.0f));
3201 }
3202
TEST(Arm64InsnTest,MulFp32)3203 TEST(Arm64InsnTest, MulFp32) {
3204 uint64_t fp_arg1 = 0x40a1999aULL; // 5.05 in float
3205 uint64_t fp_arg2 = 0x40dae148ULL; // 6.84 in float
3206 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmul %s0, %s1, %s2")(fp_arg1, fp_arg2);
3207 ASSERT_EQ(rd, MakeUInt128(0x420a2b03ULL, 0U)); // 34.5420 in float
3208 }
3209
TEST(Arm64InsnTest,MulFp64)3210 TEST(Arm64InsnTest, MulFp64) {
3211 uint64_t fp_arg1 = 0x40226b851eb851ecULL; // 9.21 in double
3212 uint64_t fp_arg2 = 0x4020c7ae147ae148ULL; // 8.39 in double
3213 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmul %d0, %d1, %d2")(fp_arg1, fp_arg2);
3214 ASSERT_EQ(rd, MakeUInt128(0x40535166cf41f214ULL, 0U)); // 77.2719 in double
3215 }
3216
TEST(Arm64InsnTest,MulF32x4)3217 TEST(Arm64InsnTest, MulF32x4) {
3218 constexpr auto AsmFmul = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmul %0.4s, %1.4s, %2.4s");
3219 __uint128_t arg1 = MakeF32x4(1.0f, -2.0f, 3.0f, -4.0f);
3220 __uint128_t arg2 = MakeF32x4(-3.0f, -1.0f, 4.0f, 1.0f);
3221 ASSERT_EQ(AsmFmul(arg1, arg2), MakeF32x4(-3.0f, 2.0f, 12.0f, -4.0f));
3222 }
3223
TEST(Arm64InsnTest,MulF64x2)3224 TEST(Arm64InsnTest, MulF64x2) {
3225 constexpr auto AsmFmul = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmul %0.2d, %1.2d, %2.2d");
3226 __uint128_t arg1 = MakeF64x2(-4.0, 2.0);
3227 __uint128_t arg2 = MakeF64x2(2.0, 3.0);
3228 ASSERT_EQ(AsmFmul(arg1, arg2), MakeF64x2(-8.0, 6.0));
3229 }
3230
TEST(Arm64InsnTest,MulF32x4ByScalar)3231 TEST(Arm64InsnTest, MulF32x4ByScalar) {
3232 __uint128_t arg1 = MakeF32x4(2.0f, 3.0f, 4.0f, 5.0f);
3233 __uint128_t arg2 = MakeF32x4(6.0f, 7.0f, 8.0f, 9.0f);
3234 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmul %0.4s, %1.4s, %2.s[3]")(arg1, arg2);
3235 ASSERT_EQ(res, MakeF32x4(18.0f, 27.0f, 36.0f, 45.0f));
3236 }
3237
TEST(Arm64InsnTest,MulF64x2ByScalar)3238 TEST(Arm64InsnTest, MulF64x2ByScalar) {
3239 __uint128_t arg1 = MakeF64x2(2.0, 3.0);
3240 __uint128_t arg2 = MakeF64x2(5.0, 4.0);
3241 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmul %0.2d, %1.2d, %2.d[1]")(arg1, arg2);
3242 ASSERT_EQ(res, MakeF64x2(8.0, 12.0));
3243 }
3244
TEST(Arm64InsnTest,MulF32IndexedElem)3245 TEST(Arm64InsnTest, MulF32IndexedElem) {
3246 constexpr auto AsmFmul = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmul %s0, %s1, %2.s[2]");
3247 __uint128_t arg1 = MakeF32x4(2.0f, 3.0f, 5.0f, 7.0f);
3248 __uint128_t arg2 = MakeF32x4(11.0f, 13.0f, 17.0f, 19.0f);
3249 ASSERT_EQ(AsmFmul(arg1, arg2), bit_cast<uint32_t>(34.0f));
3250 }
3251
TEST(Arm64InsnTest,MulF64IndexedElem)3252 TEST(Arm64InsnTest, MulF64IndexedElem) {
3253 constexpr auto AsmFmul = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmul %d0, %d1, %2.d[1]");
3254 __uint128_t arg1 = MakeF64x2(2.0, 3.0);
3255 __uint128_t arg2 = MakeF64x2(5.0, 4.0);
3256 ASSERT_EQ(AsmFmul(arg1, arg2), bit_cast<uint64_t>(8.0));
3257 }
3258
TEST(Arm64InsnTest,MulExtendedF32)3259 TEST(Arm64InsnTest, MulExtendedF32) {
3260 constexpr auto AsmFmulx = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmulx %s0, %s1, %s2");
3261 __uint128_t arg1 = MakeF32x4(2.0f, 3.0f, 5.0f, 7.0f);
3262 __uint128_t arg2 = MakeF32x4(11.0f, 13.0f, 17.0f, 19.0f);
3263 ASSERT_EQ(AsmFmulx(arg1, arg2), bit_cast<uint32_t>(22.0f));
3264 }
3265
TEST(Arm64InsnTest,MulExtendedF32x4)3266 TEST(Arm64InsnTest, MulExtendedF32x4) {
3267 constexpr auto AsmFmulx = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmulx %0.4s, %1.4s, %2.4s");
3268 __uint128_t arg1 = MakeF32x4(2.0f, 3.0f, 5.0f, 7.0f);
3269 __uint128_t arg2 = MakeF32x4(11.0f, 13.0f, 17.0f, 19.0f);
3270 ASSERT_EQ(AsmFmulx(arg1, arg2), MakeF32x4(22.0f, 39.0f, 85.0f, 133.0f));
3271 }
3272
TEST(Arm64InsnTest,MulExtendedF32IndexedElem)3273 TEST(Arm64InsnTest, MulExtendedF32IndexedElem) {
3274 constexpr auto AsmFmulx = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmulx %s0, %s1, %2.s[2]");
3275 __uint128_t arg1 = MakeF32x4(2.0f, 3.0f, 5.0f, 7.0f);
3276 __uint128_t arg2 = MakeF32x4(11.0f, 13.0f, 17.0f, 19.0f);
3277 ASSERT_EQ(AsmFmulx(arg1, arg2), bit_cast<uint32_t>(34.0f));
3278 }
3279
TEST(Arm64InsnTest,MulExtendedF64IndexedElem)3280 TEST(Arm64InsnTest, MulExtendedF64IndexedElem) {
3281 constexpr auto AsmFmulx = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmulx %d0, %d1, %2.d[1]");
3282 __uint128_t arg1 = MakeF64x2(2.0, 3.0);
3283 __uint128_t arg2 = MakeF64x2(5.0, 4.0);
3284 ASSERT_EQ(AsmFmulx(arg1, arg2), bit_cast<uint64_t>(8.0));
3285 }
3286
TEST(Arm64InsnTest,MulExtendedF32x4IndexedElem)3287 TEST(Arm64InsnTest, MulExtendedF32x4IndexedElem) {
3288 constexpr auto AsmFmulx = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fmulx %0.4s, %1.4s, %2.s[2]");
3289 __uint128_t arg1 = MakeF32x4(2.0f, 3.0f, 5.0f, 7.0f);
3290 __uint128_t arg2 = MakeF32x4(11.0f, 13.0f, 17.0f, 19.0f);
3291 ASSERT_EQ(AsmFmulx(arg1, arg2), MakeF32x4(34.0f, 51.0f, 85.0f, 119.0f));
3292 }
3293
TEST(Arm64InsnTest,MulNegFp32)3294 TEST(Arm64InsnTest, MulNegFp32) {
3295 uint64_t fp_arg1 = bit_cast<uint32_t>(2.0f);
3296 uint64_t fp_arg2 = bit_cast<uint32_t>(3.0f);
3297 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fnmul %s0, %s1, %s2")(fp_arg1, fp_arg2);
3298 ASSERT_EQ(rd, MakeUInt128(bit_cast<uint32_t>(-6.0f), 0U));
3299 }
3300
TEST(Arm64InsnTest,MulNegFp64)3301 TEST(Arm64InsnTest, MulNegFp64) {
3302 uint64_t fp_arg1 = bit_cast<uint64_t>(2.0);
3303 uint64_t fp_arg2 = bit_cast<uint64_t>(3.0);
3304 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fnmul %d0, %d1, %d2")(fp_arg1, fp_arg2);
3305 ASSERT_EQ(rd, MakeUInt128(bit_cast<uint64_t>(-6.0), 0U));
3306 }
3307
TEST(Arm64InsnTest,DivFp32)3308 TEST(Arm64InsnTest, DivFp32) {
3309 constexpr auto AsmFdiv = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fdiv %s0, %s1, %s2");
3310
3311 uint32_t arg1 = 0x40c23d71U; // 6.07 in float
3312 uint32_t arg2 = 0x401a3d71U; // 2.41 in float
3313 ASSERT_EQ(AsmFdiv(arg1, arg2), MakeUInt128(0x402131edULL, 0U)); // 2.5186722 in float
3314
3315 // Make sure that FDIV can produce a denormal result under the default FPCR,
3316 // where the FZ bit (flush-to-zero) is off.
3317 uint32_t arg3 = 0xa876eff9U; // exponent (without offset) = -47
3318 uint32_t arg4 = 0xe7d86b60U; // exponent (without offset) = 80
3319 ASSERT_EQ(AsmFdiv(arg3, arg4), MakeUInt128(0x0049065cULL, 0U)); // denormal
3320 }
3321
TEST(Arm64InsnTest,DivFp64)3322 TEST(Arm64InsnTest, DivFp64) {
3323 uint64_t fp_arg1 = 0x401e5c28f5c28f5cULL; // 7.59 in double
3324 uint64_t fp_arg2 = 0x3ff28f5c28f5c28fULL; // 1.16 in double
3325 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fdiv %d0, %d1, %d2")(fp_arg1, fp_arg2);
3326 ASSERT_EQ(rd, MakeUInt128(0x401a2c234f72c235ULL, 0U)); // 6.5431034482758620995923593 in double
3327 }
3328
TEST(Arm64InsnTest,DivFp32_FlagsWhenDivByZero)3329 TEST(Arm64InsnTest, DivFp32_FlagsWhenDivByZero) {
3330 uint64_t fpsr;
3331 volatile float dividend = 123.0f;
3332 volatile float divisor = 0.0f;
3333 float res;
3334 asm volatile(
3335 "msr fpsr, xzr\n\t"
3336 "fdiv %s1, %s2, %s3\n\t"
3337 "mrs %0, fpsr"
3338 : "=r"(fpsr), "=w"(res)
3339 : "w"(dividend), "w"(divisor));
3340 ASSERT_TRUE((fpsr & kFpsrDzcBit) == (kFpsrDzcBit));
3341
3342 // Previous bug caused IOC to be set upon scalar div by zero.
3343 ASSERT_TRUE((fpsr & kFpsrIocBit) == 0);
3344 }
3345
TEST(Arm64InsnTest,DivFp64_FlagsWhenDivByZero)3346 TEST(Arm64InsnTest, DivFp64_FlagsWhenDivByZero) {
3347 uint64_t fpsr;
3348 double res;
3349 asm volatile(
3350 "msr fpsr, xzr\n\t"
3351 "fdiv %d1, %d2, %d3\n\t"
3352 "mrs %0, fpsr"
3353 : "=r"(fpsr), "=w"(res)
3354 : "w"(123.0), "w"(0.0));
3355 ASSERT_TRUE((fpsr & kFpsrDzcBit) == (kFpsrDzcBit));
3356
3357 // Previous bug caused IOC to be set upon scalar div by zero.
3358 ASSERT_TRUE((fpsr & kFpsrIocBit) == 0);
3359 }
3360
TEST(Arm64InsnTest,DivFp32x4)3361 TEST(Arm64InsnTest, DivFp32x4) {
3362 constexpr auto AsmFdiv = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fdiv %0.4s, %1.4s, %2.4s");
3363
3364 // 16.39, 80.286, 41.16, 98.01
3365 __uint128_t arg1 = MakeUInt128(0x41831eb842a0926fULL, 0x4224a3d742c4051fULL);
3366 // 13.3, 45.45, 7.89, -2.63
3367 __uint128_t arg2 = MakeUInt128(0x4154cccd4235cccdULL, 0x40fc7ae1c02851ecULL);
3368 __uint128_t res1 = AsmFdiv(arg1, arg2);
3369 // 1.2323308, 1.7664686, 5.21673, -37.26616
3370 ASSERT_EQ(res1, MakeUInt128(0x3f9dbd043fe21ba5ULL, 0x40a6ef74c215108cULL));
3371
3372 // Verify that fdiv produces a denormal result under the default FPCR.
3373 __uint128_t arg3 = MakeF32x4(1.0f, 1.0f, 1.0f, -0x1.eddff2p-47f);
3374 __uint128_t arg4 = MakeF32x4(1.0f, 1.0f, 1.0f, -0x1.b0d6c0p80f);
3375 __uint128_t res2 = AsmFdiv(arg3, arg4);
3376 __uint128_t expected2 = MakeF32x4(1.0f, 1.0f, 1.0f, 0x0.920cb8p-126f);
3377 ASSERT_EQ(res2, expected2);
3378 }
3379
TEST(Arm64InsnTest,DivFp64x2)3380 TEST(Arm64InsnTest, DivFp64x2) {
3381 // 6.23, 65.02
3382 __uint128_t arg1 = MakeUInt128(0x4018EB851EB851ECULL, 0x40504147AE147AE1ULL);
3383 // -7.54, 11.92
3384 __uint128_t arg2 = MakeUInt128(0xC01E28F5C28F5C29ULL, 0x4027D70A3D70A3D7ULL);
3385 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fdiv %0.2d, %1.2d, %2.2d")(arg1, arg2);
3386 // -0.82625994695, 5.45469798658
3387 ASSERT_EQ(res, MakeUInt128(0xbfea70b8b3449564ULL, 0x4015d19c59579fc9ULL));
3388 }
3389
TEST(Arm64InsnTest,MulAddFp32)3390 TEST(Arm64InsnTest, MulAddFp32) {
3391 constexpr auto AsmFmadd = ASM_INSN_WRAP_FUNC_W_RES_WWW_ARG("fmadd %s0, %s1, %s2, %s3");
3392
3393 __uint128_t res1 =
3394 AsmFmadd(bit_cast<uint32_t>(2.0f), bit_cast<uint32_t>(3.0f), bit_cast<uint32_t>(5.0f));
3395 ASSERT_EQ(res1, MakeF32x4(11.0f, 0, 0, 0));
3396
3397 __uint128_t res2 =
3398 AsmFmadd(bit_cast<uint32_t>(2.5f), bit_cast<uint32_t>(2.0f), bit_cast<uint32_t>(-5.0f));
3399 ASSERT_EQ(res2, MakeF32x4(0, 0, 0, 0));
3400
3401 // These tests verify that fmadd does not lose precision while doing the mult + add.
3402 __uint128_t res3 = AsmFmadd(bit_cast<uint32_t>(0x1.fffffep22f),
3403 bit_cast<uint32_t>(0x1.000002p0f),
3404 bit_cast<uint32_t>(-0x1.p23f));
3405 ASSERT_EQ(res3, MakeF32x4(0x1.fffffcp-2f, 0, 0, 0));
3406
3407 __uint128_t res4 = AsmFmadd(bit_cast<uint32_t>(0x1.fffffep22f),
3408 bit_cast<uint32_t>(0x1.000002p0f),
3409 bit_cast<uint32_t>(-0x1.fffffep22f));
3410 ASSERT_EQ(res4, MakeF32x4(0x1.fffffep-1f, 0, 0, 0));
3411
3412 __uint128_t res5 = AsmFmadd(bit_cast<uint32_t>(0x1.p23f),
3413 bit_cast<uint32_t>(0x1.fffffep-1f),
3414 bit_cast<uint32_t>(-0x1.000002p23f));
3415 ASSERT_EQ(res5, MakeF32x4(-0x1.80p0f, 0, 0, 0));
3416 }
3417
TEST(Arm64InsnTest,MulAddFp64)3418 TEST(Arm64InsnTest, MulAddFp64) {
3419 uint64_t arg1 = 0x40323d70a3d70a3dULL; // 18.24
3420 uint64_t arg2 = 0x40504147ae147ae1ULL; // 65.02
3421 uint64_t arg3 = 0x4027d70a3d70a3d7ULL; // 11.92
3422 __uint128_t res1 = ASM_INSN_WRAP_FUNC_W_RES_WWW_ARG("fmadd %d0, %d1, %d2, %d3")(arg1, arg2, arg3);
3423 ASSERT_EQ(res1, MakeUInt128(0x4092b78a0902de00ULL, 0U)); // 1197.8848
3424 __uint128_t res2 =
3425 ASM_INSN_WRAP_FUNC_W_RES_WWW_ARG("fnmadd %d0, %d1, %d2, %d3")(arg1, arg2, arg3);
3426 ASSERT_EQ(res2, MakeUInt128(0xc092b78a0902de00ULL, 0U)); // -1197.8848
3427 }
3428
TEST(Arm64InsnTest,MulAddFp64Precision)3429 TEST(Arm64InsnTest, MulAddFp64Precision) {
3430 uint64_t arg1 = bit_cast<uint64_t>(0x1.0p1023);
3431 uint64_t arg2 = bit_cast<uint64_t>(0x1.0p-1);
3432 uint64_t arg3 = bit_cast<uint64_t>(0x1.fffffffffffffp1022);
3433 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WWW_ARG("fmadd %d0, %d1, %d2, %d3")(arg1, arg2, arg3);
3434 ASSERT_EQ(res, bit_cast<uint64_t>(0x1.7ffffffffffff8p1023));
3435 }
3436
TEST(Arm64InsnTest,NegMulAddFp32)3437 TEST(Arm64InsnTest, NegMulAddFp32) {
3438 constexpr auto AsmFnmadd = ASM_INSN_WRAP_FUNC_W_RES_WWW_ARG("fnmadd %s0, %s1, %s2, %s3");
3439
3440 __uint128_t res1 =
3441 AsmFnmadd(bit_cast<uint32_t>(2.0f), bit_cast<uint32_t>(3.0f), bit_cast<uint32_t>(5.0f));
3442 ASSERT_EQ(res1, MakeF32x4(-11.0f, 0, 0, 0));
3443
3444 // No -0 (proper negation)
3445 __uint128_t res2 =
3446 AsmFnmadd(bit_cast<uint32_t>(2.5f), bit_cast<uint32_t>(2.0f), bit_cast<uint32_t>(-5.0f));
3447 ASSERT_EQ(res2, MakeF32x4(0.0f, 0, 0, 0));
3448
3449 // These tests verify that fmadd does not lose precision while doing the mult + add.
3450 __uint128_t res3 = AsmFnmadd(bit_cast<uint32_t>(0x1.fffffep22f),
3451 bit_cast<uint32_t>(0x1.000002p0f),
3452 bit_cast<uint32_t>(-0x1.p23f));
3453 ASSERT_EQ(res3, MakeF32x4(-0x1.fffffcp-2f, 0, 0, 0));
3454
3455 __uint128_t res4 = AsmFnmadd(bit_cast<uint32_t>(0x1.fffffep22f),
3456 bit_cast<uint32_t>(0x1.000002p0f),
3457 bit_cast<uint32_t>(-0x1.fffffep22f));
3458 ASSERT_EQ(res4, MakeF32x4(-0x1.fffffep-1f, 0, 0, 0));
3459
3460 __uint128_t res5 = AsmFnmadd(bit_cast<uint32_t>(0x1.p23f),
3461 bit_cast<uint32_t>(0x1.fffffep-1f),
3462 bit_cast<uint32_t>(-0x1.000002p23f));
3463 ASSERT_EQ(res5, MakeF32x4(0x1.80p0f, 0, 0, 0));
3464 }
3465
TEST(Arm64InsnTest,NegMulAddFp64)3466 TEST(Arm64InsnTest, NegMulAddFp64) {
3467 constexpr auto AsmFnmadd = ASM_INSN_WRAP_FUNC_W_RES_WWW_ARG("fnmadd %d0, %d1, %d2, %d3");
3468
3469 __uint128_t res1 =
3470 AsmFnmadd(bit_cast<uint64_t>(2.0), bit_cast<uint64_t>(3.0), bit_cast<uint64_t>(5.0));
3471 ASSERT_EQ(res1, MakeF64x2(-11.0, 0));
3472
3473 // Proper negation (no -0 in this case)
3474 __uint128_t res2 =
3475 AsmFnmadd(bit_cast<uint64_t>(2.5), bit_cast<uint64_t>(2.0), bit_cast<uint64_t>(-5.0));
3476 ASSERT_EQ(res2, MakeF64x2(0.0, 0));
3477 }
3478
TEST(Arm64InsnTest,NegMulSubFp64)3479 TEST(Arm64InsnTest, NegMulSubFp64) {
3480 constexpr auto AsmFnmsub = ASM_INSN_WRAP_FUNC_W_RES_WWW_ARG("fnmsub %d0, %d1, %d2, %d3");
3481
3482 __uint128_t res1 =
3483 AsmFnmsub(bit_cast<uint64_t>(-2.0), bit_cast<uint64_t>(3.0), bit_cast<uint64_t>(5.0));
3484 ASSERT_EQ(res1, MakeF64x2(-11.0, 0));
3485
3486 uint64_t arg1 = 0x40357ae147ae147bULL; // 21.48
3487 uint64_t arg2 = 0x404ce3d70a3d70a4ull; // 57.78
3488 uint64_t arg3 = 0x405e29999999999aULL; // 120.65
3489 __uint128_t res2 = AsmFnmsub(arg1, arg2, arg3);
3490 ASSERT_EQ(res2, MakeUInt128(0x409181db8bac710dULL, 0U)); // 1120.4644
3491
3492 // Assert no -0 in this case
3493 __uint128_t res3 =
3494 AsmFnmsub(bit_cast<uint64_t>(2.5), bit_cast<uint64_t>(2.0), bit_cast<uint64_t>(5.0));
3495 ASSERT_EQ(res3, MakeF64x2(0.0, 0));
3496 }
3497
TEST(Arm64InsnTest,NegMulSubFp64Precision)3498 TEST(Arm64InsnTest, NegMulSubFp64Precision) {
3499 constexpr auto AsmFnmsub = ASM_INSN_WRAP_FUNC_W_RES_WWW_ARG("fnmsub %d0, %d1, %d2, %d3");
3500
3501 __uint128_t res = AsmFnmsub(bit_cast<uint64_t>(0x1.0p1023),
3502 bit_cast<uint64_t>(0x1.0p-1),
3503 bit_cast<uint64_t>(-0x1.fffffffffffffp1022));
3504 ASSERT_EQ(res, bit_cast<uint64_t>(0x1.7ffffffffffff8p1023));
3505 }
3506
TEST(Arm64InsnTest,MulAddF32x4)3507 TEST(Arm64InsnTest, MulAddF32x4) {
3508 constexpr auto AsmFmla = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("fmla %0.4s, %1.4s, %2.4s");
3509 __uint128_t arg1 = MakeF32x4(1.0f, 2.0f, 4.0f, 3.0f);
3510 __uint128_t arg2 = MakeF32x4(3.0f, 1.0f, 2.0f, 4.0f);
3511 __uint128_t arg3 = MakeF32x4(2.0f, 3.0f, 1.0f, 2.0f);
3512 ASSERT_EQ(AsmFmla(arg1, arg2, arg3), MakeF32x4(5.0f, 5.0f, 9.0f, 14.0f));
3513 }
3514
TEST(Arm64InsnTest,MulAddF32IndexedElem)3515 TEST(Arm64InsnTest, MulAddF32IndexedElem) {
3516 constexpr auto AsmFmla = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("fmla %s0, %s1, %2.s[2]");
3517 __uint128_t arg1 = MakeF32x4(1.0f, 2.0f, 4.0f, 3.0f);
3518 __uint128_t arg2 = MakeF32x4(3.0f, 1.0f, 2.0f, 4.0f);
3519 __uint128_t arg3 = MakeF32x4(2.0f, 3.0f, 1.0f, 2.0f);
3520 // 2 + (1 * 2)
3521 ASSERT_EQ(AsmFmla(arg1, arg2, arg3), bit_cast<uint32_t>(4.0f));
3522 }
3523
TEST(Arm64InsnTest,MulAddF64IndexedElem)3524 TEST(Arm64InsnTest, MulAddF64IndexedElem) {
3525 constexpr auto AsmFmla = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("fmla %d0, %d1, %2.d[1]");
3526 __uint128_t arg1 = MakeF64x2(2.0, 3.0);
3527 __uint128_t arg2 = MakeF64x2(4.0, 5.0);
3528 __uint128_t arg3 = MakeF64x2(6.0, 7.0);
3529 // 6 + (2 * 5)
3530 ASSERT_EQ(AsmFmla(arg1, arg2, arg3), bit_cast<uint64_t>(16.0));
3531 }
3532
TEST(Arm64InsnTest,MulAddF64x2)3533 TEST(Arm64InsnTest, MulAddF64x2) {
3534 constexpr auto AsmFmla = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("fmla %0.2d, %1.2d, %2.2d");
3535 __uint128_t arg1 = MakeF64x2(1.0f, 2.0f);
3536 __uint128_t arg2 = MakeF64x2(3.0f, 1.0f);
3537 __uint128_t arg3 = MakeF64x2(2.0f, 3.0f);
3538 ASSERT_EQ(AsmFmla(arg1, arg2, arg3), MakeF64x2(5.0f, 5.0f));
3539 }
3540
TEST(Arm64InsnTest,MulAddF32x4IndexedElem)3541 TEST(Arm64InsnTest, MulAddF32x4IndexedElem) {
3542 constexpr auto AsmFmla = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("fmla %0.4s, %1.4s, %2.s[2]");
3543 __uint128_t arg1 = MakeF32x4(1.0f, 2.0f, 4.0f, 3.0f);
3544 __uint128_t arg2 = MakeF32x4(3.0f, 1.0f, 2.0f, 4.0f);
3545 __uint128_t arg3 = MakeF32x4(2.0f, 3.0f, 1.0f, 2.0f);
3546 ASSERT_EQ(AsmFmla(arg1, arg2, arg3), MakeF32x4(4.0f, 7.0f, 9.0f, 8.0f));
3547 }
3548
TEST(Arm64InsnTest,MulSubFp32)3549 TEST(Arm64InsnTest, MulSubFp32) {
3550 uint32_t arg1 = bit_cast<uint32_t>(2.0f);
3551 uint32_t arg2 = bit_cast<uint32_t>(5.0f);
3552 uint32_t arg3 = bit_cast<uint32_t>(3.0f);
3553 __uint128_t res1 = ASM_INSN_WRAP_FUNC_W_RES_WWW_ARG("fmsub %s0, %s1, %s2, %s3")(arg1, arg2, arg3);
3554 ASSERT_EQ(res1, MakeUInt128(bit_cast<uint32_t>(-7.0f), 0U));
3555 __uint128_t res2 =
3556 ASM_INSN_WRAP_FUNC_W_RES_WWW_ARG("fnmsub %s0, %s1, %s2, %s3")(arg1, arg2, arg3);
3557 ASSERT_EQ(res2, MakeUInt128(bit_cast<uint32_t>(7.0f), 0U));
3558 }
3559
TEST(Arm64InsnTest,MulSubFp64)3560 TEST(Arm64InsnTest, MulSubFp64) {
3561 constexpr auto AsmFmsub = ASM_INSN_WRAP_FUNC_W_RES_WWW_ARG("fmsub %d0, %d1, %d2, %d3");
3562
3563 uint64_t arg1 = 0x40357ae147ae147bULL; // 21.48
3564 uint64_t arg2 = 0x404ce3d70a3d70a4ull; // 57.78
3565 uint64_t arg3 = 0x405e29999999999aULL; // 120.65
3566 __uint128_t res1 = AsmFmsub(arg1, arg2, arg3);
3567 ASSERT_EQ(res1, MakeUInt128(0xc09181db8bac710dULL, 0U)); // -1120.4644
3568
3569 // Basic case
3570 __uint128_t res3 =
3571 AsmFmsub(bit_cast<uint64_t>(2.0), bit_cast<uint64_t>(3.0), bit_cast<uint64_t>(-5.0));
3572 ASSERT_EQ(res3, MakeF64x2(-11.0, 0));
3573
3574 // No -0 in this case (proper negation order)
3575 __uint128_t res4 =
3576 AsmFmsub(bit_cast<uint64_t>(2.5), bit_cast<uint64_t>(2.0), bit_cast<uint64_t>(5.0));
3577 ASSERT_EQ(res4, MakeF64x2(0.0, 0));
3578 }
3579
TEST(Arm64InsnTest,MulSubFp64Precision)3580 TEST(Arm64InsnTest, MulSubFp64Precision) {
3581 constexpr auto AsmFmsub = ASM_INSN_WRAP_FUNC_W_RES_WWW_ARG("fmsub %d0, %d1, %d2, %d3");
3582 __uint128_t res5 = AsmFmsub(bit_cast<uint64_t>(-0x1.0p1023),
3583 bit_cast<uint64_t>(0x1.0p-1),
3584 bit_cast<uint64_t>(0x1.fffffffffffffp1022));
3585 ASSERT_EQ(res5, bit_cast<uint64_t>(0x1.7ffffffffffff8p1023));
3586 }
3587
TEST(Arm64InsnTest,MulSubF32x4)3588 TEST(Arm64InsnTest, MulSubF32x4) {
3589 constexpr auto AsmFmls = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("fmls %0.4s, %1.4s, %2.4s");
3590 __uint128_t arg1 = MakeF32x4(1.0f, 2.0f, 4.0f, 3.0f);
3591 __uint128_t arg2 = MakeF32x4(3.0f, 1.0f, 2.0f, 4.0f);
3592 __uint128_t arg3 = MakeF32x4(2.0f, 3.0f, 1.0f, 2.0f);
3593 ASSERT_EQ(AsmFmls(arg1, arg2, arg3), MakeF32x4(-1.0f, 1.0f, -7.0f, -10.0f));
3594 }
3595
TEST(Arm64InsnTest,MulSubF32IndexedElem)3596 TEST(Arm64InsnTest, MulSubF32IndexedElem) {
3597 constexpr auto AsmFmls = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("fmls %s0, %s1, %2.s[2]");
3598 __uint128_t arg1 = MakeF32x4(2.0f, 1.0f, 4.0f, 3.0f);
3599 __uint128_t arg2 = MakeF32x4(4.0f, 3.0f, 2.0f, 1.0f);
3600 __uint128_t arg3 = MakeF32x4(8.0f, 3.0f, 1.0f, 2.0f);
3601 // 8 - (2 * 2)
3602 ASSERT_EQ(AsmFmls(arg1, arg2, arg3), bit_cast<uint32_t>(4.0f));
3603 }
3604
TEST(Arm64InsnTest,MulSubF32x4IndexedElem)3605 TEST(Arm64InsnTest, MulSubF32x4IndexedElem) {
3606 constexpr auto AsmFmls = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("fmls %0.4s, %1.4s, %2.s[2]");
3607 __uint128_t arg1 = MakeF32x4(1.0f, 2.0f, 4.0f, 3.0f);
3608 __uint128_t arg2 = MakeF32x4(3.0f, 1.0f, 2.0f, 4.0f);
3609 __uint128_t arg3 = MakeF32x4(2.0f, 3.0f, 1.0f, 2.0f);
3610 ASSERT_EQ(AsmFmls(arg1, arg2, arg3), MakeF32x4(0.0f, -1.0f, -7.0f, -4.0f));
3611 }
3612
TEST(Arm64InsnTest,MulSubF64x2)3613 TEST(Arm64InsnTest, MulSubF64x2) {
3614 constexpr auto AsmFmls = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("fmls %0.2d, %1.2d, %2.2d");
3615 __uint128_t arg1 = MakeF64x2(1.0f, 2.0f);
3616 __uint128_t arg2 = MakeF64x2(3.0f, 1.0f);
3617 __uint128_t arg3 = MakeF64x2(2.0f, 3.0f);
3618 ASSERT_EQ(AsmFmls(arg1, arg2, arg3), MakeF64x2(-1.0f, 1.0f));
3619 }
3620
TEST(Arm64InsnTest,MulSubF64IndexedElem)3621 TEST(Arm64InsnTest, MulSubF64IndexedElem) {
3622 constexpr auto AsmFmls = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("fmls %d0, %d1, %2.d[1]");
3623 __uint128_t arg1 = MakeF64x2(2.0, 5.0);
3624 __uint128_t arg2 = MakeF64x2(4.0, 1.0);
3625 __uint128_t arg3 = MakeF64x2(6.0, 7.0f);
3626 // 6 - (2 * 1)
3627 ASSERT_EQ(AsmFmls(arg1, arg2, arg3), bit_cast<uint64_t>(4.0));
3628 }
3629
TEST(Arm64InsnTest,CompareEqualF32)3630 TEST(Arm64InsnTest, CompareEqualF32) {
3631 constexpr auto AsmFcmeq = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fcmeq %s0, %s1, %s2");
3632 uint32_t two = bit_cast<uint32_t>(2.0f);
3633 uint32_t six = bit_cast<uint32_t>(6.0f);
3634 ASSERT_EQ(AsmFcmeq(two, six), 0x00000000ULL);
3635 ASSERT_EQ(AsmFcmeq(two, two), 0xffffffffULL);
3636 ASSERT_EQ(AsmFcmeq(kDefaultNaN32AsInteger, two), 0x00000000ULL);
3637 ASSERT_EQ(AsmFcmeq(two, kDefaultNaN32AsInteger), 0x00000000ULL);
3638 }
3639
TEST(Arm64InsnTest,CompareEqualF32x4)3640 TEST(Arm64InsnTest, CompareEqualF32x4) {
3641 constexpr auto AsmFcmeq = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fcmeq %0.4s, %1.4s, %2.4s");
3642 __uint128_t arg1 = MakeF32x4(-3.0f, 2.0f, 7.0f, -0.0f);
3643 __uint128_t arg2 = MakeF32x4(6.0f, 2.0f, -8.0f, 5.0f);
3644 __uint128_t res = AsmFcmeq(arg1, arg2);
3645 ASSERT_EQ(res, MakeUInt128(0xffffffff00000000ULL, 0x0000000000000000ULL));
3646 }
3647
TEST(Arm64InsnTest,CompareGreaterEqualF32)3648 TEST(Arm64InsnTest, CompareGreaterEqualF32) {
3649 constexpr auto AsmFcmge = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fcmge %s0, %s1, %s2");
3650 uint32_t two = bit_cast<uint32_t>(2.0f);
3651 uint32_t six = bit_cast<uint32_t>(6.0f);
3652 ASSERT_EQ(AsmFcmge(two, six), 0x00000000ULL);
3653 ASSERT_EQ(AsmFcmge(two, two), 0xffffffffULL);
3654 ASSERT_EQ(AsmFcmge(six, two), 0xffffffffULL);
3655 ASSERT_EQ(AsmFcmge(kDefaultNaN32AsInteger, two), 0x00000000ULL);
3656 ASSERT_EQ(AsmFcmge(two, kDefaultNaN32AsInteger), 0x00000000ULL);
3657 }
3658
TEST(Arm64InsnTest,CompareGreaterEqualF32x4)3659 TEST(Arm64InsnTest, CompareGreaterEqualF32x4) {
3660 constexpr auto AsmFcmge = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fcmge %0.4s, %1.4s, %2.4s");
3661 __uint128_t arg1 = MakeF32x4(-3.0f, 2.0f, 7.0f, -0.0f);
3662 __uint128_t arg2 = MakeF32x4(6.0f, 2.0f, -8.0f, 5.0f);
3663 __uint128_t res = AsmFcmge(arg1, arg2);
3664 ASSERT_EQ(res, MakeUInt128(0xffffffff00000000ULL, 0x00000000ffffffffULL));
3665 }
3666
TEST(Arm64InsnTest,CompareGreaterF32)3667 TEST(Arm64InsnTest, CompareGreaterF32) {
3668 constexpr auto AsmFcmgt = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fcmgt %s0, %s1, %s2");
3669 uint32_t two = bit_cast<uint32_t>(2.0f);
3670 uint32_t six = bit_cast<uint32_t>(6.0f);
3671 ASSERT_EQ(AsmFcmgt(two, six), 0x00000000ULL);
3672 ASSERT_EQ(AsmFcmgt(two, two), 0x00000000ULL);
3673 ASSERT_EQ(AsmFcmgt(six, two), 0xffffffffULL);
3674 ASSERT_EQ(AsmFcmgt(kDefaultNaN32AsInteger, two), 0x00000000ULL);
3675 ASSERT_EQ(AsmFcmgt(two, kDefaultNaN32AsInteger), 0x00000000ULL);
3676 }
3677
TEST(Arm64InsnTest,CompareGreaterF32x4)3678 TEST(Arm64InsnTest, CompareGreaterF32x4) {
3679 constexpr auto AsmFcmgt = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fcmgt %0.4s, %1.4s, %2.4s");
3680 __uint128_t arg1 = MakeF32x4(-3.0f, 2.0f, 7.0f, -0.0f);
3681 __uint128_t arg2 = MakeF32x4(6.0f, 2.0f, -8.0f, 5.0f);
3682 __uint128_t res = AsmFcmgt(arg1, arg2);
3683 ASSERT_EQ(res, MakeUInt128(0x0000000000000000ULL, 0x00000000ffffffffULL));
3684 }
3685
TEST(Arm64InsnTest,CompareEqualZeroF32)3686 TEST(Arm64InsnTest, CompareEqualZeroF32) {
3687 constexpr auto AsmFcmeq = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcmeq %s0, %s1, #0");
3688 ASSERT_EQ(AsmFcmeq(bit_cast<uint32_t>(0.0f)), 0xffffffffULL);
3689 ASSERT_EQ(AsmFcmeq(bit_cast<uint32_t>(4.0f)), 0x00000000ULL);
3690 }
3691
TEST(Arm64InsnTest,CompareEqualZeroF32x4)3692 TEST(Arm64InsnTest, CompareEqualZeroF32x4) {
3693 constexpr auto AsmFcmeq = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcmeq %0.4s, %1.4s, #0");
3694 __uint128_t arg = MakeF32x4(-3.0f, 0.0f, 7.0f, 1.0f);
3695 __uint128_t res = AsmFcmeq(arg);
3696 ASSERT_EQ(res, MakeUInt128(0xffffffff00000000ULL, 0x0000000000000000ULL));
3697 }
3698
TEST(Arm64InsnTest,CompareGreaterThanZeroF32)3699 TEST(Arm64InsnTest, CompareGreaterThanZeroF32) {
3700 constexpr auto AsmFcmgt = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcmgt %s0, %s1, #0");
3701 ASSERT_EQ(AsmFcmgt(bit_cast<uint32_t>(-1.0f)), 0x00000000ULL);
3702 ASSERT_EQ(AsmFcmgt(bit_cast<uint32_t>(0.0f)), 0x00000000ULL);
3703 ASSERT_EQ(AsmFcmgt(bit_cast<uint32_t>(1.0f)), 0xffffffffULL);
3704 }
3705
TEST(Arm64InsnTest,CompareGreaterThanZeroF32x4)3706 TEST(Arm64InsnTest, CompareGreaterThanZeroF32x4) {
3707 constexpr auto AsmFcmgt = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcmgt %0.4s, %1.4s, #0");
3708 __uint128_t arg = MakeF32x4(-3.0f, 0.0f, 7.0f, 1.0f);
3709 __uint128_t res = AsmFcmgt(arg);
3710 ASSERT_EQ(res, MakeUInt128(0x0000000000000000ULL, 0xffffffffffffffffULL));
3711 }
3712
TEST(Arm64InsnTest,CompareGreaterThanOrEqualZeroF32)3713 TEST(Arm64InsnTest, CompareGreaterThanOrEqualZeroF32) {
3714 constexpr auto AsmFcmge = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcmge %s0, %s1, #0");
3715 ASSERT_EQ(AsmFcmge(bit_cast<uint32_t>(-1.0f)), 0x00000000ULL);
3716 ASSERT_EQ(AsmFcmge(bit_cast<uint32_t>(0.0f)), 0xffffffffULL);
3717 ASSERT_EQ(AsmFcmge(bit_cast<uint32_t>(1.0f)), 0xffffffffULL);
3718 }
3719
TEST(Arm64InsnTest,CompareGreaterThanOrEqualZeroF32x4)3720 TEST(Arm64InsnTest, CompareGreaterThanOrEqualZeroF32x4) {
3721 constexpr auto AsmFcmge = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcmge %0.4s, %1.4s, #0");
3722 __uint128_t arg = MakeF32x4(-3.0f, 0.0f, 7.0f, 1.0f);
3723 __uint128_t res = AsmFcmge(arg);
3724 ASSERT_EQ(res, MakeUInt128(0xffffffff00000000ULL, 0xffffffffffffffffULL));
3725 }
3726
TEST(Arm64InsnTest,CompareLessThanZeroF32)3727 TEST(Arm64InsnTest, CompareLessThanZeroF32) {
3728 constexpr auto AsmFcmlt = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcmlt %s0, %s1, #0");
3729 ASSERT_EQ(AsmFcmlt(bit_cast<uint32_t>(-1.0f)), 0xffffffffULL);
3730 ASSERT_EQ(AsmFcmlt(bit_cast<uint32_t>(0.0f)), 0x00000000ULL);
3731 ASSERT_EQ(AsmFcmlt(bit_cast<uint32_t>(1.0f)), 0x00000000ULL);
3732 }
3733
TEST(Arm64InsnTest,CompareLessThanZeroF32x4)3734 TEST(Arm64InsnTest, CompareLessThanZeroF32x4) {
3735 constexpr auto AsmFcmlt = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcmlt %0.4s, %1.4s, #0");
3736 __uint128_t arg = MakeF32x4(-3.0f, 0.0f, 7.0f, 1.0f);
3737 __uint128_t res = AsmFcmlt(arg);
3738 ASSERT_EQ(res, MakeUInt128(0x00000000ffffffffULL, 0x0000000000000000ULL));
3739 }
3740
TEST(Arm64InsnTest,CompareLessThanOrEqualZeroF32)3741 TEST(Arm64InsnTest, CompareLessThanOrEqualZeroF32) {
3742 constexpr auto AsmFcmle = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcmle %s0, %s1, #0");
3743 ASSERT_EQ(AsmFcmle(bit_cast<uint32_t>(-1.0f)), 0xffffffffULL);
3744 ASSERT_EQ(AsmFcmle(bit_cast<uint32_t>(0.0f)), 0xffffffffULL);
3745 ASSERT_EQ(AsmFcmle(bit_cast<uint32_t>(1.0f)), 0x00000000ULL);
3746 }
3747
TEST(Arm64InsnTest,CompareLessThanOrEqualZeroF32x4)3748 TEST(Arm64InsnTest, CompareLessThanOrEqualZeroF32x4) {
3749 constexpr auto AsmFcmle = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("fcmle %0.4s, %1.4s, #0");
3750 __uint128_t arg = MakeF32x4(-3.0f, 0.0f, 7.0f, 1.0f);
3751 __uint128_t res = AsmFcmle(arg);
3752 ASSERT_EQ(res, MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
3753 }
3754
TEST(Arm64InsnTest,AbsoluteCompareGreaterThanF32)3755 TEST(Arm64InsnTest, AbsoluteCompareGreaterThanF32) {
3756 constexpr auto AsmFacgt = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("facgt %s0, %s1, %s2");
3757 ASSERT_EQ(AsmFacgt(bit_cast<uint32_t>(-3.0f), bit_cast<uint32_t>(1.0f)), 0xffffffffULL);
3758 ASSERT_EQ(AsmFacgt(bit_cast<uint32_t>(1.0f), bit_cast<uint32_t>(-1.0f)), 0x00000000ULL);
3759 ASSERT_EQ(AsmFacgt(bit_cast<uint32_t>(3.0f), bit_cast<uint32_t>(-7.0f)), 0x00000000ULL);
3760 }
3761
TEST(Arm64InsnTest,AbsoluteCompareGreaterThanOrEqualF32)3762 TEST(Arm64InsnTest, AbsoluteCompareGreaterThanOrEqualF32) {
3763 constexpr auto AsmFacge = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("facge %s0, %s1, %s2");
3764 ASSERT_EQ(AsmFacge(bit_cast<uint32_t>(-3.0f), bit_cast<uint32_t>(1.0f)), 0xffffffffULL);
3765 ASSERT_EQ(AsmFacge(bit_cast<uint32_t>(1.0f), bit_cast<uint32_t>(-1.0f)), 0xffffffffULL);
3766 ASSERT_EQ(AsmFacge(bit_cast<uint32_t>(3.0f), bit_cast<uint32_t>(-7.0f)), 0x00000000ULL);
3767 }
3768
TEST(Arm64InsnTest,AbsoluteCompareGreaterThanF32x4)3769 TEST(Arm64InsnTest, AbsoluteCompareGreaterThanF32x4) {
3770 constexpr auto AsmFacgt = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("facgt %0.4s, %1.4s, %2.4s");
3771 __uint128_t arg1 = MakeF32x4(-3.0f, 1.0f, 3.0f, 4.0f);
3772 __uint128_t arg2 = MakeF32x4(1.0f, -1.0f, -7.0f, 2.0f);
3773 ASSERT_EQ(AsmFacgt(arg1, arg2), MakeUInt128(0x00000000ffffffffULL, 0xffffffff00000000ULL));
3774 }
3775
TEST(Arm64InsnTest,AbsoluteCompareGreaterThanEqualF32x4)3776 TEST(Arm64InsnTest, AbsoluteCompareGreaterThanEqualF32x4) {
3777 constexpr auto AsmFacge = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("facge %0.4s, %1.4s, %2.4s");
3778 __uint128_t arg1 = MakeF32x4(-3.0f, 1.0f, 3.0f, 4.0f);
3779 __uint128_t arg2 = MakeF32x4(1.0f, -1.0f, -7.0f, 2.0f);
3780 ASSERT_EQ(AsmFacge(arg1, arg2), MakeUInt128(0xffffffffffffffffULL, 0xffffffff00000000ULL));
3781 }
3782
TEST(Arm64InsnTest,CompareEqualF64)3783 TEST(Arm64InsnTest, CompareEqualF64) {
3784 constexpr auto AsmFcmeq = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fcmeq %d0, %d1, %d2");
3785 uint64_t two = bit_cast<uint64_t>(2.0);
3786 uint64_t six = bit_cast<uint64_t>(6.0);
3787 ASSERT_EQ(AsmFcmeq(two, six), 0x0000000000000000ULL);
3788 ASSERT_EQ(AsmFcmeq(two, two), 0xffffffffffffffffULL);
3789 ASSERT_EQ(AsmFcmeq(kDefaultNaN64AsInteger, two), 0x0000000000000000ULL);
3790 ASSERT_EQ(AsmFcmeq(two, kDefaultNaN64AsInteger), 0x0000000000000000ULL);
3791 }
3792
TEST(Arm64InsnTest,CompareEqualF64x2)3793 TEST(Arm64InsnTest, CompareEqualF64x2) {
3794 constexpr auto AsmFcmeq = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fcmeq %0.2d, %1.2d, %2.2d");
3795 __uint128_t arg1 = MakeF64x2(-3.0, 2.0);
3796 __uint128_t arg2 = MakeF64x2(6.0, 2.0);
3797 __uint128_t res = AsmFcmeq(arg1, arg2);
3798 ASSERT_EQ(res, MakeUInt128(0x0000000000000000ULL, 0xffffffffffffffffULL));
3799 arg1 = MakeF64x2(7.0, -0.0);
3800 arg2 = MakeF64x2(-8.0, 5.0);
3801 res = AsmFcmeq(arg1, arg2);
3802 ASSERT_EQ(res, MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
3803 }
3804
TEST(Arm64InsnTest,CompareGreaterEqualF64)3805 TEST(Arm64InsnTest, CompareGreaterEqualF64) {
3806 constexpr auto AsmFcmge = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fcmge %d0, %d1, %d2");
3807 uint64_t two = bit_cast<uint64_t>(2.0);
3808 uint64_t six = bit_cast<uint64_t>(6.0);
3809 ASSERT_EQ(AsmFcmge(two, six), 0x0000000000000000ULL);
3810 ASSERT_EQ(AsmFcmge(two, two), 0xffffffffffffffffULL);
3811 ASSERT_EQ(AsmFcmge(six, two), 0xffffffffffffffffULL);
3812 ASSERT_EQ(AsmFcmge(kDefaultNaN64AsInteger, two), 0x0000000000000000ULL);
3813 ASSERT_EQ(AsmFcmge(two, kDefaultNaN64AsInteger), 0x0000000000000000ULL);
3814 }
3815
TEST(Arm64InsnTest,CompareGreaterEqualF64x2)3816 TEST(Arm64InsnTest, CompareGreaterEqualF64x2) {
3817 constexpr auto AsmFcmge = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fcmge %0.2d, %1.2d, %2.2d");
3818 __uint128_t arg1 = MakeF64x2(-3.0, 2.0);
3819 __uint128_t arg2 = MakeF64x2(6.0, 2.0);
3820 __uint128_t res = AsmFcmge(arg1, arg2);
3821 ASSERT_EQ(res, MakeUInt128(0x0000000000000000ULL, 0xffffffffffffffffULL));
3822 arg1 = MakeF64x2(7.0, -0.0);
3823 arg2 = MakeF64x2(-8.0, 5.0);
3824 res = AsmFcmge(arg1, arg2);
3825 ASSERT_EQ(res, MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
3826 }
3827
TEST(Arm64InsnTest,CompareGreaterF64)3828 TEST(Arm64InsnTest, CompareGreaterF64) {
3829 constexpr auto AsmFcmgt = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fcmgt %d0, %d1, %d2");
3830 uint64_t two = bit_cast<uint64_t>(2.0);
3831 uint64_t six = bit_cast<uint64_t>(6.0);
3832 ASSERT_EQ(AsmFcmgt(two, six), 0x0000000000000000ULL);
3833 ASSERT_EQ(AsmFcmgt(two, two), 0x0000000000000000ULL);
3834 ASSERT_EQ(AsmFcmgt(six, two), 0xffffffffffffffffULL);
3835 ASSERT_EQ(AsmFcmgt(kDefaultNaN64AsInteger, two), 0x0000000000000000ULL);
3836 ASSERT_EQ(AsmFcmgt(two, kDefaultNaN64AsInteger), 0x0000000000000000ULL);
3837 }
3838
TEST(Arm64InsnTest,CompareGreaterF64x2)3839 TEST(Arm64InsnTest, CompareGreaterF64x2) {
3840 constexpr auto AsmFcmgt = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("fcmgt %0.2d, %1.2d, %2.2d");
3841 __uint128_t arg1 = MakeF64x2(-3.0, 2.0);
3842 __uint128_t arg2 = MakeF64x2(6.0, 2.0);
3843 __uint128_t res = AsmFcmgt(arg1, arg2);
3844 ASSERT_EQ(res, MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
3845 arg1 = MakeF64x2(7.0, -0.0);
3846 arg2 = MakeF64x2(-8.0, 5.0);
3847 res = AsmFcmgt(arg1, arg2);
3848 ASSERT_EQ(res, MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
3849 }
3850
TEST(Arm64InsnTest,AndInt8x16)3851 TEST(Arm64InsnTest, AndInt8x16) {
3852 __uint128_t op1 = MakeUInt128(0x7781857780532171ULL, 0x2268066130019278ULL);
3853 __uint128_t op2 = MakeUInt128(0x0498862723279178ULL, 0x6085784383827967ULL);
3854 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("and %0.16b, %1.16b, %2.16b")(op1, op2);
3855 ASSERT_EQ(rd, MakeUInt128(0x0480842700030170ULL, 0x2000004100001060ULL));
3856 }
3857
TEST(Arm64InsnTest,AndInt8x8)3858 TEST(Arm64InsnTest, AndInt8x8) {
3859 __uint128_t op1 = MakeUInt128(0x7781857780532171ULL, 0x2268066130019278ULL);
3860 __uint128_t op2 = MakeUInt128(0x0498862723279178ULL, 0x6085784383827967ULL);
3861 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("and %0.8b, %1.8b, %2.8b")(op1, op2);
3862 ASSERT_EQ(rd, MakeUInt128(0x0480842700030170ULL, 0));
3863 }
3864
TEST(Arm64InsnTest,OrInt8x16)3865 TEST(Arm64InsnTest, OrInt8x16) {
3866 __uint128_t op1 = MakeUInt128(0x00ffaa5500112244ULL, 0x1248124812481248ULL);
3867 __uint128_t op2 = MakeUInt128(0x44221100ffaa5500ULL, 0x1122448811224488ULL);
3868 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("orr %0.16b, %1.16b, %2.16b")(op1, op2);
3869 ASSERT_EQ(rd, MakeUInt128(0x44ffbb55ffbb7744ULL, 0x136a56c8136a56c8ULL));
3870 }
3871
TEST(Arm64InsnTest,OrInt8x8)3872 TEST(Arm64InsnTest, OrInt8x8) {
3873 __uint128_t op1 = MakeUInt128(0x00ffaa5500112244ULL, 0x1248124812481248ULL);
3874 __uint128_t op2 = MakeUInt128(0x44221100ffaa5500ULL, 0x1122448811224488ULL);
3875 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("orr %0.8b, %1.8b, %2.8b")(op1, op2);
3876 ASSERT_EQ(rd, MakeUInt128(0x44ffbb55ffbb7744ULL, 0));
3877 }
3878
TEST(Arm64InsnTest,XorInt8x16)3879 TEST(Arm64InsnTest, XorInt8x16) {
3880 __uint128_t op1 = MakeUInt128(0x1050792279689258ULL, 0x9235420199561121ULL);
3881 __uint128_t op2 = MakeUInt128(0x8239864565961163ULL, 0x5488623057745649ULL);
3882 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("eor %0.16b, %1.16b, %2.16b")(op1, op2);
3883 ASSERT_EQ(rd, MakeUInt128(0x9269ff671cfe833bULL, 0xc6bd2031ce224768ULL));
3884 }
3885
TEST(Arm64InsnTest,XorInt8x8)3886 TEST(Arm64InsnTest, XorInt8x8) {
3887 __uint128_t op1 = MakeUInt128(0x1050792279689258ULL, 0x9235420199561121ULL);
3888 __uint128_t op2 = MakeUInt128(0x8239864565961163ULL, 0x5488623057745649ULL);
3889 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("eor %0.8b, %1.8b, %2.8b")(op1, op2);
3890 ASSERT_EQ(rd, MakeUInt128(0x9269ff671cfe833bULL, 0));
3891 }
3892
TEST(Arm64InsnTest,AndNotInt8x16)3893 TEST(Arm64InsnTest, AndNotInt8x16) {
3894 __uint128_t op1 = MakeUInt128(0x0313783875288658ULL, 0x7533208381420617ULL);
3895 __uint128_t op2 = MakeUInt128(0x2327917860857843ULL, 0x8382796797668145ULL);
3896 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("bic %0.16b, %1.16b, %2.16b")(op1, op2);
3897 ASSERT_EQ(rd, MakeUInt128(0x0010680015288618ULL, 0x7431008000000612ULL));
3898 }
3899
TEST(Arm64InsnTest,AndNotInt8x8)3900 TEST(Arm64InsnTest, AndNotInt8x8) {
3901 __uint128_t op1 = MakeUInt128(0x4861045432664821ULL, 0x2590360011330530ULL);
3902 __uint128_t op2 = MakeUInt128(0x5420199561121290ULL, 0x8572424541506959ULL);
3903 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("bic %0.8b, %1.8b, %2.8b")(op1, op2);
3904 ASSERT_EQ(rd, MakeUInt128(0x0841044012644821ULL, 0x0000000000000000ULL));
3905 }
3906
TEST(Arm64InsnTest,AndNotInt16x4Imm)3907 TEST(Arm64InsnTest, AndNotInt16x4Imm) {
3908 __uint128_t res = MakeUInt128(0x9690314950191085ULL, 0x7598442391986291ULL);
3909
3910 asm("bic %0.4h, #0x3" : "=w"(res) : "0"(res));
3911
3912 ASSERT_EQ(res, MakeUInt128(0x9690314850181084ULL, 0x0000000000000000ULL));
3913 }
3914
TEST(Arm64InsnTest,AndNotInt16x4ImmShiftedBy8)3915 TEST(Arm64InsnTest, AndNotInt16x4ImmShiftedBy8) {
3916 __uint128_t res = MakeUInt128(0x8354056704038674ULL, 0x3513622224771589ULL);
3917
3918 asm("bic %0.4h, #0xa8, lsl #8" : "=w"(res) : "0"(res));
3919
3920 ASSERT_EQ(res, MakeUInt128(0x0354056704030674ULL, 0x0000000000000000ULL));
3921 }
3922
TEST(Arm64InsnTest,AndNotInt32x2ImmShiftedBy8)3923 TEST(Arm64InsnTest, AndNotInt32x2ImmShiftedBy8) {
3924 __uint128_t res = MakeUInt128(0x1842631298608099ULL, 0x8886874132604721ULL);
3925
3926 asm("bic %0.2s, #0xd3, lsl #8" : "=w"(res) : "0"(res));
3927
3928 ASSERT_EQ(res, MakeUInt128(0x1842201298600099ULL, 0x0000000000000000ULL));
3929 }
3930
TEST(Arm64InsnTest,AndNotInt32x2ImmShiftedBy16)3931 TEST(Arm64InsnTest, AndNotInt32x2ImmShiftedBy16) {
3932 __uint128_t res = MakeUInt128(0x2947867242292465ULL, 0x4366800980676928ULL);
3933
3934 asm("bic %0.2s, #0x22, lsl #16" : "=w"(res) : "0"(res));
3935
3936 ASSERT_EQ(res, MakeUInt128(0x2945867242092465ULL, 0x0000000000000000ULL));
3937 }
3938
TEST(Arm64InsnTest,AndNotInt32x2ImmShiftedBy24)3939 TEST(Arm64InsnTest, AndNotInt32x2ImmShiftedBy24) {
3940 __uint128_t res = MakeUInt128(0x0706977942236250ULL, 0x8221688957383798ULL);
3941
3942 asm("bic %0.2s, #0x83, lsl #24" : "=w"(res) : "0"(res));
3943
3944 ASSERT_EQ(res, MakeUInt128(0x0406977940236250ULL, 0x0000000000000000ULL));
3945 }
3946
TEST(Arm64InsnTest,OrInt16x4Imm)3947 TEST(Arm64InsnTest, OrInt16x4Imm) {
3948 __uint128_t res = MakeUInt128(0x0841284886269456ULL, 0x0424196528502221ULL);
3949
3950 asm("orr %0.4h, #0x5" : "=w"(res) : "0"(res));
3951
3952 ASSERT_EQ(res, MakeUInt128(0x0845284d86279457ULL, 0x0000000000000000ULL));
3953 }
3954
TEST(Arm64InsnTest,OrNotInt8x16)3955 TEST(Arm64InsnTest, OrNotInt8x16) {
3956 __uint128_t op1 = MakeUInt128(0x5428584447952658ULL, 0x6782105114135473ULL);
3957 __uint128_t op2 = MakeUInt128(0x3558764024749647ULL, 0x3263914199272604ULL);
3958 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("orn %0.16b, %1.16b, %2.16b")(op1, op2);
3959 ASSERT_EQ(rd, MakeUInt128(0xdeafd9ffdf9f6ff8ULL, 0xef9e7eff76dbddfbULL));
3960 }
3961
TEST(Arm64InsnTest,OrNotInt8x8)3962 TEST(Arm64InsnTest, OrNotInt8x8) {
3963 __uint128_t op1 = MakeUInt128(0x3279178608578438ULL, 0x3827967976681454ULL);
3964 __uint128_t op2 = MakeUInt128(0x6838689427741559ULL, 0x9185592524595395ULL);
3965 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("orn %0.8b, %1.8b, %2.8b")(op1, op2);
3966 ASSERT_EQ(rd, MakeUInt128(0xb7ff97efd8dfeebeULL, 0x0000000000000000ULL));
3967 }
3968
TEST(Arm64InsnTest,BitwiseSelectInt8x8)3969 TEST(Arm64InsnTest, BitwiseSelectInt8x8) {
3970 __uint128_t op1 = MakeUInt128(0x2000568127145263ULL, 0x5608277857713427ULL);
3971 __uint128_t op2 = MakeUInt128(0x0792279689258923ULL, 0x5420199561121290ULL);
3972 __uint128_t op3 = MakeUInt128(0x8372978049951059ULL, 0x7317328160963185ULL);
3973 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("bsl %0.8b, %1.8b, %2.8b")(op1, op2, op3);
3974 ASSERT_EQ(res, MakeUInt128(0x0480369681349963ULL, 0x0000000000000000ULL));
3975 }
3976
TEST(Arm64InsnTest,BitwiseInsertIfTrueInt8x8)3977 TEST(Arm64InsnTest, BitwiseInsertIfTrueInt8x8) {
3978 __uint128_t op1 = MakeUInt128(0x3678925903600113ULL, 0x3053054882046652ULL);
3979 __uint128_t op2 = MakeUInt128(0x9326117931051185ULL, 0x4807446237996274ULL);
3980 __uint128_t op3 = MakeUInt128(0x6430860213949463ULL, 0x9522473719070217ULL);
3981 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("bit %0.8b, %1.8b, %2.8b")(op1, op2, op3);
3982 ASSERT_EQ(res, MakeUInt128(0x7630965b03908563ULL, 0x0000000000000000ULL));
3983 }
3984
TEST(Arm64InsnTest,BitwiseInsertIfFalseInt8x8)3985 TEST(Arm64InsnTest, BitwiseInsertIfFalseInt8x8) {
3986 __uint128_t op1 = MakeUInt128(0x7067982148086513ULL, 0x2823066470938446ULL);
3987 __uint128_t op2 = MakeUInt128(0x5964462294895493ULL, 0x0381964428810975ULL);
3988 __uint128_t op3 = MakeUInt128(0x0348610454326648ULL, 0x2133936072602491ULL);
3989 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("bif %0.8b, %1.8b, %2.8b")(op1, op2, op3);
3990 ASSERT_EQ(res, MakeUInt128(0x2143d8015c006500ULL, 0x0000000000000000ULL));
3991 }
3992
TEST(Arm64InsnTest,ArithmeticShiftRightInt64x1)3993 TEST(Arm64InsnTest, ArithmeticShiftRightInt64x1) {
3994 __uint128_t arg = MakeUInt128(0x9486015046652681ULL, 0x4398770516153170ULL);
3995 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("sshr %d0, %d1, #39")(arg);
3996 ASSERT_EQ(res, MakeUInt128(0xffffffffff290c02ULL, 0x0000000000000000ULL));
3997 }
3998
TEST(Arm64InsnTest,ArithmeticShiftRightBy64Int64x1)3999 TEST(Arm64InsnTest, ArithmeticShiftRightBy64Int64x1) {
4000 __uint128_t arg = MakeUInt128(0x9176042601763387ULL, 0x0454990176143641ULL);
4001 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("sshr %d0, %d1, #64")(arg);
4002 ASSERT_EQ(res, MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
4003 }
4004
TEST(Arm64InsnTest,ArithmeticShiftRightInt64x2)4005 TEST(Arm64InsnTest, ArithmeticShiftRightInt64x2) {
4006 __uint128_t arg = MakeUInt128(0x7501116498327856ULL, 0x3531614516845769ULL);
4007 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("sshr %0.2d, %1.2d, #35")(arg);
4008 ASSERT_EQ(res, MakeUInt128(0x000000000ea0222cULL, 0x0000000006a62c28ULL));
4009 }
4010
TEST(Arm64InsnTest,ArithmeticShiftRightAccumulateInt64x1)4011 TEST(Arm64InsnTest, ArithmeticShiftRightAccumulateInt64x1) {
4012 __uint128_t arg1 = MakeUInt128(0x9667179643468760ULL, 0x0770479995378833ULL);
4013 __uint128_t arg2 = MakeUInt128(0x2557176908196030ULL, 0x9201824018842705ULL);
4014 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("ssra %d0, %d1, #40")(arg1, arg2);
4015 ASSERT_EQ(res, MakeUInt128(0x2557176907afc747ULL, 0x0000000000000000ULL));
4016 }
4017
TEST(Arm64InsnTest,ArithmeticShiftRightBy64AccumulateInt64x1)4018 TEST(Arm64InsnTest, ArithmeticShiftRightBy64AccumulateInt64x1) {
4019 __uint128_t arg1 = MakeUInt128(0x9223343657791601ULL, 0x2809317940171859ULL);
4020 __uint128_t arg2 = MakeUInt128(0x3498025249906698ULL, 0x4233017350358044ULL);
4021 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("ssra %d0, %d1, #64")(arg1, arg2);
4022 ASSERT_EQ(res, MakeUInt128(0x3498025249906697ULL, 0x0000000000000000ULL));
4023 }
4024
TEST(Arm64InsnTest,ArithmeticShiftRightAccumulateInt16x8)4025 TEST(Arm64InsnTest, ArithmeticShiftRightAccumulateInt16x8) {
4026 __uint128_t arg1 = MakeUInt128(0x9276457931065792ULL, 0x2955249887275846ULL);
4027 __uint128_t arg2 = MakeUInt128(0x0101655256375678ULL, 0x5667227966198857ULL);
4028 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("ssra %0.8h, %1.8h, #12")(arg1, arg2);
4029 ASSERT_EQ(res, MakeUInt128(0x00fa6556563a567dULL, 0x5669227b6611885cULL));
4030 }
4031
TEST(Arm64InsnTest,ArithmeticRoundingShiftRightAccumulateInt16x8)4032 TEST(Arm64InsnTest, ArithmeticRoundingShiftRightAccumulateInt16x8) {
4033 __uint128_t arg1 = MakeUInt128(0x9894671543578468ULL, 0x7886144458123145ULL);
4034 __uint128_t arg2 = MakeUInt128(0x1412147805734551ULL, 0x0500801908699603ULL);
4035 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("srsra %0.8h, %1.8h, #12")(arg1, arg2);
4036 ASSERT_EQ(res, MakeUInt128(0x140c147e05774549ULL, 0x0508801a086f9606ULL));
4037 }
4038
TEST(Arm64InsnTest,LogicalShiftRightInt64x1)4039 TEST(Arm64InsnTest, LogicalShiftRightInt64x1) {
4040 __uint128_t arg = MakeUInt128(0x9859771921805158ULL, 0x5321473926532515ULL);
4041 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ushr %d0, %d1, #33")(arg);
4042 ASSERT_EQ(res, MakeUInt128(0x000000004c2cbb8cULL, 0x0000000000000000ULL));
4043 }
4044
TEST(Arm64InsnTest,LogicalShiftRightBy64Int64x1)4045 TEST(Arm64InsnTest, LogicalShiftRightBy64Int64x1) {
4046 __uint128_t arg = MakeUInt128(0x9474696134360928ULL, 0x6148494178501718ULL);
4047 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ushr %d0, %d1, #64")(arg);
4048 ASSERT_EQ(res, MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
4049 }
4050
TEST(Arm64InsnTest,LogicalShiftRightInt64x2)4051 TEST(Arm64InsnTest, LogicalShiftRightInt64x2) {
4052 __uint128_t op = MakeUInt128(0x3962657978771855ULL, 0x6084552965412665ULL);
4053 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ushr %0.2d, %1.2d, #33")(op);
4054 ASSERT_EQ(rd, MakeUInt128(0x000000001cb132bcULL, 0x0000000030422a94ULL));
4055 }
4056
TEST(Arm64InsnTest,LogicalShiftRightAccumulateInt64x1)4057 TEST(Arm64InsnTest, LogicalShiftRightAccumulateInt64x1) {
4058 __uint128_t arg1 = MakeUInt128(0x9004112453790153ULL, 0x3296615697052237ULL);
4059 __uint128_t arg2 = MakeUInt128(0x0499939532215362ULL, 0x2748476603613677ULL);
4060 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("usra %d0, %d1, #40")(arg1, arg2);
4061 ASSERT_EQ(res, MakeUInt128(0x0499939532b15773ULL, 0x0000000000000000ULL));
4062 }
4063
TEST(Arm64InsnTest,LogicalShiftRightBy64AccumulateInt64x1)4064 TEST(Arm64InsnTest, LogicalShiftRightBy64AccumulateInt64x1) {
4065 __uint128_t arg1 = MakeUInt128(0x9886592578662856ULL, 0x1249665523533829ULL);
4066 __uint128_t arg2 = MakeUInt128(0x3559152534784459ULL, 0x8183134112900199ULL);
4067 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("usra %d0, %d1, #64")(arg1, arg2);
4068 ASSERT_EQ(res, MakeUInt128(0x3559152534784459ULL, 0x0000000000000000ULL));
4069 }
4070
TEST(Arm64InsnTest,LogicalShiftRightAccumulateInt16x8)4071 TEST(Arm64InsnTest, LogicalShiftRightAccumulateInt16x8) {
4072 __uint128_t arg1 = MakeUInt128(0x9984345225161050ULL, 0x7027056235266012ULL);
4073 __uint128_t arg2 = MakeUInt128(0x4628654036036745ULL, 0x3286510570658748ULL);
4074 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("usra %0.8h, %1.8h, #12")(arg1, arg2);
4075 ASSERT_EQ(res, MakeUInt128(0x4631654336056746ULL, 0x328d51057068874eULL));
4076 }
4077
TEST(Arm64InsnTest,LogicalRoundingShiftRightAccumulateInt16x8)4078 TEST(Arm64InsnTest, LogicalRoundingShiftRightAccumulateInt16x8) {
4079 __uint128_t arg1 = MakeUInt128(0x9843452251610507ULL, 0x0270562352660127ULL);
4080 __uint128_t arg2 = MakeUInt128(0x6286540360367453ULL, 0x2865105706587488ULL);
4081 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("srsra %0.8h, %1.8h, #12")(arg1, arg2);
4082 ASSERT_EQ(res, MakeUInt128(0x62805407603b7453ULL, 0x2865105c065d7488ULL));
4083 }
4084
TEST(Arm64InsnTest,SignedRoundingShiftRightInt64x1)4085 TEST(Arm64InsnTest, SignedRoundingShiftRightInt64x1) {
4086 __uint128_t arg = MakeUInt128(0x9323685785585581ULL, 0x9555604215625088ULL);
4087 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("srshr %d0, %d1, #40")(arg);
4088 ASSERT_EQ(res, MakeUInt128(0xffffffffff932368ULL, 0x0000000000000000ULL));
4089 }
4090
TEST(Arm64InsnTest,SignedRoundingShiftRightInt64x2)4091 TEST(Arm64InsnTest, SignedRoundingShiftRightInt64x2) {
4092 __uint128_t arg = MakeUInt128(0x8714878398908107ULL, 0x4295309410605969ULL);
4093 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("srshr %0.2d, %1.2d, #36")(arg);
4094 ASSERT_EQ(res, MakeUInt128(0xfffffffff8714878ULL, 0x0000000004295309ULL));
4095 }
4096
TEST(Arm64InsnTest,SignedRoundingShiftRightAccumulateInt64x1)4097 TEST(Arm64InsnTest, SignedRoundingShiftRightAccumulateInt64x1) {
4098 __uint128_t arg1 = MakeUInt128(0x9946016520577405ULL, 0x2942305360178031ULL);
4099 __uint128_t arg2 = MakeUInt128(0x3960188013782542ULL, 0x1927094767337191ULL);
4100 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("srsra %d0, %d1, #33")(arg1, arg2);
4101 ASSERT_EQ(res, MakeUInt128(0x3960187fe01b25f5ULL, 0x0000000000000000ULL));
4102 }
4103
TEST(Arm64InsnTest,UnsignedRoundingShiftRightInt64x1)4104 TEST(Arm64InsnTest, UnsignedRoundingShiftRightInt64x1) {
4105 __uint128_t arg = MakeUInt128(0x9713552208445285ULL, 0x2640081252027665ULL);
4106 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("urshr %d0, %d1, #33")(arg);
4107 ASSERT_EQ(res, MakeUInt128(0x000000004b89aa91ULL, 0x0000000000000000ULL));
4108 }
4109
TEST(Arm64InsnTest,UnsignedRoundingShiftRightInt64x2)4110 TEST(Arm64InsnTest, UnsignedRoundingShiftRightInt64x2) {
4111 __uint128_t arg = MakeUInt128(0x6653398573888786ULL, 0x6147629443414010ULL);
4112 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("urshr %0.2d, %1.2d, #34")(arg);
4113 ASSERT_EQ(res, MakeUInt128(0x000000001994ce61ULL, 0x000000001851d8a5ULL));
4114 }
4115
TEST(Arm64InsnTest,UnsignedRoundingShiftRightAccumulateInt64x1)4116 TEST(Arm64InsnTest, UnsignedRoundingShiftRightAccumulateInt64x1) {
4117 __uint128_t arg1 = MakeUInt128(0x9616143204006381ULL, 0x3224658411111577ULL);
4118 __uint128_t arg2 = MakeUInt128(0x7184728147519983ULL, 0x5050478129771859ULL);
4119 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("ursra %d0, %d1, #33")(arg1, arg2);
4120 ASSERT_EQ(res, MakeUInt128(0x71847281925ca39cULL, 0x0000000000000000ULL));
4121 }
4122
TEST(Arm64InsnTest,ShiftLeftInt64x1)4123 TEST(Arm64InsnTest, ShiftLeftInt64x1) {
4124 __uint128_t arg = MakeUInt128(0x3903594664691623ULL, 0x5396809201394578ULL);
4125 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("shl %d0, %d1, #35")(arg);
4126 ASSERT_EQ(res, MakeUInt128(0x2348b11800000000ULL, 0x0000000000000000ULL));
4127 }
4128
TEST(Arm64InsnTest,ShiftLeftInt64x2)4129 TEST(Arm64InsnTest, ShiftLeftInt64x2) {
4130 __uint128_t arg = MakeUInt128(0x0750111649832785ULL, 0x6353161451684576ULL);
4131 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("shl %0.2d, %1.2d, #37")(arg);
4132 ASSERT_EQ(res, MakeUInt128(0x3064f0a000000000ULL, 0x2d08aec000000000ULL));
4133 }
4134
TEST(Arm64InsnTest,ShiftLeftInt8x8)4135 TEST(Arm64InsnTest, ShiftLeftInt8x8) {
4136 __uint128_t arg = MakeUInt128(0x0402956047346131ULL, 0x1382638788975517ULL);
4137 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("shl %0.8b, %1.8b, #6")(arg);
4138 ASSERT_EQ(res, MakeUInt128(0x00804000c0004040ULL, 0x0000000000000000ULL));
4139 }
4140
TEST(Arm64InsnTest,ShiftRightInsertInt8x8)4141 TEST(Arm64InsnTest, ShiftRightInsertInt8x8) {
4142 __uint128_t arg1 = MakeUInt128(0x9112232618794059ULL, 0x9415540632701319ULL);
4143 __uint128_t arg2 = MakeUInt128(0x1537675115830432ULL, 0x0849872092028092ULL);
4144 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("sri %0.8b, %1.8b, #4")(arg1, arg2);
4145 ASSERT_EQ(res, MakeUInt128(0x1931625211870435ULL, 0x0000000000000000ULL));
4146 }
4147
TEST(Arm64InsnTest,ShiftRightInsertInt64x1)4148 TEST(Arm64InsnTest, ShiftRightInsertInt64x1) {
4149 __uint128_t arg1 = MakeUInt128(0x9112232618794059ULL, 0x9415540632701319ULL);
4150 __uint128_t arg2 = MakeUInt128(0x1537675115830432ULL, 0x0849872092028092ULL);
4151 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("sri %d0, %d1, #20")(arg1, arg2);
4152 ASSERT_EQ(res, MakeUInt128(0x1537691122326187ULL, 0x0000000000000000ULL));
4153 }
4154
TEST(Arm64InsnTest,ShiftRightInsertInt64x2)4155 TEST(Arm64InsnTest, ShiftRightInsertInt64x2) {
4156 __uint128_t arg1 = MakeUInt128(0x7332335603484653ULL, 0x1873029302665964ULL);
4157 __uint128_t arg2 = MakeUInt128(0x5013718375428897ULL, 0x5579714499246540ULL);
4158 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("sri %0.2d, %1.2d, #21")(arg1, arg2);
4159 ASSERT_EQ(res, MakeUInt128(0x50137399919ab01aULL, 0x557970c398149813ULL));
4160 }
4161
TEST(Arm64InsnTest,ShiftLeftInsertInt64x1)4162 TEST(Arm64InsnTest, ShiftLeftInsertInt64x1) {
4163 __uint128_t arg1 = MakeUInt128(0x3763526969344354ULL, 0x4004730671988689ULL);
4164 __uint128_t arg2 = MakeUInt128(0x6369498567302175ULL, 0x2313252926537589ULL);
4165 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("sli %d0, %d1, #23")(arg1, arg2);
4166 ASSERT_EQ(res, MakeUInt128(0x34b49a21aa302175ULL, 0x0000000000000000ULL));
4167 }
4168
TEST(Arm64InsnTest,ShiftLeftInsertInt64x2)4169 TEST(Arm64InsnTest, ShiftLeftInsertInt64x2) {
4170 __uint128_t arg1 = MakeUInt128(0x3270206902872323ULL, 0x3005386216347988ULL);
4171 __uint128_t arg2 = MakeUInt128(0x5094695472004795ULL, 0x2311201504329322ULL);
4172 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("sli %0.2d, %1.2d, #21")(arg1, arg2);
4173 ASSERT_EQ(res, MakeUInt128(0x0d2050e464604795ULL, 0x0c42c68f31129322ULL));
4174 }
4175
TEST(Arm64InsnTest,ShiftLeftLongInt8x8)4176 TEST(Arm64InsnTest, ShiftLeftLongInt8x8) {
4177 __uint128_t arg = MakeUInt128(0x2650697620201995ULL, 0x5484126500053944ULL);
4178 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("shll %0.8h, %1.8b, #8")(arg);
4179 ASSERT_EQ(res, MakeUInt128(0x2000200019009500ULL, 0x2600500069007600ULL));
4180 }
4181
TEST(Arm64InsnTest,UnsignedShiftLeftLongInt8x8)4182 TEST(Arm64InsnTest, UnsignedShiftLeftLongInt8x8) {
4183 __uint128_t arg = MakeUInt128(0x2650697620201995ULL, 0x5484126500053944ULL);
4184 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ushll %0.8h, %1.8b, #4")(arg);
4185 ASSERT_EQ(res, MakeUInt128(0x200020001900950ULL, 0x260050006900760ULL));
4186 }
4187
TEST(Arm64InsnTest,ShiftLeftLongInt8x8Upper)4188 TEST(Arm64InsnTest, ShiftLeftLongInt8x8Upper) {
4189 __uint128_t arg = MakeUInt128(0x9050429225978771ULL, 0x0667873840000616ULL);
4190 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("shll2 %0.8h, %1.16b, #8")(arg);
4191 ASSERT_EQ(res, MakeUInt128(0x4000000006001600ULL, 0x0600670087003800ULL));
4192 }
4193
TEST(Arm64InsnTest,SignedShiftLeftLongInt32x2)4194 TEST(Arm64InsnTest, SignedShiftLeftLongInt32x2) {
4195 __uint128_t arg = MakeUInt128(0x9075407923424023ULL, 0x0092590070173196ULL);
4196 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("sshll %0.2d, %1.2s, #9")(arg);
4197 ASSERT_EQ(res, MakeUInt128(0x0000004684804600ULL, 0xffffff20ea80f200ULL));
4198 }
4199
TEST(Arm64InsnTest,SignedShiftLeftLongInt32x2Upper)4200 TEST(Arm64InsnTest, SignedShiftLeftLongInt32x2Upper) {
4201 __uint128_t arg = MakeUInt128(0x9382432227188515ULL, 0x9740547021482897ULL);
4202 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("sshll2 %0.2d, %1.4s, #9")(arg);
4203 ASSERT_EQ(res, MakeUInt128(0x0000004290512e00ULL, 0xffffff2e80a8e000ULL));
4204 }
4205
TEST(Arm64InsnTest,SignedShiftLeftLongInt32x2By0)4206 TEST(Arm64InsnTest, SignedShiftLeftLongInt32x2By0) {
4207 __uint128_t arg = MakeUInt128(0x9008777697763127ULL, 0x9572267265556259ULL);
4208 // SXTL is an alias for SSHLL for the shift count being zero.
4209 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("sxtl %0.2d, %1.2s")(arg);
4210 ASSERT_EQ(res, MakeUInt128(0xffffffff97763127ULL, 0xffffffff90087776ULL));
4211 }
4212
TEST(Arm64InsnTest,ShiftLeftLongInt32x2)4213 TEST(Arm64InsnTest, ShiftLeftLongInt32x2) {
4214 __uint128_t arg = MakeUInt128(0x9094334676851422ULL, 0x1447737939375170ULL);
4215 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ushll %0.2d, %1.2s, #9")(arg);
4216 ASSERT_EQ(res, MakeUInt128(0x000000ed0a284400ULL, 0x0000012128668c00ULL));
4217 }
4218
TEST(Arm64InsnTest,ShiftLeftLongInt32x2Upper)4219 TEST(Arm64InsnTest, ShiftLeftLongInt32x2Upper) {
4220 __uint128_t arg = MakeUInt128(0x7096834080053559ULL, 0x8491754173818839ULL);
4221 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ushll2 %0.2d, %1.4s, #17")(arg);
4222 ASSERT_EQ(res, MakeUInt128(0x0000e70310720000ULL, 0x00010922ea820000ULL));
4223 }
4224
TEST(Arm64InsnTest,ShiftLeftLongInt32x2By0)4225 TEST(Arm64InsnTest, ShiftLeftLongInt32x2By0) {
4226 __uint128_t arg = MakeUInt128(0x9945681506526530ULL, 0x5371829412703369ULL);
4227 // UXTL is an alias for USHLL for the shift count being zero.
4228 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("uxtl %0.2d, %1.2s")(arg);
4229 ASSERT_EQ(res, MakeUInt128(0x0000000006526530ULL, 0x0000000099456815ULL));
4230 }
4231
TEST(Arm64InsnTest,ShiftRightNarrowI16x8)4232 TEST(Arm64InsnTest, ShiftRightNarrowI16x8) {
4233 __uint128_t arg = MakeUInt128(0x9378541786109696ULL, 0x9202538865034577ULL);
4234 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("shrn %0.8b, %1.8h, #2")(arg);
4235 ASSERT_EQ(res, MakeUInt128(0x80e2405dde0584a5ULL, 0x0000000000000000ULL));
4236 }
4237
TEST(Arm64InsnTest,ShiftRightNarrowI16x8Upper)4238 TEST(Arm64InsnTest, ShiftRightNarrowI16x8Upper) {
4239 __uint128_t arg1 = MakeUInt128(0x9779940012601642ULL, 0x2760926082349304ULL);
4240 __uint128_t arg2 = MakeUInt128(0x3879158299848645ULL, 0x9271734059225620ULL);
4241 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("shrn2 %0.16b, %1.8h, #2")(arg1, arg2);
4242 ASSERT_EQ(res, MakeUInt128(0x3879158299848645ULL, 0xd8988dc1de009890ULL));
4243 }
4244
TEST(Arm64InsnTest,RoundingShiftRightNarrowI16x8)4245 TEST(Arm64InsnTest, RoundingShiftRightNarrowI16x8) {
4246 __uint128_t arg = MakeUInt128(0x9303774688099929ULL, 0x6877582441047878ULL);
4247 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("rshrn %0.8b, %1.8h, #2")(arg);
4248 ASSERT_EQ(res, MakeUInt128(0x1e09411ec1d2024aULL, 0x0000000000000000ULL));
4249 }
4250
TEST(Arm64InsnTest,RoundingShiftRightNarrowI16x8Upper)4251 TEST(Arm64InsnTest, RoundingShiftRightNarrowI16x8Upper) {
4252 __uint128_t arg1 = MakeUInt128(0x9314507607167064ULL, 0x3556827437743965ULL);
4253 __uint128_t arg2 = MakeUInt128(0x2103098604092717ULL, 0x0909512808630902ULL);
4254 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("rshrn2 %0.16b, %1.8h, #2")(arg1, arg2);
4255 ASSERT_EQ(res, MakeUInt128(0x2103098604092717ULL, 0x569ddd59c51ec619ULL));
4256 }
4257
TEST(Arm64InsnTest,AddInt64x1)4258 TEST(Arm64InsnTest, AddInt64x1) {
4259 __uint128_t arg1 = MakeUInt128(0x0080000000000003ULL, 0xdeadbeef01234567ULL);
4260 __uint128_t arg2 = MakeUInt128(0x0080000000000005ULL, 0x0123deadbeef4567ULL);
4261 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("add %d0, %d1, %d2")(arg1, arg2);
4262 ASSERT_EQ(res, MakeUInt128(0x0100000000000008ULL, 0x0ULL));
4263 }
4264
TEST(Arm64InsnTest,AddInt32x4)4265 TEST(Arm64InsnTest, AddInt32x4) {
4266 // The "add" below adds two vectors, each with four 32-bit elements. We set the sign
4267 // bit for each element to verify that the carry does not affect any lane.
4268 __uint128_t op1 = MakeUInt128(0x8000000380000001ULL, 0x8000000780000005ULL);
4269 __uint128_t op2 = MakeUInt128(0x8000000480000002ULL, 0x8000000880000006ULL);
4270 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("add %0.4s, %1.4s, %2.4s")(op1, op2);
4271 ASSERT_EQ(rd, MakeUInt128(0x0000000700000003ULL, 0x0000000f0000000bULL));
4272 }
4273
TEST(Arm64InsnTest,AddInt32x2)4274 TEST(Arm64InsnTest, AddInt32x2) {
4275 __uint128_t op1 = MakeUInt128(0x8000000380000001ULL, 0x8000000780000005ULL);
4276 __uint128_t op2 = MakeUInt128(0x8000000480000002ULL, 0x8000000880000006ULL);
4277 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("add %0.2s, %1.2s, %2.2s")(op1, op2);
4278 ASSERT_EQ(rd, MakeUInt128(0x0000000700000003ULL, 0));
4279 }
4280
TEST(Arm64InsnTest,AddInt64x2)4281 TEST(Arm64InsnTest, AddInt64x2) {
4282 __uint128_t op1 = MakeUInt128(0x8000000380000001ULL, 0x8000000780000005ULL);
4283 __uint128_t op2 = MakeUInt128(0x8000000480000002ULL, 0x8000000880000006ULL);
4284 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("add %0.2d, %1.2d, %2.2d")(op1, op2);
4285 ASSERT_EQ(rd, MakeUInt128(0x0000000800000003ULL, 0x000000100000000bULL));
4286 }
4287
TEST(Arm64InsnTest,SubInt64x1)4288 TEST(Arm64InsnTest, SubInt64x1) {
4289 __uint128_t arg1 = MakeUInt128(0x0000000000000002ULL, 0x0011223344556677ULL);
4290 __uint128_t arg2 = MakeUInt128(0x0000000000000003ULL, 0x0123456789abcdefULL);
4291 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("sub %d0, %d1, %d2")(arg1, arg2);
4292 ASSERT_EQ(res, MakeUInt128(0xffffffffffffffffULL, 0x0ULL));
4293 }
4294
TEST(Arm64InsnTest,SubInt64x2)4295 TEST(Arm64InsnTest, SubInt64x2) {
4296 constexpr auto AsmSub = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("sub %0.2d, %1.2d, %2.2d");
4297 __uint128_t arg1 = MakeUInt128(0x6873115956286388ULL, 0x2353787593751957ULL);
4298 __uint128_t arg2 = MakeUInt128(0x7818577805321712ULL, 0x2680661300192787ULL);
4299 __uint128_t res = AsmSub(arg1, arg2);
4300 ASSERT_EQ(res, MakeUInt128(0xf05ab9e150f64c76ULL, 0xfcd31262935bf1d0ULL));
4301 }
4302
TEST(Arm64InsnTest,SubInt32x4)4303 TEST(Arm64InsnTest, SubInt32x4) {
4304 __uint128_t op1 = MakeUInt128(0x0000000A00000005ULL, 0x0000000C00000C45ULL);
4305 __uint128_t op2 = MakeUInt128(0x0000000500000003ULL, 0x0000000200000C45ULL);
4306 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("sub %0.4s, %1.4s, %2.4s")(op1, op2);
4307 ASSERT_EQ(rd, MakeUInt128(0x0000000500000002ULL, 0x00000000A00000000ULL));
4308 }
4309
TEST(Arm64InsnTest,SubInt32x2)4310 TEST(Arm64InsnTest, SubInt32x2) {
4311 __uint128_t op1 = MakeUInt128(0x0000000000000005ULL, 0x0000000000000C45ULL);
4312 __uint128_t op2 = MakeUInt128(0x0000000000000003ULL, 0x0000000000000C45ULL);
4313 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("sub %0.2s, %1.2s, %2.2s")(op1, op2);
4314 ASSERT_EQ(rd, MakeUInt128(0x0000000000000002ULL, 0x00000000000000000ULL));
4315 }
4316
TEST(Arm64InsnTest,SubInt16x4)4317 TEST(Arm64InsnTest, SubInt16x4) {
4318 __uint128_t arg1 = MakeUInt128(0x8888777766665555ULL, 0);
4319 __uint128_t arg2 = MakeUInt128(0x1111222233334444ULL, 0);
4320 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("sub %0.4h, %1.4h, %2.4h")(arg1, arg2);
4321 ASSERT_EQ(res, MakeUInt128(0x7777555533331111ULL, 0));
4322 }
4323
TEST(Arm64InsnTest,MultiplyI8x8)4324 TEST(Arm64InsnTest, MultiplyI8x8) {
4325 __uint128_t arg1 = MakeUInt128(0x5261365549781893ULL, 0x1297848216829989ULL);
4326 __uint128_t arg2 = MakeUInt128(0x4542858444795265ULL, 0x8678210511413547ULL);
4327 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("mul %0.8b, %1.8b, %2.8b")(arg1, arg2);
4328 ASSERT_EQ(res, MakeUInt128(0x1a020ed464b8b0ffULL, 0x0000000000000000ULL));
4329 }
4330
TEST(Arm64InsnTest,MultiplyAndAccumulateI8x8)4331 TEST(Arm64InsnTest, MultiplyAndAccumulateI8x8) {
4332 __uint128_t arg1 = MakeUInt128(0x5848406353422072ULL, 0x2258284886481584ULL);
4333 __uint128_t arg2 = MakeUInt128(0x7823986456596116ULL, 0x3548862305774564ULL);
4334 __uint128_t arg3 = MakeUInt128(0x8797108931456691ULL, 0x3686722874894056ULL);
4335 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("mla %0.8b, %1.8b, %2.8b")(arg1, arg2, arg3);
4336 ASSERT_EQ(res, MakeUInt128(0xc76f10351337865dULL, 0x0000000000000000ULL));
4337 }
4338
TEST(Arm64InsnTest,MultiplyAndAccumulateI8x8IndexedElem)4339 TEST(Arm64InsnTest, MultiplyAndAccumulateI8x8IndexedElem) {
4340 __uint128_t arg1 = MakeUInt128(0x4143334547762416ULL, 0x8625189835694855ULL);
4341 __uint128_t arg2 = MakeUInt128(0x5346462080466842ULL, 0x5906949129331367ULL);
4342 __uint128_t arg3 = MakeUInt128(0x0355876402474964ULL, 0x7326391419927260ULL);
4343 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("mla %0.4h, %1.4h, %2.h[0]")(arg1, arg2, arg3);
4344 ASSERT_EQ(res, MakeUInt128(0x0e9bc72e5eb38710ULL, 0x0000000000000000ULL));
4345 }
4346
TEST(Arm64InsnTest,MultiplyAndAccumulateI8x8IndexedElemPosition2)4347 TEST(Arm64InsnTest, MultiplyAndAccumulateI8x8IndexedElemPosition2) {
4348 __uint128_t arg1 = MakeUInt128(0x1431429809190659ULL, 0x2509372216964615ULL);
4349 __uint128_t arg2 = MakeUInt128(0x2686838689427741ULL, 0x5599185592524595ULL);
4350 __uint128_t arg3 = MakeUInt128(0x6099124608051243ULL, 0x8843904512441365ULL);
4351 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("mla %0.2s, %1.2s, %2.s[2]")(arg1, arg2, arg3);
4352 ASSERT_EQ(res, MakeUInt128(0x6ce7ccbedccdc110ULL, 0x0000000000000000ULL));
4353 }
4354
TEST(Arm64InsnTest,MultiplyAndSubtractI8x8IndexedElem)4355 TEST(Arm64InsnTest, MultiplyAndSubtractI8x8IndexedElem) {
4356 __uint128_t arg1 = MakeUInt128(0x8297455570674983ULL, 0x8505494588586926ULL);
4357 __uint128_t arg2 = MakeUInt128(0x6549911988183479ULL, 0x7753566369807426ULL);
4358 __uint128_t arg3 = MakeUInt128(0x4524919217321721ULL, 0x4772350141441973ULL);
4359 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("mls %0.4h, %1.4h, %2.h[1]")(arg1, arg2, arg3);
4360 ASSERT_EQ(res, MakeUInt128(0xcefce99ad58a9ad9ULL, 0x0000000000000000ULL));
4361 }
4362
TEST(Arm64InsnTest,MultiplyAndSubtractI8x8)4363 TEST(Arm64InsnTest, MultiplyAndSubtractI8x8) {
4364 __uint128_t arg1 = MakeUInt128(0x0635342207222582ULL, 0x8488648158456028ULL);
4365 __uint128_t arg2 = MakeUInt128(0x9864565961163548ULL, 0x8623057745649803ULL);
4366 __uint128_t arg3 = MakeUInt128(0x1089314566913686ULL, 0x7228748940560101ULL);
4367 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("mls %0.8b, %1.8b, %2.8b")(arg1, arg2, arg3);
4368 ASSERT_EQ(res, MakeUInt128(0x80d5b973bfa58df6ULL, 0x0000000000000000ULL));
4369 }
4370
TEST(Arm64InsnTest,MultiplyI32x4IndexedElem)4371 TEST(Arm64InsnTest, MultiplyI32x4IndexedElem) {
4372 __uint128_t arg1 = MakeUInt128(0x143334547762416ULL, 0x8625189835694855ULL);
4373 __uint128_t arg2 = MakeUInt128(0x627232791786085ULL, 0x7843838279679766ULL);
4374 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("mul %0.4s, %1.4s, %2.s[1]")(arg1, arg2);
4375 ASSERT_EQ(res, MakeUInt128(0xcec23e830d48815aULL, 0xd12b87288ae0a3f3ULL));
4376 }
4377
TEST(Arm64InsnTest,PolynomialMultiplyU8x8)4378 TEST(Arm64InsnTest, PolynomialMultiplyU8x8) {
4379 __uint128_t arg1 = MakeUInt128(0x1862056476931257ULL, 0x0586356620185581ULL);
4380 __uint128_t arg2 = MakeUInt128(0x1668039626579787ULL, 0x7185560845529654ULL);
4381 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("pmul %0.8b, %1.8b, %2.8b")(arg1, arg2);
4382 ASSERT_EQ(res, MakeUInt128(0xd0d00f18f4095e25ULL, 0x0000000000000000ULL));
4383 }
4384
TEST(Arm64InsnTest,PolynomialMultiplyLongU8x8)4385 TEST(Arm64InsnTest, PolynomialMultiplyLongU8x8) {
4386 __uint128_t arg1 = MakeUInt128(0x1327656180937734ULL, 0x4403070746921120ULL);
4387 __uint128_t arg2 = MakeUInt128(0x9838952286847831ULL, 0x2355265821314495ULL);
4388 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("pmull %0.8h, %1.8b, %2.8b")(arg1, arg2);
4389 ASSERT_EQ(res, MakeUInt128(0x43004bcc17e805f4ULL, 0x082807a835210ce2ULL));
4390 }
4391
TEST(Arm64InsnTest,PolynomialMultiplyLongU8x8Upper)4392 TEST(Arm64InsnTest, PolynomialMultiplyLongU8x8Upper) {
4393 __uint128_t arg1 = MakeUInt128(0x4439658253375438ULL, 0x8569094113031509ULL);
4394 __uint128_t arg2 = MakeUInt128(0x1865619673378623ULL, 0x6256125216320862ULL);
4395 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("pmull2 %0.8h, %1.16b, %2.16b")(arg1, arg2);
4396 ASSERT_EQ(res, MakeUInt128(0x015a005600a80372ULL, 0x30ea1da6008214d2ULL));
4397 }
4398
TEST(Arm64InsnTest,PolynomialMultiplyLongU64x2)4399 TEST(Arm64InsnTest, PolynomialMultiplyLongU64x2) {
4400 __uint128_t arg1 = MakeUInt128(0x1000100010001000ULL, 0xffffeeeeffffeeeeULL);
4401 __uint128_t arg2 = MakeUInt128(0x10001ULL, 0xffffeeeeffffeeeeULL);
4402 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("pmull %0.1q, %1.1d, %2.1d")(arg1, arg2);
4403 ASSERT_EQ(res, MakeUInt128(0x1000ULL, 0x1000ULL));
4404 }
4405
TEST(Arm64InsnTest,PolynomialMultiplyLongU64x2Upper)4406 TEST(Arm64InsnTest, PolynomialMultiplyLongU64x2Upper) {
4407 __uint128_t arg1 = MakeUInt128(0xffffeeeeffffeeeeULL, 0x1000100010001000ULL);
4408 __uint128_t arg2 = MakeUInt128(0xffffeeeeffffeeeeULL, 0x10001ULL);
4409 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("pmull2 %0.1q, %1.2d, %2.2d")(arg1, arg2);
4410 ASSERT_EQ(res, MakeUInt128(0x1000ULL, 0x1000ULL));
4411 }
4412
TEST(Arm64InsnTest,PairwiseAddInt8x16)4413 TEST(Arm64InsnTest, PairwiseAddInt8x16) {
4414 __uint128_t op1 = MakeUInt128(0x7766554433221100ULL, 0xffeeddccbbaa9988ULL);
4415 __uint128_t op2 = MakeUInt128(0x0706050403020100ULL, 0x0f0e0d0c0b0a0908ULL);
4416 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("addp %0.16b, %1.16b, %2.16b")(op1, op2);
4417 ASSERT_EQ(rd, MakeUInt128(0xeda96521dd995511ULL, 0x1d1915110d090501ULL));
4418 }
4419
TEST(Arm64InsnTest,PairwiseAddInt8x8)4420 TEST(Arm64InsnTest, PairwiseAddInt8x8) {
4421 __uint128_t op1 = MakeUInt128(0x7766554433221100ULL, 0xffeeddccbbaa9988ULL);
4422 __uint128_t op2 = MakeUInt128(0x0706050403020100ULL, 0x0f0e0d0c0b0a0908ULL);
4423 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("addp %0.8b, %1.8b, %2.8b")(op1, op2);
4424 ASSERT_EQ(rd, MakeUInt128(0x0d090501dd995511ULL, 0));
4425 }
4426
TEST(Arm64InsnTest,PairwiseAddInt64x2)4427 TEST(Arm64InsnTest, PairwiseAddInt64x2) {
4428 __uint128_t op1 = MakeUInt128(1ULL, 2ULL);
4429 __uint128_t op2 = MakeUInt128(3ULL, 4ULL);
4430 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("addp %0.2d, %1.2d, %2.2d")(op1, op2);
4431 ASSERT_EQ(rd, MakeUInt128(3ULL, 7ULL));
4432 }
4433
TEST(Arm64InsnTest,CompareEqualInt8x16)4434 TEST(Arm64InsnTest, CompareEqualInt8x16) {
4435 __uint128_t op1 = MakeUInt128(0x9375195778185778ULL, 0x0532171226806613ULL);
4436 __uint128_t op2 = MakeUInt128(0x9371595778815787ULL, 0x0352172126068613ULL);
4437 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmeq %0.16b, %1.16b, %2.16b")(op1, op2);
4438 ASSERT_EQ(rd, MakeUInt128(0xff0000ffff00ff00ULL, 0x0000ff00ff0000ffULL));
4439 }
4440
TEST(Arm64InsnTest,CompareEqualInt8x8)4441 TEST(Arm64InsnTest, CompareEqualInt8x8) {
4442 __uint128_t op1 = MakeUInt128(0x9375195778185778ULL, 0x0532171226806613ULL);
4443 __uint128_t op2 = MakeUInt128(0x9371595778815787ULL, 0x0352172126068613ULL);
4444 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmeq %0.8b, %1.8b, %2.8b")(op1, op2);
4445 ASSERT_EQ(rd, MakeUInt128(0xff0000ffff00ff00ULL, 0));
4446 }
4447
TEST(Arm64InsnTest,CompareEqualInt16x4)4448 TEST(Arm64InsnTest, CompareEqualInt16x4) {
4449 __uint128_t op1 = MakeUInt128(0x4444333322221111ULL, 0);
4450 __uint128_t op2 = MakeUInt128(0x8888333300001111ULL, 0);
4451 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmeq %0.4h, %1.4h, %2.4h")(op1, op2);
4452 ASSERT_EQ(rd, MakeUInt128(0x0000ffff0000ffffULL, 0));
4453 }
4454
TEST(Arm64InsnTest,CompareEqualInt64x1)4455 TEST(Arm64InsnTest, CompareEqualInt64x1) {
4456 constexpr auto AsmCmeq = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmeq %d0, %d1, %d2");
4457 __uint128_t arg1 = MakeUInt128(0x8297455570674983ULL, 0x8505494588586926ULL);
4458 __uint128_t arg2 = MakeUInt128(0x0665499119881834ULL, 0x7977535663698074ULL);
4459 __uint128_t arg3 = MakeUInt128(0x8297455570674983ULL, 0x1452491921732172ULL);
4460 ASSERT_EQ(AsmCmeq(arg1, arg2), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
4461 ASSERT_EQ(AsmCmeq(arg1, arg3), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
4462 }
4463
TEST(Arm64InsnTest,CompareEqualZeroInt64x1)4464 TEST(Arm64InsnTest, CompareEqualZeroInt64x1) {
4465 constexpr auto AsmCmeq = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cmeq %d0, %d1, #0");
4466 __uint128_t arg1 = MakeUInt128(0x6517166776672793ULL, 0x0354851542040238ULL);
4467 __uint128_t arg2 = MakeUInt128(0x0000000000000000ULL, 0x1746089232839170ULL);
4468 ASSERT_EQ(AsmCmeq(arg1), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
4469 ASSERT_EQ(AsmCmeq(arg2), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
4470 }
4471
TEST(Arm64InsnTest,CompareEqualZeroInt8x16)4472 TEST(Arm64InsnTest, CompareEqualZeroInt8x16) {
4473 __uint128_t op = MakeUInt128(0x0000555500332200ULL, 0x0000000077001100ULL);
4474 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cmeq %0.16b, %1.16b, #0")(op);
4475 ASSERT_EQ(rd, MakeUInt128(0xffff0000ff0000ffULL, 0xffffffff00ff00ffULL));
4476 }
4477
TEST(Arm64InsnTest,CompareEqualZeroInt8x8)4478 TEST(Arm64InsnTest, CompareEqualZeroInt8x8) {
4479 __uint128_t op = MakeUInt128(0x001122330000aaaaULL, 0xdeadbeef0000cafeULL);
4480 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cmeq %0.8b, %1.8b, #0")(op);
4481 ASSERT_EQ(rd, MakeUInt128(0xff000000ffff0000ULL, 0));
4482 }
4483
TEST(Arm64InsnTest,CompareGreaterInt64x1)4484 TEST(Arm64InsnTest, CompareGreaterInt64x1) {
4485 constexpr auto AsmCmgt = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmgt %d0, %d1, %d2");
4486 __uint128_t arg1 = MakeUInt128(0x1976668559233565ULL, 0x4639138363185745ULL);
4487 __uint128_t arg2 = MakeUInt128(0x3474940784884423ULL, 0x7721751543342603ULL);
4488 __uint128_t arg3 = MakeUInt128(0x1976668559233565ULL, 0x8183196376370761ULL);
4489 __uint128_t arg4 = MakeUInt128(0x9243530136776310ULL, 0x8491351615642269ULL);
4490 ASSERT_EQ(AsmCmgt(arg1, arg2), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
4491 ASSERT_EQ(AsmCmgt(arg1, arg3), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
4492 ASSERT_EQ(AsmCmgt(arg1, arg4), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
4493 }
4494
TEST(Arm64InsnTest,CompareGreaterZeroInt64x1)4495 TEST(Arm64InsnTest, CompareGreaterZeroInt64x1) {
4496 constexpr auto AsmCmgt = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cmgt %d0, %d1, #0");
4497 __uint128_t arg1 = MakeUInt128(0x6517166776672793ULL, 0x0354851542040238ULL);
4498 __uint128_t arg2 = MakeUInt128(0x0000000000000000ULL, 0x6174599705674507ULL);
4499 __uint128_t arg3 = MakeUInt128(0x9592057668278967ULL, 0x7644531840404185ULL);
4500 ASSERT_EQ(AsmCmgt(arg1), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
4501 ASSERT_EQ(AsmCmgt(arg2), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
4502 ASSERT_EQ(AsmCmgt(arg3), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
4503 }
4504
TEST(Arm64InsnTest,CompareGreaterThanZeroInt8x16)4505 TEST(Arm64InsnTest, CompareGreaterThanZeroInt8x16) {
4506 __uint128_t op = MakeUInt128(0x807fff00017efe02ULL, 0xff7f80000102fe02ULL);
4507 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cmgt %0.16b, %1.16b, #0")(op);
4508 ASSERT_EQ(rd, MakeUInt128(0x00ff0000ffff00ffULL, 0x00ff0000ffff00ffULL));
4509 }
4510
TEST(Arm64InsnTest,CompareGreaterThanZeroInt8x8)4511 TEST(Arm64InsnTest, CompareGreaterThanZeroInt8x8) {
4512 __uint128_t op = MakeUInt128(0x00ff7f80017efe00ULL, 0x0000cafedeadbeefULL);
4513 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cmgt %0.8b, %1.8b, #0")(op);
4514 ASSERT_EQ(rd, MakeUInt128(0x0000ff00ffff0000ULL, 0));
4515 }
4516
TEST(Arm64InsnTest,CompareGreaterThanInt16x8)4517 TEST(Arm64InsnTest, CompareGreaterThanInt16x8) {
4518 __uint128_t arg1 = MakeUInt128(0x9789389001852956ULL, 0x9196780455448285ULL);
4519 __uint128_t arg2 = MakeUInt128(0x7269389081795897ULL, 0x5469399264218285);
4520 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmgt %0.8h, %1.8h, %2.8h")(arg1, arg2);
4521 ASSERT_EQ(res, MakeUInt128(0x00000000ffff0000ULL, 0x0000ffff00000000ULL));
4522 }
4523
TEST(Arm64InsnTest,CompareGreaterThanInt32x4)4524 TEST(Arm64InsnTest, CompareGreaterThanInt32x4) {
4525 __uint128_t arg1 = MakeUInt128(0x0000'0000'ffff'ffffULL, 0xffff'ffff'0000'0000ULL);
4526 __uint128_t arg2 = MakeUInt128(0xffff'ffff'0000'0000ULL, 0x0000'0000'ffff'ffffULL);
4527 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmgt %0.4s, %1.4s, %2.4s")(arg1, arg2);
4528 ASSERT_EQ(res, MakeUInt128(0xffff'ffff'0000'0000ULL, 0x0000'0000'ffff'ffffULL));
4529 }
4530
TEST(Arm64InsnTest,CompareLessZeroInt64x1)4531 TEST(Arm64InsnTest, CompareLessZeroInt64x1) {
4532 constexpr auto AsmCmlt = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cmlt %d0, %d1, #0");
4533 __uint128_t arg1 = MakeUInt128(0x4784264567633881ULL, 0x8807565612168960ULL);
4534 __uint128_t arg2 = MakeUInt128(0x0000000000000000ULL, 0x8955999911209916ULL);
4535 __uint128_t arg3 = MakeUInt128(0x9364610175685060ULL, 0x1671453543158148ULL);
4536 ASSERT_EQ(AsmCmlt(arg1), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
4537 ASSERT_EQ(AsmCmlt(arg2), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
4538 ASSERT_EQ(AsmCmlt(arg3), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
4539 }
4540
TEST(Arm64InsnTest,CompareLessThanZeroInt8x16)4541 TEST(Arm64InsnTest, CompareLessThanZeroInt8x16) {
4542 __uint128_t op = MakeUInt128(0xff00017ffe020180ULL, 0x0001027e7ffeff80ULL);
4543 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cmlt %0.16b, %1.16b, #0")(op);
4544 ASSERT_EQ(rd, MakeUInt128(0xff000000ff0000ffULL, 0x0000000000ffffffULL));
4545 }
4546
TEST(Arm64InsnTest,CompareLessThanZeroInt8x8)4547 TEST(Arm64InsnTest, CompareLessThanZeroInt8x8) {
4548 __uint128_t op = MakeUInt128(0x0002017e7fff8000ULL, 0x001100220000ffffULL);
4549 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cmlt %0.8b, %1.8b, #0")(op);
4550 ASSERT_EQ(rd, MakeUInt128(0x0000000000ffff00ULL, 0));
4551 }
4552
TEST(Arm64InsnTest,CompareGreaterThanEqualInt64x1)4553 TEST(Arm64InsnTest, CompareGreaterThanEqualInt64x1) {
4554 constexpr auto AsmCmge = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmge %d0, %d1, %d2");
4555 __uint128_t arg1 = MakeUInt128(0x1009391369138107ULL, 0x2581378135789400ULL);
4556 __uint128_t arg2 = MakeUInt128(0x5890939568814856ULL, 0x0263224393726562ULL);
4557 __uint128_t arg3 = MakeUInt128(0x1009391369138107ULL, 0x5511995818319637ULL);
4558 __uint128_t arg4 = MakeUInt128(0x9427141009391369ULL, 0x1381072581378135ULL);
4559 ASSERT_EQ(AsmCmge(arg1, arg2), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
4560 ASSERT_EQ(AsmCmge(arg1, arg3), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
4561 ASSERT_EQ(AsmCmge(arg1, arg4), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
4562 }
4563
TEST(Arm64InsnTest,CompareGreaterThanEqualZeroInt64x1)4564 TEST(Arm64InsnTest, CompareGreaterThanEqualZeroInt64x1) {
4565 constexpr auto AsmCmge = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cmge %d0, %d1, #0");
4566 __uint128_t arg1 = MakeUInt128(0x5562116715468484ULL, 0x7780394475697980ULL);
4567 __uint128_t arg2 = MakeUInt128(0x0000000000000000ULL, 0x3548487562529875ULL);
4568 __uint128_t arg3 = MakeUInt128(0x9212366168902596ULL, 0x2730430679316531ULL);
4569 ASSERT_EQ(AsmCmge(arg1), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
4570 ASSERT_EQ(AsmCmge(arg2), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
4571 ASSERT_EQ(AsmCmge(arg3), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
4572 }
4573
TEST(Arm64InsnTest,CompareGreaterThanEqualZeroInt8x16)4574 TEST(Arm64InsnTest, CompareGreaterThanEqualZeroInt8x16) {
4575 __uint128_t op = MakeUInt128(0x00ff01027ffe8002ULL, 0x80fffe7f7e020100ULL);
4576 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cmge %0.16b, %1.16b, #0")(op);
4577 ASSERT_EQ(rd, MakeUInt128(0xff00ffffff0000ffULL, 0x000000ffffffffffULL));
4578 }
4579
TEST(Arm64InsnTest,CompareGreaterThanEqualZeroInt8x8)4580 TEST(Arm64InsnTest, CompareGreaterThanEqualZeroInt8x8) {
4581 __uint128_t op = MakeUInt128(0x0001027f80feff00ULL, 0x0011223344556677ULL);
4582 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cmge %0.8b, %1.8b, #0")(op);
4583 ASSERT_EQ(rd, MakeUInt128(0xffffffff000000ffULL, 0));
4584 }
4585
TEST(Arm64InsnTest,CompareGreaterEqualInt16x8)4586 TEST(Arm64InsnTest, CompareGreaterEqualInt16x8) {
4587 __uint128_t arg1 = MakeUInt128(0x4391962838870543ULL, 0x6777432242768091ULL);
4588 __uint128_t arg2 = MakeUInt128(0x4391838548318875ULL, 0x0142432208995068ULL);
4589 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmge %0.8h, %1.8h, %2.8h")(arg1, arg2);
4590 ASSERT_EQ(res, MakeUInt128(0xffffffff0000ffffULL, 0xffffffffffff0000ULL));
4591 }
4592
TEST(Arm64InsnTest,CompareLessThanEqualZeroInt64x1)4593 TEST(Arm64InsnTest, CompareLessThanEqualZeroInt64x1) {
4594 constexpr auto AsmCmle = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cmle %d0, %d1, #0");
4595 __uint128_t arg1 = MakeUInt128(0x3643296406335728ULL, 0x1070788758164043ULL);
4596 __uint128_t arg2 = MakeUInt128(0x0000000000000000ULL, 0x5865720227637840ULL);
4597 __uint128_t arg3 = MakeUInt128(0x8694346828590066ULL, 0x6408063140777577ULL);
4598 ASSERT_EQ(AsmCmle(arg1), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
4599 ASSERT_EQ(AsmCmle(arg2), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
4600 ASSERT_EQ(AsmCmle(arg3), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
4601 }
4602
TEST(Arm64InsnTest,CompareLessThanEqualZeroInt8x16)4603 TEST(Arm64InsnTest, CompareLessThanEqualZeroInt8x16) {
4604 __uint128_t op = MakeUInt128(0x80fffe7f7e020100ULL, 0x00ff01027ffe8002ULL);
4605 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cmle %0.16b, %1.16b, #0")(op);
4606 ASSERT_EQ(rd, MakeUInt128(0xffffff00000000ffULL, 0xffff000000ffff00ULL));
4607 }
4608
TEST(Arm64InsnTest,CompareHigherInt64x1)4609 TEST(Arm64InsnTest, CompareHigherInt64x1) {
4610 constexpr auto AsmCmhi = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmhi %d0, %d1, %d2");
4611 __uint128_t arg1 = MakeUInt128(0x1009391369138107ULL, 0x2581378135789400ULL);
4612 __uint128_t arg2 = MakeUInt128(0x0759167297007850ULL, 0x5807171863810549ULL);
4613 __uint128_t arg3 = MakeUInt128(0x1009391369138107ULL, 0x6026322439372656ULL);
4614 __uint128_t arg4 = MakeUInt128(0x9087839523245323ULL, 0x7896029841669225ULL);
4615 ASSERT_EQ(AsmCmhi(arg1, arg2), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
4616 ASSERT_EQ(AsmCmhi(arg1, arg3), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
4617 ASSERT_EQ(AsmCmhi(arg1, arg4), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
4618 }
4619
TEST(Arm64InsnTest,CompareHigherInt16x8)4620 TEST(Arm64InsnTest, CompareHigherInt16x8) {
4621 __uint128_t arg1 = MakeUInt128(0x6517166776672793ULL, 0x0354851542040238ULL);
4622 __uint128_t arg2 = MakeUInt128(0x2057166778967764ULL, 0x4531840442045540ULL);
4623 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmhi %0.8h, %1.8h, %2.8h")(arg1, arg2);
4624 ASSERT_EQ(res, MakeUInt128(0xffff000000000000ULL, 0x0000ffff00000000ULL));
4625 }
4626
TEST(Arm64InsnTest,CompareHigherInt32x4)4627 TEST(Arm64InsnTest, CompareHigherInt32x4) {
4628 __uint128_t arg1 = MakeUInt128(0x0000'0000'ffff'ffffULL, 0xffff'ffff'0000'0000ULL);
4629 __uint128_t arg2 = MakeUInt128(0xffff'ffff'0000'0000ULL, 0x0000'0000'ffff'ffffULL);
4630 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmhi %0.4s, %1.4s, %2.4s")(arg1, arg2);
4631 ASSERT_EQ(res, MakeUInt128(0x0000'0000'ffff'ffffULL, 0xffff'ffff'0000'0000ULL));
4632 }
4633
TEST(Arm64InsnTest,CompareHigherSameInt64x1)4634 TEST(Arm64InsnTest, CompareHigherSameInt64x1) {
4635 constexpr auto AsmCmhs = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmhs %d0, %d1, %d2");
4636 __uint128_t arg1 = MakeUInt128(0x3529566139788848ULL, 0x6050978608595701ULL);
4637 __uint128_t arg2 = MakeUInt128(0x1769845875810446ULL, 0x6283998806006162ULL);
4638 __uint128_t arg3 = MakeUInt128(0x3529566139788848ULL, 0x9001852956919678ULL);
4639 __uint128_t arg4 = MakeUInt128(0x9628388705436777ULL, 0x4322427680913236ULL);
4640 ASSERT_EQ(AsmCmhs(arg1, arg2), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
4641 ASSERT_EQ(AsmCmhs(arg1, arg3), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
4642 ASSERT_EQ(AsmCmhs(arg1, arg4), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
4643 }
4644
TEST(Arm64InsnTest,CompareHigherSameInt16x8)4645 TEST(Arm64InsnTest, CompareHigherSameInt16x8) {
4646 __uint128_t arg1 = MakeUInt128(0x4599705674507183ULL, 0x3206503455664403ULL);
4647 __uint128_t arg2 = MakeUInt128(0x4264705633881880ULL, 0x3206612168960504ULL);
4648 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmhs %0.8h, %1.8h, %2.8h")(arg1, arg2);
4649 ASSERT_EQ(res, MakeUInt128(0xffffffffffffffffULL, 0xffff00000000ffffULL));
4650 }
4651
TEST(Arm64InsnTest,CompareLessThanEqualZeroInt8x8)4652 TEST(Arm64InsnTest, CompareLessThanEqualZeroInt8x8) {
4653 __uint128_t op = MakeUInt128(0x00fffe807f020100ULL, 0x00aabbccddeeff00ULL);
4654 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cmle %0.8b, %1.8b, #0")(op);
4655 ASSERT_EQ(rd, MakeUInt128(0xffffffff000000ffULL, 0));
4656 }
4657
TEST(Arm64InsnTest,TestInt64x1)4658 TEST(Arm64InsnTest, TestInt64x1) {
4659 constexpr auto AsmCmtst = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmtst %d0, %d1, %d2");
4660 __uint128_t arg1 = MakeUInt128(0xaaaaaaaa55555555ULL, 0x7698385483188750ULL);
4661 __uint128_t arg2 = MakeUInt128(0x55555555aaaaaaaaULL, 0x1429389089950685ULL);
4662 __uint128_t arg3 = MakeUInt128(0xaa00aa0055005500ULL, 0x4530765116803337ULL);
4663 ASSERT_EQ(AsmCmtst(arg1, arg2), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
4664 ASSERT_EQ(AsmCmtst(arg1, arg3), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
4665 }
4666
TEST(Arm64InsnTest,TestInt16x8)4667 TEST(Arm64InsnTest, TestInt16x8) {
4668 __uint128_t arg1 = MakeUInt128(0x5999911209916464ULL, 0x6441191856827700ULL);
4669 __uint128_t arg2 = MakeUInt128(0x6101756850601671ULL, 0x4535431581480105ULL);
4670 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("cmtst %0.8h, %1.8h, %2.8h")(arg1, arg2);
4671 ASSERT_EQ(res, MakeUInt128(0xffffffff0000ffffULL, 0xffffffff0000ffffULL));
4672 }
4673
TEST(Arm64InsnTest,ExtractVectorFromPair)4674 TEST(Arm64InsnTest, ExtractVectorFromPair) {
4675 __uint128_t op1 = MakeUInt128(0x0011223344556677ULL, 0x8899aabbccddeeffULL);
4676 __uint128_t op2 = MakeUInt128(0x0001020304050607ULL, 0x08090a0b0c0d0e0fULL);
4677 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("ext %0.16b, %1.16b, %2.16b, #8")(op1, op2);
4678 ASSERT_EQ(rd, MakeUInt128(0x8899aabbccddeeffULL, 0x0001020304050607ULL));
4679 }
4680
TEST(Arm64InsnTest,ExtractVectorFromPairHalfWidth)4681 TEST(Arm64InsnTest, ExtractVectorFromPairHalfWidth) {
4682 __uint128_t op1 = MakeUInt128(0x8138268683868942ULL, 0x7741559918559252ULL);
4683 __uint128_t op2 = MakeUInt128(0x3622262609912460ULL, 0x8051243884390451ULL);
4684 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("ext %0.8b, %1.8b, %2.8b, #3")(op1, op2);
4685 ASSERT_EQ(res, MakeUInt128(0x9124608138268683ULL, 0x0000000000000000ULL));
4686 }
4687
TEST(Arm64InsnTest,ExtractVectorFromPairHalfWidthPosition1)4688 TEST(Arm64InsnTest, ExtractVectorFromPairHalfWidthPosition1) {
4689 __uint128_t op1 = MakeUInt128(0x9471329621073404ULL, 0x3751895735961458ULL);
4690 __uint128_t op2 = MakeUInt128(0x9048010941214722ULL, 0x1317947647772622ULL);
4691 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("ext %0.8b, %1.8b, %2.8b, #1")(op1, op2);
4692 ASSERT_EQ(res, MakeUInt128(0x2294713296210734ULL, 0x0000000000000000ULL));
4693 }
4694
TEST(Arm64InsnTest,Load1OneI8x8)4695 TEST(Arm64InsnTest, Load1OneI8x8) {
4696 static constexpr uint64_t arg = 0x8867915896904956ULL;
4697 __uint128_t res;
4698 asm("ld1 {%0.8b}, [%1]" : "=w"(res) : "r"(&arg) : "memory");
4699 ASSERT_EQ(res, arg);
4700 }
4701
TEST(Arm64InsnTest,Load1ThreeI8x8)4702 TEST(Arm64InsnTest, Load1ThreeI8x8) {
4703 static constexpr uint64_t arg[3] = {
4704 0x3415354584283376ULL, 0x4378111988556318ULL, 0x7777925372011667ULL};
4705 __uint128_t res[3];
4706 asm("ld1 {v0.8b-v2.8b}, [%3]\n\t"
4707 "mov %0.16b, v0.16b\n\t"
4708 "mov %1.16b, v1.16b\n\t"
4709 "mov %2.16b, v2.16b"
4710 : "=w"(res[0]), "=w"(res[1]), "=w"(res[2])
4711 : "r"(arg)
4712 : "v0", "v1", "v2", "memory");
4713 ASSERT_EQ(res[0], static_cast<__uint128_t>(arg[0]));
4714 ASSERT_EQ(res[1], static_cast<__uint128_t>(arg[1]));
4715 ASSERT_EQ(res[2], static_cast<__uint128_t>(arg[2]));
4716 }
4717
TEST(Arm64InsnTest,Load1FourI8x8)4718 TEST(Arm64InsnTest, Load1FourI8x8) {
4719 static constexpr uint64_t arg[4] = {
4720 0x9523688483099930ULL,
4721 0x2757419916463841ULL,
4722 0x4270779887088742ULL,
4723 0x2927705389122717ULL,
4724 };
4725 __uint128_t res[4];
4726 asm("ld1 {v0.8b-v3.8b}, [%4]\n\t"
4727 "mov %0.16b, v0.16b\n\t"
4728 "mov %1.16b, v1.16b\n\t"
4729 "mov %2.16b, v2.16b\n\t"
4730 "mov %3.16b, v3.16b"
4731 : "=w"(res[0]), "=w"(res[1]), "=w"(res[2]), "=w"(res[3])
4732 : "r"(arg)
4733 : "v0", "v1", "v2", "v3", "memory");
4734 ASSERT_EQ(res[0], static_cast<__uint128_t>(arg[0]));
4735 ASSERT_EQ(res[1], static_cast<__uint128_t>(arg[1]));
4736 ASSERT_EQ(res[2], static_cast<__uint128_t>(arg[2]));
4737 ASSERT_EQ(res[3], static_cast<__uint128_t>(arg[3]));
4738 }
4739
TEST(Arm64InsnTest,Store1OneI8x16)4740 TEST(Arm64InsnTest, Store1OneI8x16) {
4741 static constexpr __uint128_t arg = MakeUInt128(0x7642291583425006ULL, 0x7361245384916067ULL);
4742 __uint128_t res;
4743 asm("st1 {%0.16b}, [%1]" : : "w"(arg), "r"(&res) : "memory");
4744 ASSERT_EQ(res, arg);
4745 }
4746
TEST(Arm64InsnTest,Store1ThreeI8x8)4747 TEST(Arm64InsnTest, Store1ThreeI8x8) {
4748 static constexpr uint64_t arg[3] = {
4749 0x3086436111389069ULL, 0x4202790881431194ULL, 0x4879941715404210ULL};
4750 uint64_t res[3];
4751 asm("mov v0.16b, %0.16b\n\t"
4752 "mov v1.16b, %1.16b\n\t"
4753 "mov v2.16b, %2.16b\n\t"
4754 "st1 {v0.8b-v2.8b}, [%3]"
4755 :
4756 : "w"(arg[0]), "w"(arg[1]), "w"(arg[2]), "r"(res)
4757 : "v0", "v1", "v2", "memory");
4758 ASSERT_EQ(res[0], arg[0]);
4759 ASSERT_EQ(res[1], arg[1]);
4760 ASSERT_EQ(res[2], arg[2]);
4761 }
4762
TEST(Arm64InsnTest,Store1FourI8x8)4763 TEST(Arm64InsnTest, Store1FourI8x8) {
4764 static constexpr uint64_t arg[4] = {
4765 0x8954750448339314ULL, 0x6896307633966572ULL, 0x2672704339321674ULL, 0x5421824557062524ULL};
4766 uint64_t res[4];
4767 asm("mov v0.16b, %0.16b\n\t"
4768 "mov v1.16b, %1.16b\n\t"
4769 "mov v2.16b, %2.16b\n\t"
4770 "mov v3.16b, %3.16b\n\t"
4771 "st1 {v0.8b-v3.8b}, [%4]"
4772 :
4773 : "w"(arg[0]), "w"(arg[1]), "w"(arg[2]), "w"(arg[3]), "r"(res)
4774 : "v0", "v1", "v2", "v3", "memory");
4775 ASSERT_EQ(res[0], arg[0]);
4776 ASSERT_EQ(res[1], arg[1]);
4777 ASSERT_EQ(res[2], arg[2]);
4778 ASSERT_EQ(res[3], arg[3]);
4779 }
4780
TEST(Arm64InsnTest,Load1TwoPostIndex)4781 TEST(Arm64InsnTest, Load1TwoPostIndex) {
4782 __uint128_t op0 = MakeUInt128(0x5499119881834797ULL, 0x0507922796892589ULL);
4783 __uint128_t op1 = MakeUInt128(0x0511854807446237ULL, 0x6691368672287489ULL);
4784 __uint128_t array[] = {
4785 op0,
4786 op1,
4787 };
4788 __uint128_t* addr = &array[0];
4789 __uint128_t res0 = 0;
4790 __uint128_t res1 = 0;
4791
4792 // The "memory" below ensures that the array contents are up to date. Without it, the
4793 // compiler might decide to initialize the array after the asm statement.
4794 //
4795 // We hardcode SIMD registers v0 and v1 below because there is no other way to express
4796 // consecutive registers, which in turn requires the mov instructions to retrieve the
4797 // loaded values into res0 and res1.
4798 asm("ld1 {v0.16b, v1.16b}, [%2], #32\n\t"
4799 "mov %0.16b, v0.16b\n\t"
4800 "mov %1.16b, v1.16b"
4801 : "=w"(res0), "=w"(res1), "+r"(addr)
4802 :
4803 : "v0", "v1", "memory");
4804
4805 ASSERT_EQ(res0, op0);
4806 ASSERT_EQ(res1, op1);
4807 ASSERT_EQ(addr, &array[2]);
4808 }
4809
TEST(Arm64InsnTest,Load1OnePostIndexReg)4810 TEST(Arm64InsnTest, Load1OnePostIndexReg) {
4811 static constexpr __uint128_t arg = MakeUInt128(0x4884761005564018ULL, 0x2423921926950620ULL);
4812 __uint128_t res_val;
4813 uint64_t res_addr;
4814 asm("ld1 {%0.16b}, [%1], %2"
4815 : "=w"(res_val), "=r"(res_addr)
4816 : "r"(static_cast<uint64_t>(32U)), "1"(&arg)
4817 : "memory");
4818 ASSERT_EQ(res_val, arg);
4819 ASSERT_EQ(res_addr, reinterpret_cast<uint64_t>(&arg) + 32);
4820 }
4821
TEST(Arm64InsnTest,LoadSingleInt8)4822 TEST(Arm64InsnTest, LoadSingleInt8) {
4823 static constexpr __uint128_t reg_before =
4824 MakeUInt128(0x0011223344556677ULL, 0x8899aabbccddeeffULL);
4825 static constexpr __uint128_t mem_src = MakeUInt128(0x0102030405060708ULL, 0x090a0b0c0d0e0f10ULL);
4826 __uint128_t reg_after;
4827 asm("ld1 {%0.b}[3], [%1]" : "=w"(reg_after) : "r"(&mem_src), "0"(reg_before) : "memory");
4828 ASSERT_EQ(reg_after, MakeUInt128(0x00112233'08'556677ULL, 0x8899aabbccddeeffULL));
4829 }
4830
TEST(Arm64InsnTest,LoadSingleInt16)4831 TEST(Arm64InsnTest, LoadSingleInt16) {
4832 static constexpr __uint128_t reg_before =
4833 MakeUInt128(0x0000111122223333ULL, 0x4444555566667777ULL);
4834 static constexpr __uint128_t mem_src = MakeUInt128(0x0102030405060708ULL, 0x090a0b0c0d0e0f10ULL);
4835 __uint128_t reg_after;
4836 asm("ld1 {%0.h}[2], [%1]" : "=w"(reg_after) : "r"(&mem_src), "0"(reg_before) : "memory");
4837 ASSERT_EQ(reg_after, MakeUInt128(0x0000'0708'22223333ULL, 0x4444555566667777ULL));
4838 }
4839
TEST(Arm64InsnTest,LoadSingleInt32)4840 TEST(Arm64InsnTest, LoadSingleInt32) {
4841 static constexpr __uint128_t reg_before =
4842 MakeUInt128(0x0000000011111111ULL, 0x2222222233333333ULL);
4843 static constexpr __uint128_t mem_src = MakeUInt128(0x0102030405060708ULL, 0x090a0b0c0d0e0f10ULL);
4844 __uint128_t reg_after;
4845 asm("ld1 {%0.s}[1], [%1]" : "=w"(reg_after) : "r"(&mem_src), "0"(reg_before) : "memory");
4846 ASSERT_EQ(reg_after, MakeUInt128(0x0506070811111111ULL, 0x2222222233333333ULL));
4847 }
4848
TEST(Arm64InsnTest,LoadSingleInt64)4849 TEST(Arm64InsnTest, LoadSingleInt64) {
4850 static constexpr __uint128_t reg_before =
4851 MakeUInt128(0x0000000000000000ULL, 0x1111111111111111ULL);
4852 static constexpr __uint128_t mem_src = MakeUInt128(0x0102030405060708ULL, 0x090a0b0c0d0e0f10ULL);
4853 __uint128_t reg_after;
4854 asm("ld1 {%0.d}[1], [%1]" : "=w"(reg_after) : "r"(&mem_src), "0"(reg_before) : "memory");
4855 ASSERT_EQ(reg_after, MakeUInt128(0x0000000000000000ULL, 0x0102030405060708ULL));
4856 }
4857
TEST(Arm64InsnTest,StoreSingleInt8)4858 TEST(Arm64InsnTest, StoreSingleInt8) {
4859 static constexpr __uint128_t arg = MakeUInt128(0x0102030405060708ULL, 0x090a0b0c0d0e0f10ULL);
4860 __uint128_t mem_dest = MakeUInt128(0x0011223344556677ULL, 0x8899aabbccddeeffULL);
4861 asm("st1 {%1.b}[3], [%0]" : : "r"(&mem_dest), "w"(arg) : "memory");
4862 ASSERT_EQ(mem_dest, MakeUInt128(0x00112233445566'05ULL, 0x8899aabbccddeeffULL));
4863 }
4864
TEST(Arm64InsnTest,StoreSingleInt16)4865 TEST(Arm64InsnTest, StoreSingleInt16) {
4866 static constexpr __uint128_t arg = MakeUInt128(0x0102030405060708ULL, 0x090a0b0c0d0e0f10ULL);
4867 __uint128_t mem_dest = MakeUInt128(0x0000111122223333ULL, 0x4444555566667777ULL);
4868 asm("st1 {%1.h}[5], [%0]" : : "r"(&mem_dest), "w"(arg) : "memory");
4869 ASSERT_EQ(mem_dest, MakeUInt128(0x000011112222'0d0eULL, 0x4444555566667777ULL));
4870 }
4871
TEST(Arm64InsnTest,StoreSingleInt32)4872 TEST(Arm64InsnTest, StoreSingleInt32) {
4873 static constexpr __uint128_t arg = MakeUInt128(0x0102030405060708ULL, 0x090a0b0c0d0e0f10ULL);
4874 __uint128_t mem_dest = MakeUInt128(0x0000000011111111ULL, 0x2222222233333333ULL);
4875 asm("st1 {%1.s}[2], [%0]" : : "r"(&mem_dest), "w"(arg) : "memory");
4876 ASSERT_EQ(mem_dest, MakeUInt128(0x000000000'd0e0f10ULL, 0x2222222233333333ULL));
4877 }
4878
TEST(Arm64InsnTest,StoreSingleInt64)4879 TEST(Arm64InsnTest, StoreSingleInt64) {
4880 static constexpr __uint128_t arg = MakeUInt128(0x0102030405060708ULL, 0x090a0b0c0d0e0f10ULL);
4881 __uint128_t mem_dest = MakeUInt128(0x0000000000000000ULL, 0x1111111111111111ULL);
4882 asm("st1 {%1.d}[1], [%0]" : : "r"(&mem_dest), "w"(arg) : "memory");
4883 ASSERT_EQ(mem_dest, MakeUInt128(0x090a0b0c0d0e0f10ULL, 0x1111111111111111ULL));
4884 }
4885
TEST(Arm64InsnTest,LoadSinglePostIndexImmInt8)4886 TEST(Arm64InsnTest, LoadSinglePostIndexImmInt8) {
4887 static constexpr __uint128_t arg1 = MakeUInt128(0x5494167594605487ULL, 0x1172359464291058ULL);
4888 static constexpr __uint128_t arg2 = MakeUInt128(0x5090995021495879ULL, 0x3112196135908315ULL);
4889 __uint128_t res;
4890 uint8_t* addr;
4891 asm("ld1 {%0.b}[3], [%1], #1" : "=w"(res), "=r"(addr) : "0"(arg1), "1"(&arg2) : "memory");
4892 ASSERT_EQ(res, MakeUInt128(0x5494167579605487ULL, 0x1172359464291058ULL));
4893 ASSERT_EQ(addr, reinterpret_cast<const uint8_t*>(&arg2) + 1);
4894 }
4895
TEST(Arm64InsnTest,LoadSinglePostIndexRegInt16)4896 TEST(Arm64InsnTest, LoadSinglePostIndexRegInt16) {
4897 static constexpr __uint128_t arg1 = MakeUInt128(0x0080587824107493ULL, 0x5751488997891173ULL);
4898 static constexpr __uint128_t arg2 = MakeUInt128(0x9746129320351081ULL, 0x4327032514090304ULL);
4899 __uint128_t res;
4900 uint8_t* addr;
4901 asm("ld1 {%0.h}[7], [%1], %2"
4902 : "=w"(res), "=r"(addr)
4903 : "r"(static_cast<uint64_t>(17U)), "0"(arg1), "1"(&arg2)
4904 : "memory");
4905 ASSERT_EQ(res, MakeUInt128(0x0080587824107493ULL, 0x1081488997891173ULL));
4906 ASSERT_EQ(addr, reinterpret_cast<const uint8_t*>(&arg2) + 17);
4907 }
4908
TEST(Arm64InsnTest,StoreSimdPostIndex)4909 TEST(Arm64InsnTest, StoreSimdPostIndex) {
4910 __uint128_t old_val = MakeUInt128(0x4939965143142980ULL, 0x9190659250937221ULL);
4911 __uint128_t new_val = MakeUInt128(0x5985261365549781ULL, 0x8931297848216829ULL);
4912 __uint128_t* addr = &old_val;
4913
4914 // Verify that the interpreter accepts "str q0, [x0], #8" where the register numbers are
4915 // the same, when the data register is one of the SIMD registers.
4916 asm("mov x0, %0\n\t"
4917 "mov v0.2D, %1.2D\n\t"
4918 "str q0, [x0], #8\n\t"
4919 "mov %0, x0"
4920 : "+r"(addr)
4921 : "w"(new_val)
4922 : "v0", "x0", "memory");
4923
4924 ASSERT_EQ(old_val, MakeUInt128(0x5985261365549781ULL, 0x8931297848216829ULL));
4925 ASSERT_EQ(reinterpret_cast<uintptr_t>(addr), reinterpret_cast<uintptr_t>(&old_val) + 8);
4926 }
4927
TEST(Arm64InsnTest,StoreZeroPostIndex1)4928 TEST(Arm64InsnTest, StoreZeroPostIndex1) {
4929 uint64_t res;
4930 asm("str xzr, [sp, #-16]!\n\t"
4931 "ldr %0, [sp, #0]\n\t"
4932 "add sp, sp, #16"
4933 : "=r"(res));
4934 ASSERT_EQ(res, 0);
4935 }
4936
TEST(Arm64InsnTest,StoreZeroPostIndex2)4937 TEST(Arm64InsnTest, StoreZeroPostIndex2) {
4938 __uint128_t arg1 = MakeUInt128(0x9415573293820485ULL, 0x4212350817391254ULL);
4939 __uint128_t arg2 = MakeUInt128(0x9749819308714396ULL, 0x6151329420459193ULL);
4940 __uint128_t res1;
4941 __uint128_t res2;
4942 asm("mov v30.16b, %2.16b\n\t"
4943 "mov v31.16b, %3.16b\n\t"
4944 "stp q30, q31, [sp, #-32]!\n\t"
4945 "ldr %q0, [sp, #0]\n\t"
4946 "ldr %q1, [sp, #16]\n\t"
4947 "add sp, sp, #32"
4948 : "=w"(res1), "=w"(res2)
4949 : "w"(arg1), "w"(arg2)
4950 : "v30", "v31");
4951
4952 ASSERT_EQ(res1, arg1);
4953 ASSERT_EQ(res2, arg2);
4954 }
4955
TEST(Arm64InsnTest,Load2MultipleInt8x8)4956 TEST(Arm64InsnTest, Load2MultipleInt8x8) {
4957 static constexpr uint8_t mem[] = {0x02,
4958 0x16,
4959 0x91,
4960 0x83,
4961 0x37,
4962 0x23,
4963 0x68,
4964 0x03,
4965 0x99,
4966 0x02,
4967 0x79,
4968 0x31,
4969 0x60,
4970 0x64,
4971 0x20,
4972 0x43};
4973 __uint128_t res[2];
4974 asm("ld2 {v0.8b, v1.8b}, [%2]\n\t"
4975 "mov %0.16b, v0.16b\n\t"
4976 "mov %1.16b, v1.16b"
4977 : "=w"(res[0]), "=w"(res[1])
4978 : "r"(mem)
4979 : "v0", "v1", "memory");
4980 ASSERT_EQ(res[0], MakeUInt128(0x2060799968379102ULL, 0U));
4981 ASSERT_EQ(res[1], MakeUInt128(0x4364310203238316ULL, 0U));
4982 }
4983
TEST(Arm64InsnTest,Load3MultipleInt8x8)4984 TEST(Arm64InsnTest, Load3MultipleInt8x8) {
4985 static constexpr uint8_t mem[3 * 8] = {0x32, 0x87, 0x67, 0x03, 0x80, 0x92, 0x52, 0x16,
4986 0x79, 0x07, 0x57, 0x12, 0x04, 0x06, 0x12, 0x37,
4987 0x59, 0x63, 0x27, 0x68, 0x56, 0x74, 0x84, 0x50};
4988 __uint128_t res[3];
4989 asm("ld3 {v7.8b-v9.8b}, [%3]\n\t"
4990 "mov %0.16b, v7.16b\n\t"
4991 "mov %1.16b, v8.16b\n\t"
4992 "mov %2.16b, v9.16b"
4993 : "=w"(res[0]), "=w"(res[1]), "=w"(res[2])
4994 : "r"(mem)
4995 : "v7", "v8", "v9", "memory");
4996 ASSERT_EQ(res[0], MakeUInt128(0x7427370407520332ULL, 0U));
4997 ASSERT_EQ(res[1], MakeUInt128(0x8468590657168087ULL, 0U));
4998 ASSERT_EQ(res[2], MakeUInt128(0x5056631212799267ULL, 0U));
4999 }
5000
TEST(Arm64InsnTest,Store3MultipleInt8x8)5001 TEST(Arm64InsnTest, Store3MultipleInt8x8) {
5002 static constexpr uint64_t arg[3] = {
5003 0x7427370407520332ULL, 0x8468590657168087ULL, 0x5056631212799267ULL};
5004 uint64_t res[3];
5005 asm("mov v0.16b, %0.16b\n\t"
5006 "mov v1.16b, %1.16b\n\t"
5007 "mov v2.16b, %2.16b\n\t"
5008 "st3 {v0.8b-v2.8b}, [%3]"
5009 :
5010 : "w"(arg[0]), "w"(arg[1]), "w"(arg[2]), "r"(res)
5011 : "v0", "v1", "v2", "memory");
5012 ASSERT_EQ(res[0], 0x1652928003678732ULL);
5013 ASSERT_EQ(res[1], 0x3712060412570779ULL);
5014 ASSERT_EQ(res[2], 0x5084745668276359ULL);
5015 }
5016
TEST(Arm64InsnTest,Load3MultipleInt8x16)5017 TEST(Arm64InsnTest, Load3MultipleInt8x16) {
5018 static constexpr uint8_t mem[3 * 16] = {
5019 0x69, 0x20, 0x35, 0x65, 0x63, 0x38, 0x44, 0x96, 0x25, 0x32, 0x83, 0x38,
5020 0x52, 0x27, 0x99, 0x24, 0x59, 0x60, 0x97, 0x86, 0x59, 0x47, 0x23, 0x88,
5021 0x91, 0x29, 0x63, 0x62, 0x59, 0x54, 0x32, 0x73, 0x45, 0x44, 0x37, 0x16,
5022 0x33, 0x55, 0x77, 0x43, 0x29, 0x49, 0x99, 0x28, 0x81, 0x05, 0x57, 0x17};
5023 __uint128_t res[3];
5024 asm("ld3 {v7.16b-v9.16b}, [%3]\n\t"
5025 "mov %0.16b, v7.16b\n\t"
5026 "mov %1.16b, v8.16b\n\t"
5027 "mov %2.16b, v9.16b"
5028 : "=w"(res[0]), "=w"(res[1]), "=w"(res[2])
5029 : "r"(mem)
5030 : "v7", "v8", "v9", "memory");
5031 ASSERT_EQ(res[0], MakeUInt128(0x4797245232446569ULL, 0x599433344326291ULL));
5032 ASSERT_EQ(res[1], MakeUInt128(0x2386592783966320ULL, 0x5728295537735929ULL));
5033 ASSERT_EQ(res[2], MakeUInt128(0x8859609938253835ULL, 0x1781497716455463ULL));
5034 }
5035
TEST(Arm64InsnTest,Store3MultipleInt8x16)5036 TEST(Arm64InsnTest, Store3MultipleInt8x16) {
5037 static constexpr __uint128_t arg[3] = {MakeUInt128(0x4797245232446569ULL, 0x599433344326291ULL),
5038 MakeUInt128(0x2386592783966320ULL, 0x5728295537735929ULL),
5039 MakeUInt128(0x8859609938253835ULL, 0x1781497716455463ULL)};
5040 __uint128_t res[3];
5041 asm("mov v0.16b, %0.16b\n\t"
5042 "mov v1.16b, %1.16b\n\t"
5043 "mov v2.16b, %2.16b\n\t"
5044 "st3 {v0.16b-v2.16b}, [%3]"
5045 :
5046 : "w"(arg[0]), "w"(arg[1]), "w"(arg[2]), "r"(res)
5047 : "v0", "v1", "v2", "memory");
5048 ASSERT_EQ(res[0], MakeUInt128(0x9644386365352069ULL, 0x2499275238833225ULL));
5049 ASSERT_EQ(res[1], MakeUInt128(0x8823475986976059ULL, 0x7332545962632991ULL));
5050 ASSERT_EQ(res[2], MakeUInt128(0x4377553316374445ULL, 0x1757058128994929ULL));
5051 }
5052
TEST(Arm64InsnTest,Load3MultipleInt16x4)5053 TEST(Arm64InsnTest, Load3MultipleInt16x4) {
5054 static constexpr uint16_t mem[3 * 4] = {0x2069,
5055 0x6535,
5056 0x3863,
5057 0x9644,
5058 0x3225,
5059 0x3883,
5060 0x2752,
5061 0x2499,
5062 0x6059,
5063 0x8697,
5064 0x4759,
5065 0x8823};
5066 __uint128_t res[3];
5067 asm("ld3 {v30.4h-v0.4h}, [%3]\n\t"
5068 "mov %0.16b, v30.16b\n\t"
5069 "mov %1.16b, v31.16b\n\t"
5070 "mov %2.16b, v0.16b"
5071 : "=w"(res[0]), "=w"(res[1]), "=w"(res[2])
5072 : "r"(mem)
5073 : "v30", "v31", "v0", "memory");
5074 ASSERT_EQ(res[0], MakeUInt128(0x8697275296442069ULL, 0));
5075 ASSERT_EQ(res[1], MakeUInt128(0x4759249932256535ULL, 0));
5076 ASSERT_EQ(res[2], MakeUInt128(0x8823605938833863ULL, 0));
5077 }
5078
TEST(Arm64InsnTest,Store3MultipleInt16x4)5079 TEST(Arm64InsnTest, Store3MultipleInt16x4) {
5080 static constexpr uint64_t arg[3] = {
5081 0x8697275296442069ULL, 0x4759249932256535ULL, 0x8823605938833863ULL};
5082 uint64_t res[3];
5083 asm("mov v0.16b, %0.16b\n\t"
5084 "mov v1.16b, %1.16b\n\t"
5085 "mov v2.16b, %2.16b\n\t"
5086 "st3 {v0.4h-v2.4h}, [%3]"
5087 :
5088 : "w"(arg[0]), "w"(arg[1]), "w"(arg[2]), "r"(res)
5089 : "v0", "v1", "v2", "memory");
5090 ASSERT_EQ(res[0], 0x9644386365352069ULL);
5091 ASSERT_EQ(res[1], 0x2499275238833225ULL);
5092 ASSERT_EQ(res[2], 0x8823475986976059ULL);
5093 }
5094
TEST(Arm64InsnTest,Load3MultipleInt16x8)5095 TEST(Arm64InsnTest, Load3MultipleInt16x8) {
5096 static constexpr uint16_t mem[3 * 8] = {0x2069, 0x6535, 0x3863, 0x9644, 0x3225, 0x3883,
5097 0x2752, 0x2499, 0x6059, 0x8697, 0x4759, 0x8823,
5098 0x2991, 0x6263, 0x5459, 0x7332, 0x4445, 0x1637,
5099 0x5533, 0x4377, 0x4929, 0x2899, 0x0581, 0x1757};
5100 __uint128_t res[3];
5101 asm("ld3 {v30.8h-v0.8h}, [%3]\n\t"
5102 "mov %0.16b, v30.16b\n\t"
5103 "mov %1.16b, v31.16b\n\t"
5104 "mov %2.16b, v0.16b"
5105 : "=w"(res[0]), "=w"(res[1]), "=w"(res[2])
5106 : "r"(mem)
5107 : "v30", "v31", "v0", "memory");
5108 ASSERT_EQ(res[0], MakeUInt128(0x8697275296442069ULL, 0x2899553373322991ULL));
5109 ASSERT_EQ(res[1], MakeUInt128(0x4759249932256535ULL, 0x581437744456263ULL));
5110 ASSERT_EQ(res[2], MakeUInt128(0x8823605938833863ULL, 0x1757492916375459ULL));
5111 }
5112
TEST(Arm64InsnTest,Store3MultipleInt16x8)5113 TEST(Arm64InsnTest, Store3MultipleInt16x8) {
5114 static constexpr __uint128_t arg[3] = {MakeUInt128(0x8697275296442069ULL, 0x2899553373322991ULL),
5115 MakeUInt128(0x4759249932256535ULL, 0x581437744456263ULL),
5116 MakeUInt128(0x8823605938833863ULL, 0x1757492916375459ULL)};
5117 __uint128_t res[3];
5118 asm("mov v0.16b, %0.16b\n\t"
5119 "mov v1.16b, %1.16b\n\t"
5120 "mov v2.16b, %2.16b\n\t"
5121 "st3 {v0.8h-v2.8h}, [%3]"
5122 :
5123 : "w"(arg[0]), "w"(arg[1]), "w"(arg[2]), "r"(res)
5124 : "v0", "v1", "v2", "memory");
5125 ASSERT_EQ(res[0], MakeUInt128(0x9644386365352069ULL, 0x2499275238833225ULL));
5126 ASSERT_EQ(res[1], MakeUInt128(0x8823475986976059ULL, 0x7332545962632991ULL));
5127 ASSERT_EQ(res[2], MakeUInt128(0x4377553316374445ULL, 0x1757058128994929ULL));
5128 }
5129
TEST(Arm64InsnTest,Load3MultipleInt32x2)5130 TEST(Arm64InsnTest, Load3MultipleInt32x2) {
5131 static constexpr uint32_t mem[3 * 2] = {
5132 0x65352069, 0x96443863, 0x38833225, 0x24992752, 0x86976059, 0x88234759};
5133 __uint128_t res[3];
5134 asm("ld3 {v30.2s-v0.2s}, [%3]\n\t"
5135 "mov %0.16b, v30.16b\n\t"
5136 "mov %1.16b, v31.16b\n\t"
5137 "mov %2.16b, v0.16b"
5138 : "=w"(res[0]), "=w"(res[1]), "=w"(res[2])
5139 : "r"(mem)
5140 : "v30", "v31", "v0", "memory");
5141 ASSERT_EQ(res[0], MakeUInt128(0x2499275265352069ULL, 0));
5142 ASSERT_EQ(res[1], MakeUInt128(0x8697605996443863ULL, 0));
5143 ASSERT_EQ(res[2], MakeUInt128(0x8823475938833225ULL, 0));
5144 }
5145
TEST(Arm64InsnTest,Store3MultipleInt32x2)5146 TEST(Arm64InsnTest, Store3MultipleInt32x2) {
5147 static constexpr uint64_t arg[3] = {
5148 0x2499275265352069ULL, 0x8697605996443863ULL, 0x8823475938833225ULL};
5149 uint64_t res[3];
5150 asm("mov v0.16b, %0.16b\n\t"
5151 "mov v1.16b, %1.16b\n\t"
5152 "mov v2.16b, %2.16b\n\t"
5153 "st3 {v0.2s-v2.2s}, [%3]"
5154 :
5155 : "w"(arg[0]), "w"(arg[1]), "w"(arg[2]), "r"(res)
5156 : "v0", "v1", "v2", "memory");
5157 ASSERT_EQ(res[0], 0x9644386365352069ULL);
5158 ASSERT_EQ(res[1], 0x2499275238833225ULL);
5159 ASSERT_EQ(res[2], 0x8823475986976059ULL);
5160 }
5161
TEST(Arm64InsnTest,Load3MultipleInt32x4)5162 TEST(Arm64InsnTest, Load3MultipleInt32x4) {
5163 static constexpr uint32_t mem[3 * 4] = {0x65352069,
5164 0x96443863,
5165 0x38833225,
5166 0x24992752,
5167 0x86976059,
5168 0x88234759,
5169 0x62632991,
5170 0x73325459,
5171 0x16374445,
5172 0x43775533,
5173 0x28994929,
5174 0x17570581};
5175 __uint128_t res[3];
5176 asm("ld3 {v30.4s-v0.4s}, [%3]\n\t"
5177 "mov %0.16b, v30.16b\n\t"
5178 "mov %1.16b, v31.16b\n\t"
5179 "mov %2.16b, v0.16b"
5180 : "=w"(res[0]), "=w"(res[1]), "=w"(res[2])
5181 : "r"(mem)
5182 : "v30", "v31", "v0", "memory");
5183 ASSERT_EQ(res[0], MakeUInt128(0x2499275265352069ULL, 0x4377553362632991ULL));
5184 ASSERT_EQ(res[1], MakeUInt128(0x8697605996443863ULL, 0x2899492973325459ULL));
5185 ASSERT_EQ(res[2], MakeUInt128(0x8823475938833225ULL, 0x1757058116374445ULL));
5186 }
5187
TEST(Arm64InsnTest,Store3MultipleInt32x4)5188 TEST(Arm64InsnTest, Store3MultipleInt32x4) {
5189 static constexpr __uint128_t arg[3] = {MakeUInt128(0x2499275265352069ULL, 0x4377553362632991ULL),
5190 MakeUInt128(0x8697605996443863ULL, 0x2899492973325459ULL),
5191 MakeUInt128(0x8823475938833225ULL, 0x1757058116374445ULL)};
5192 __uint128_t res[3];
5193 asm("mov v0.16b, %0.16b\n\t"
5194 "mov v1.16b, %1.16b\n\t"
5195 "mov v2.16b, %2.16b\n\t"
5196 "st3 {v0.4s-v2.4s}, [%3]"
5197 :
5198 : "w"(arg[0]), "w"(arg[1]), "w"(arg[2]), "r"(res)
5199 : "v0", "v1", "v2", "memory");
5200 ASSERT_EQ(res[0], MakeUInt128(0x9644386365352069ULL, 0x2499275238833225ULL));
5201 ASSERT_EQ(res[1], MakeUInt128(0x8823475986976059ULL, 0x7332545962632991ULL));
5202 ASSERT_EQ(res[2], MakeUInt128(0x4377553316374445ULL, 0x1757058128994929ULL));
5203 }
5204
TEST(Arm64InsnTest,Load3MultipleInt64x2)5205 TEST(Arm64InsnTest, Load3MultipleInt64x2) {
5206 static constexpr uint64_t mem[3 * 2] = {0x9644386365352069,
5207 0x2499275238833225,
5208 0x8823475986976059,
5209 0x7332545962632991,
5210 0x4377553316374445,
5211 0x1757058128994929};
5212 __uint128_t res[3];
5213 asm("ld3 {v30.2d-v0.2d}, [%3]\n\t"
5214 "mov %0.16b, v30.16b\n\t"
5215 "mov %1.16b, v31.16b\n\t"
5216 "mov %2.16b, v0.16b"
5217 : "=w"(res[0]), "=w"(res[1]), "=w"(res[2])
5218 : "r"(mem)
5219 : "v30", "v31", "v0", "memory");
5220 ASSERT_EQ(res[0], MakeUInt128(0x9644386365352069ULL, 0x7332545962632991ULL));
5221 ASSERT_EQ(res[1], MakeUInt128(0x2499275238833225ULL, 0x4377553316374445ULL));
5222 ASSERT_EQ(res[2], MakeUInt128(0x8823475986976059ULL, 0x1757058128994929ULL));
5223 }
5224
TEST(Arm64InsnTest,Store3MultipleInt64x2)5225 TEST(Arm64InsnTest, Store3MultipleInt64x2) {
5226 static constexpr __uint128_t arg[3] = {MakeUInt128(0x9644386365352069ULL, 0x7332545962632991ULL),
5227 MakeUInt128(0x2499275238833225ULL, 0x4377553316374445ULL),
5228 MakeUInt128(0x8823475986976059ULL, 0x1757058128994929ULL)};
5229 __uint128_t res[3];
5230 asm("mov v0.16b, %0.16b\n\t"
5231 "mov v1.16b, %1.16b\n\t"
5232 "mov v2.16b, %2.16b\n\t"
5233 "st3 {v0.2d-v2.2d}, [%3]"
5234 :
5235 : "w"(arg[0]), "w"(arg[1]), "w"(arg[2]), "r"(res)
5236 : "v0", "v1", "v2", "memory");
5237 ASSERT_EQ(res[0], MakeUInt128(0x9644386365352069ULL, 0x2499275238833225ULL));
5238 ASSERT_EQ(res[1], MakeUInt128(0x8823475986976059ULL, 0x7332545962632991ULL));
5239 ASSERT_EQ(res[2], MakeUInt128(0x4377553316374445ULL, 0x1757058128994929ULL));
5240 }
5241
TEST(Arm64InsnTest,Load4MultipleInt8x8)5242 TEST(Arm64InsnTest, Load4MultipleInt8x8) {
5243 static constexpr uint8_t mem[4 * 8] = {0x69, 0x20, 0x35, 0x65, 0x63, 0x38, 0x44, 0x96,
5244 0x25, 0x32, 0x83, 0x38, 0x52, 0x27, 0x99, 0x24,
5245 0x59, 0x60, 0x97, 0x86, 0x59, 0x47, 0x23, 0x88,
5246 0x91, 0x29, 0x63, 0x62, 0x59, 0x54, 0x32, 0x73};
5247 __uint128_t res[4];
5248 asm("ld4 {v7.8b-v10.8b}, [%4]\n\t"
5249 "mov %0.16b, v7.16b\n\t"
5250 "mov %1.16b, v8.16b\n\t"
5251 "mov %2.16b, v9.16b\n\t"
5252 "mov %3.16b, v10.16b"
5253 : "=w"(res[0]), "=w"(res[1]), "=w"(res[2]), "=w"(res[3])
5254 : "r"(mem)
5255 : "v7", "v8", "v9", "v10", "memory");
5256 ASSERT_EQ(res[0], MakeUInt128(0x5991595952256369ULL, 0));
5257 ASSERT_EQ(res[1], MakeUInt128(0x5429476027323820ULL, 0));
5258 ASSERT_EQ(res[2], MakeUInt128(0x3263239799834435ULL, 0));
5259 ASSERT_EQ(res[3], MakeUInt128(0x7362888624389665ULL, 0));
5260 }
5261
TEST(Arm64InsnTest,Store4MultipleInt8x8)5262 TEST(Arm64InsnTest, Store4MultipleInt8x8) {
5263 static constexpr uint64_t arg[4] = {
5264 0x5991595952256369ULL, 0x5429476027323820ULL, 0x3263239799834435ULL, 0x7362888624389665ULL};
5265 uint64_t res[4];
5266 asm("mov v7.16b, %0.16b\n\t"
5267 "mov v8.16b, %1.16b\n\t"
5268 "mov v9.16b, %2.16b\n\t"
5269 "mov v10.16b, %3.16b\n\t"
5270 "st4 {v7.8b-v10.8b}, [%4]"
5271 :
5272 : "w"(arg[0]), "w"(arg[1]), "w"(arg[2]), "w"(arg[3]), "r"(res)
5273 : "v7", "v8", "v9", "v10", "memory");
5274 ASSERT_EQ(res[0], 0x9644386365352069ULL);
5275 ASSERT_EQ(res[1], 0x2499275238833225ULL);
5276 ASSERT_EQ(res[2], 0x8823475986976059ULL);
5277 ASSERT_EQ(res[3], 0x7332545962632991ULL);
5278 }
5279
TEST(Arm64InsnTest,Load4MultipleInt8x16)5280 TEST(Arm64InsnTest, Load4MultipleInt8x16) {
5281 static constexpr uint8_t mem[4 * 16] = {
5282 0x69, 0x20, 0x35, 0x65, 0x63, 0x38, 0x44, 0x96, 0x25, 0x32, 0x83, 0x38, 0x52,
5283 0x27, 0x99, 0x24, 0x59, 0x60, 0x97, 0x86, 0x59, 0x47, 0x23, 0x88, 0x91, 0x29,
5284 0x63, 0x62, 0x59, 0x54, 0x32, 0x73, 0x45, 0x44, 0x37, 0x16, 0x33, 0x55, 0x77,
5285 0x43, 0x29, 0x49, 0x99, 0x28, 0x81, 0x05, 0x57, 0x17, 0x81, 0x98, 0x78, 0x50,
5286 0x68, 0x14, 0x62, 0x52, 0x32, 0x13, 0x47, 0x52, 0x37, 0x38, 0x11, 0x65};
5287 __uint128_t res[4];
5288 asm("ld4 {v7.16b-v10.16b}, [%4]\n\t"
5289 "mov %0.16b, v7.16b\n\t"
5290 "mov %1.16b, v8.16b\n\t"
5291 "mov %2.16b, v9.16b\n\t"
5292 "mov %3.16b, v10.16b"
5293 : "=w"(res[0]), "=w"(res[1]), "=w"(res[2]), "=w"(res[3])
5294 : "r"(mem)
5295 : "v7", "v8", "v9", "v10", "memory");
5296 ASSERT_EQ(res[0], MakeUInt128(0x5991595952256369ULL, 0x3732688181293345ULL));
5297 ASSERT_EQ(res[1], MakeUInt128(0x5429476027323820ULL, 0x3813149805495544ULL));
5298 ASSERT_EQ(res[2], MakeUInt128(0x3263239799834435ULL, 0x1147627857997737ULL));
5299 ASSERT_EQ(res[3], MakeUInt128(0x7362888624389665ULL, 0x6552525017284316ULL));
5300 }
5301
TEST(Arm64InsnTest,Store4MultipleInt8x16)5302 TEST(Arm64InsnTest, Store4MultipleInt8x16) {
5303 static constexpr __uint128_t arg[4] = {MakeUInt128(0x5991595952256369ULL, 0x3732688181293345ULL),
5304 MakeUInt128(0x5429476027323820ULL, 0x3813149805495544ULL),
5305 MakeUInt128(0x3263239799834435ULL, 0x1147627857997737ULL),
5306 MakeUInt128(0x7362888624389665ULL, 0x6552525017284316ULL)};
5307 __uint128_t res[4];
5308 asm("mov v7.16b, %0.16b\n\t"
5309 "mov v8.16b, %1.16b\n\t"
5310 "mov v9.16b, %2.16b\n\t"
5311 "mov v10.16b, %3.16b\n\t"
5312 "st4 {v7.16b-v10.16b}, [%4]"
5313 :
5314 : "w"(arg[0]), "w"(arg[1]), "w"(arg[2]), "w"(arg[3]), "r"(res)
5315 : "v7", "v8", "v9", "v10", "memory");
5316 ASSERT_EQ(res[0], MakeUInt128(0x9644386365352069ULL, 0x2499275238833225ULL));
5317 ASSERT_EQ(res[1], MakeUInt128(0x8823475986976059ULL, 0x7332545962632991ULL));
5318 ASSERT_EQ(res[2], MakeUInt128(0x4377553316374445ULL, 0x1757058128994929ULL));
5319 ASSERT_EQ(res[3], MakeUInt128(0x5262146850789881ULL, 0x6511383752471332ULL));
5320 }
5321
TEST(Arm64InsnTest,Load4MultipleInt16x4)5322 TEST(Arm64InsnTest, Load4MultipleInt16x4) {
5323 static constexpr uint16_t mem[4 * 4] = {0x2069,
5324 0x6535,
5325 0x3863,
5326 0x9644,
5327 0x3225,
5328 0x3883,
5329 0x2752,
5330 0x2499,
5331 0x6059,
5332 0x8697,
5333 0x4759,
5334 0x8823,
5335 0x2991,
5336 0x6263,
5337 0x5459,
5338 0x7332};
5339 __uint128_t res[4];
5340 asm("ld4 {v30.4h-v1.4h}, [%4]\n\t"
5341 "mov %0.16b, v30.16b\n\t"
5342 "mov %1.16b, v31.16b\n\t"
5343 "mov %2.16b, v0.16b\n\t"
5344 "mov %3.16b, v1.16b"
5345 : "=w"(res[0]), "=w"(res[1]), "=w"(res[2]), "=w"(res[3])
5346 : "r"(mem)
5347 : "v30", "v31", "v0", "v1", "memory");
5348 ASSERT_EQ(res[0], MakeUInt128(0x2991605932252069ULL, 0));
5349 ASSERT_EQ(res[1], MakeUInt128(0x6263869738836535ULL, 0));
5350 ASSERT_EQ(res[2], MakeUInt128(0x5459475927523863ULL, 0));
5351 ASSERT_EQ(res[3], MakeUInt128(0x7332882324999644ULL, 0));
5352 }
5353
TEST(Arm64InsnTest,Store4MultipleInt16x4)5354 TEST(Arm64InsnTest, Store4MultipleInt16x4) {
5355 static constexpr uint64_t arg[4] = {
5356 0x2991605932252069ULL, 0x6263869738836535ULL, 0x5459475927523863ULL, 0x7332882324999644ULL};
5357 uint64_t res[4];
5358 asm("mov v30.16b, %0.16b\n\t"
5359 "mov v31.16b, %1.16b\n\t"
5360 "mov v0.16b, %2.16b\n\t"
5361 "mov v1.16b, %3.16b\n\t"
5362 "st4 {v30.4h-v1.4h}, [%4]"
5363 :
5364 : "w"(arg[0]), "w"(arg[1]), "w"(arg[2]), "w"(arg[3]), "r"(res)
5365 : "v30", "v31", "v0", "v1", "memory");
5366 ASSERT_EQ(res[0], 0x9644386365352069ULL);
5367 ASSERT_EQ(res[1], 0x2499275238833225ULL);
5368 ASSERT_EQ(res[2], 0x8823475986976059ULL);
5369 ASSERT_EQ(res[3], 0x7332545962632991ULL);
5370 }
5371
TEST(Arm64InsnTest,Load4MultipleInt16x8)5372 TEST(Arm64InsnTest, Load4MultipleInt16x8) {
5373 static constexpr uint16_t mem[4 * 8] = {
5374 0x2069, 0x6535, 0x3863, 0x9644, 0x3225, 0x3883, 0x2752, 0x2499, 0x6059, 0x8697, 0x4759,
5375 0x8823, 0x2991, 0x6263, 0x5459, 0x7332, 0x4445, 0x1637, 0x5533, 0x4377, 0x4929, 0x2899,
5376 0x0581, 0x1757, 0x9881, 0x5078, 0x1468, 0x5262, 0x1332, 0x5247, 0x3837, 0x6511};
5377 __uint128_t res[4];
5378 asm("ld4 {v30.8h-v1.8h}, [%4]\n\t"
5379 "mov %0.16b, v30.16b\n\t"
5380 "mov %1.16b, v31.16b\n\t"
5381 "mov %2.16b, v0.16b\n\t"
5382 "mov %3.16b, v1.16b"
5383 : "=w"(res[0]), "=w"(res[1]), "=w"(res[2]), "=w"(res[3])
5384 : "r"(mem)
5385 : "v30", "v31", "v0", "v1", "memory");
5386 ASSERT_EQ(res[0], MakeUInt128(0x2991605932252069ULL, 0x1332988149294445ULL));
5387 ASSERT_EQ(res[1], MakeUInt128(0x6263869738836535ULL, 0x5247507828991637ULL));
5388 ASSERT_EQ(res[2], MakeUInt128(0x5459475927523863ULL, 0x3837146805815533ULL));
5389 ASSERT_EQ(res[3], MakeUInt128(0x7332882324999644ULL, 0x6511526217574377ULL));
5390 }
5391
TEST(Arm64InsnTest,Store4MultipleInt16x8)5392 TEST(Arm64InsnTest, Store4MultipleInt16x8) {
5393 static constexpr __uint128_t arg[4] = {MakeUInt128(0x2991605932252069ULL, 0x1332988149294445ULL),
5394 MakeUInt128(0x6263869738836535ULL, 0x5247507828991637ULL),
5395 MakeUInt128(0x5459475927523863ULL, 0x3837146805815533ULL),
5396 MakeUInt128(0x7332882324999644ULL, 0x6511526217574377ULL)};
5397 __uint128_t res[4];
5398 asm("mov v30.16b, %0.16b\n\t"
5399 "mov v31.16b, %1.16b\n\t"
5400 "mov v0.16b, %2.16b\n\t"
5401 "mov v1.16b, %3.16b\n\t"
5402 "st4 {v30.8h-v1.8h}, [%4]"
5403 :
5404 : "w"(arg[0]), "w"(arg[1]), "w"(arg[2]), "w"(arg[3]), "r"(res)
5405 : "v30", "v31", "v0", "v1", "memory");
5406 ASSERT_EQ(res[0], MakeUInt128(0x9644386365352069ULL, 0x2499275238833225ULL));
5407 ASSERT_EQ(res[1], MakeUInt128(0x8823475986976059ULL, 0x7332545962632991ULL));
5408 ASSERT_EQ(res[2], MakeUInt128(0x4377553316374445ULL, 0x1757058128994929ULL));
5409 ASSERT_EQ(res[3], MakeUInt128(0x5262146850789881ULL, 0x6511383752471332ULL));
5410 }
5411
TEST(Arm64InsnTest,Load4MultipleInt32x2)5412 TEST(Arm64InsnTest, Load4MultipleInt32x2) {
5413 static constexpr uint32_t mem[4 * 2] = {0x65352069,
5414 0x96443863,
5415 0x38833225,
5416 0x24992752,
5417 0x86976059,
5418 0x88234759,
5419 0x62632991,
5420 0x73325459};
5421 __uint128_t res[4];
5422 asm("ld4 {v30.2s-v1.2s}, [%4]\n\t"
5423 "mov %0.16b, v30.16b\n\t"
5424 "mov %1.16b, v31.16b\n\t"
5425 "mov %2.16b, v0.16b\n\t"
5426 "mov %3.16b, v1.16b"
5427 : "=w"(res[0]), "=w"(res[1]), "=w"(res[2]), "=w"(res[3])
5428 : "r"(mem)
5429 : "v30", "v31", "v0", "v1", "memory");
5430 ASSERT_EQ(res[0], MakeUInt128(0x8697605965352069ULL, 0));
5431 ASSERT_EQ(res[1], MakeUInt128(0x8823475996443863ULL, 0));
5432 ASSERT_EQ(res[2], MakeUInt128(0x6263299138833225ULL, 0));
5433 ASSERT_EQ(res[3], MakeUInt128(0x7332545924992752ULL, 0));
5434 }
5435
TEST(Arm64InsnTest,Store4MultipleInt32x2)5436 TEST(Arm64InsnTest, Store4MultipleInt32x2) {
5437 static constexpr uint64_t arg[4] = {
5438 0x8697605965352069ULL, 0x8823475996443863ULL, 0x6263299138833225ULL, 0x7332545924992752ULL};
5439 uint64_t res[4];
5440 asm("mov v30.16b, %0.16b\n\t"
5441 "mov v31.16b, %1.16b\n\t"
5442 "mov v0.16b, %2.16b\n\t"
5443 "mov v1.16b, %3.16b\n\t"
5444 "st4 {v30.2s-v1.2s}, [%4]"
5445 :
5446 : "w"(arg[0]), "w"(arg[1]), "w"(arg[2]), "w"(arg[3]), "r"(res)
5447 : "v30", "v31", "v0", "v1", "memory");
5448 ASSERT_EQ(res[0], 0x9644386365352069ULL);
5449 ASSERT_EQ(res[1], 0x2499275238833225ULL);
5450 ASSERT_EQ(res[2], 0x8823475986976059ULL);
5451 ASSERT_EQ(res[3], 0x7332545962632991ULL);
5452 }
5453
TEST(Arm64InsnTest,Load4MultipleInt32x4)5454 TEST(Arm64InsnTest, Load4MultipleInt32x4) {
5455 static constexpr uint32_t mem[4 * 4] = {0x65352069,
5456 0x96443863,
5457 0x38833225,
5458 0x24992752,
5459 0x86976059,
5460 0x88234759,
5461 0x62632991,
5462 0x73325459,
5463 0x16374445,
5464 0x43775533,
5465 0x28994929,
5466 0x17570581,
5467 0x50789881,
5468 0x52621468,
5469 0x52471332,
5470 0x65113837};
5471 __uint128_t res[4];
5472 asm("ld4 {v30.4s-v1.4s}, [%4]\n\t"
5473 "mov %0.16b, v30.16b\n\t"
5474 "mov %1.16b, v31.16b\n\t"
5475 "mov %2.16b, v0.16b\n\t"
5476 "mov %3.16b, v1.16b"
5477 : "=w"(res[0]), "=w"(res[1]), "=w"(res[2]), "=w"(res[3])
5478 : "r"(mem)
5479 : "v30", "v31", "v0", "v1", "memory");
5480 ASSERT_EQ(res[0], MakeUInt128(0x8697605965352069ULL, 0x5078988116374445ULL));
5481 ASSERT_EQ(res[1], MakeUInt128(0x8823475996443863ULL, 0x5262146843775533ULL));
5482 ASSERT_EQ(res[2], MakeUInt128(0x6263299138833225ULL, 0x5247133228994929ULL));
5483 ASSERT_EQ(res[3], MakeUInt128(0x7332545924992752ULL, 0x6511383717570581ULL));
5484 }
5485
TEST(Arm64InsnTest,Store4MultipleInt32x4)5486 TEST(Arm64InsnTest, Store4MultipleInt32x4) {
5487 static constexpr __uint128_t arg[4] = {MakeUInt128(0x8697605965352069ULL, 0x5078988116374445ULL),
5488 MakeUInt128(0x8823475996443863ULL, 0x5262146843775533ULL),
5489 MakeUInt128(0x6263299138833225ULL, 0x5247133228994929ULL),
5490 MakeUInt128(0x7332545924992752ULL, 0x6511383717570581ULL)};
5491 __uint128_t res[4];
5492 asm("mov v30.16b, %0.16b\n\t"
5493 "mov v31.16b, %1.16b\n\t"
5494 "mov v0.16b, %2.16b\n\t"
5495 "mov v1.16b, %3.16b\n\t"
5496 "st4 {v30.4s-v1.4s}, [%4]"
5497 :
5498 : "w"(arg[0]), "w"(arg[1]), "w"(arg[2]), "w"(arg[3]), "r"(res)
5499 : "v30", "v31", "v0", "v1", "memory");
5500 ASSERT_EQ(res[0], MakeUInt128(0x9644386365352069ULL, 0x2499275238833225ULL));
5501 ASSERT_EQ(res[1], MakeUInt128(0x8823475986976059ULL, 0x7332545962632991ULL));
5502 ASSERT_EQ(res[2], MakeUInt128(0x4377553316374445ULL, 0x1757058128994929ULL));
5503 ASSERT_EQ(res[3], MakeUInt128(0x5262146850789881ULL, 0x6511383752471332ULL));
5504 }
5505
TEST(Arm64InsnTest,Load4MultipleInt64x2)5506 TEST(Arm64InsnTest, Load4MultipleInt64x2) {
5507 static constexpr uint64_t mem[4 * 2] = {0x9644386365352069,
5508 0x2499275238833225,
5509 0x8823475986976059,
5510 0x7332545962632991,
5511 0x4377553316374445,
5512 0x1757058128994929,
5513 0x5262146850789881,
5514 0x6511383752471332};
5515 __uint128_t res[4];
5516 asm("ld4 {v30.2d-v1.2d}, [%4]\n\t"
5517 "mov %0.16b, v30.16b\n\t"
5518 "mov %1.16b, v31.16b\n\t"
5519 "mov %2.16b, v0.16b\n\t"
5520 "mov %3.16b, v1.16b"
5521 : "=w"(res[0]), "=w"(res[1]), "=w"(res[2]), "=w"(res[3])
5522 : "r"(mem)
5523 : "v30", "v31", "v0", "v1", "memory");
5524 ASSERT_EQ(res[0], MakeUInt128(0x9644386365352069ULL, 0x4377553316374445ULL));
5525 ASSERT_EQ(res[1], MakeUInt128(0x2499275238833225ULL, 0x1757058128994929ULL));
5526 ASSERT_EQ(res[2], MakeUInt128(0x8823475986976059ULL, 0x5262146850789881ULL));
5527 ASSERT_EQ(res[3], MakeUInt128(0x7332545962632991ULL, 0x6511383752471332ULL));
5528 }
5529
TEST(Arm64InsnTest,Store4MultipleInt64x2)5530 TEST(Arm64InsnTest, Store4MultipleInt64x2) {
5531 static constexpr __uint128_t arg[4] = {MakeUInt128(0x9644386365352069ULL, 0x4377553316374445ULL),
5532 MakeUInt128(0x2499275238833225ULL, 0x1757058128994929ULL),
5533 MakeUInt128(0x8823475986976059ULL, 0x5262146850789881ULL),
5534 MakeUInt128(0x7332545962632991ULL, 0x6511383752471332ULL)};
5535 __uint128_t res[4];
5536 asm("mov v30.16b, %0.16b\n\t"
5537 "mov v31.16b, %1.16b\n\t"
5538 "mov v0.16b, %2.16b\n\t"
5539 "mov v1.16b, %3.16b\n\t"
5540 "st4 {v30.2d-v1.2d}, [%4]"
5541 :
5542 : "w"(arg[0]), "w"(arg[1]), "w"(arg[2]), "w"(arg[3]), "r"(res)
5543 : "v30", "v31", "v0", "v1", "memory");
5544 ASSERT_EQ(res[0], MakeUInt128(0x9644386365352069ULL, 0x2499275238833225ULL));
5545 ASSERT_EQ(res[1], MakeUInt128(0x8823475986976059ULL, 0x7332545962632991ULL));
5546 ASSERT_EQ(res[2], MakeUInt128(0x4377553316374445ULL, 0x1757058128994929ULL));
5547 ASSERT_EQ(res[3], MakeUInt128(0x5262146850789881ULL, 0x6511383752471332ULL));
5548 }
5549
TEST(Arm64InsnTest,Load1ReplicateInt8x8)5550 TEST(Arm64InsnTest, Load1ReplicateInt8x8) {
5551 static constexpr uint8_t mem = 0x81U;
5552 __uint128_t res;
5553 asm("ld1r {%0.8b}, [%1]" : "=w"(res) : "r"(&mem) : "memory");
5554 ASSERT_EQ(res, MakeUInt128(0x8181818181818181ULL, 0U));
5555 }
5556
TEST(Arm64InsnTest,Load2ReplicateInt16x8)5557 TEST(Arm64InsnTest, Load2ReplicateInt16x8) {
5558 static constexpr uint16_t mem[] = {0x7904, 0x8715};
5559 __uint128_t res[2];
5560 asm("ld2r {v6.8h, v7.8h}, [%2]\n\t"
5561 "mov %0.16b, v6.16b\n\t"
5562 "mov %1.16b, v7.16b"
5563 : "=w"(res[0]), "=w"(res[1])
5564 : "r"(mem)
5565 : "v6", "v7", "memory");
5566 ASSERT_EQ(res[0], MakeUInt128(0x7904790479047904ULL, 0x7904790479047904ULL));
5567 ASSERT_EQ(res[1], MakeUInt128(0x8715871587158715ULL, 0x8715871587158715ULL));
5568 }
5569
TEST(Arm64InsnTest,Load3ReplicateInt32x4)5570 TEST(Arm64InsnTest, Load3ReplicateInt32x4) {
5571 static constexpr uint32_t mem[] = {0x78713710U, 0x60510637U, 0x95558588U};
5572 __uint128_t res[3];
5573 asm("ld3r {v30.4s-v0.4s}, [%3]\n\t"
5574 "mov %0.16b, v30.16b\n\t"
5575 "mov %1.16b, v31.16b\n\t"
5576 "mov %2.16b, v0.16b"
5577 : "=w"(res[0]), "=w"(res[1]), "=w"(res[2])
5578 : "r"(mem)
5579 : "v30", "v31", "v0", "memory");
5580 ASSERT_EQ(res[0], MakeUInt128(0x7871371078713710ULL, 0x7871371078713710ULL));
5581 ASSERT_EQ(res[1], MakeUInt128(0x6051063760510637ULL, 0x6051063760510637ULL));
5582 ASSERT_EQ(res[2], MakeUInt128(0x9555858895558588ULL, 0x9555858895558588ULL));
5583 }
5584
TEST(Arm64InsnTest,Load4ReplicateInt64x2)5585 TEST(Arm64InsnTest, Load4ReplicateInt64x2) {
5586 static constexpr uint64_t mem[] = {
5587 0x8150781468526213ULL, 0x3252473837651192ULL, 0x9901561091897779ULL, 0x2200870579339646ULL};
5588 __uint128_t res[4];
5589 asm("ld4r {v29.2d-v0.2d}, [%4]\n\t"
5590 "mov %0.16b, v29.16b\n\t"
5591 "mov %1.16b, v30.16b\n\t"
5592 "mov %2.16b, v31.16b\n\t"
5593 "mov %3.16b, v0.16b"
5594 : "=w"(res[0]), "=w"(res[1]), "=w"(res[2]), "=w"(res[3])
5595 : "r"(mem)
5596 : "v29", "v30", "v31", "v0", "memory");
5597 ASSERT_EQ(res[0], MakeUInt128(mem[0], mem[0]));
5598 ASSERT_EQ(res[1], MakeUInt128(mem[1], mem[1]));
5599 ASSERT_EQ(res[2], MakeUInt128(mem[2], mem[2]));
5600 ASSERT_EQ(res[3], MakeUInt128(mem[3], mem[3]));
5601 }
5602
TEST(Arm64InsnTest,LoadPairNonTemporarlInt64)5603 TEST(Arm64InsnTest, LoadPairNonTemporarlInt64) {
5604 static constexpr uint64_t mem[] = {0x3843601737474215ULL, 0x2476085152099016ULL};
5605 __uint128_t res[2];
5606 asm("ldnp %d0, %d1, [%2]" : "=w"(res[0]), "=w"(res[1]) : "r"(mem) : "memory");
5607 ASSERT_EQ(res[0], MakeUInt128(0x3843601737474215ULL, 0U));
5608 ASSERT_EQ(res[1], MakeUInt128(0x2476085152099016ULL, 0U));
5609 }
5610
TEST(Arm64InsnTest,MoviVector2S)5611 TEST(Arm64InsnTest, MoviVector2S) {
5612 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES("movi %0.2s, #0xe4")();
5613 ASSERT_EQ(rd, MakeUInt128(0x000000e4000000e4ULL, 0x0000000000000000ULL));
5614 }
5615
TEST(Arm64InsnTest,MoviVector2D)5616 TEST(Arm64InsnTest, MoviVector2D) {
5617 __uint128_t rd = ASM_INSN_WRAP_FUNC_W_RES("movi %0.2d, #0xff")();
5618 ASSERT_EQ(rd, MakeUInt128(0x00000000000000ffULL, 0x00000000000000ffULL));
5619 }
5620
TEST(Arm64InsnTest,MoviVector8B)5621 TEST(Arm64InsnTest, MoviVector8B) {
5622 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES("movi %0.8b, #0xda")();
5623 ASSERT_EQ(res, MakeUInt128(0xdadadadadadadadaULL, 0x0000000000000000ULL));
5624 }
5625
TEST(Arm64InsnTest,MoviVector4HShiftBy8)5626 TEST(Arm64InsnTest, MoviVector4HShiftBy8) {
5627 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES("movi %0.4h, #0xd1, lsl #8")();
5628 ASSERT_EQ(res, MakeUInt128(0xd100d100d100d100ULL, 0x0000000000000000ULL));
5629 }
5630
TEST(Arm64InsnTest,MoviVector2SShiftBy16)5631 TEST(Arm64InsnTest, MoviVector2SShiftBy16) {
5632 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES("movi %0.2s, #0x37, msl #16")();
5633 ASSERT_EQ(res, MakeUInt128(0x0037ffff0037ffffULL, 0x0000000000000000ULL));
5634 }
5635
TEST(Arm64InsnTest,MvniVector4H)5636 TEST(Arm64InsnTest, MvniVector4H) {
5637 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES("mvni %0.4h, #0xbc")();
5638 ASSERT_EQ(res, MakeUInt128(0xff43ff43ff43ff43ULL, 0x0000000000000000ULL));
5639 }
5640
TEST(Arm64InsnTest,MvniVector2SShiftBy8)5641 TEST(Arm64InsnTest, MvniVector2SShiftBy8) {
5642 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES("mvni %0.2s, #0x24, lsl #8")();
5643 ASSERT_EQ(res, MakeUInt128(0xffffdbffffffdbffULL, 0x0000000000000000ULL));
5644 }
5645
TEST(Arm64InsnTest,MvniVector2SShiftBy16)5646 TEST(Arm64InsnTest, MvniVector2SShiftBy16) {
5647 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES("mvni %0.2s, #0x25, msl #16")();
5648 ASSERT_EQ(res, MakeUInt128(0xffda0000ffda0000ULL, 0x0000000000000000ULL));
5649 }
5650
TEST(Arm64InsnTest,LoadSimdRegPlusReg)5651 TEST(Arm64InsnTest, LoadSimdRegPlusReg) {
5652 __uint128_t array[] = {
5653 MakeUInt128(0x6517980694113528ULL, 0x0131470130478164ULL),
5654 MakeUInt128(0x8672422924654366ULL, 0x8009806769282382ULL),
5655 };
5656 uint64_t offset = 16;
5657 __uint128_t rd;
5658
5659 asm("ldr %q0, [%1, %2]" : "=w"(rd) : "r"(array), "r"(offset) : "memory");
5660
5661 ASSERT_EQ(rd, MakeUInt128(0x8672422924654366ULL, 0x8009806769282382ULL));
5662 }
5663
TEST(Arm64InsnTest,ExtractNarrowI16x8ToI8x8)5664 TEST(Arm64InsnTest, ExtractNarrowI16x8ToI8x8) {
5665 __uint128_t arg = MakeUInt128(0x0123456789abcdefULL, 0x0011223344556677ULL);
5666 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("xtn %0.8b, %1.8h")(arg);
5667 ASSERT_EQ(res, MakeUInt128(0x113355772367abefULL, 0x0ULL));
5668 }
5669
TEST(Arm64InsnTest,ExtractNarrowI32x4ToI16x4)5670 TEST(Arm64InsnTest, ExtractNarrowI32x4ToI16x4) {
5671 __uint128_t arg = MakeUInt128(0x0123456789abcdefULL, 0x0011223344556677ULL);
5672 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("xtn %0.4h, %1.4s")(arg);
5673 ASSERT_EQ(res, MakeUInt128(0x223366774567cdefULL, 0x0ULL));
5674 }
5675
TEST(Arm64InsnTest,ExtractNarrowI64x2ToI32x2)5676 TEST(Arm64InsnTest, ExtractNarrowI64x2ToI32x2) {
5677 __uint128_t arg = MakeUInt128(0x0123456789abcdefULL, 0x0011223344556677ULL);
5678 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("xtn %0.2s, %1.2d")(arg);
5679 ASSERT_EQ(res, MakeUInt128(0x4455667789abcdefULL, 0x0ULL));
5680 }
5681
TEST(Arm64InsnTest,ExtractNarrow2Int16x8ToInt8x16)5682 TEST(Arm64InsnTest, ExtractNarrow2Int16x8ToInt8x16) {
5683 __uint128_t arg1 = MakeUInt128(0x1844396582533754ULL, 0x3885690941130315ULL);
5684 __uint128_t arg2 = MakeUInt128(0x6121865619673378ULL, 0x6236256125216320ULL);
5685 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("xtn2 %0.16b, %1.8h")(arg1, arg2);
5686 ASSERT_EQ(res, MakeUInt128(0x6121865619673378ULL, 0x8509131544655354ULL));
5687 }
5688
TEST(Arm64InsnTest,LoadLiteralSimd)5689 TEST(Arm64InsnTest, LoadLiteralSimd) {
5690 // We call an external assembly function to perform LDR literal because we
5691 // need to place the literal in .rodata. The literal placed in .text would
5692 // trigger a segfault.
5693 ASSERT_EQ(get_fp64_literal(), 0x0123456789abcdefULL);
5694 }
5695
TEST(Arm64InsnTest,AbsInt64x1)5696 TEST(Arm64InsnTest, AbsInt64x1) {
5697 __uint128_t arg = MakeUInt128(0xfffffffffffffffdULL, 0xdeadbeef01234567ULL);
5698 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("abs %d0, %d1")(arg);
5699 ASSERT_EQ(res, MakeUInt128(0x0000000000000003ULL, 0x0ULL));
5700 }
5701
TEST(Arm64InsnTest,AbsInt8x8)5702 TEST(Arm64InsnTest, AbsInt8x8) {
5703 __uint128_t arg = MakeUInt128(0x0001027e7f8081ffULL, 0x0123456789abcdefULL);
5704 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("abs %0.8b, %1.8b")(arg);
5705 ASSERT_EQ(res, MakeUInt128(0x0001027e7f807f01ULL, 0x0ULL));
5706 }
5707
TEST(Arm64InsnTest,UseV31)5708 TEST(Arm64InsnTest, UseV31) {
5709 __uint128_t res;
5710
5711 asm("movi v31.2d, #0xffffffffffffffff\n\t"
5712 "mov %0.16b, v31.16b"
5713 : "=w"(res)
5714 :
5715 : "v31");
5716
5717 ASSERT_EQ(res, MakeUInt128(~0ULL, ~0ULL));
5718 }
5719
TEST(Arm64InsnTest,AddHighNarrowInt16x8)5720 TEST(Arm64InsnTest, AddHighNarrowInt16x8) {
5721 __uint128_t arg1 = MakeUInt128(0x2296617119637792ULL, 0x1337575114959501ULL);
5722 __uint128_t arg2 = MakeUInt128(0x0941214722131794ULL, 0x7647772622414254ULL);
5723 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("addhn %0.8b, %1.8h, %2.8h")(arg1, arg2);
5724 ASSERT_EQ(res, MakeUInt128(0x89ce36d72b823b8fULL, 0x0ULL));
5725 }
5726
TEST(Arm64InsnTest,AddHighNarrowUpperInt16x8)5727 TEST(Arm64InsnTest, AddHighNarrowUpperInt16x8) {
5728 __uint128_t arg1 = MakeUInt128(0x6561809377344403ULL, 0x0707469211201913ULL);
5729 __uint128_t arg2 = MakeUInt128(0x6095752706957220ULL, 0x9175671167229109ULL);
5730 __uint128_t arg3 = MakeUInt128(0x5797877185560845ULL, 0x5296541266540853ULL);
5731 __uint128_t res =
5732 ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("addhn2 %0.16b, %1.8h, %2.8h")(arg1, arg2, arg3);
5733 ASSERT_EQ(res, MakeUInt128(0x5797877185560845ULL, 0x98ad78aac5f57db6ULL));
5734 }
5735
TEST(Arm64InsnTest,SubHighNarrowInt16x8)5736 TEST(Arm64InsnTest, SubHighNarrowInt16x8) {
5737 __uint128_t arg1 = MakeUInt128(0x4978189312978482ULL, 0x1682998948722658ULL);
5738 __uint128_t arg2 = MakeUInt128(0x1210835791513698ULL, 0x8209144421006751ULL);
5739 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("subhn %0.8b, %1.8h, %2.8h")(arg1, arg2);
5740 ASSERT_EQ(res, MakeUInt128(0x948527bf3795814dULL, 0x0ULL));
5741 }
5742
TEST(Arm64InsnTest,SubHighNarrowUpperInt16x8)5743 TEST(Arm64InsnTest, SubHighNarrowUpperInt16x8) {
5744 __uint128_t arg1 = MakeUInt128(0x5324944166803962ULL, 0x6579787718556084ULL);
5745 __uint128_t arg2 = MakeUInt128(0x1066587969981635ULL, 0x7473638405257145ULL);
5746 __uint128_t arg3 = MakeUInt128(0x3142980919065925ULL, 0x0937221696461515ULL);
5747 __uint128_t res =
5748 ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("subhn2 %0.16b, %1.8h, %2.8h")(arg1, arg2, arg3);
5749 ASSERT_EQ(res, MakeUInt128(0x3142980919065925ULL, 0xf11413ef423bfc23ULL));
5750 }
5751
TEST(Arm64InsnTest,RoundingAddHighNarrowInt16x8)5752 TEST(Arm64InsnTest, RoundingAddHighNarrowInt16x8) {
5753 __uint128_t arg1 = MakeUInt128(0x8039626579787718ULL, 0x5560845529654126ULL);
5754 __uint128_t arg2 = MakeUInt128(0x3440171274947042ULL, 0x0562230538994561ULL);
5755 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("raddhn %0.8b, %1.8h, %2.8h")(arg1, arg2);
5756 ASSERT_EQ(res, MakeUInt128(0x5ba76287b479eee7ULL, 0x0000000000000000ULL));
5757 }
5758
TEST(Arm64InsnTest,RoundingSubHighNarrowInt16x8)5759 TEST(Arm64InsnTest, RoundingSubHighNarrowInt16x8) {
5760 __uint128_t arg1 = MakeUInt128(0x3063432858785698ULL, 0x3052358089330657ULL);
5761 __uint128_t arg2 = MakeUInt128(0x0216471550979259ULL, 0x2309907965473761ULL);
5762 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("rsubhn %0.8b, %1.8h, %2.8h")(arg1, arg2);
5763 ASSERT_EQ(res, MakeUInt128(0x0da524cf2efc08c4ULL, 0x0000000000000000ULL));
5764 }
5765
TEST(Arm64InsnTest,ScalarPairwiseAddInt8x2)5766 TEST(Arm64InsnTest, ScalarPairwiseAddInt8x2) {
5767 __uint128_t arg = MakeUInt128(0x6257591633303910ULL, 0x7225383742182140ULL);
5768 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("addp %d0, %1.2d")(arg);
5769 ASSERT_EQ(res, MakeUInt128(0xd47c914d75485a50ULL, 0x0000000000000000ULL));
5770 }
5771
TEST(Arm64InsnTest,AddAcrossInt8x8)5772 TEST(Arm64InsnTest, AddAcrossInt8x8) {
5773 __uint128_t arg = MakeUInt128(0x0681216028764962ULL, 0x8674460477464915ULL);
5774 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("addv %b0, %1.8b")(arg);
5775 ASSERT_EQ(res, MakeUInt128(0x51ULL, 0x0ULL));
5776 }
5777
TEST(Arm64InsnTest,SignedAddLongAcrossInt16x8)5778 TEST(Arm64InsnTest, SignedAddLongAcrossInt16x8) {
5779 __uint128_t arg = MakeUInt128(0x9699557377273756ULL, 0x6761552711392258ULL);
5780 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("saddlv %s0, %1.8h")(arg);
5781 ASSERT_EQ(res, MakeUInt128(0x0000000000018aa2ULL, 0x0000000000000000ULL));
5782 }
5783
TEST(Arm64InsnTest,UnsignedAddLongAcrossInt16x8)5784 TEST(Arm64InsnTest, UnsignedAddLongAcrossInt16x8) {
5785 __uint128_t arg = MakeUInt128(0x7986396522961312ULL, 0x8017826797172898ULL);
5786 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("uaddlv %s0, %1.8h")(arg);
5787 ASSERT_EQ(res, MakeUInt128(0x000000000002aac0ULL, 0x0000000000000000ULL));
5788 }
5789
TEST(Arm64InsnTest,SignedMaximumAcrossInt16x8)5790 TEST(Arm64InsnTest, SignedMaximumAcrossInt16x8) {
5791 __uint128_t arg = MakeUInt128(0x8482065967379473ULL, 0x1680864156456505ULL);
5792 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("smaxv %h0, %1.8h")(arg);
5793 ASSERT_EQ(res, MakeUInt128(0x0000000000006737ULL, 0x0000000000000000ULL));
5794 }
5795
TEST(Arm64InsnTest,SignedMinimumAcrossInt16x8)5796 TEST(Arm64InsnTest, SignedMinimumAcrossInt16x8) {
5797 __uint128_t arg = MakeUInt128(0x6772530431825197ULL, 0x5791679296996504ULL);
5798 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("sminv %h0, %1.8h")(arg);
5799 ASSERT_EQ(res, MakeUInt128(0x0000000000009699ULL, 0x0000000000000000ULL));
5800 }
5801
TEST(Arm64InsnTest,UnsignedMaximumAcrossInt16x8)5802 TEST(Arm64InsnTest, UnsignedMaximumAcrossInt16x8) {
5803 __uint128_t arg = MakeUInt128(0x6500378070466126ULL, 0x4706021457505793ULL);
5804 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("umaxv %h0, %1.8h")(arg);
5805 ASSERT_EQ(res, MakeUInt128(0x0000000000007046ULL, 0x0000000000000000ULL));
5806 }
5807
TEST(Arm64InsnTest,UnsignedMinimumAcrossInt16x8)5808 TEST(Arm64InsnTest, UnsignedMinimumAcrossInt16x8) {
5809 __uint128_t arg = MakeUInt128(0x5223572397395128ULL, 0x8181640597859142ULL);
5810 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("uminv %h0, %1.8h")(arg);
5811 ASSERT_EQ(res, MakeUInt128(0x0000000000005128ULL, 0x0000000000000000ULL));
5812 }
5813
TEST(Arm64InsnTest,CountLeadingZerosI8x8)5814 TEST(Arm64InsnTest, CountLeadingZerosI8x8) {
5815 __uint128_t arg = MakeUInt128(0x1452635608277857ULL, 0x7134275778960917ULL);
5816 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("clz %0.8b, %1.8b")(arg);
5817 ASSERT_EQ(res, MakeUInt128(0x0301010104020101ULL, 0x0000000000000000ULL));
5818 }
5819
TEST(Arm64InsnTest,CountLeadingSignBitsI8x8)5820 TEST(Arm64InsnTest, CountLeadingSignBitsI8x8) {
5821 __uint128_t arg = MakeUInt128(0x8925892354201995ULL, 0x6112129021960864ULL);
5822 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cls %0.8b, %1.8b")(arg);
5823 ASSERT_EQ(res, MakeUInt128(0x0001000100010200ULL, 0x0000000000000000ULL));
5824 }
5825
TEST(Arm64InsnTest,Cnt)5826 TEST(Arm64InsnTest, Cnt) {
5827 __uint128_t arg = MakeUInt128(0x9835484875625298ULL, 0x7524238730775595ULL);
5828 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("cnt %0.16b, %1.16b")(arg);
5829 ASSERT_EQ(res, MakeUInt128(0x0304020205030303ULL, 0x0502030402060404ULL));
5830 }
5831
TEST(Arm64InsnTest,SimdScalarMove)5832 TEST(Arm64InsnTest, SimdScalarMove) {
5833 __uint128_t arg = MakeUInt128(0x1433345477624168ULL, 0x6251898356948556ULL);
5834 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("mov %b0, %1.b[5]")(arg);
5835 ASSERT_EQ(res, MakeUInt128(0x0000000000000034ULL, 0x0000000000000000ULL));
5836 }
5837
TEST(Arm64InsnTest,SimdVectorElemDuplicate)5838 TEST(Arm64InsnTest, SimdVectorElemDuplicate) {
5839 __uint128_t arg = MakeUInt128(0x3021647155097925ULL, 0x9230990796547376ULL);
5840 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("dup %0.8b, %1.b[5]")(arg);
5841 ASSERT_EQ(res, MakeUInt128(0x6464646464646464ULL, 0x0000000000000000ULL));
5842 }
5843
TEST(Arm64InsnTest,SimdVectorElemDuplicateInt16AtIndex7)5844 TEST(Arm64InsnTest, SimdVectorElemDuplicateInt16AtIndex7) {
5845 __uint128_t arg = MakeUInt128(0x2582262052248940ULL, 0x7726719478268482ULL);
5846 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("dup %0.4h, %1.h[7]")(arg);
5847 ASSERT_EQ(res, MakeUInt128(0x7726772677267726ULL, 0x0000000000000000ULL));
5848 }
5849
TEST(Arm64InsnTest,SimdVectorElemInsert)5850 TEST(Arm64InsnTest, SimdVectorElemInsert) {
5851 __uint128_t arg1 = MakeUInt128(0x7120844335732654ULL, 0x8938239119325974ULL);
5852 __uint128_t arg2 = MakeUInt128(0x7656180937734440ULL, 0x3070746921120191ULL);
5853 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("mov %0.s[2], %1.s[1]")(arg1, arg2);
5854 ASSERT_EQ(res, MakeUInt128(0x7656180937734440ULL, 0x3070746971208443ULL));
5855 }
5856
TEST(Arm64InsnTest,NegateInt64x1)5857 TEST(Arm64InsnTest, NegateInt64x1) {
5858 constexpr auto AsmNeg = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("neg %d0, %d1");
5859 __uint128_t arg1 = MakeUInt128(0x8389522868478312ULL, 0x3552658213144957ULL);
5860 ASSERT_EQ(AsmNeg(arg1), MakeUInt128(0x7c76add797b87ceeULL, 0x0000000000000000ULL));
5861
5862 __uint128_t arg2 = MakeUInt128(1ULL << 63, 0U);
5863 ASSERT_EQ(AsmNeg(arg2), MakeUInt128(1ULL << 63, 0U));
5864 }
5865
TEST(Arm64InsnTest,NegateInt16x8)5866 TEST(Arm64InsnTest, NegateInt16x8) {
5867 __uint128_t arg = MakeUInt128(0x4411010446823252ULL, 0x7162010526522721ULL);
5868 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("neg %0.8h, %1.8h")(arg);
5869 ASSERT_EQ(res, MakeUInt128(0xbbeffefcb97ecdaeULL, 0x8e9efefbd9aed8dfULL));
5870 }
5871
TEST(Arm64InsnTest,NotI8x8)5872 TEST(Arm64InsnTest, NotI8x8) {
5873 __uint128_t arg = MakeUInt128(0x6205647693125705ULL, 0x8635662018558100ULL);
5874 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("not %0.8b, %1.8b")(arg);
5875 ASSERT_EQ(res, MakeUInt128(0x9dfa9b896ceda8faULL, 0x0000000000000000ULL));
5876 }
5877
TEST(Arm64InsnTest,RbitInt8x8)5878 TEST(Arm64InsnTest, RbitInt8x8) {
5879 __uint128_t arg = MakeUInt128(0x4713296210734043ULL, 0x7518957359614589ULL);
5880 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("rbit %0.8b, %1.8b")(arg);
5881 ASSERT_EQ(res, MakeUInt128(0xe2c8944608ce02c2ULL, 0x0000000000000000ULL));
5882 }
5883
TEST(Arm64InsnTest,Rev16Int8x16)5884 TEST(Arm64InsnTest, Rev16Int8x16) {
5885 __uint128_t arg = MakeUInt128(0x9904801094121472ULL, 0x2131794764777262ULL);
5886 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("rev16 %0.16b, %1.16b")(arg);
5887 ASSERT_EQ(res, MakeUInt128(0x0499108012947214ULL, 0x3121477977646272ULL));
5888 }
5889
TEST(Arm64InsnTest,Rev32Int16x8)5890 TEST(Arm64InsnTest, Rev32Int16x8) {
5891 __uint128_t arg = MakeUInt128(0x8662237172159160ULL, 0x7716692547487389ULL);
5892 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("rev32 %0.8h, %1.8h")(arg);
5893 ASSERT_EQ(res, MakeUInt128(0x2371866291607215ULL, 0x6925771673894748ULL));
5894 }
5895
TEST(Arm64InsnTest,Rev64Int32x4)5896 TEST(Arm64InsnTest, Rev64Int32x4) {
5897 __uint128_t arg = MakeUInt128(0x5306736096571209ULL, 0x1807638327166416ULL);
5898 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("rev64 %0.4s, %1.4s")(arg);
5899 ASSERT_EQ(res, MakeUInt128(0x9657120953067360ULL, 0x2716641618076383ULL));
5900 }
5901
TEST(Arm64InsnTest,TblInt8x8)5902 TEST(Arm64InsnTest, TblInt8x8) {
5903 __uint128_t arg1 = MakeUInt128(0x7766554433221100ULL, 0xffeeddccbbaa9988ULL);
5904 __uint128_t arg2 = MakeUInt128(0x0104011509120605ULL, 0x0315080907091312ULL);
5905 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("tbl %0.8b, {%1.16b}, %2.8b")(arg1, arg2);
5906 ASSERT_EQ(res, MakeUInt128(0x1144110099006655ULL, 0x0000000000000000ULL));
5907 }
5908
TEST(Arm64InsnTest,TblInt8x16)5909 TEST(Arm64InsnTest, TblInt8x16) {
5910 __uint128_t arg1 = MakeUInt128(0x7766554433221100ULL, 0xffeeddccbbaa9988ULL);
5911 __uint128_t arg2 = MakeUInt128(0x0905060808010408ULL, 0x0506000206030202ULL);
5912 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("tbl %0.16b, {%1.16b}, %2.16b")(arg1, arg2);
5913 ASSERT_EQ(res, MakeUInt128(0x9955668888114488ULL, 0x5566002266332222ULL));
5914 }
5915
TEST(Arm64InsnTest,Tbl2Int8x16)5916 TEST(Arm64InsnTest, Tbl2Int8x16) {
5917 __uint128_t arg1 = MakeUInt128(0x7766554433221100ULL, 0xffeeddccbbaa9988ULL);
5918 __uint128_t arg2 = MakeUInt128(0x76655443322110ffULL, 0xfeeddccbbaa99887ULL);
5919 __uint128_t arg3 = MakeUInt128(0x0224052800020910ULL, 0x1807280319002203ULL);
5920 __uint128_t res;
5921
5922 // Hardcode v30 and v0 so that the TBL instruction gets consecutive registers.
5923 asm("mov v31.16b, %1.16b\n\t"
5924 "mov v0.16b, %2.16b\n\t"
5925 "tbl %0.16b, {v31.16b, v0.16b}, %3.16b"
5926 : "=w"(res)
5927 : "w"(arg1), "w"(arg2), "w"(arg3)
5928 : "v31", "v0");
5929
5930 ASSERT_EQ(res, MakeUInt128(0x22005500002299ffULL, 0x8777003398000033ULL));
5931 }
5932
TEST(Arm64InsnTest,Tbl3Int8x16)5933 TEST(Arm64InsnTest, Tbl3Int8x16) {
5934 __uint128_t arg1 = MakeUInt128(0x7766554433221100ULL, 0xffeeddccbbaa9988ULL);
5935 __uint128_t arg2 = MakeUInt128(0x76655443322110ffULL, 0xfeeddccbbaa99887ULL);
5936 __uint128_t arg3 = MakeUInt128(0x7060504030201000ULL, 0xf0e0d0c0b0a09080ULL);
5937 __uint128_t arg4 = MakeUInt128(0x0718264039291035ULL, 0x3526190040211304ULL);
5938 __uint128_t res;
5939
5940 // Hardcode v0, v1, and v2 so that the TBL instruction gets consecutive registers.
5941 asm("mov v30.16b, %1.16b\n\t"
5942 "mov v31.16b, %2.16b\n\t"
5943 "mov v0.16b, %3.16b\n\t"
5944 "tbl %0.16b, {v30.16b-v0.16b}, %4.16b"
5945 : "=w"(res)
5946 : "w"(arg1), "w"(arg2), "w"(arg3), "w"(arg4)
5947 : "v0", "v1", "v2");
5948
5949 ASSERT_EQ(res, MakeUInt128(0x778760000090ff00ULL, 0x0060980000103244ULL));
5950 }
5951
TEST(Arm64InsnTest,Tbl4Int8x16)5952 TEST(Arm64InsnTest, Tbl4Int8x16) {
5953 __uint128_t arg1 = MakeUInt128(0x7766554433221100ULL, 0xffeeddccbbaa9988ULL);
5954 __uint128_t arg2 = MakeUInt128(0x76655443322110ffULL, 0xfeeddccbbaa99887ULL);
5955 __uint128_t arg3 = MakeUInt128(0x7060504030201000ULL, 0xf0e0d0c0b0a09080ULL);
5956 __uint128_t arg4 = MakeUInt128(0x7f6f5f4f3f2f1fffULL, 0xffefdfcfbfaf9f8fULL);
5957 __uint128_t arg5 = MakeUInt128(0x0718264039291035ULL, 0x3526190040211304ULL);
5958 __uint128_t res;
5959
5960 // Hardcode v30, v31, v0, and v1 so that the TBX instruction gets consecutive registers.
5961 asm("mov v30.16b, %1.16b\n\t"
5962 "mov v31.16b, %2.16b\n\t"
5963 "mov v0.16b, %3.16b\n\t"
5964 "mov v1.16b, %4.16b\n\t"
5965 "tbl %0.16b, {v30.16b-v1.16b}, %5.16b"
5966 : "=w"(res)
5967 : "w"(arg1), "w"(arg2), "w"(arg3), "w"(arg4), "w"(arg5)
5968 : "v30", "v31", "v0", "v1");
5969
5970 ASSERT_EQ(res, MakeUInt128(0x778760009f90ff5fULL, 0x5f60980000103244ULL));
5971 }
5972
TEST(Arm64InsnTest,TbxInt8x16)5973 TEST(Arm64InsnTest, TbxInt8x16) {
5974 __uint128_t arg1 = MakeUInt128(0x7766554433221100ULL, 0xffeeddccbbaa9988ULL);
5975 __uint128_t arg2 = MakeUInt128(0x0915061808010408ULL, 0x0516000206031202ULL);
5976 __uint128_t arg3 = MakeUInt128(0x6668559233565463ULL, 0x9138363185745698ULL);
5977 __uint128_t res =
5978 ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("tbx %0.16b, {%1.16b}, %2.16b")(arg1, arg2, arg3);
5979 ASSERT_EQ(res, MakeUInt128(0x9968669288114488ULL, 0x5538002266335622ULL));
5980 }
5981
TEST(Arm64InsnTest,Tbx2Int8x16)5982 TEST(Arm64InsnTest, Tbx2Int8x16) {
5983 __uint128_t arg1 = MakeUInt128(0x7766554433221100ULL, 0xffeeddccbbaa9988ULL);
5984 __uint128_t arg2 = MakeUInt128(0x76655443322110ffULL, 0xfeeddccbbaa99887ULL);
5985 __uint128_t arg3 = MakeUInt128(0x0224052800020910ULL, 0x1807280319002203ULL);
5986 __uint128_t res = MakeUInt128(0x7494078488442377ULL, 0x2175154334260306ULL);
5987
5988 // Hardcode v0 and v1 so that the TBX instruction gets consecutive registers.
5989 asm("mov v0.16b, %1.16b\n\t"
5990 "mov v1.16b, %2.16b\n\t"
5991 "tbx %0.16b, {v0.16b, v1.16b}, %3.16b"
5992 : "=w"(res)
5993 : "w"(arg1), "w"(arg2), "w"(arg3), "0"(res)
5994 : "v0", "v1");
5995
5996 ASSERT_EQ(res, MakeUInt128(0x22945584002299ffULL, 0x8777153398000333ULL));
5997 }
5998
TEST(Arm64InsnTest,Tbx3Int8x16)5999 TEST(Arm64InsnTest, Tbx3Int8x16) {
6000 __uint128_t arg1 = MakeUInt128(0x7766554433221100ULL, 0xffeeddccbbaa9988ULL);
6001 __uint128_t arg2 = MakeUInt128(0x76655443322110ffULL, 0xfeeddccbbaa99887ULL);
6002 __uint128_t arg3 = MakeUInt128(0x7060504030201000ULL, 0xf0e0d0c0b0a09080ULL);
6003 __uint128_t arg4 = MakeUInt128(0x0718264039291035ULL, 0x3526190040211304ULL);
6004 __uint128_t res = MakeUInt128(0x0136776310849135ULL, 0x1615642269847507ULL);
6005
6006 // Hardcode v0, v1, and v2 so that the TBX instruction gets consecutive registers.
6007 asm("mov v0.16b, %1.16b\n\t"
6008 "mov v1.16b, %2.16b\n\t"
6009 "mov v2.16b, %3.16b\n\t"
6010 "tbx %0.16b, {v0.16b, v1.16b, v2.16b}, %4.16b"
6011 : "=w"(res)
6012 : "w"(arg1), "w"(arg2), "w"(arg3), "w"(arg4), "0"(res)
6013 : "v0", "v1", "v2");
6014
6015 ASSERT_EQ(res, MakeUInt128(0x778760631090ff35ULL, 0x1660980069103244ULL));
6016 }
6017
TEST(Arm64InsnTest,Tbx4Int8x16)6018 TEST(Arm64InsnTest, Tbx4Int8x16) {
6019 __uint128_t arg1 = MakeUInt128(0x7766554433221100ULL, 0xffeeddccbbaa9988ULL);
6020 __uint128_t arg2 = MakeUInt128(0x76655443322110ffULL, 0xfeeddccbbaa99887ULL);
6021 __uint128_t arg3 = MakeUInt128(0x7060504030201000ULL, 0xf0e0d0c0b0a09080ULL);
6022 __uint128_t arg4 = MakeUInt128(0x7f6f5f4f3f2f1fffULL, 0xffefdfcfbfaf9f8fULL);
6023 __uint128_t arg5 = MakeUInt128(0x0718264039291035ULL, 0x3526190040211304ULL);
6024 __uint128_t res = MakeUInt128(0x5818319637637076ULL, 0x1799191920357958ULL);
6025
6026 // Hardcode v0, v1, v2, and v3 so that the TBX instruction gets consecutive registers.
6027 asm("mov v0.16b, %1.16b\n\t"
6028 "mov v1.16b, %2.16b\n\t"
6029 "mov v2.16b, %3.16b\n\t"
6030 "mov v3.16b, %4.16b\n\t"
6031 "tbx %0.16b, {v0.16b-v3.16b}, %5.16b"
6032 : "=w"(res)
6033 : "w"(arg1), "w"(arg2), "w"(arg3), "w"(arg4), "w"(arg5), "0"(res)
6034 : "v0", "v1", "v2", "v3");
6035
6036 ASSERT_EQ(res, MakeUInt128(0x778760969f90ff5fULL, 0x5f60980020103244ULL));
6037 }
6038
TEST(Arm64InsnTest,Trn1Int8x8)6039 TEST(Arm64InsnTest, Trn1Int8x8) {
6040 __uint128_t arg1 = MakeUInt128(0x2075916729700785ULL, 0x0580717186381054ULL);
6041 __uint128_t arg2 = MakeUInt128(0x2786099055690013ULL, 0x4137182368370991ULL);
6042 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("trn1 %0.8b, %1.8b, %2.8b")(arg1, arg2);
6043 ASSERT_EQ(res, MakeUInt128(0x8675906769701385ULL, 0x0000000000000000ULL));
6044 }
6045
TEST(Arm64InsnTest,Trn2Int16x8)6046 TEST(Arm64InsnTest, Trn2Int16x8) {
6047 __uint128_t arg1 = MakeUInt128(0x6685592335654639ULL, 0x1383631857456981ULL);
6048 __uint128_t arg2 = MakeUInt128(0x7494078488442377ULL, 0x2175154334260306ULL);
6049 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("trn2 %0.8h, %1.8h, %2.8h")(arg1, arg2);
6050 ASSERT_EQ(res, MakeUInt128(0x7494668588443565ULL, 0x2175138334265745ULL));
6051 }
6052
TEST(Arm64InsnTest,Uzp1Int8x8)6053 TEST(Arm64InsnTest, Uzp1Int8x8) {
6054 __uint128_t arg1 = MakeUInt128(0x4954893139394489ULL, 0x9216125525597701ULL);
6055 __uint128_t arg2 = MakeUInt128(0x2783467926101995ULL, 0x5852247172201777ULL);
6056 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("uzp1 %0.8b, %1.8b, %2.8b")(arg1, arg2);
6057 ASSERT_EQ(res, MakeUInt128(0x8379109554313989ULL, 0x0000000000000000ULL));
6058 }
6059
TEST(Arm64InsnTest,Uzp2Int16x8)6060 TEST(Arm64InsnTest, Uzp2Int16x8) {
6061 __uint128_t arg1 = MakeUInt128(0x6745642390585850ULL, 0x2167190313952629ULL);
6062 __uint128_t arg2 = MakeUInt128(0x3620129476918749ULL, 0x7519101147231528ULL);
6063 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("uzp2 %0.8h, %1.8h, %2.8h")(arg1, arg2);
6064 ASSERT_EQ(res, MakeUInt128(0x2167139567459058ULL, 0x7519472336207691ULL));
6065 }
6066
TEST(Arm64InsnTest,Zip2Int64x2)6067 TEST(Arm64InsnTest, Zip2Int64x2) {
6068 __uint128_t arg1 = MakeUInt128(0x1494271410093913ULL, 0x6913810725813781ULL);
6069 __uint128_t arg2 = MakeUInt128(0x3578940055995001ULL, 0x8354251184172136ULL);
6070 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("uzp2 %0.2d, %1.2d, %2.2d")(arg1, arg2);
6071 ASSERT_EQ(res, MakeUInt128(0x6913810725813781ULL, 0x8354251184172136ULL));
6072 }
6073
TEST(Arm64InsnTest,Zip1Int8x8)6074 TEST(Arm64InsnTest, Zip1Int8x8) {
6075 __uint128_t arg1 = MakeUInt128(0x7499235630254947ULL, 0x8024901141952123ULL);
6076 __uint128_t arg2 = MakeUInt128(0x3331239480494707ULL, 0x9119153267343028ULL);
6077 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("zip1 %0.8b, %1.8b, %2.8b")(arg1, arg2);
6078 ASSERT_EQ(res, MakeUInt128(0x8030492547490747ULL, 0x0000000000000000ULL));
6079 }
6080
TEST(Arm64InsnTest,Zip1Int64x2)6081 TEST(Arm64InsnTest, Zip1Int64x2) {
6082 __uint128_t arg1 = MakeUInt128(0x9243530136776310ULL, 0x8491351615642269ULL);
6083 __uint128_t arg2 = MakeUInt128(0x0551199581831963ULL, 0x7637076179919192ULL);
6084 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("zip1 %0.2d, %1.2d, %2.2d")(arg1, arg2);
6085 ASSERT_EQ(res, MakeUInt128(0x9243530136776310ULL, 0x0551199581831963ULL));
6086 }
6087
TEST(Arm64InsnTest,Zip2Int16x8)6088 TEST(Arm64InsnTest, Zip2Int16x8) {
6089 __uint128_t arg1 = MakeUInt128(0x5831832713142517ULL, 0x0296923488962766ULL);
6090 __uint128_t arg2 = MakeUInt128(0x2934595889706953ULL, 0x6534940603402166ULL);
6091 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("zip2 %0.8h, %1.8h, %2.8h")(arg1, arg2);
6092 ASSERT_EQ(res, MakeUInt128(0x0340889621662766ULL, 0x6534029694069234ULL));
6093 }
6094
TEST(Arm64InsnTest,SignedMaxInt16x8)6095 TEST(Arm64InsnTest, SignedMaxInt16x8) {
6096 __uint128_t arg1 = MakeUInt128(0x9901573466102371ULL, 0x2235478911292547ULL);
6097 __uint128_t arg2 = MakeUInt128(0x4922157650450812ULL, 0x0677173571202718ULL);
6098 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("smax %0.8h, %1.8h, %2.8h")(arg1, arg2);
6099 ASSERT_EQ(res, MakeUInt128(0x4922573466102371ULL, 0x2235478971202718ULL));
6100 }
6101
TEST(Arm64InsnTest,SignedMinInt16x8)6102 TEST(Arm64InsnTest, SignedMinInt16x8) {
6103 __uint128_t arg1 = MakeUInt128(0x7820385653909910ULL, 0x4775941413215432ULL);
6104 __uint128_t arg2 = MakeUInt128(0x0084531214065935ULL, 0x8090412711359200ULL);
6105 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("smin %0.8h, %1.8h, %2.8h")(arg1, arg2);
6106 ASSERT_EQ(res, MakeUInt128(0x0084385614069910ULL, 0x8090941411359200ULL));
6107 }
6108
TEST(Arm64InsnTest,SignedMaxPairwiseInt16x8)6109 TEST(Arm64InsnTest, SignedMaxPairwiseInt16x8) {
6110 __uint128_t arg1 = MakeUInt128(0x6998469884770232ULL, 0x3823840055655517ULL);
6111 __uint128_t arg2 = MakeUInt128(0x3272867600724817ULL, 0x2987637569816335ULL);
6112 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("smaxp %0.8h, %1.8h, %2.8h")(arg1, arg2);
6113 ASSERT_EQ(res, MakeUInt128(0x3823556569980232ULL, 0x6375698132724817ULL));
6114 }
6115
TEST(Arm64InsnTest,SignedMinPairwiseInt16x8)6116 TEST(Arm64InsnTest, SignedMinPairwiseInt16x8) {
6117 __uint128_t arg1 = MakeUInt128(0x8865701568501691ULL, 0x8647488541679154ULL);
6118 __uint128_t arg2 = MakeUInt128(0x1821553559732353ULL, 0x0686043010675760ULL);
6119 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("sminp %0.8h, %1.8h, %2.8h")(arg1, arg2);
6120 ASSERT_EQ(res, MakeUInt128(0x8647915488651691ULL, 0x0430106718212353ULL));
6121 }
6122
TEST(Arm64InsnTest,UnsignedMaxInt16x8)6123 TEST(Arm64InsnTest, UnsignedMaxInt16x8) {
6124 __uint128_t arg1 = MakeUInt128(0x7639975974619383ULL, 0x5845749159880976ULL);
6125 __uint128_t arg2 = MakeUInt128(0x5928493695941434ULL, 0x0814685298150539ULL);
6126 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("umax %0.8h, %1.8h, %2.8h")(arg1, arg2);
6127 ASSERT_EQ(res, MakeUInt128(0x7639975995949383ULL, 0x5845749198150976ULL));
6128 }
6129
TEST(Arm64InsnTest,UnsignedMinInt16x8)6130 TEST(Arm64InsnTest, UnsignedMinInt16x8) {
6131 __uint128_t arg1 = MakeUInt128(0x2888773717663748ULL, 0x6027660634960353ULL);
6132 __uint128_t arg2 = MakeUInt128(0x6983349515101986ULL, 0x4269887847171939ULL);
6133 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("umin %0.8h, %1.8h, %2.8h")(arg1, arg2);
6134 ASSERT_EQ(res, MakeUInt128(0x2888349515101986ULL, 0x4269660634960353ULL));
6135 }
6136
TEST(Arm64InsnTest,UnsignedMaxPairwiseInt16x8)6137 TEST(Arm64InsnTest, UnsignedMaxPairwiseInt16x8) {
6138 __uint128_t arg1 = MakeUInt128(0x1318583584066747ULL, 0x2370297149785084ULL);
6139 __uint128_t arg2 = MakeUInt128(0x4570249413983163ULL, 0x4332378975955680ULL);
6140 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("umaxp %0.8h, %1.8h, %2.8h")(arg1, arg2);
6141 ASSERT_EQ(res, MakeUInt128(0x2971508458358406ULL, 0x4332759545703163ULL));
6142 }
6143
TEST(Arm64InsnTest,UnsignedMinPairwiseInt16x8)6144 TEST(Arm64InsnTest, UnsignedMinPairwiseInt16x8) {
6145 __uint128_t arg1 = MakeUInt128(0x9538121791319145ULL, 0x1350099384631177ULL);
6146 __uint128_t arg2 = MakeUInt128(0x7769055481028850ULL, 0x2080858008781157ULL);
6147 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("uminp %0.8h, %1.8h, %2.8h")(arg1, arg2);
6148 ASSERT_EQ(res, MakeUInt128(0x0993117712179131ULL, 0x2080087805548102ULL));
6149 }
6150
TEST(Arm64InsnTest,SignedHalvingAddInt16x8)6151 TEST(Arm64InsnTest, SignedHalvingAddInt16x8) {
6152 __uint128_t arg1 = MakeUInt128(0x1021944719713869ULL, 0x2560841624511239ULL);
6153 __uint128_t arg2 = MakeUInt128(0x8062011318454124ULL, 0x4782050110798760ULL);
6154 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("shadd %0.8h, %1.8h, %2.8h")(arg1, arg2);
6155 ASSERT_EQ(res, MakeUInt128(0xc841caad18db3cc6ULL, 0x3671c48b1a65ccccULL));
6156 }
6157
TEST(Arm64InsnTest,SignedHalvingSubInt16x8)6158 TEST(Arm64InsnTest, SignedHalvingSubInt16x8) {
6159 __uint128_t arg1 = MakeUInt128(0x9041210873032402ULL, 0x0106853419472304ULL);
6160 __uint128_t arg2 = MakeUInt128(0x7666672174986986ULL, 0x8547076781205124ULL);
6161 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("shsub %0.8h, %1.8h, %2.8h")(arg1, arg2);
6162 ASSERT_EQ(res, MakeUInt128(0x8ceddcf3ff35dd3eULL, 0x3ddfbee64c13e8f0ULL));
6163 }
6164
TEST(Arm64InsnTest,SignedRoundingHalvingAddInt16x8)6165 TEST(Arm64InsnTest, SignedRoundingHalvingAddInt16x8) {
6166 __uint128_t arg1 = MakeUInt128(0x5871487839890810ULL, 0x7429530941060596ULL);
6167 __uint128_t arg2 = MakeUInt128(0x9443158477539700ULL, 0x9439883949144323ULL);
6168 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("srhadd %0.8h, %1.8h, %2.8h")(arg1, arg2);
6169 ASSERT_EQ(res, MakeUInt128(0xf65a2efe586ecf88ULL, 0x0431eda1450d245dULL));
6170 }
6171
TEST(Arm64InsnTest,SignedAbsoluteDifferenceInt16x8)6172 TEST(Arm64InsnTest, SignedAbsoluteDifferenceInt16x8) {
6173 __uint128_t arg1 = MakeUInt128(0x1349607501116498ULL, 0x3278563531614516ULL);
6174 __uint128_t arg2 = MakeUInt128(0x8457695687109002ULL, 0x9997698412632665ULL);
6175 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("sabd %0.8h, %1.8h, %2.8h")(arg1, arg2);
6176 ASSERT_EQ(res, MakeUInt128(0x8ef208e17a01d496ULL, 0x98e1134f1efe1eb1ULL));
6177 }
6178
TEST(Arm64InsnTest,SignedAbsoluteDifferenceLongInt16x8)6179 TEST(Arm64InsnTest, SignedAbsoluteDifferenceLongInt16x8) {
6180 __uint128_t arg1 = MakeUInt128(0x7419850973346267ULL, 0x9332107268687076ULL);
6181 __uint128_t arg2 = MakeUInt128(0x8062639919361965ULL, 0x0440995421676278ULL);
6182 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("sabdl %0.4s, %1.4h, %2.4h")(arg1, arg2);
6183 ASSERT_EQ(res, MakeUInt128(0x000059fe00004902ULL, 0x0000f3b70000de90ULL));
6184 }
6185
TEST(Arm64InsnTest,SignedAbsoluteDifferenceLongUpperInt16x8)6186 TEST(Arm64InsnTest, SignedAbsoluteDifferenceLongUpperInt16x8) {
6187 __uint128_t arg1 = MakeUInt128(0x4980559610330799ULL, 0x4145347784574699ULL);
6188 __uint128_t arg2 = MakeUInt128(0x9921285999993996ULL, 0x1228161521931488ULL);
6189 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("sabdl2 %0.4s, %1.8h, %2.8h")(arg1, arg2);
6190 ASSERT_EQ(res, MakeUInt128(0x00009d3c00003211ULL, 0x00002f1d00001e62ULL));
6191 }
6192
TEST(Arm64InsnTest,SignedAbsoluteDifferenceAccumulateInt16x8)6193 TEST(Arm64InsnTest, SignedAbsoluteDifferenceAccumulateInt16x8) {
6194 // The lowest element tests the overflow.
6195 __uint128_t arg1 = MakeUInt128(0x8967'0031'9258'7fffULL, 0x9410'5105'3358'4384ULL);
6196 __uint128_t arg2 = MakeUInt128(0x6560'2339'1796'8000ULL, 0x6784'4763'7084'7497ULL);
6197 __uint128_t arg3 = MakeUInt128(0x8333'6555'7900'5555ULL, 0x1914'7319'8862'7135ULL);
6198 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("saba %0.8h, %1.8h, %2.8h")(arg1, arg2, arg3);
6199 ASSERT_EQ(res, MakeUInt128(0x5f2c'885d'fe3e'5554ULL, 0xec88'7cbb'c58e'a248ULL));
6200 }
6201
TEST(Arm64InsnTest,SignedAbsoluteDifferenceAccumulateInt32x4)6202 TEST(Arm64InsnTest, SignedAbsoluteDifferenceAccumulateInt32x4) {
6203 // The lowest element tests the overflow.
6204 __uint128_t arg1 = MakeUInt128(0x8967'0031'7fff'ffffULL, 0x9410'5105'3358'4384ULL);
6205 __uint128_t arg2 = MakeUInt128(0x6560'2339'8000'0000ULL, 0x6784'4763'7084'7497ULL);
6206 __uint128_t arg3 = MakeUInt128(0x8333'6555'aaaa'5555ULL, 0x1914'7319'8862'7135ULL);
6207 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("saba %0.4s, %1.4s, %2.4s")(arg1, arg2, arg3);
6208 ASSERT_EQ(res, MakeUInt128(0x5f2c'885d'aaaa'5554ULL, 0xec88'6977'c58e'a248ULL));
6209 }
6210
TEST(Arm64InsnTest,SignedAbsoluteDifferenceAccumulateLongInt16x4)6211 TEST(Arm64InsnTest, SignedAbsoluteDifferenceAccumulateLongInt16x4) {
6212 __uint128_t arg1 = MakeUInt128(0x078464167452167ULL, 0x719048310967671ULL);
6213 __uint128_t arg2 = MakeUInt128(0x344349481926268ULL, 0x110739948250607ULL);
6214 __uint128_t arg3 = MakeUInt128(0x949507350316901ULL, 0x731852119552635ULL);
6215 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("sabal %0.4s, %1.4h, %2.4h")(arg1, arg2, arg3);
6216 ASSERT_EQ(res, MakeUInt128(0x094a36265031aa02ULL, 0x073187ed195537e2ULL));
6217 }
6218
TEST(Arm64InsnTest,SignedAbsoluteDifferenceLongInt32x2)6219 TEST(Arm64InsnTest, SignedAbsoluteDifferenceLongInt32x2) {
6220 __uint128_t arg1 = MakeUInt128(0x000000007fffffffULL, 0x0000000000000000ULL);
6221 __uint128_t arg2 = MakeUInt128(0x0000000080000000ULL, 0x0000000000000000ULL);
6222 __uint128_t arg3 = MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL);
6223 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("sabal %0.2d, %1.2s, %2.2s")(arg1, arg2, arg3);
6224 ASSERT_EQ(res, MakeUInt128(0x00000000ffffffffULL, 0x0000000000000000ULL));
6225 }
6226
TEST(Arm64InsnTest,SignedAbsoluteDifferenceAccumulateLongUpperInt16x8)6227 TEST(Arm64InsnTest, SignedAbsoluteDifferenceAccumulateLongUpperInt16x8) {
6228 __uint128_t arg1 = MakeUInt128(0x690943470482932ULL, 0x414041114654092ULL);
6229 __uint128_t arg2 = MakeUInt128(0x988344435159133ULL, 0x010773944111840ULL);
6230 __uint128_t arg3 = MakeUInt128(0x410768498106634ULL, 0x241048239358274ULL);
6231 __uint128_t res =
6232 ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("sabal2 %0.4s, %1.8h, %2.8h")(arg1, arg2, arg3);
6233 ASSERT_EQ(res, MakeUInt128(0x0410a63098108e86ULL, 0x024108863935f59cULL));
6234 }
6235
TEST(Arm64InsnTest,UnsignedHalvingAddInt16x8)6236 TEST(Arm64InsnTest, UnsignedHalvingAddInt16x8) {
6237 __uint128_t arg1 = MakeUInt128(0x4775379853799732ULL, 0x2344561227858432ULL);
6238 __uint128_t arg2 = MakeUInt128(0x9684664751333657ULL, 0x3692387201464723ULL);
6239 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("uhadd %0.8h, %1.8h, %2.8h")(arg1, arg2);
6240 ASSERT_EQ(res, MakeUInt128(0x6efc4eef525666c4ULL, 0x2ceb4742146565aaULL));
6241 }
6242
TEST(Arm64InsnTest,UnsignedHalvingSubInt16x8)6243 TEST(Arm64InsnTest, UnsignedHalvingSubInt16x8) {
6244 __uint128_t arg1 = MakeUInt128(0x9926884349592876ULL, 0x1240075587569464ULL);
6245 __uint128_t arg2 = MakeUInt128(0x1370562514001179ULL, 0x7133166207153715ULL);
6246 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("uhsub %0.8h, %1.8h, %2.8h")(arg1, arg2);
6247 ASSERT_EQ(res, MakeUInt128(0x42db190f1aac0b7eULL, 0xd086f87940202ea7ULL));
6248 }
6249
TEST(Arm64InsnTest,UnsignedRoundingHalvingAddInt16x8)6250 TEST(Arm64InsnTest, UnsignedRoundingHalvingAddInt16x8) {
6251 __uint128_t arg1 = MakeUInt128(0x5066533985738887ULL, 0x8661476294434140ULL);
6252 __uint128_t arg2 = MakeUInt128(0x1049888993160051ULL, 0x2076781035886116ULL);
6253 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("urhadd %0.8h, %1.8h, %2.8h")(arg1, arg2);
6254 ASSERT_EQ(res, MakeUInt128(0x30586de18c45446cULL, 0x536c5fb964e6512bULL));
6255 }
6256
TEST(Arm64InsnTest,UnsignedAbsoluteDifferenceInt16x8)6257 TEST(Arm64InsnTest, UnsignedAbsoluteDifferenceInt16x8) {
6258 __uint128_t arg1 = MakeUInt128(0x8574664607722834ULL, 0x1540311441529418ULL);
6259 __uint128_t arg2 = MakeUInt128(0x8047825438761770ULL, 0x7904300015669867ULL);
6260 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("uabd %0.8h, %1.8h, %2.8h")(arg1, arg2);
6261 ASSERT_EQ(res, MakeUInt128(0x052d1c0e310410c4ULL, 0x63c401142bec044fULL));
6262 }
6263
TEST(Arm64InsnTest,UnsignedAbsoluteDifferenceLongInt16x8)6264 TEST(Arm64InsnTest, UnsignedAbsoluteDifferenceLongInt16x8) {
6265 __uint128_t arg1 = MakeUInt128(0x1614585505839727ULL, 0x4209809097817293ULL);
6266 __uint128_t arg2 = MakeUInt128(0x2393010676638682ULL, 0x4040111304024700ULL);
6267 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("uabdl %0.4s, %1.4h, %2.4h")(arg1, arg2);
6268 ASSERT_EQ(res, MakeUInt128(0x000070e0000010a5ULL, 0x00000d7f0000574fULL));
6269 }
6270
TEST(Arm64InsnTest,UnsignedAbsoluteDifferenceLongUpperInt16x8)6271 TEST(Arm64InsnTest, UnsignedAbsoluteDifferenceLongUpperInt16x8) {
6272 __uint128_t arg1 = MakeUInt128(0x0347999588867695ULL, 0x0161249722820403ULL);
6273 __uint128_t arg2 = MakeUInt128(0x0399546327883069ULL, 0x5976249361510102ULL);
6274 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("uabdl2 %0.4s, %1.8h, %2.8h")(arg1, arg2);
6275 ASSERT_EQ(res, MakeUInt128(0x00003ecf00000301ULL, 0x0000581500000004ULL));
6276 }
6277
TEST(Arm64InsnTest,UnsignedAbsoluteDifferenceAccumulateInt16x8)6278 TEST(Arm64InsnTest, UnsignedAbsoluteDifferenceAccumulateInt16x8) {
6279 __uint128_t arg1 = MakeUInt128(0x0857466460772283ULL, 0x4154031144152941ULL);
6280 __uint128_t arg2 = MakeUInt128(0x8804782543876177ULL, 0x0790430001566986ULL);
6281 __uint128_t arg3 = MakeUInt128(0x7767957609099669ULL, 0x3607559496515273ULL);
6282 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("uaba %0.8h, %1.8h, %2.8h")(arg1, arg2, arg3);
6283 ASSERT_EQ(res, MakeUInt128(0xf714c73725f9d55dULL, 0x6fcb9583d91092b8ULL));
6284 }
6285
TEST(Arm64InsnTest,UnsignedAbsoluteDifferenceAccumulateLongInt16x4)6286 TEST(Arm64InsnTest, UnsignedAbsoluteDifferenceAccumulateLongInt16x4) {
6287 __uint128_t arg1 = MakeUInt128(0x8343417044157348ULL, 0x2481833301640566ULL);
6288 __uint128_t arg2 = MakeUInt128(0x9596688667695634ULL, 0x9141632842641497ULL);
6289 __uint128_t arg3 = MakeUInt128(0x4533349999480002ULL, 0x6699875888159350ULL);
6290 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("uabal %0.4s, %1.4h, %2.4h")(arg1, arg2, arg3);
6291 ASSERT_EQ(res, MakeUInt128(0x453357ed99481d16ULL, 0x669999ab8815ba66ULL));
6292 }
6293
TEST(Arm64InsnTest,UnsignedAbsoluteDifferenceAccumulateLongUpperInt16x8)6294 TEST(Arm64InsnTest, UnsignedAbsoluteDifferenceAccumulateLongUpperInt16x8) {
6295 __uint128_t arg1 = MakeUInt128(0x998685541703188ULL, 0x778867592902607ULL);
6296 __uint128_t arg2 = MakeUInt128(0x043212666179192ULL, 0x352093822787888ULL);
6297 __uint128_t arg3 = MakeUInt128(0x988633599116081ULL, 0x235355570464634ULL);
6298 __uint128_t res =
6299 ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("uabal2 %0.4s, %1.8h, %2.8h")(arg1, arg2, arg3);
6300 ASSERT_EQ(res, MakeUInt128(0x0988d34d9911b302ULL, 0x0235397b7046c371ULL));
6301 }
6302
TEST(Arm64InsnTest,SignedAddLongPairwiseInt8x8)6303 TEST(Arm64InsnTest, SignedAddLongPairwiseInt8x8) {
6304 __uint128_t arg = MakeUInt128(0x6164411096256633ULL, 0x7305409219519675ULL);
6305 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("saddlp %0.4h, %1.8b")(arg);
6306 ASSERT_EQ(res, MakeUInt128(0x00c50051ffbb0099ULL, 0x0000000000000000ULL));
6307 }
6308
TEST(Arm64InsnTest,SignedAddLongPairwiseInt8x16)6309 TEST(Arm64InsnTest, SignedAddLongPairwiseInt8x16) {
6310 __uint128_t arg = MakeUInt128(0x6164411096256633ULL, 0x7305409219519675ULL);
6311 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("saddlp %0.8h, %1.16b")(arg);
6312 ASSERT_EQ(res, MakeUInt128(0x00c50051ffbb0099ULL, 0x0078ffd2006a000bULL));
6313 }
6314
TEST(Arm64InsnTest,SignedAddLongPairwiseInt16x4)6315 TEST(Arm64InsnTest, SignedAddLongPairwiseInt16x4) {
6316 __uint128_t arg = MakeUInt128(0x6164411096256633ULL, 0x7305409219519675ULL);
6317 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("saddlp %0.2s, %1.4h")(arg);
6318 ASSERT_EQ(res, MakeUInt128(0x0000a274fffffc58ULL, 0x0000000000000000ULL));
6319 }
6320
TEST(Arm64InsnTest,SignedAddLongPairwiseInt16x8)6321 TEST(Arm64InsnTest, SignedAddLongPairwiseInt16x8) {
6322 __uint128_t arg = MakeUInt128(0x6164411096256633ULL, 0x7305409219519675ULL);
6323 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("saddlp %0.4s, %1.8h")(arg);
6324 ASSERT_EQ(res, MakeUInt128(0xa274fffffc58ULL, 0xb397ffffafc6ULL));
6325 }
6326
TEST(Arm64InsnTest,SignedAddAccumulateLongPairwiseInt8x16)6327 TEST(Arm64InsnTest, SignedAddAccumulateLongPairwiseInt8x16) {
6328 __uint128_t arg1 = MakeUInt128(0x1991646384142707ULL, 0x7988708874229277ULL);
6329 __uint128_t arg2 = MakeUInt128(0x7217826030500994ULL, 0x5108247835729056ULL);
6330 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("sadalp %0.8h, %1.16b")(arg1, arg2);
6331 ASSERT_EQ(res, MakeUInt128(0x71c183272fe809c2ULL, 0x510924703608905fULL));
6332 }
6333
TEST(Arm64InsnTest,SignedAddAccumulateLongPairwiseInt16x8)6334 TEST(Arm64InsnTest, SignedAddAccumulateLongPairwiseInt16x8) {
6335 __uint128_t arg1 = MakeUInt128(0x1991646384142707ULL, 0x7988708874229277ULL);
6336 __uint128_t arg2 = MakeUInt128(0x7217826030500994ULL, 0x5108247835729056ULL);
6337 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("sadalp %0.4s, %1.8h")(arg1, arg2);
6338 ASSERT_EQ(res, MakeUInt128(0x72180054304fb4afULL, 0x51090e88357296efULL));
6339 }
6340
TEST(Arm64InsnTest,UnsignedAddLongPairwiseInt8x16)6341 TEST(Arm64InsnTest, UnsignedAddLongPairwiseInt8x16) {
6342 __uint128_t arg = MakeUInt128(0x1483287348089574ULL, 0x7777527834422109ULL);
6343 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("uaddlp %0.8h, %1.16b")(arg);
6344 ASSERT_EQ(res, MakeUInt128(0x0097009b00500109ULL, 0x00ee00ca0076002aULL));
6345 }
6346
TEST(Arm64InsnTest,UnsignedAddAccumulateLongPairwiseInt8x16)6347 TEST(Arm64InsnTest, UnsignedAddAccumulateLongPairwiseInt8x16) {
6348 __uint128_t arg1 = MakeUInt128(0x9348154691631162ULL, 0x4928873574718824ULL);
6349 __uint128_t arg2 = MakeUInt128(0x5207665738825139ULL, 0x6391635767231510ULL);
6350 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W0_ARG("uadalp %0.8h, %1.16b")(arg1, arg2);
6351 ASSERT_EQ(res, MakeUInt128(0x52e266b2397651acULL, 0x64026413680815bcULL));
6352 }
6353
TEST(Arm64InsnTest,SignedAddLong)6354 TEST(Arm64InsnTest, SignedAddLong) {
6355 __uint128_t arg1 = MakeUInt128(0x3478074585067606ULL, 0x3048229409653041ULL);
6356 __uint128_t arg2 = MakeUInt128(0x1183066710818930ULL, 0x3110887172816751ULL);
6357 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("saddl %0.4s, %1.4h, %2.4h")(arg1, arg2);
6358 ASSERT_EQ(res, MakeUInt128(0xffff9587ffffff36ULL, 0x000045fb00000dacULL));
6359 }
6360
TEST(Arm64InsnTest,SignedAddLongUpper)6361 TEST(Arm64InsnTest, SignedAddLongUpper) {
6362 __uint128_t arg1 = MakeUInt128(0x3160683158679946ULL, 0x0165205774052942ULL);
6363 __uint128_t arg2 = MakeUInt128(0x3053601780313357ULL, 0x2632670547903384ULL);
6364 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("saddl2 %0.4s, %1.8h, %2.8h")(arg1, arg2);
6365 ASSERT_EQ(res, MakeUInt128(0x0000bb9500005cc6ULL, 0x000027970000875cULL));
6366 }
6367
TEST(Arm64InsnTest,SignedSubLong)6368 TEST(Arm64InsnTest, SignedSubLong) {
6369 __uint128_t arg1 = MakeUInt128(0x8566746260879482ULL, 0x0186474876727272ULL);
6370 __uint128_t arg2 = MakeUInt128(0x2206267646533809ULL, 0x9801966883680994ULL);
6371 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("ssubl %0.4s, %1.4h, %2.4h")(arg1, arg2);
6372 ASSERT_EQ(res, MakeUInt128(0x00001a34ffff5c79ULL, 0xffff636000004decULL));
6373 }
6374
TEST(Arm64InsnTest,SignedSubLongUpper)6375 TEST(Arm64InsnTest, SignedSubLongUpper) {
6376 __uint128_t arg1 = MakeUInt128(0x3011331753305329ULL, 0x8020166888174813ULL);
6377 __uint128_t arg2 = MakeUInt128(0x4298868158557781ULL, 0x0343231753064784ULL);
6378 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("ssubl2 %0.4s, %1.8h, %2.8h")(arg1, arg2);
6379 ASSERT_EQ(res, MakeUInt128(0xffff35110000008fULL, 0xffff7cddfffff351ULL));
6380 }
6381
TEST(Arm64InsnTest,UnsignedAddLong)6382 TEST(Arm64InsnTest, UnsignedAddLong) {
6383 __uint128_t arg1 = MakeUInt128(0x3126059505777727ULL, 0x5424712416483128ULL);
6384 __uint128_t arg2 = MakeUInt128(0x3298207236175057ULL, 0x4673870128209575ULL);
6385 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("uaddl %0.4s, %1.4h, %2.4h")(arg1, arg2);
6386 ASSERT_EQ(res, MakeUInt128(0x00003b8e0000c77eULL, 0x000063be00002607ULL));
6387 }
6388
TEST(Arm64InsnTest,UnsignedAddLongUpper)6389 TEST(Arm64InsnTest, UnsignedAddLongUpper) {
6390 __uint128_t arg1 = MakeUInt128(0x3384698499778726ULL, 0x7065551918544686ULL);
6391 __uint128_t arg2 = MakeUInt128(0x9846947849573462ULL, 0x2606294219624557ULL);
6392 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("uaddl2 %0.4s, %1.8h, %2.8h")(arg1, arg2);
6393 ASSERT_EQ(res, MakeUInt128(0x000031b600008bddULL, 0x0000966b00007e5bULL));
6394 }
6395
TEST(Arm64InsnTest,UnsignedSubLong)6396 TEST(Arm64InsnTest, UnsignedSubLong) {
6397 __uint128_t arg1 = MakeUInt128(0x4378111988556318ULL, 0x7777925372011667ULL);
6398 __uint128_t arg2 = MakeUInt128(0x1853954183598443ULL, 0x8305203762819440ULL);
6399 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("usubl %0.4s, %1.4h, %2.4h")(arg1, arg2);
6400 ASSERT_EQ(res, MakeUInt128(0x000004fcffffded5ULL, 0x00002b25ffff7bd8ULL));
6401 }
6402
TEST(Arm64InsnTest,UnsignedSubLongUpper)6403 TEST(Arm64InsnTest, UnsignedSubLongUpper) {
6404 __uint128_t arg1 = MakeUInt128(0x5228717440266638ULL, 0x9148817173086436ULL);
6405 __uint128_t arg2 = MakeUInt128(0x1113890694202790ULL, 0x8814311944879941ULL);
6406 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("usubl2 %0.4s, %1.8h, %2.8h")(arg1, arg2);
6407 ASSERT_EQ(res, MakeUInt128(0x00002e81ffffcaf5ULL, 0x0000093400005058ULL));
6408 }
6409
TEST(Arm64InsnTest,SignedAddWide8x8)6410 TEST(Arm64InsnTest, SignedAddWide8x8) {
6411 __uint128_t arg1 = MakeUInt128(0x7844598183134112ULL, 0x9001999205981352ULL);
6412 __uint128_t arg2 = MakeUInt128(0x2051173365856407ULL, 0x8264849427644113ULL);
6413 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("saddw %0.8h, %1.8h, %2.8b")(arg1, arg2);
6414 ASSERT_EQ(res, MakeUInt128(0x78a9590683774119ULL, 0x902199e305af1385ULL));
6415 }
6416
TEST(Arm64InsnTest,SignedAddWide16x4)6417 TEST(Arm64InsnTest, SignedAddWide16x4) {
6418 __uint128_t arg1 = MakeUInt128(0x7844598183134112ULL, 0x9001999205981352ULL);
6419 __uint128_t arg2 = MakeUInt128(0x2051173365856407ULL, 0x8264849427644113ULL);
6420 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("saddw %0.4s, %1.4s, %2.4h")(arg1, arg2);
6421 ASSERT_EQ(res, MakeUInt128(0x7844bf068313a519ULL, 0x9001b9e305982a85ULL));
6422 }
6423
TEST(Arm64InsnTest,SignedAddWide32x2)6424 TEST(Arm64InsnTest, SignedAddWide32x2) {
6425 __uint128_t arg1 = MakeUInt128(0x7844598183134112ULL, 0x9001999205981352ULL);
6426 __uint128_t arg2 = MakeUInt128(0x2051173365856407ULL, 0x8264849427644113ULL);
6427 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("saddw %0.2d, %1.2d, %2.2s")(arg1, arg2);
6428 ASSERT_EQ(res, MakeUInt128(0x78445981e898a519ULL, 0x9001999225e92a85ULL));
6429 }
6430
TEST(Arm64InsnTest,SignedAddWideUpper)6431 TEST(Arm64InsnTest, SignedAddWideUpper) {
6432 __uint128_t arg1 = MakeUInt128(0x3407092233436577ULL, 0x9160128093179401ULL);
6433 __uint128_t arg2 = MakeUInt128(0x7185985999338492ULL, 0x3549564005709955ULL);
6434 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("saddw2 %0.4s, %1.4s, %2.8h")(arg1, arg2);
6435 ASSERT_EQ(res, MakeUInt128(0x34070e923342feccULL, 0x916047c99317ea41ULL));
6436 }
6437
TEST(Arm64InsnTest,SignedSubWide)6438 TEST(Arm64InsnTest, SignedSubWide) {
6439 __uint128_t arg1 = MakeUInt128(0x2302847007312065ULL, 0x8032626417116165ULL);
6440 __uint128_t arg2 = MakeUInt128(0x9576132723515666ULL, 0x6253667271899853ULL);
6441 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("ssubw %0.4s, %1.4s, %2.4h")(arg1, arg2);
6442 ASSERT_EQ(res, MakeUInt128(0x2302611f0730c9ffULL, 0x8032ccee17114e3eULL));
6443 }
6444
TEST(Arm64InsnTest,SignedSubWideUpper)6445 TEST(Arm64InsnTest, SignedSubWideUpper) {
6446 __uint128_t arg1 = MakeUInt128(0x4510824783572905ULL, 0x6919885554678860ULL);
6447 __uint128_t arg2 = MakeUInt128(0x7946280537122704ULL, 0x2466543192145281ULL);
6448 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("ssubw2 %0.4s, %1.4s, %2.8h")(arg1, arg2);
6449 ASSERT_EQ(res, MakeUInt128(0x4510f0338356d684ULL, 0x691963ef5467342fULL));
6450 }
6451
TEST(Arm64InsnTest,UnsignedAddWide8x8)6452 TEST(Arm64InsnTest, UnsignedAddWide8x8) {
6453 __uint128_t arg1 = MakeUInt128(0x5870785951298344ULL, 0x1729535195378855ULL);
6454 __uint128_t arg2 = MakeUInt128(0x3457374260859029ULL, 0x0817651557803905ULL);
6455 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("uaddw %0.8h, %1.8h, %2.8b")(arg1, arg2);
6456 ASSERT_EQ(res, MakeUInt128(0x58d078de51b9836dULL, 0x175d53a8956e8897ULL));
6457 }
6458
TEST(Arm64InsnTest,UnsignedAddWide16x4)6459 TEST(Arm64InsnTest, UnsignedAddWide16x4) {
6460 __uint128_t arg1 = MakeUInt128(0x5870785951298344ULL, 0x1729535195378855ULL);
6461 __uint128_t arg2 = MakeUInt128(0x3457374260859029ULL, 0x0817651557803905ULL);
6462 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("uaddw %0.4s, %1.4s, %2.4h")(arg1, arg2);
6463 ASSERT_EQ(res, MakeUInt128(0x5870d8de512a136dULL, 0x172987a89537bf97ULL));
6464 }
6465
TEST(Arm64InsnTest,UnsignedAddWide32x2)6466 TEST(Arm64InsnTest, UnsignedAddWide32x2) {
6467 __uint128_t arg1 = MakeUInt128(0x5870785951298344ULL, 0x1729535195378855ULL);
6468 __uint128_t arg2 = MakeUInt128(0x3457374260859029ULL, 0x0817651557803905ULL);
6469 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("uaddw %0.2d, %1.2d, %2.2s")(arg1, arg2);
6470 ASSERT_EQ(res, MakeUInt128(0x58707859b1af136dULL, 0x17295351c98ebf97ULL));
6471 }
6472
TEST(Arm64InsnTest,UnsignedAddWideUpper)6473 TEST(Arm64InsnTest, UnsignedAddWideUpper) {
6474 __uint128_t arg1 = MakeUInt128(0x7516493270950493ULL, 0x4639382432227188ULL);
6475 __uint128_t arg2 = MakeUInt128(0x5159740547021482ULL, 0x8971117779237612ULL);
6476 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("uaddw2 %0.4s, %1.4s, %2.8h")(arg1, arg2);
6477 ASSERT_EQ(res, MakeUInt128(0x7516c25570957aa5ULL, 0x4639c195322282ffULL));
6478 }
6479
TEST(Arm64InsnTest,UnsignedSubWide)6480 TEST(Arm64InsnTest, UnsignedSubWide) {
6481 __uint128_t arg1 = MakeUInt128(0x0625247972199786ULL, 0x6854279897799233ULL);
6482 __uint128_t arg2 = MakeUInt128(0x9579057581890622ULL, 0x5254735822052364ULL);
6483 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("usubw %0.4s, %1.4s, %2.4h")(arg1, arg2);
6484 ASSERT_EQ(res, MakeUInt128(0x0624a2f072199164ULL, 0x6853921f97798cbeULL));
6485 }
6486
TEST(Arm64InsnTest,UnsignedSubWideUpper)6487 TEST(Arm64InsnTest, UnsignedSubWideUpper) {
6488 __uint128_t arg1 = MakeUInt128(0x8242392192695062ULL, 0x0831838145469839ULL);
6489 __uint128_t arg2 = MakeUInt128(0x2366461363989101ULL, 0x2102177095976704ULL);
6490 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("usubw2 %0.4s, %1.4s, %2.8h")(arg1, arg2);
6491 ASSERT_EQ(res, MakeUInt128(0x8241a38a9268e95eULL, 0x0831627f454680c9ULL));
6492 }
6493
TEST(Arm64InsnTest,SignedMultiplyLongInt8x8)6494 TEST(Arm64InsnTest, SignedMultiplyLongInt8x8) {
6495 __uint128_t arg1 = MakeUInt128(0x9191791552241718ULL, 0x9585361680594741ULL);
6496 __uint128_t arg2 = MakeUInt128(0x2341933984202187ULL, 0x4564925644346239ULL);
6497 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("smull %0.8h, %1.8b, %2.8b")(arg1, arg2);
6498 ASSERT_EQ(res, MakeUInt128(0xd848048002f7f4a8ULL, 0xf0d3e3d1cc7b04adULL));
6499 }
6500
TEST(Arm64InsnTest,SignedMultiplyLongInt8x8Upper)6501 TEST(Arm64InsnTest, SignedMultiplyLongInt8x8Upper) {
6502 __uint128_t arg1 = MakeUInt128(0x9314052976347574ULL, 0x8119356709110137ULL);
6503 __uint128_t arg2 = MakeUInt128(0x7517210080315590ULL, 0x2485309066920376ULL);
6504 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("smull2 %0.8h, %1.16b, %2.16b")(arg1, arg2);
6505 ASSERT_EQ(res, MakeUInt128(0x0396f8b20003195aULL, 0xee24f3fd09f0d2f0ULL));
6506 }
6507
TEST(Arm64InsnTest,UnsignedMultiplyLongInt8x8)6508 TEST(Arm64InsnTest, UnsignedMultiplyLongInt8x8) {
6509 __uint128_t arg1 = MakeUInt128(0x9149055628425039ULL, 0x1275771028402799ULL);
6510 __uint128_t arg2 = MakeUInt128(0x8066365825488926ULL, 0x4880254566101729ULL);
6511 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("umull %0.8h, %1.8b, %2.8b")(arg1, arg2);
6512 ASSERT_EQ(res, MakeUInt128(0x05c812902ad00876ULL, 0x48801d16010e1d90ULL));
6513 }
6514
TEST(Arm64InsnTest,UnsignedMultiplyLongInt8x8Upper)6515 TEST(Arm64InsnTest, UnsignedMultiplyLongInt8x8Upper) {
6516 __uint128_t arg1 = MakeUInt128(0x9709683408005355ULL, 0x9849175417381883ULL);
6517 __uint128_t arg2 = MakeUInt128(0x9994469748676265ULL, 0x5165827658483588ULL);
6518 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("umull2 %0.8h, %1.16b, %2.16b")(arg1, arg2);
6519 ASSERT_EQ(res, MakeUInt128(0x07e80fc004f84598ULL, 0x30181ccd0bae26b8ULL));
6520 }
6521
TEST(Arm64InsnTest,SignedMultiplyLongInt8x8IndexedElem)6522 TEST(Arm64InsnTest, SignedMultiplyLongInt8x8IndexedElem) {
6523 __uint128_t arg1 = MakeUInt128(0x9293459588970695ULL, 0x3653494060340216ULL);
6524 __uint128_t arg2 = MakeUInt128(0x6544375589004563ULL, 0x2882250545255640ULL);
6525 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("smull %0.4s, %1.4h, %2.h[2]")(arg1, arg2);
6526 ASSERT_EQ(res, MakeUInt128(0xe630cb23016c3279ULL, 0xe8593fcf0f0a1d79ULL));
6527 }
6528
TEST(Arm64InsnTest,SignedMultiplyLongInt8x8IndexedElemUpper)6529 TEST(Arm64InsnTest, SignedMultiplyLongInt8x8IndexedElemUpper) {
6530 __uint128_t arg1 = MakeUInt128(0x9279068212073883ULL, 0x7781423356282360ULL);
6531 __uint128_t arg2 = MakeUInt128(0x8963208068222468ULL, 0x0122482611771858ULL);
6532 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("smull2 %0.4s, %1.8h, %2.h[2]")(arg1, arg2);
6533 ASSERT_EQ(res, MakeUInt128(0x0af01400047db000ULL, 0x0f2be08008677980ULL));
6534 }
6535
TEST(Arm64InsnTest,UnsignedMultiplyLongInt8x8IndexedElem)6536 TEST(Arm64InsnTest, UnsignedMultiplyLongInt8x8IndexedElem) {
6537 __uint128_t arg1 = MakeUInt128(0x9086996033027634ULL, 0x7870810817545011ULL);
6538 __uint128_t arg2 = MakeUInt128(0x9307141223390866ULL, 0x3938339529425786ULL);
6539 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("umull %0.4s, %1.4h, %2.h[2]")(arg1, arg2);
6540 ASSERT_EQ(res, MakeUInt128(0x03ffbe2409445fa8ULL, 0x0b54a16c0c0648c0ULL));
6541 }
6542
TEST(Arm64InsnTest,UnsignedMultiplyLongInt8x8IndexedElem2)6543 TEST(Arm64InsnTest, UnsignedMultiplyLongInt8x8IndexedElem2) {
6544 __uint128_t arg1 = MakeUInt128(0x9132710495478599ULL, 0x1801969678353214ULL);
6545 __uint128_t arg2 = MakeUInt128(0x6444118926063152ULL, 0x6618167443193550ULL);
6546 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("umull %0.4s, %1.4h, %2.h[4]")(arg1, arg2);
6547 ASSERT_EQ(res, MakeUInt128(0x1f1659301bd26cd0ULL, 0x1e3cb9a017892540ULL));
6548 }
6549
TEST(Arm64InsnTest,UnsignedMultiplyLongInt8x8IndexedElemUpper)6550 TEST(Arm64InsnTest, UnsignedMultiplyLongInt8x8IndexedElemUpper) {
6551 __uint128_t arg1 = MakeUInt128(0x9815793678976697ULL, 0x4220575059683440ULL);
6552 __uint128_t arg2 = MakeUInt128(0x8697350201410206ULL, 0x7235850200724522ULL);
6553 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("umull2 %0.4s, %1.8h, %2.h[2]")(arg1, arg2);
6554 ASSERT_EQ(res, MakeUInt128(0x12833ad00ad1a880ULL, 0x0db1244012143ea0ULL));
6555 }
6556
TEST(Arm64InsnTest,SignedMultiplyAddLongInt8x8)6557 TEST(Arm64InsnTest, SignedMultiplyAddLongInt8x8) {
6558 __uint128_t arg1 = MakeUInt128(0x9779940012601642ULL, 0x2760926082349304ULL);
6559 __uint128_t arg2 = MakeUInt128(0x1180643829138347ULL, 0x3546797253992623ULL);
6560 __uint128_t arg3 = MakeUInt128(0x3879158299848645ULL, 0x9271734059225620ULL);
6561 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("smlal %0.8h, %1.8b, %2.8b")(arg1, arg2, arg3);
6562 ASSERT_EQ(res, MakeUInt128(0x3b5b1ca28ec69893ULL, 0x8b7836c02ef25620ULL));
6563 }
6564
TEST(Arm64InsnTest,SignedMultiplyAddLongInt8x8Upper)6565 TEST(Arm64InsnTest, SignedMultiplyAddLongInt8x8Upper) {
6566 __uint128_t arg1 = MakeUInt128(0x5514435021828702ULL, 0x6685610665003531ULL);
6567 __uint128_t arg2 = MakeUInt128(0x0502163182060176ULL, 0x0921798468493686ULL);
6568 __uint128_t arg3 = MakeUInt128(0x3161293727951873ULL, 0x0789726373537171ULL);
6569 __uint128_t res =
6570 ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("smlal2 %0.8h, %1.16b, %2.16b")(arg1, arg2, arg3);
6571 ASSERT_EQ(res, MakeUInt128(0x5a69293732c30119ULL, 0x0b1f6288a12c6e89ULL));
6572 }
6573
TEST(Arm64InsnTest,SignedMultiplySubtractLongInt8x8)6574 TEST(Arm64InsnTest, SignedMultiplySubtractLongInt8x8) {
6575 __uint128_t arg1 = MakeUInt128(0x9662539339538092ULL, 0x2195591918188552ULL);
6576 __uint128_t arg2 = MakeUInt128(0x6780621499231727ULL, 0x6316321833989693ULL);
6577 __uint128_t arg3 = MakeUInt128(0x8075616855911752ULL, 0x9984501320671293ULL);
6578 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("smlsl %0.8h, %1.8b, %2.8b")(arg1, arg2, arg3);
6579 ASSERT_EQ(res, MakeUInt128(0x9764560f61112814ULL, 0xc42a811300a11b17ULL));
6580 }
6581
TEST(Arm64InsnTest,SignedMultiplySubtractLongInt8x8Upper)6582 TEST(Arm64InsnTest, SignedMultiplySubtractLongInt8x8Upper) {
6583 __uint128_t arg1 = MakeUInt128(0x9826903089111856ULL, 0x8798692947051352ULL);
6584 __uint128_t arg2 = MakeUInt128(0x4816091743243015ULL, 0x3836847072928989ULL);
6585 __uint128_t arg3 = MakeUInt128(0x8284602223730145ULL, 0x2655679898627767ULL);
6586 __uint128_t res =
6587 ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("smlsl2 %0.8h, %1.16b, %2.16b")(arg1, arg2, arg3);
6588 ASSERT_EQ(res, MakeUInt128(0x62e662482c482763ULL, 0x40cd7d88cb3e6577ULL));
6589 }
6590
TEST(Arm64InsnTest,SignedMultiplyAddLongInt16x4)6591 TEST(Arm64InsnTest, SignedMultiplyAddLongInt16x4) {
6592 __uint128_t arg1 = MakeUInt128(0x9779940012601642ULL, 0x2760926082349304ULL);
6593 __uint128_t arg2 = MakeUInt128(0x1180643829138347ULL, 0x3546797253992623ULL);
6594 __uint128_t arg3 = MakeUInt128(0x3879158299848645ULL, 0x9271734059225620ULL);
6595 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("smlal %0.4s, %1.4h, %2.4h")(arg1, arg2, arg3);
6596 ASSERT_EQ(res, MakeUInt128(0x3b6bd2a28eac7893ULL, 0x8b4c38c02edab620ULL));
6597 }
6598
TEST(Arm64InsnTest,UnsignedMultiplyAddLongInt8x8)6599 TEST(Arm64InsnTest, UnsignedMultiplyAddLongInt8x8) {
6600 __uint128_t arg1 = MakeUInt128(0x9696920253886503ULL, 0x4577183176686885ULL);
6601 __uint128_t arg2 = MakeUInt128(0x9236814884752764ULL, 0x9846882194973972ULL);
6602 __uint128_t arg3 = MakeUInt128(0x9707737187188400ULL, 0x4143231276365048ULL);
6603 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("umlal %0.8h, %1.8b, %2.8b")(arg1, arg2, arg3);
6604 ASSERT_EQ(res, MakeUInt128(0xc1d3b199967b852cULL, 0x96cf42b6bfc850d8ULL));
6605 }
6606
TEST(Arm64InsnTest,UnsignedMultiplyAddLongInt8x8Upper)6607 TEST(Arm64InsnTest, UnsignedMultiplyAddLongInt8x8Upper) {
6608 __uint128_t arg1 = MakeUInt128(0x9055637695252326ULL, 0x5361442478023082ULL);
6609 __uint128_t arg2 = MakeUInt128(0x6811831037735887ULL, 0x0892406130313364ULL);
6610 __uint128_t arg3 = MakeUInt128(0x7737101162821461ULL, 0x4661679404090518ULL);
6611 __uint128_t res =
6612 ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("umlal2 %0.8h, %1.16b, %2.16b")(arg1, arg2, arg3);
6613 ASSERT_EQ(res, MakeUInt128(0x8db710736c124729ULL, 0x48f99ee6150912bcULL));
6614 }
6615
TEST(Arm64InsnTest,UnsignedMultiplySubtractLongInt8x8)6616 TEST(Arm64InsnTest, UnsignedMultiplySubtractLongInt8x8) {
6617 __uint128_t arg1 = MakeUInt128(0x4577772457520386ULL, 0x5437542828256714ULL);
6618 __uint128_t arg2 = MakeUInt128(0x1288583454443513ULL, 0x2562054464241011ULL);
6619 __uint128_t arg3 = MakeUInt128(0x0379554641905811ULL, 0x6862305964476958ULL);
6620 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("umlsl %0.8h, %1.8b, %2.8b")(arg1, arg2, arg3);
6621 ASSERT_EQ(res, MakeUInt128(0xe6ed3f7e40f14e1fULL, 0x6388f1213b5f6208ULL));
6622 }
6623
TEST(Arm64InsnTest,UnsignedMultiplySubtractLongInt8x8Upper)6624 TEST(Arm64InsnTest, UnsignedMultiplySubtractLongInt8x8Upper) {
6625 __uint128_t arg1 = MakeUInt128(0x4739376564336319ULL, 0x7978680367187307ULL);
6626 __uint128_t arg2 = MakeUInt128(0x9693924236321448ULL, 0x4503547763156702ULL);
6627 __uint128_t arg3 = MakeUInt128(0x5539006542311792ULL, 0x0153464977929066ULL);
6628 __uint128_t res =
6629 ASM_INSN_WRAP_FUNC_W_RES_WW0_ARG("umlsl2 %0.8h, %1.16b, %2.16b")(arg1, arg2, arg3);
6630 ASSERT_EQ(res, MakeUInt128(0x2d64fe6d13ec1784ULL, 0xe0b644e155728f01ULL));
6631 }
6632
TEST(Arm64InsnTest,SignedShiftLeftInt64x1)6633 TEST(Arm64InsnTest, SignedShiftLeftInt64x1) {
6634 constexpr auto AsmSshl = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("sshl %d0, %d1, %d2");
6635 __uint128_t arg = MakeUInt128(0x9007497297363549ULL, 0x6453328886984406ULL);
6636 ASSERT_EQ(AsmSshl(arg, -65), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
6637 ASSERT_EQ(AsmSshl(arg, -64), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
6638 ASSERT_EQ(AsmSshl(arg, -63), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
6639 ASSERT_EQ(AsmSshl(arg, -1), MakeUInt128(0xc803a4b94b9b1aa4ULL, 0x0000000000000000ULL));
6640 ASSERT_EQ(AsmSshl(arg, 0), MakeUInt128(0x9007497297363549ULL, 0x0000000000000000ULL));
6641 ASSERT_EQ(AsmSshl(arg, 1), MakeUInt128(0x200e92e52e6c6a92ULL, 0x0000000000000000ULL));
6642 ASSERT_EQ(AsmSshl(arg, 63), MakeUInt128(0x8000000000000000ULL, 0x0000000000000000ULL));
6643 ASSERT_EQ(AsmSshl(arg, 64), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
6644 ASSERT_EQ(AsmSshl(arg, 65), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
6645 }
6646
TEST(Arm64InsnTest,SignedRoundingShiftLeftInt64x1)6647 TEST(Arm64InsnTest, SignedRoundingShiftLeftInt64x1) {
6648 constexpr auto AsmSrshl = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("srshl %d0, %d1, %d2");
6649 __uint128_t arg = MakeUInt128(0x9276457931065792ULL, 0x2955249887275846ULL);
6650 ASSERT_EQ(AsmSrshl(arg, -65), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
6651 ASSERT_EQ(AsmSrshl(arg, -64), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
6652 ASSERT_EQ(AsmSrshl(arg, -63), MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
6653 ASSERT_EQ(AsmSrshl(arg, -1), MakeUInt128(0xc93b22bc98832bc9ULL, 0x0000000000000000ULL));
6654 ASSERT_EQ(AsmSrshl(arg, 0), MakeUInt128(0x9276457931065792ULL, 0x0000000000000000ULL));
6655 ASSERT_EQ(AsmSrshl(arg, 1), MakeUInt128(0x24ec8af2620caf24ULL, 0x0000000000000000ULL));
6656 ASSERT_EQ(AsmSrshl(arg, 63), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
6657 ASSERT_EQ(AsmSrshl(arg, 64), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
6658 ASSERT_EQ(AsmSrshl(arg, 65), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
6659 }
6660
TEST(Arm64InsnTest,UnsignedShiftLeftInt64x1)6661 TEST(Arm64InsnTest, UnsignedShiftLeftInt64x1) {
6662 constexpr auto AsmUshl = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("ushl %d0, %d1, %d2");
6663 __uint128_t arg = MakeUInt128(0x9138296682468185ULL, 0x7103188790652870ULL);
6664 ASSERT_EQ(AsmUshl(arg, -65), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
6665 ASSERT_EQ(AsmUshl(arg, -64), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
6666 ASSERT_EQ(AsmUshl(arg, -63), MakeUInt128(0x0000000000000001ULL, 0x0000000000000000ULL));
6667 ASSERT_EQ(AsmUshl(arg, -1), MakeUInt128(0x489c14b3412340c2ULL, 0x0000000000000000ULL));
6668 ASSERT_EQ(AsmUshl(arg, 0), MakeUInt128(0x9138296682468185ULL, 0x0000000000000000ULL));
6669 ASSERT_EQ(AsmUshl(arg, 1), MakeUInt128(0x227052cd048d030aULL, 0x0000000000000000ULL));
6670 ASSERT_EQ(AsmUshl(arg, 63), MakeUInt128(0x8000000000000000ULL, 0x0000000000000000ULL));
6671 ASSERT_EQ(AsmUshl(arg, 64), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
6672 ASSERT_EQ(AsmUshl(arg, 65), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
6673 }
6674
TEST(Arm64InsnTest,UnsignedRoundingShiftLeftInt64x1)6675 TEST(Arm64InsnTest, UnsignedRoundingShiftLeftInt64x1) {
6676 constexpr auto AsmUrshl = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("urshl %d0, %d1, %d2");
6677 __uint128_t arg = MakeUInt128(0x9023452924407736ULL, 0x5949563051007421ULL);
6678 ASSERT_EQ(AsmUrshl(arg, -65), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
6679 ASSERT_EQ(AsmUrshl(arg, -64), MakeUInt128(0x0000000000000001ULL, 0x0000000000000000ULL));
6680 ASSERT_EQ(AsmUrshl(arg, -63), MakeUInt128(0x0000000000000001ULL, 0x0000000000000000ULL));
6681 ASSERT_EQ(AsmUrshl(arg, -1), MakeUInt128(0x4811a29492203b9bULL, 0x0000000000000000ULL));
6682 ASSERT_EQ(AsmUrshl(arg, 0), MakeUInt128(0x9023452924407736ULL, 0x0000000000000000ULL));
6683 ASSERT_EQ(AsmUrshl(arg, 1), MakeUInt128(0x20468a524880ee6cULL, 0x0000000000000000ULL));
6684 ASSERT_EQ(AsmUrshl(arg, 63), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
6685 ASSERT_EQ(AsmUrshl(arg, 64), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
6686 ASSERT_EQ(AsmUrshl(arg, 65), MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
6687 }
6688
TEST(Arm64InsnTest,SignedShiftLeftInt16x8)6689 TEST(Arm64InsnTest, SignedShiftLeftInt16x8) {
6690 constexpr auto AsmSshl = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("sshl %0.8h, %1.8h, %2.8h");
6691 __uint128_t arg1 = MakeUInt128(0x9999999999999999ULL, 0x9999999999999999ULL);
6692 __uint128_t arg2 = MakeUInt128(0x0010000f00020001ULL, 0xfffffff1fff0ffefULL);
6693 ASSERT_EQ(AsmSshl(arg1, arg2), MakeUInt128(0x0000800066643332ULL, 0xccccffffffffffffULL));
6694 ASSERT_EQ(AsmSshl(arg1, 0), MakeUInt128(0x9999999999999999ULL, 0x9999999999999999ULL));
6695 }
6696
TEST(Arm64InsnTest,SignedRoundingShiftLeftInt16x8)6697 TEST(Arm64InsnTest, SignedRoundingShiftLeftInt16x8) {
6698 constexpr auto AsmSrshl = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("srshl %0.8h, %1.8h, %2.8h");
6699 __uint128_t arg1 = MakeUInt128(0x9999999999999999ULL, 0x9999999999999999ULL);
6700 __uint128_t arg2 = MakeUInt128(0x0010000f00020001ULL, 0xfffffff1fff0ffefULL);
6701 ASSERT_EQ(AsmSrshl(arg1, arg2), MakeUInt128(0x0000800066643332ULL, 0xcccdffff00000000ULL));
6702 ASSERT_EQ(AsmSrshl(arg1, 0), MakeUInt128(0x9999999999999999ULL, 0x9999999999999999ULL));
6703 }
6704
TEST(Arm64InsnTest,UnsignedShiftLeftInt16x8)6705 TEST(Arm64InsnTest, UnsignedShiftLeftInt16x8) {
6706 constexpr auto AsmUshl = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("ushl %0.8h, %1.8h, %2.8h");
6707 __uint128_t arg1 = MakeUInt128(0x9999999999999999ULL, 0x9999999999999999ULL);
6708 __uint128_t arg2 = MakeUInt128(0x0010000f00020001ULL, 0xfffffff1fff0ffefULL);
6709 ASSERT_EQ(AsmUshl(arg1, arg2), MakeUInt128(0x0000800066643332ULL, 0x4ccc000100000000ULL));
6710 ASSERT_EQ(AsmUshl(arg1, 0), MakeUInt128(0x9999999999999999ULL, 0x9999999999999999ULL));
6711 }
6712
TEST(Arm64InsnTest,UnsignedRoundingShiftLeftInt16x8)6713 TEST(Arm64InsnTest, UnsignedRoundingShiftLeftInt16x8) {
6714 constexpr auto AsmUrshl = ASM_INSN_WRAP_FUNC_W_RES_WW_ARG("urshl %0.8h, %1.8h, %2.8h");
6715 __uint128_t arg1 = MakeUInt128(0x9999999999999999ULL, 0x9999999999999999ULL);
6716 __uint128_t arg2 = MakeUInt128(0x0010000f00020001ULL, 0xfffffff1fff0ffefULL);
6717 ASSERT_EQ(AsmUrshl(arg1, arg2), MakeUInt128(0x0000800066643332ULL, 0x4ccd000100010000ULL));
6718 ASSERT_EQ(AsmUrshl(arg1, 0), MakeUInt128(0x9999999999999999ULL, 0x9999999999999999ULL));
6719 }
6720
TEST(Arm64InsnTest,UnsignedReciprocalSquareRootEstimateInt32x4)6721 TEST(Arm64InsnTest, UnsignedReciprocalSquareRootEstimateInt32x4) {
6722 __uint128_t arg = MakeUInt128(0x9641122821407533ULL, 0x0265510042410489ULL);
6723 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("ursqrte %0.4s, %1.4s")(arg);
6724 ASSERT_EQ(res, MakeUInt128(0xa7000000ffffffffULL, 0xfffffffffb800000ULL));
6725 }
6726
TEST(Arm64InsnTest,UnsignedReciprocalEstimateInt32x4)6727 TEST(Arm64InsnTest, UnsignedReciprocalEstimateInt32x4) {
6728 __uint128_t arg = MakeUInt128(0x9714864899468611ULL, 0x2476054286734367ULL);
6729 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("urecpe %0.4s, %1.4s")(arg);
6730 ASSERT_EQ(res, MakeUInt128(0xd8800000d6000000ULL, 0xfffffffff4000000ULL));
6731 }
6732
IsQcBitSet(uint32_t fpsr)6733 bool IsQcBitSet(uint32_t fpsr) {
6734 return (fpsr & kFpsrQcBit) != 0;
6735 }
6736
TEST(Arm64InsnTest,SignedSaturatingAddInt64x1)6737 TEST(Arm64InsnTest, SignedSaturatingAddInt64x1) {
6738 constexpr auto AsmSqadd = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqadd %d0, %d2, %d3");
6739
6740 __uint128_t arg1 = MakeUInt128(0x4342527753119724ULL, 0x7430873043619511ULL);
6741 __uint128_t arg2 = MakeUInt128(0x3961190800302558ULL, 0x7838764420608504ULL);
6742 auto [res1, fpsr1] = AsmSqadd(arg1, arg2);
6743 ASSERT_EQ(res1, MakeUInt128(0x7ca36b7f5341bc7cULL, 0x0000000000000000ULL));
6744 ASSERT_FALSE(IsQcBitSet(fpsr1));
6745
6746 __uint128_t arg3 = MakeUInt128(0x2557185308919284ULL, 0x4038050710300647ULL);
6747 __uint128_t arg4 = MakeUInt128(0x7684786324319100ULL, 0x0223929785255372ULL);
6748 auto [res2, fpsr2] = AsmSqadd(arg3, arg4);
6749 ASSERT_EQ(res2, MakeUInt128(0x7fffffffffffffffULL, 0x0000000000000000ULL));
6750 ASSERT_TRUE(IsQcBitSet(fpsr2));
6751 }
6752
TEST(Arm64InsnTest,SignedSaturatingAddInt32x4)6753 TEST(Arm64InsnTest, SignedSaturatingAddInt32x4) {
6754 constexpr auto AsmSqadd = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqadd %0.4s, %2.4s, %3.4s");
6755
6756 __uint128_t arg1 = MakeUInt128(0x9883554445602495ULL, 0x5666843660292219ULL);
6757 __uint128_t arg2 = MakeUInt128(0x5124830910605377ULL, 0x2019802183101032ULL);
6758 auto [res1, fpsr1] = AsmSqadd(arg1, arg2);
6759 ASSERT_EQ(res1, MakeUInt128(0xe9a7d84d55c0780cULL, 0x76800457e339324bULL));
6760 ASSERT_FALSE(IsQcBitSet(fpsr1));
6761
6762 __uint128_t arg3 = MakeUInt128(0x9713308844617410ULL, 0x7959162511714864ULL);
6763 __uint128_t arg4 = MakeUInt128(0x8744686112476054ULL, 0x2867343670904667ULL);
6764 auto [res2, fpsr2] = AsmSqadd(arg3, arg4);
6765 ASSERT_EQ(res2, MakeUInt128(0x8000000056a8d464ULL, 0x7fffffff7fffffffULL));
6766 ASSERT_TRUE(IsQcBitSet(fpsr2));
6767 }
6768
TEST(Arm64InsnTest,UnsignedSaturatingAddInt8x1)6769 TEST(Arm64InsnTest, UnsignedSaturatingAddInt8x1) {
6770 constexpr auto AsmUqadd = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("uqadd %b0, %b2, %b3");
6771
6772 __uint128_t arg1 = MakeUInt128(0x6017174229960273ULL, 0x5310276871944944ULL);
6773 __uint128_t arg2 = MakeUInt128(0x4917939785144631ULL, 0x5973144353518504ULL);
6774 auto [res1, fpsr1] = AsmUqadd(arg1, arg2);
6775 ASSERT_EQ(res1, MakeUInt128(0x00000000000000a4ULL, 0x0000000000000000ULL));
6776 ASSERT_FALSE(IsQcBitSet(fpsr1));
6777
6778 __uint128_t arg3 = MakeUInt128(0x3306263695626490ULL, 0x9108276271159038ULL);
6779 __uint128_t arg4 = MakeUInt128(0x5699505124652999ULL, 0x6062855443838330ULL);
6780 auto [res2, fpsr2] = AsmUqadd(arg3, arg4);
6781 ASSERT_EQ(res2, MakeUInt128(0x00000000000000ffULL, 0x0000000000000000ULL));
6782 ASSERT_TRUE(IsQcBitSet(fpsr2));
6783 }
6784
TEST(Arm64InsnTest,UnsignedSaturatingAddInt64x1)6785 TEST(Arm64InsnTest, UnsignedSaturatingAddInt64x1) {
6786 constexpr auto AsmUqadd = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("uqadd %d0, %d2, %d3");
6787
6788 __uint128_t arg1 = MakeUInt128(0x0606885137234627ULL, 0x0799732723313469ULL);
6789 __uint128_t arg2 = MakeUInt128(0x3971456285542615ULL, 0x4676506324656766ULL);
6790 auto [res1, fpsr1] = AsmUqadd(arg1, arg2);
6791 ASSERT_EQ(res1, MakeUInt128(0x3f77cdb3bc776c3cULL, 0x0000000000000000ULL));
6792 ASSERT_FALSE(IsQcBitSet(fpsr1));
6793
6794 __uint128_t arg3 = MakeUInt128(0x9534957018600154ULL, 0x1262396228641389ULL);
6795 __uint128_t arg4 = MakeUInt128(0x7796733329070567ULL, 0x3769621564981845ULL);
6796 auto [res2, fpsr2] = AsmUqadd(arg3, arg4);
6797 ASSERT_EQ(res2, MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
6798 ASSERT_TRUE(IsQcBitSet(fpsr2));
6799 }
6800
TEST(Arm64InsnTest,UnsignedSaturatingAddInt32x4)6801 TEST(Arm64InsnTest, UnsignedSaturatingAddInt32x4) {
6802 constexpr auto AsmUqadd = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("uqadd %0.4s, %2.4s, %3.4s");
6803
6804 __uint128_t arg1 = MakeUInt128(0x9737425700735921ULL, 0x0031541508936793ULL);
6805 __uint128_t arg2 = MakeUInt128(0x0081699805365202ULL, 0x7600727749674584ULL);
6806 auto [res1, fpsr1] = AsmUqadd(arg1, arg2);
6807 ASSERT_EQ(res1, MakeUInt128(0x97b8abef05a9ab23ULL, 0x7631c68c51faad17ULL));
6808 ASSERT_FALSE(IsQcBitSet(fpsr1));
6809
6810 __uint128_t arg3 = MakeUInt128(0x9727856471983963ULL, 0x0878154322116691ULL);
6811 __uint128_t arg4 = MakeUInt128(0x8654522268126887ULL, 0x2684459684424161ULL);
6812 auto [res2, fpsr2] = AsmUqadd(arg3, arg4);
6813 ASSERT_EQ(res2, MakeUInt128(0xffffffffd9aaa1eaULL, 0x2efc5ad9a653a7f2ULL));
6814 ASSERT_TRUE(IsQcBitSet(fpsr2));
6815 }
6816
TEST(Arm64InsnTest,SignedSaturatingSubtractInt32x1)6817 TEST(Arm64InsnTest, SignedSaturatingSubtractInt32x1) {
6818 constexpr auto AsmSqsub = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqsub %s0, %s2, %s3");
6819
6820 __uint128_t arg1 = MakeUInt128(0x3178534870760322ULL, 0x1982970579751191ULL);
6821 __uint128_t arg2 = MakeUInt128(0x4405109942358830ULL, 0x3454635349234982ULL);
6822 auto [res1, fpsr1] = AsmSqsub(arg1, arg2);
6823 ASSERT_EQ(res1, MakeUInt128(0x2e407af2ULL, 0U));
6824 ASSERT_FALSE(IsQcBitSet(fpsr1));
6825
6826 __uint128_t arg3 = MakeUInt128(0x1423696483086410ULL, 0x2592887457999322ULL);
6827 __uint128_t arg4 = MakeUInt128(0x3749551912219519ULL, 0x0342445230753513ULL);
6828 auto [res2, fpsr2] = AsmSqsub(arg3, arg4);
6829 ASSERT_EQ(res2, MakeUInt128(0x80000000ULL, 0U));
6830 ASSERT_TRUE(IsQcBitSet(fpsr2));
6831
6832 __uint128_t arg5 = MakeUInt128(0x3083508879584152ULL, 0x1489912761065137ULL);
6833 __uint128_t arg6 = MakeUInt128(0x4153943580721139ULL, 0x0328574918769094ULL);
6834 auto [res3, fpsr3] = AsmSqsub(arg5, arg6);
6835 ASSERT_EQ(res3, MakeUInt128(0x7fffffffULL, 0U));
6836 ASSERT_TRUE(IsQcBitSet(fpsr3));
6837 }
6838
TEST(Arm64InsnTest,SignedSaturatingSubtractInt64x1)6839 TEST(Arm64InsnTest, SignedSaturatingSubtractInt64x1) {
6840 constexpr auto AsmSqsub = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqsub %d0, %d2, %d3");
6841
6842 __uint128_t arg1 = MakeUInt128(0x4416125223196943ULL, 0x4712064173754912ULL);
6843 __uint128_t arg2 = MakeUInt128(0x1635700857369439ULL, 0x7305979709719726ULL);
6844 auto [res1, fpsr1] = AsmSqsub(arg1, arg2);
6845 ASSERT_EQ(res1, MakeUInt128(0x2de0a249cbe2d50aULL, 0x0000000000000000ULL));
6846 ASSERT_FALSE(IsQcBitSet(fpsr1));
6847
6848 __uint128_t arg3 = MakeUInt128(0x7862766490242516ULL, 0x1990277471090335ULL);
6849 __uint128_t arg4 = MakeUInt128(0x9333093049483805ULL, 0x9785662884478744ULL);
6850 auto [res2, fpsr2] = AsmSqsub(arg3, arg4);
6851 ASSERT_EQ(res2, MakeUInt128(0x7fffffffffffffffULL, 0x0000000000000000ULL));
6852 ASSERT_TRUE(IsQcBitSet(fpsr2));
6853 }
6854
TEST(Arm64InsnTest,SignedSaturatingSubtractInt32x4)6855 TEST(Arm64InsnTest, SignedSaturatingSubtractInt32x4) {
6856 constexpr auto AsmSqsub = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqsub %0.4s, %2.4s, %3.4s");
6857
6858 __uint128_t arg1 = MakeUInt128(0x4485680977569630ULL, 0x3129588719161129ULL);
6859 __uint128_t arg2 = MakeUInt128(0x2946818849363386ULL, 0x4739274760122696ULL);
6860 auto [res1, fpsr1] = AsmSqsub(arg1, arg2);
6861 ASSERT_EQ(res1, MakeUInt128(0x1b3ee6812e2062aaULL, 0xe9f03140b903ea93ULL));
6862 ASSERT_FALSE(IsQcBitSet(fpsr1));
6863
6864 __uint128_t arg3 = MakeUInt128(0x9304127100727784ULL, 0x9301555038895360ULL);
6865 __uint128_t arg4 = MakeUInt128(0x3382619293437970ULL, 0x8187432094991415ULL);
6866 auto [res2, fpsr2] = AsmSqsub(arg3, arg4);
6867 ASSERT_EQ(res2, MakeUInt128(0x800000006d2efe14ULL, 0x117a12307fffffffULL));
6868 ASSERT_TRUE(IsQcBitSet(fpsr2));
6869 }
6870
TEST(Arm64InsnTest,UnsignedSaturatingSubtractInt32x1)6871 TEST(Arm64InsnTest, UnsignedSaturatingSubtractInt32x1) {
6872 constexpr auto AsmUqsub = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("uqsub %s0, %s2, %s3");
6873
6874 __uint128_t arg1 = MakeUInt128(0x2548156091372812ULL, 0x8406333039373562ULL);
6875 __uint128_t arg2 = MakeUInt128(0x4200160456645574ULL, 0x1458816605216660ULL);
6876 auto [res1, fpsr1] = AsmUqsub(arg1, arg2);
6877 ASSERT_EQ(res1, MakeUInt128(0x3ad2d29eULL, 0U));
6878 ASSERT_FALSE(IsQcBitSet(fpsr1));
6879
6880 __uint128_t arg3 = MakeUInt128(0x1259960281839309ULL, 0x5487090590738613ULL);
6881 __uint128_t arg4 = MakeUInt128(0x5191459181951029ULL, 0x7327875571049729ULL);
6882 auto [res2, fpsr2] = AsmUqsub(arg3, arg4);
6883 ASSERT_EQ(res2, MakeUInt128(0U, 0U));
6884 ASSERT_TRUE(IsQcBitSet(fpsr2));
6885 }
6886
TEST(Arm64InsnTest,UnsignedSaturatingSubtractInt64x1)6887 TEST(Arm64InsnTest, UnsignedSaturatingSubtractInt64x1) {
6888 constexpr auto AsmUqsub = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("uqsub %d0, %d2, %d3");
6889
6890 __uint128_t arg1 = MakeUInt128(0x9691077542576474ULL, 0x8832534141213280ULL);
6891 __uint128_t arg2 = MakeUInt128(0x0626717094009098ULL, 0x2235296579579978ULL);
6892 auto [res1, fpsr1] = AsmUqsub(arg1, arg2);
6893 ASSERT_EQ(res1, MakeUInt128(0x906a9604ae56d3dcULL, 0x0000000000000000ULL));
6894 ASSERT_FALSE(IsQcBitSet(fpsr1));
6895
6896 __uint128_t arg3 = MakeUInt128(0x7752929106925043ULL, 0x2614469501098610ULL);
6897 __uint128_t arg4 = MakeUInt128(0x8889991465855188ULL, 0x1873582528164302ULL);
6898 auto [res2, fpsr2] = AsmUqsub(arg3, arg4);
6899 ASSERT_EQ(res2, MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
6900 ASSERT_TRUE(IsQcBitSet(fpsr2));
6901 }
6902
TEST(Arm64InsnTest,UnsignedSaturatingSubtractInt32x4)6903 TEST(Arm64InsnTest, UnsignedSaturatingSubtractInt32x4) {
6904 constexpr auto AsmUqsub = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("uqsub %0.4s, %2.4s, %3.4s");
6905
6906 __uint128_t arg1 = MakeUInt128(0x6884962578665885ULL, 0x9991798675205545ULL);
6907 __uint128_t arg2 = MakeUInt128(0x5809900455646117ULL, 0x8755249370124553ULL);
6908 auto [res1, fpsr1] = AsmUqsub(arg1, arg2);
6909 ASSERT_EQ(res1, MakeUInt128(0x107b06212301f76eULL, 0x123c54f3050e0ff2ULL));
6910 ASSERT_FALSE(IsQcBitSet(fpsr1));
6911
6912 __uint128_t arg3 = MakeUInt128(0x5032678340586301ULL, 0x9301932429963972ULL);
6913 __uint128_t arg4 = MakeUInt128(0x0444517928812285ULL, 0x4478211953530898ULL);
6914 auto [res2, fpsr2] = AsmUqsub(arg3, arg4);
6915 ASSERT_EQ(res2, MakeUInt128(0x4bee160a17d7407cULL, 0x4e89720b00000000ULL));
6916 ASSERT_TRUE(IsQcBitSet(fpsr2));
6917 }
6918
TEST(Arm64InsnTest,SignedSaturatingAbsoluteInt8x1)6919 TEST(Arm64InsnTest, SignedSaturatingAbsoluteInt8x1) {
6920 constexpr auto AsmSqabs = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqabs %b0, %b2");
6921
6922 __uint128_t arg1 = MakeUInt128(0x8918016855727981ULL, 0x5642185819119749ULL);
6923 auto [res1, fpsr1] = AsmSqabs(arg1);
6924 ASSERT_EQ(res1, MakeUInt128(0x000000000000007fULL, 0x0000000000000000ULL));
6925 ASSERT_FALSE(IsQcBitSet(fpsr1));
6926
6927 __uint128_t arg2 = MakeUInt128(0x0000000000000080ULL, 0x6464607287574305ULL);
6928 auto [res2, fpsr2] = AsmSqabs(arg2);
6929 ASSERT_EQ(res2, MakeUInt128(0x000000000000007fULL, 0x0000000000000000ULL));
6930 ASSERT_TRUE(IsQcBitSet(fpsr2));
6931 }
6932
TEST(Arm64InsnTest,SignedSaturatingAbsoluteInt64x1)6933 TEST(Arm64InsnTest, SignedSaturatingAbsoluteInt64x1) {
6934 constexpr auto AsmSqabs = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqabs %d0, %d2");
6935
6936 __uint128_t arg1 = MakeUInt128(0x9717317281315179ULL, 0x3290443112181587ULL);
6937 auto [res1, fpsr1] = AsmSqabs(arg1);
6938 ASSERT_EQ(res1, MakeUInt128(0x68e8ce8d7eceae87ULL, 0x0000000000000000ULL));
6939 ASSERT_FALSE(IsQcBitSet(fpsr1));
6940
6941 __uint128_t arg2 = MakeUInt128(0x8000000000000000ULL, 0x1001237687219447ULL);
6942 auto [res2, fpsr2] = AsmSqabs(arg2);
6943 ASSERT_EQ(res2, MakeUInt128(0x7fffffffffffffffULL, 0x0000000000000000ULL));
6944 ASSERT_TRUE(IsQcBitSet(fpsr2));
6945 }
6946
TEST(Arm64InsnTest,SignedSaturatingAbsoluteInt32x4)6947 TEST(Arm64InsnTest, SignedSaturatingAbsoluteInt32x4) {
6948 constexpr auto AsmSqabs = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqabs %0.4s, %2.4s");
6949
6950 __uint128_t arg1 = MakeUInt128(0x9133820578492800ULL, 0x6982551957402018ULL);
6951 auto [res1, fpsr1] = AsmSqabs(arg1);
6952 ASSERT_EQ(res1, MakeUInt128(0x6ecc7dfb78492800ULL, 0x6982551957402018ULL));
6953 ASSERT_FALSE(IsQcBitSet(fpsr1));
6954
6955 __uint128_t arg2 = MakeUInt128(0x1810564129725083ULL, 0x6070356880000000ULL);
6956 auto [res2, fpsr2] = AsmSqabs(arg2);
6957 ASSERT_EQ(res2, MakeUInt128(0x1810564129725083ULL, 0x607035687fffffffULL));
6958 ASSERT_TRUE(IsQcBitSet(fpsr2));
6959 }
6960
TEST(Arm64InsnTest,SignedSaturatingNegateInt32x1)6961 TEST(Arm64InsnTest, SignedSaturatingNegateInt32x1) {
6962 constexpr auto AsmSqneg = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqneg %s0, %s2");
6963
6964 __uint128_t arg1 = MakeUInt128(0x6461582694563802ULL, 0x3950283712168644ULL);
6965 auto [res1, fpsr1] = AsmSqneg(arg1);
6966 ASSERT_EQ(res1, MakeUInt128(0x000000006ba9c7feULL, 0x0000000000000000ULL));
6967 ASSERT_FALSE(IsQcBitSet(fpsr1));
6968
6969 __uint128_t arg2 = MakeUInt128(0x6561785280000000ULL, 0x1277128269186886ULL);
6970 auto [res2, fpsr2] = AsmSqneg(arg2);
6971 ASSERT_EQ(res2, MakeUInt128(0x000000007fffffffULL, 0x0000000000000000ULL));
6972 ASSERT_TRUE(IsQcBitSet(fpsr2));
6973 }
6974
TEST(Arm64InsnTest,SignedSaturatingNegateInt64x1)6975 TEST(Arm64InsnTest, SignedSaturatingNegateInt64x1) {
6976 constexpr auto AsmSqneg = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqneg %d0, %d2");
6977
6978 __uint128_t arg1 = MakeUInt128(0x9703600795698276ULL, 0x2639234410714658ULL);
6979 auto [res1, fpsr1] = AsmSqneg(arg1);
6980 ASSERT_EQ(res1, MakeUInt128(0x68fc9ff86a967d8aULL, 0x0000000000000000ULL));
6981 ASSERT_FALSE(IsQcBitSet(fpsr1));
6982
6983 __uint128_t arg2 = MakeUInt128(0x8000000000000000ULL, 0x4052295369374997ULL);
6984 auto [res2, fpsr2] = AsmSqneg(arg2);
6985 ASSERT_EQ(res2, MakeUInt128(0x7fffffffffffffffULL, 0x0000000000000000ULL));
6986 ASSERT_TRUE(IsQcBitSet(fpsr2));
6987 }
6988
TEST(Arm64InsnTest,SignedSaturatingNegateInt32x4)6989 TEST(Arm64InsnTest, SignedSaturatingNegateInt32x4) {
6990 constexpr auto AsmSqneg = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqneg %0.4s, %2.4s");
6991
6992 __uint128_t arg1 = MakeUInt128(0x9172320202822291ULL, 0x4886959399729974ULL);
6993 auto [res1, fpsr1] = AsmSqneg(arg1);
6994 ASSERT_EQ(res1, MakeUInt128(0x6e8dcdfefd7ddd6fULL, 0xb7796a6d668d668cULL));
6995 ASSERT_FALSE(IsQcBitSet(fpsr1));
6996
6997 __uint128_t arg2 = MakeUInt128(0x2974711553718589ULL, 0x2423849380000000ULL);
6998 auto [res2, fpsr2] = AsmSqneg(arg2);
6999 ASSERT_EQ(res2, MakeUInt128(0xd68b8eebac8e7a77ULL, 0xdbdc7b6d7fffffffULL));
7000 ASSERT_TRUE(IsQcBitSet(fpsr2));
7001 }
7002
TEST(Arm64InsnTest,SignedSaturatingShiftLeftImmInt32x1)7003 TEST(Arm64InsnTest, SignedSaturatingShiftLeftImmInt32x1) {
7004 constexpr auto AsmSqshl = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqshl %s0, %s2, #20");
7005
7006 __uint128_t arg1 = MakeUInt128(0x9724611600000181ULL, 0x0003509892864120ULL);
7007 auto [res1, fpsr1] = AsmSqshl(arg1);
7008 ASSERT_EQ(res1, MakeUInt128(0x0000000018100000ULL, 0x0000000000000000ULL));
7009 ASSERT_FALSE(IsQcBitSet(fpsr1));
7010
7011 __uint128_t arg2 = MakeUInt128(0x4195163551108763ULL, 0x2042676129798265ULL);
7012 auto [res2, fpsr2] = AsmSqshl(arg2);
7013 ASSERT_EQ(res2, MakeUInt128(0x000000007fffffffULL, 0x0000000000000000ULL));
7014 ASSERT_TRUE(IsQcBitSet(fpsr2));
7015 }
7016
TEST(Arm64InsnTest,SignedSaturatingShiftLeftImmInt64x1)7017 TEST(Arm64InsnTest, SignedSaturatingShiftLeftImmInt64x1) {
7018 constexpr auto AsmSqshl = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqshl %d0, %d2, #28");
7019
7020 __uint128_t arg1 = MakeUInt128(0x0000000774000539ULL, 0x2622760323659751ULL);
7021 auto [res1, fpsr1] = AsmSqshl(arg1);
7022 ASSERT_EQ(res1, MakeUInt128(0x7740005390000000ULL, 0x0000000000000000ULL));
7023 ASSERT_FALSE(IsQcBitSet(fpsr1));
7024
7025 __uint128_t arg2 = MakeUInt128(0x9938714995449137ULL, 0x3020518436690767ULL);
7026 auto [res2, fpsr2] = AsmSqshl(arg2);
7027 ASSERT_EQ(res2, MakeUInt128(0x8000000000000000ULL, 0x0000000000000000ULL));
7028 ASSERT_TRUE(IsQcBitSet(fpsr2));
7029 }
7030
TEST(Arm64InsnTest,SignedSaturatingShiftLeftImmInt32x4)7031 TEST(Arm64InsnTest, SignedSaturatingShiftLeftImmInt32x4) {
7032 constexpr auto AsmSqshl = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqshl %0.4s, %2.4s, #12");
7033
7034 __uint128_t arg1 = MakeUInt128(0x0007256800042011ULL, 0x0000313500033555ULL);
7035 auto [res1, fpsr1] = AsmSqshl(arg1);
7036 ASSERT_EQ(res1, MakeUInt128(0x7256800042011000ULL, 0x0313500033555000ULL));
7037 ASSERT_FALSE(IsQcBitSet(fpsr1));
7038
7039 __uint128_t arg2 = MakeUInt128(0x0944031900072034ULL, 0x8651010561049872ULL);
7040 auto [res2, fpsr2] = AsmSqshl(arg2);
7041 ASSERT_EQ(res2, MakeUInt128(0x7fffffff72034000ULL, 0x800000007fffffffULL));
7042 ASSERT_TRUE(IsQcBitSet(fpsr2));
7043 }
7044
TEST(Arm64InsnTest,SignedSaturatingShiftLeftByRegisterImmInt32x1)7045 TEST(Arm64InsnTest, SignedSaturatingShiftLeftByRegisterImmInt32x1) {
7046 constexpr auto AsmSqshl = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqshl %s0, %s2, %s3");
7047
7048 __uint128_t res;
7049 uint32_t fpsr;
7050 __uint128_t arg1 = MakeUInt128(0x7480771811555330ULL, 0x9098870255052076ULL);
7051
7052 std::tie(res, fpsr) = AsmSqshl(arg1, -33);
7053 ASSERT_EQ(res, MakeUInt128(0U, 0U));
7054 ASSERT_FALSE(IsQcBitSet(fpsr));
7055
7056 std::tie(res, fpsr) = AsmSqshl(arg1, -32);
7057 ASSERT_EQ(res, MakeUInt128(0U, 0U));
7058 ASSERT_FALSE(IsQcBitSet(fpsr));
7059
7060 std::tie(res, fpsr) = AsmSqshl(arg1, -31);
7061 ASSERT_EQ(res, MakeUInt128(0U, 0U));
7062 ASSERT_FALSE(IsQcBitSet(fpsr));
7063
7064 std::tie(res, fpsr) = AsmSqshl(arg1, -1);
7065 ASSERT_EQ(res, MakeUInt128(0x08aaa998ULL, 0U));
7066 ASSERT_FALSE(IsQcBitSet(fpsr));
7067
7068 std::tie(res, fpsr) = AsmSqshl(arg1, 0);
7069 ASSERT_EQ(res, MakeUInt128(0x11555330ULL, 0U));
7070 ASSERT_FALSE(IsQcBitSet(fpsr));
7071
7072 std::tie(res, fpsr) = AsmSqshl(arg1, 1);
7073 ASSERT_EQ(res, MakeUInt128(0x22aaa660ULL, 0U));
7074 ASSERT_FALSE(IsQcBitSet(fpsr));
7075
7076 std::tie(res, fpsr) = AsmSqshl(arg1, 31);
7077 ASSERT_EQ(res, MakeUInt128(0x7fffffffULL, 0U));
7078 ASSERT_TRUE(IsQcBitSet(fpsr));
7079
7080 std::tie(res, fpsr) = AsmSqshl(arg1, 32);
7081 ASSERT_EQ(res, MakeUInt128(0x7fffffffULL, 0U));
7082 ASSERT_TRUE(IsQcBitSet(fpsr));
7083
7084 std::tie(res, fpsr) = AsmSqshl(arg1, 33);
7085 ASSERT_EQ(res, MakeUInt128(0x7fffffffULL, 0U));
7086 ASSERT_TRUE(IsQcBitSet(fpsr));
7087 }
7088
TEST(Arm64InsnTest,UnsignedSaturatingShiftLeftImmInt64x1)7089 TEST(Arm64InsnTest, UnsignedSaturatingShiftLeftImmInt64x1) {
7090 constexpr auto AsmUqshl = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("uqshl %d0, %d2, #28");
7091
7092 __uint128_t arg1 = MakeUInt128(0x0000000961573564ULL, 0x8883443185280853ULL);
7093 auto [res1, fpsr1] = AsmUqshl(arg1);
7094 ASSERT_EQ(res1, MakeUInt128(0x9615735640000000ULL, 0x0000000000000000ULL));
7095 ASSERT_FALSE(IsQcBitSet(fpsr1));
7096
7097 __uint128_t arg2 = MakeUInt128(0x9759277344336553ULL, 0x8418834030351782ULL);
7098 auto [res2, fpsr2] = AsmUqshl(arg2);
7099 ASSERT_EQ(res2, MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
7100 ASSERT_TRUE(IsQcBitSet(fpsr2));
7101 }
7102
TEST(Arm64InsnTest,UnsignedSaturatingShiftLeftImmInt32x4)7103 TEST(Arm64InsnTest, UnsignedSaturatingShiftLeftImmInt32x4) {
7104 constexpr auto AsmUqshl = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("uqshl %0.4s, %2.4s, #12");
7105
7106 __uint128_t arg1 = MakeUInt128(0x0000326300096218ULL, 0x0004565900066853ULL);
7107 auto [res1, fpsr1] = AsmUqshl(arg1);
7108 ASSERT_EQ(res1, MakeUInt128(0x0326300096218000ULL, 0x4565900066853000ULL));
7109 ASSERT_FALSE(IsQcBitSet(fpsr1));
7110
7111 __uint128_t arg2 = MakeUInt128(0x0009911314010804ULL, 0x0009732335449090ULL);
7112 auto [res2, fpsr2] = AsmUqshl(arg2);
7113 ASSERT_EQ(res2, MakeUInt128(0x99113000ffffffffULL, 0x97323000ffffffffULL));
7114 ASSERT_TRUE(IsQcBitSet(fpsr2));
7115 }
7116
TEST(Arm64InsnTest,UnsignedSaturatingShiftLeftByRegisterImmInt32x1)7117 TEST(Arm64InsnTest, UnsignedSaturatingShiftLeftByRegisterImmInt32x1) {
7118 constexpr auto AsmUqshl = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("uqshl %s0, %s2, %s3");
7119
7120 __uint128_t res;
7121 uint32_t fpsr;
7122 __uint128_t arg1 = MakeUInt128(0x9714978507414585ULL, 0x3085781339156270ULL);
7123
7124 std::tie(res, fpsr) = AsmUqshl(arg1, -33);
7125 ASSERT_EQ(res, MakeUInt128(0U, 0U));
7126 ASSERT_FALSE(IsQcBitSet(fpsr));
7127
7128 std::tie(res, fpsr) = AsmUqshl(arg1, -32);
7129 ASSERT_EQ(res, MakeUInt128(0U, 0U));
7130 ASSERT_FALSE(IsQcBitSet(fpsr));
7131
7132 std::tie(res, fpsr) = AsmUqshl(arg1, -31);
7133 ASSERT_EQ(res, MakeUInt128(0U, 0U));
7134 ASSERT_FALSE(IsQcBitSet(fpsr));
7135
7136 std::tie(res, fpsr) = AsmUqshl(arg1, -1);
7137 ASSERT_EQ(res, MakeUInt128(0x03a0a2c2ULL, 0U));
7138 ASSERT_FALSE(IsQcBitSet(fpsr));
7139
7140 std::tie(res, fpsr) = AsmUqshl(arg1, 0);
7141 ASSERT_EQ(res, MakeUInt128(0x07414585ULL, 0U));
7142 ASSERT_FALSE(IsQcBitSet(fpsr));
7143
7144 std::tie(res, fpsr) = AsmUqshl(arg1, 1);
7145 ASSERT_EQ(res, MakeUInt128(0x0e828b0aULL, 0U));
7146 ASSERT_FALSE(IsQcBitSet(fpsr));
7147
7148 std::tie(res, fpsr) = AsmUqshl(arg1, 31);
7149 ASSERT_EQ(res, MakeUInt128(0xffffffffULL, 0U));
7150 ASSERT_TRUE(IsQcBitSet(fpsr));
7151
7152 std::tie(res, fpsr) = AsmUqshl(arg1, 32);
7153 ASSERT_EQ(res, MakeUInt128(0xffffffffULL, 0U));
7154 ASSERT_TRUE(IsQcBitSet(fpsr));
7155
7156 std::tie(res, fpsr) = AsmUqshl(arg1, 33);
7157 ASSERT_EQ(res, MakeUInt128(0xffffffffULL, 0U));
7158 ASSERT_TRUE(IsQcBitSet(fpsr));
7159 }
7160
TEST(Arm64InsnTest,SignedSaturatingShiftLeftByRegisterImmInt16x8)7161 TEST(Arm64InsnTest, SignedSaturatingShiftLeftByRegisterImmInt16x8) {
7162 constexpr auto AsmSqshl = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqshl %0.8h, %2.8h, %3.8h");
7163
7164 __uint128_t arg1 = 0U;
7165 __uint128_t arg2 = MakeUInt128(0xffdfffe0ffe1ffffULL, 0x0001001f00200021ULL);
7166 auto [res1, fpsr1] = AsmSqshl(arg1, arg2);
7167 ASSERT_EQ(res1, MakeUInt128(0U, 0U));
7168 ASSERT_FALSE(IsQcBitSet(fpsr1));
7169
7170 __uint128_t arg3 = MakeUInt128(0x3333333333333333ULL, 0x3333333333333333ULL);
7171 auto [res2, fpsr2] = AsmSqshl(arg3, arg2);
7172 ASSERT_EQ(res2, MakeUInt128(0x0000000000001999ULL, 0x66667fff7fff7fffULL));
7173 ASSERT_TRUE(IsQcBitSet(fpsr2));
7174 }
7175
TEST(Arm64InsnTest,UnsignedSaturatingShiftLeftByRegisterImmInt16x8)7176 TEST(Arm64InsnTest, UnsignedSaturatingShiftLeftByRegisterImmInt16x8) {
7177 constexpr auto AsmUqshl = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("uqshl %0.8h, %2.8h, %3.8h");
7178
7179 __uint128_t arg1 = 0U;
7180 __uint128_t arg2 = MakeUInt128(0xffdfffe0ffe1ffffULL, 0x0001001f00200021ULL);
7181 auto [res1, fpsr1] = AsmUqshl(arg1, arg2);
7182 ASSERT_EQ(res1, MakeUInt128(0U, 0U));
7183 ASSERT_FALSE(IsQcBitSet(fpsr1));
7184
7185 __uint128_t arg3 = MakeUInt128(0x7777777777777777ULL, 0x7777777777777777ULL);
7186 auto [res2, fpsr2] = AsmUqshl(arg3, arg2);
7187 ASSERT_EQ(res2, MakeUInt128(0x0000000000003bbbULL, 0xeeeeffffffffffffULL));
7188 ASSERT_TRUE(IsQcBitSet(fpsr2));
7189 }
7190
TEST(Arm64InsnTest,SignedSaturatingExtractNarrowInt64x2ToInt32x2)7191 TEST(Arm64InsnTest, SignedSaturatingExtractNarrowInt64x2ToInt32x2) {
7192 constexpr auto AsmSqxtn = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqxtn %0.2s, %2.2d");
7193
7194 __uint128_t arg1 = MakeUInt128(0x0123456789abcdefULL, 0xfedcba9876543210ULL);
7195 auto [res1, fpsr1] = AsmSqxtn(arg1);
7196 ASSERT_EQ(res1, MakeUInt128(0x800000007fffffffULL, 0x0000000000000000ULL));
7197 ASSERT_TRUE(IsQcBitSet(fpsr1));
7198
7199 __uint128_t arg2 = MakeUInt128(0x0000000001234567ULL, 0x000000007ecdba98LL);
7200 auto [res2, fpsr2] = AsmSqxtn(arg2);
7201 ASSERT_EQ(res2, MakeUInt128(0x7ecdba9801234567ULL, 0x0000000000000000ULL));
7202 ASSERT_FALSE(IsQcBitSet(fpsr2));
7203 }
7204
TEST(Arm64InsnTest,SignedSaturatingExtractNarrowInt64x1ToInt32x1)7205 TEST(Arm64InsnTest, SignedSaturatingExtractNarrowInt64x1ToInt32x1) {
7206 constexpr auto AsmSqxtn = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqxtn %s0, %d2");
7207
7208 __uint128_t arg1 = MakeUInt128(0x1234567812345678ULL, 0x0ULL);
7209 auto [res1, fpsr1] = AsmSqxtn(arg1);
7210 ASSERT_EQ(res1, MakeUInt128(0x000000007fffffffULL, 0x0000000000000000ULL));
7211 ASSERT_TRUE(IsQcBitSet(fpsr1));
7212
7213 __uint128_t arg2 = MakeUInt128(0x0000000012345678ULL, 0x0ULL);
7214 auto [res2, fpsr2] = AsmSqxtn(arg2);
7215 ASSERT_EQ(res2, MakeUInt128(0x0000000012345678ULL, 0x0000000000000000ULL));
7216 ASSERT_FALSE(IsQcBitSet(fpsr2));
7217 }
7218
TEST(Arm64InsnTest,UnsignedSaturatingExtractNarrowInt64x2ToInt32x2)7219 TEST(Arm64InsnTest, UnsignedSaturatingExtractNarrowInt64x2ToInt32x2) {
7220 constexpr auto AsmUqstn = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("uqxtn %0.2s, %2.2d");
7221
7222 __uint128_t arg1 = MakeUInt128(0x0123456789abcdefULL, 0xfedcba9876543210ULL);
7223 auto [res1, fpsr1] = AsmUqstn(arg1);
7224 ASSERT_EQ(res1, MakeUInt128(0xffffffffffffffffULL, 0x0000000000000000ULL));
7225 ASSERT_TRUE(IsQcBitSet(fpsr1));
7226
7227 __uint128_t arg2 = MakeUInt128(0x0000000001234567ULL, 0x00000000fecdba98LL);
7228 auto [res2, fpsr2] = AsmUqstn(arg2);
7229 ASSERT_EQ(res2, MakeUInt128(0xfecdba9801234567ULL, 0x0000000000000000ULL));
7230 ASSERT_FALSE(IsQcBitSet(fpsr2));
7231 }
7232
TEST(Arm64InsnTest,UnsignedSaturatingExtractNarrowInt64x1ToInt32x1)7233 TEST(Arm64InsnTest, UnsignedSaturatingExtractNarrowInt64x1ToInt32x1) {
7234 constexpr auto AsmUqxtn = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("uqxtn %s0, %d2");
7235
7236 __uint128_t arg1 = MakeUInt128(0x1234567812345678ULL, 0x0ULL);
7237 auto [res1, fpsr1] = AsmUqxtn(arg1);
7238 ASSERT_EQ(res1, MakeUInt128(0x00000000ffffffffULL, 0x0000000000000000ULL));
7239 ASSERT_TRUE(IsQcBitSet(fpsr1));
7240
7241 __uint128_t arg2 = MakeUInt128(0x0000000087654321ULL, 0x0ULL);
7242 auto [res2, fpsr2] = AsmUqxtn(arg2);
7243 ASSERT_EQ(res2, MakeUInt128(0x0000000087654321ULL, 0x0000000000000000ULL));
7244 ASSERT_FALSE(IsQcBitSet(fpsr2));
7245 }
7246
TEST(Arm64InsnTest,SignedSaturatingExtractNarrow2Int64x2ToInt32x2)7247 TEST(Arm64InsnTest, SignedSaturatingExtractNarrow2Int64x2ToInt32x2) {
7248 constexpr auto AsmSqxtn2 = ASM_INSN_WRAP_FUNC_WQ_RES_W0_ARG("sqxtn2 %0.4s, %2.2d");
7249
7250 __uint128_t arg1 = MakeUInt128(0x0123456789abcdefULL, 0xfedcba9876543210ULL);
7251 __uint128_t arg2 = MakeUInt128(0x6121865619673378ULL, 0x6236256125216320ULL);
7252 auto [res1, fpsr1] = AsmSqxtn2(arg1, arg2);
7253 ASSERT_EQ(res1, MakeUInt128(0x6121865619673378ULL, 0x800000007fffffffULL));
7254 ASSERT_TRUE(IsQcBitSet(fpsr1));
7255
7256 __uint128_t arg3 = MakeUInt128(0x0000000001234567ULL, 0x000000007ecdba98LL);
7257 __uint128_t arg4 = MakeUInt128(0x6121865619673378ULL, 0x6236256125216320ULL);
7258 auto [res2, fpsr2] = AsmSqxtn2(arg3, arg4);
7259 ASSERT_EQ(res2, MakeUInt128(0x6121865619673378ULL, 0x7ecdba9801234567ULL));
7260 ASSERT_FALSE(IsQcBitSet(fpsr2));
7261 }
7262
TEST(Arm64InsnTest,UnsignedSaturatingExtractNarrow2Int64x2ToInt32x4)7263 TEST(Arm64InsnTest, UnsignedSaturatingExtractNarrow2Int64x2ToInt32x4) {
7264 constexpr auto AsmUqxtn2 = ASM_INSN_WRAP_FUNC_WQ_RES_W0_ARG("uqxtn2 %0.4s, %2.2d");
7265
7266 __uint128_t arg1 = MakeUInt128(0x0123456789abcdefULL, 0xfedcba9876543210ULL);
7267 __uint128_t arg2 = MakeUInt128(0x6121865619673378ULL, 0x6236256125216320ULL);
7268 auto [res1, fpsr1] = AsmUqxtn2(arg1, arg2);
7269 ASSERT_EQ(res1, MakeUInt128(0x6121865619673378ULL, 0xffffffffffffffffULL));
7270 ASSERT_TRUE(IsQcBitSet(fpsr1));
7271
7272 __uint128_t arg3 = MakeUInt128(0x0000000001234567ULL, 0x00000000fecdba98LL);
7273 __uint128_t arg4 = MakeUInt128(0x6121865619673378ULL, 0x6236256125216320ULL);
7274 auto [res2, fpsr2] = AsmUqxtn2(arg3, arg4);
7275 ASSERT_EQ(res2, MakeUInt128(0x6121865619673378ULL, 0xfecdba9801234567ULL));
7276 ASSERT_FALSE(IsQcBitSet(fpsr2));
7277 }
7278
TEST(Arm64InsnTest,SignedSaturatingExtractUnsignedNarrowInt64x2ToInt32x2)7279 TEST(Arm64InsnTest, SignedSaturatingExtractUnsignedNarrowInt64x2ToInt32x2) {
7280 constexpr auto AsmSqxtun = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqxtun %0.2s, %2.2d");
7281
7282 __uint128_t arg1 = MakeUInt128(0x0000000044332211ULL, 0x00000001aabbccddULL);
7283 auto [res1, fpsr1] = AsmSqxtun(arg1);
7284 ASSERT_EQ(res1, MakeUInt128(0xffffffff44332211ULL, 0x0000000000000000ULL));
7285 ASSERT_TRUE(IsQcBitSet(fpsr1));
7286
7287 __uint128_t arg2 = MakeUInt128(0x0000000001234567ULL, 0x00000000fecdba98LL);
7288 auto [res2, fpsr2] = AsmSqxtun(arg2);
7289 ASSERT_EQ(res2, MakeUInt128(0xfecdba9801234567ULL, 0x0000000000000000ULL));
7290 ASSERT_FALSE(IsQcBitSet(fpsr2));
7291 }
7292
TEST(Arm64InsnTest,SignedSaturatingExtractUnsignedNarrowInt64x1ToInt32x1)7293 TEST(Arm64InsnTest, SignedSaturatingExtractUnsignedNarrowInt64x1ToInt32x1) {
7294 constexpr auto AsmSqxtun = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqxtun %s0, %d2");
7295
7296 __uint128_t arg1 = MakeUInt128(0x00000001ff332211ULL, 0x0ULL);
7297 auto [res1, fpsr1] = AsmSqxtun(arg1);
7298 ASSERT_EQ(res1, MakeUInt128(0x00000000ffffffffULL, 0x0000000000000000ULL));
7299 ASSERT_TRUE(IsQcBitSet(fpsr1));
7300
7301 __uint128_t arg2 = MakeUInt128(0x00000000ff332211ULL, 0x0ULL);
7302 auto [res2, fpsr2] = AsmSqxtun(arg2);
7303 ASSERT_EQ(res2, MakeUInt128(0x00000000ff332211ULL, 0x0000000000000000ULL));
7304 ASSERT_FALSE(IsQcBitSet(fpsr2));
7305 }
7306
TEST(Arm64InsnTest,SignedSaturatingExtractUnsignedNarrow2Int64x2ToInt32x4)7307 TEST(Arm64InsnTest, SignedSaturatingExtractUnsignedNarrow2Int64x2ToInt32x4) {
7308 constexpr auto AsmSqxtun2 = ASM_INSN_WRAP_FUNC_WQ_RES_W0_ARG("sqxtun2 %0.4s, %2.2d");
7309
7310 __uint128_t arg1 = MakeUInt128(0x0000000089abcdefULL, 0xfedcba9876543210ULL);
7311 __uint128_t arg2 = MakeUInt128(0x0123456789abcdefULL, 0xfedcba9876543210ULL);
7312 auto [res1, fpsr1] = AsmSqxtun2(arg1, arg2);
7313 ASSERT_EQ(res1, MakeUInt128(0x0123456789abcdefULL, 0x0000000089abcdefULL));
7314 ASSERT_TRUE(IsQcBitSet(fpsr1));
7315
7316 __uint128_t arg3 = MakeUInt128(0x0000000001234567ULL, 0x00000000fecdba98LL);
7317 __uint128_t arg4 = MakeUInt128(0x0123456789abcdefULL, 0xfedcba9876543210ULL);
7318 auto [res2, fpsr2] = AsmSqxtun2(arg3, arg4);
7319 ASSERT_EQ(res2, MakeUInt128(0x0123456789abcdefULL, 0xfecdba9801234567ULL));
7320 ASSERT_FALSE(IsQcBitSet(fpsr2));
7321 }
7322
TEST(Arm64InsnTest,SignedSaturatingAccumulateOfUnsignedValueInt32x1)7323 TEST(Arm64InsnTest, SignedSaturatingAccumulateOfUnsignedValueInt32x1) {
7324 constexpr auto AsmSuqadd = ASM_INSN_WRAP_FUNC_WQ_RES_W0_ARG("suqadd %s0, %s2");
7325
7326 __uint128_t arg1 = MakeUInt128(0x9392023115638719ULL, 0x5080502467972579ULL);
7327 __uint128_t arg2 = MakeUInt128(0x2497605762625913ULL, 0x3285597263712112ULL);
7328 auto [res1, fpsr1] = AsmSuqadd(arg1, arg2);
7329 ASSERT_EQ(res1, MakeUInt128(0x0000000077c5e02cULL, 0x0000000000000000ULL));
7330 ASSERT_FALSE(IsQcBitSet(fpsr1));
7331
7332 __uint128_t arg3 = MakeUInt128(0x9099791776687477ULL, 0x4481882870632315ULL);
7333 __uint128_t arg4 = MakeUInt128(0x5158650328981642ULL, 0x2828823274686610ULL);
7334 auto [res2, fpsr2] = AsmSuqadd(arg3, arg4);
7335 ASSERT_EQ(res2, MakeUInt128(0x000000007fffffffULL, 0x0000000000000000ULL));
7336 ASSERT_TRUE(IsQcBitSet(fpsr2));
7337 }
7338
TEST(Arm64InsnTest,SignedSaturatingAccumulateOfUnsignedValueInt32x4)7339 TEST(Arm64InsnTest, SignedSaturatingAccumulateOfUnsignedValueInt32x4) {
7340 constexpr auto AsmSuqadd = ASM_INSN_WRAP_FUNC_WQ_RES_W0_ARG("suqadd %0.4s, %2.4s");
7341
7342 __uint128_t arg1 = MakeUInt128(0x2590181000350989ULL, 0x2864120419516355ULL);
7343 __uint128_t arg2 = MakeUInt128(0x1108763204267612ULL, 0x9798265294258829ULL);
7344 auto [res1, fpsr1] = AsmSuqadd(arg1, arg2);
7345 ASSERT_EQ(res1, MakeUInt128(0x36988e42045b7f9bULL, 0xbffc3856ad76eb7eULL));
7346 ASSERT_FALSE(IsQcBitSet(fpsr1));
7347
7348 __uint128_t arg3 = MakeUInt128(0x9082888934938376ULL, 0x4393992569006040ULL);
7349 __uint128_t arg4 = MakeUInt128(0x6731142209331219ULL, 0x5936202982972351ULL);
7350 auto [res2, fpsr2] = AsmSuqadd(arg3, arg4);
7351 ASSERT_EQ(res2, MakeUInt128(0x7fffffff3dc6958fULL, 0x7fffffffeb978391ULL));
7352 ASSERT_TRUE(IsQcBitSet(fpsr2));
7353 }
7354
TEST(Arm64InsnTest,UnsignedSaturatingAccumulateOfSignedValueInt32x1)7355 TEST(Arm64InsnTest, UnsignedSaturatingAccumulateOfSignedValueInt32x1) {
7356 constexpr auto AsmUsqadd = ASM_INSN_WRAP_FUNC_WQ_RES_W0_ARG("usqadd %s0, %s2");
7357
7358 __uint128_t arg1 = MakeUInt128(0x9052523242348615ULL, 0x3152097693846104ULL);
7359 __uint128_t arg2 = MakeUInt128(0x2582849714963475ULL, 0x3418375620030149ULL);
7360 auto [res1, fpsr1] = AsmUsqadd(arg1, arg2);
7361 ASSERT_EQ(res1, MakeUInt128(0x0000000056caba8aULL, 0x0000000000000000ULL));
7362 ASSERT_FALSE(IsQcBitSet(fpsr1));
7363
7364 __uint128_t arg3 = MakeUInt128(0x9887125387801719ULL, 0x6071816407812484ULL);
7365 __uint128_t arg4 = MakeUInt128(0x7847257912407824ULL, 0x5443616823452395ULL);
7366 auto [res2, fpsr2] = AsmUsqadd(arg3, arg4);
7367 ASSERT_EQ(res2, MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
7368 ASSERT_TRUE(IsQcBitSet(fpsr2));
7369
7370 __uint128_t arg5 = MakeUInt128(0x9708583970761645ULL, 0x8229630324424328ULL);
7371 __uint128_t arg6 = MakeUInt128(0x2377374595170285ULL, 0x6069806788952176ULL);
7372 auto [res3, fpsr3] = AsmUsqadd(arg5, arg6);
7373 ASSERT_EQ(res3, MakeUInt128(0x00000000ffffffffULL, 0x0000000000000000ULL));
7374 ASSERT_TRUE(IsQcBitSet(fpsr3));
7375 }
7376
TEST(Arm64InsnTest,UnsignedSaturatingAccumulateOfSignedValueInt32x4)7377 TEST(Arm64InsnTest, UnsignedSaturatingAccumulateOfSignedValueInt32x4) {
7378 constexpr auto AsmUsqadd = ASM_INSN_WRAP_FUNC_WQ_RES_W0_ARG("usqadd %0.4s, %2.4s");
7379
7380 __uint128_t arg1 = MakeUInt128(0x4129137074982305ULL, 0x7592909166293919ULL);
7381 __uint128_t arg2 = MakeUInt128(0x5014721157586067ULL, 0x2700925477180257ULL);
7382 auto [res1, fpsr1] = AsmUsqadd(arg1, arg2);
7383 ASSERT_EQ(res1, MakeUInt128(0x913d8581cbf0836cULL, 0x9c9322e5dd413b70ULL));
7384 ASSERT_FALSE(IsQcBitSet(fpsr1));
7385
7386 __uint128_t arg3 = MakeUInt128(0x7816422828823274ULL, 0x6866106592732197ULL);
7387 __uint128_t arg4 = MakeUInt128(0x9071623846421534ULL, 0x8985247621678905ULL);
7388 auto [res2, fpsr2] = AsmUsqadd(arg3, arg4);
7389 ASSERT_EQ(res2, MakeUInt128(0xffffffff6ec447a8ULL, 0xf1eb34db00000000ULL));
7390 ASSERT_TRUE(IsQcBitSet(fpsr2));
7391 }
7392
TEST(Arm64InsnTest,SignedSaturatingRoundingShiftLeftInt32x1)7393 TEST(Arm64InsnTest, SignedSaturatingRoundingShiftLeftInt32x1) {
7394 constexpr auto AsmSqrshl = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqrshl %s0, %s2, %s3");
7395
7396 __uint128_t res;
7397 uint32_t fpsr;
7398
7399 __uint128_t arg = MakeUInt128(0x9736705435580445ULL, 0x8657202276378404ULL);
7400 std::tie(res, fpsr) = AsmSqrshl(arg, -33);
7401 ASSERT_EQ(res, MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
7402 ASSERT_FALSE(IsQcBitSet(fpsr));
7403
7404 std::tie(res, fpsr) = AsmSqrshl(arg, -32);
7405 ASSERT_EQ(res, MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
7406 ASSERT_FALSE(IsQcBitSet(fpsr));
7407
7408 std::tie(res, fpsr) = AsmSqrshl(arg, -31);
7409 ASSERT_EQ(res, MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
7410 ASSERT_FALSE(IsQcBitSet(fpsr));
7411
7412 std::tie(res, fpsr) = AsmSqrshl(arg, -1);
7413 ASSERT_EQ(res, MakeUInt128(0x000000001aac0223ULL, 0x0000000000000000ULL));
7414 ASSERT_FALSE(IsQcBitSet(fpsr));
7415
7416 std::tie(res, fpsr) = AsmSqrshl(arg, 0);
7417 ASSERT_EQ(res, MakeUInt128(0x0000000035580445ULL, 0x0000000000000000ULL));
7418 ASSERT_FALSE(IsQcBitSet(fpsr));
7419
7420 std::tie(res, fpsr) = AsmSqrshl(arg, 1);
7421 ASSERT_EQ(res, MakeUInt128(0x000000006ab0088aULL, 0x0000000000000000ULL));
7422 ASSERT_FALSE(IsQcBitSet(fpsr));
7423
7424 std::tie(res, fpsr) = AsmSqrshl(arg, 31);
7425 ASSERT_EQ(res, MakeUInt128(0x000000007fffffffULL, 0x0000000000000000ULL));
7426 ASSERT_TRUE(IsQcBitSet(fpsr));
7427
7428 std::tie(res, fpsr) = AsmSqrshl(arg, 32);
7429 ASSERT_EQ(res, MakeUInt128(0x000000007fffffffULL, 0x0000000000000000ULL));
7430 ASSERT_TRUE(IsQcBitSet(fpsr));
7431
7432 std::tie(res, fpsr) = AsmSqrshl(arg, 33);
7433 ASSERT_EQ(res, MakeUInt128(0x000000007fffffffULL, 0x0000000000000000ULL));
7434 ASSERT_TRUE(IsQcBitSet(fpsr));
7435 }
7436
TEST(Arm64InsnTest,SignedSaturatingRoundingShiftLeftInt16x8)7437 TEST(Arm64InsnTest, SignedSaturatingRoundingShiftLeftInt16x8) {
7438 constexpr auto AsmSqrshl = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqrshl %0.8h, %2.8h, %3.8h");
7439
7440 __uint128_t arg1 = MakeUInt128(0x0000000000000099ULL, 0x9999099999999999ULL);
7441 __uint128_t arg2 = MakeUInt128(0x00110010000f0001ULL, 0xfffffff1fff0ffefULL);
7442 auto [res1, fpsr1] = AsmSqrshl(arg1, arg2);
7443 ASSERT_EQ(res1, MakeUInt128(0x0000000000000132ULL, 0xcccd000000000000ULL));
7444 ASSERT_FALSE(IsQcBitSet(fpsr1));
7445
7446 __uint128_t arg3 = MakeUInt128(0x0099009900990099ULL, 0x0099009900990099ULL);
7447 auto [res2, fpsr2] = AsmSqrshl(arg3, arg2);
7448 ASSERT_EQ(res2, MakeUInt128(0x7fff7fff7fff0132ULL, 0x004d000000000000ULL));
7449 ASSERT_TRUE(IsQcBitSet(fpsr2));
7450 }
7451
TEST(Arm64InsnTest,UnsignedSaturatingRoundingShiftLeftInt32x1)7452 TEST(Arm64InsnTest, UnsignedSaturatingRoundingShiftLeftInt32x1) {
7453 constexpr auto AsmUqrshl = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("uqrshl %s0, %s2, %s3");
7454
7455 __uint128_t res;
7456 uint32_t fpsr;
7457
7458 __uint128_t arg = MakeUInt128(0x9984124848262367ULL, 0x3771467226061633ULL);
7459 std::tie(res, fpsr) = AsmUqrshl(arg, -33);
7460 ASSERT_EQ(res, MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
7461 ASSERT_FALSE(IsQcBitSet(fpsr));
7462
7463 std::tie(res, fpsr) = AsmUqrshl(arg, -32);
7464 ASSERT_EQ(res, MakeUInt128(0x0000000000000000ULL, 0x0000000000000000ULL));
7465 ASSERT_FALSE(IsQcBitSet(fpsr));
7466
7467 std::tie(res, fpsr) = AsmUqrshl(arg, -31);
7468 ASSERT_EQ(res, MakeUInt128(0x0000000000000001ULL, 0x0000000000000000ULL));
7469 ASSERT_FALSE(IsQcBitSet(fpsr));
7470
7471 std::tie(res, fpsr) = AsmUqrshl(arg, -1);
7472 ASSERT_EQ(res, MakeUInt128(0x00000000241311b4ULL, 0x0000000000000000ULL));
7473 ASSERT_FALSE(IsQcBitSet(fpsr));
7474
7475 std::tie(res, fpsr) = AsmUqrshl(arg, 0);
7476 ASSERT_EQ(res, MakeUInt128(0x0000000048262367ULL, 0x0000000000000000ULL));
7477 ASSERT_FALSE(IsQcBitSet(fpsr));
7478
7479 std::tie(res, fpsr) = AsmUqrshl(arg, 1);
7480 ASSERT_EQ(res, MakeUInt128(0x00000000904c46ceULL, 0x0000000000000000ULL));
7481 ASSERT_FALSE(IsQcBitSet(fpsr));
7482
7483 std::tie(res, fpsr) = AsmUqrshl(arg, 31);
7484 ASSERT_EQ(res, MakeUInt128(0x00000000ffffffffULL, 0x0000000000000000ULL));
7485 ASSERT_TRUE(IsQcBitSet(fpsr));
7486
7487 std::tie(res, fpsr) = AsmUqrshl(arg, 32);
7488 ASSERT_EQ(res, MakeUInt128(0x00000000ffffffffULL, 0x0000000000000000ULL));
7489 ASSERT_TRUE(IsQcBitSet(fpsr));
7490
7491 std::tie(res, fpsr) = AsmUqrshl(arg, 33);
7492 ASSERT_EQ(res, MakeUInt128(0x00000000ffffffffULL, 0x0000000000000000ULL));
7493 ASSERT_TRUE(IsQcBitSet(fpsr));
7494 }
7495
TEST(Arm64InsnTest,UnsignedSaturatingRoundingShiftLeftInt16x8)7496 TEST(Arm64InsnTest, UnsignedSaturatingRoundingShiftLeftInt16x8) {
7497 constexpr auto AsmUqrshl = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("uqrshl %0.8h, %2.8h, %3.8h");
7498
7499 __uint128_t arg1 = MakeUInt128(0x0000000000000099ULL, 0x9999099999999999ULL);
7500 __uint128_t arg2 = MakeUInt128(0x00110010000f0001ULL, 0xfffffff1fff0ffefULL);
7501 auto [res1, fpsr1] = AsmUqrshl(arg1, arg2);
7502 ASSERT_EQ(res1, MakeUInt128(0x0000000000000132ULL, 0x4ccd000000010000ULL));
7503 ASSERT_FALSE(IsQcBitSet(fpsr1));
7504
7505 __uint128_t arg3 = MakeUInt128(0x0099009900990099ULL, 0x0099009900990099ULL);
7506 auto [res2, fpsr2] = AsmUqrshl(arg3, arg2);
7507 ASSERT_EQ(res2, MakeUInt128(0xffffffffffff0132ULL, 0x004d000000000000ULL));
7508 ASSERT_TRUE(IsQcBitSet(fpsr2));
7509 }
7510
TEST(Arm64InsnTest,SignedSaturatingShiftRightNarrowInt16x1)7511 TEST(Arm64InsnTest, SignedSaturatingShiftRightNarrowInt16x1) {
7512 constexpr auto AsmSqshrn = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqshrn %b0, %h2, #4");
7513
7514 __uint128_t arg1 = MakeUInt128(0x888786614762f943ULL, 0x4140104988899316ULL);
7515 auto [res1, fpsr1] = AsmSqshrn(arg1);
7516 ASSERT_EQ(res1, MakeUInt128(0x94U, 0U));
7517 ASSERT_FALSE(IsQcBitSet(fpsr1));
7518
7519 __uint128_t arg2 = MakeUInt128(0x0051207678103588ULL, 0x6116602029611936ULL);
7520 auto [res2, fpsr2] = AsmSqshrn(arg2);
7521 ASSERT_EQ(res2, MakeUInt128(0x7fU, 0U));
7522 ASSERT_TRUE(IsQcBitSet(fpsr2));
7523 }
7524
TEST(Arm64InsnTest,SignedSaturatingShiftRightNarrowInt16x8)7525 TEST(Arm64InsnTest, SignedSaturatingShiftRightNarrowInt16x8) {
7526 constexpr auto AsmSqshrn = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqshrn %0.8b, %2.8h, #4");
7527
7528 __uint128_t arg1 = MakeUInt128(0x0625051604340253ULL, 0x0299028602670568ULL);
7529 auto [res1, fpsr1] = AsmSqshrn(arg1);
7530 ASSERT_EQ(res1, MakeUInt128(0x2928265662514325ULL, 0U));
7531 ASSERT_FALSE(IsQcBitSet(fpsr1));
7532
7533 __uint128_t arg2 = MakeUInt128(0x2405806005642114ULL, 0x9386436864224724ULL);
7534 auto [res2, fpsr2] = AsmSqshrn(arg2);
7535 ASSERT_EQ(res2, MakeUInt128(0x807f7f7f7f80567fULL, 0U));
7536 ASSERT_TRUE(IsQcBitSet(fpsr2));
7537 }
7538
TEST(Arm64InsnTest,SignedSaturatingShiftRightNarrowInt16x8Upper)7539 TEST(Arm64InsnTest, SignedSaturatingShiftRightNarrowInt16x8Upper) {
7540 constexpr auto AsmSqshrn2 = ASM_INSN_WRAP_FUNC_WQ_RES_W0_ARG("sqshrn2 %0.16b, %2.8h, #4");
7541
7542 __uint128_t arg1 = MakeUInt128(0x0367034704100536ULL, 0x0175064803000078ULL);
7543 __uint128_t arg2 = MakeUInt128(0x3494819262681110ULL, 0x7399482506073949ULL);
7544 auto [res1, fpsr1] = AsmSqshrn2(arg1, arg2);
7545 ASSERT_EQ(res1, MakeUInt128(0x3494819262681110ULL, 0x1764300736344153ULL));
7546 ASSERT_FALSE(IsQcBitSet(fpsr1));
7547
7548 __uint128_t arg3 = MakeUInt128(0x4641074501673719ULL, 0x0483109676711344ULL);
7549 auto [res2, fpsr2] = AsmSqshrn2(arg3, arg2);
7550 ASSERT_EQ(res2, MakeUInt128(0x3494819262681110ULL, 0x487f7f7f7f74167fULL));
7551 ASSERT_TRUE(IsQcBitSet(fpsr2));
7552 }
7553
TEST(Arm64InsnTest,UnsignedSaturatingShiftRightNarrowInt16x1)7554 TEST(Arm64InsnTest, UnsignedSaturatingShiftRightNarrowInt16x1) {
7555 constexpr auto AsmUqshrn = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("uqshrn %b0, %h2, #4");
7556
7557 __uint128_t arg1 = MakeUInt128(0x6797172898220360ULL, 0x7028806908776866ULL);
7558 auto [res1, fpsr1] = AsmUqshrn(arg1);
7559 ASSERT_EQ(res1, MakeUInt128(0x36U, 0U));
7560 ASSERT_FALSE(IsQcBitSet(fpsr1));
7561
7562 __uint128_t arg2 = MakeUInt128(0x0593252746378405ULL, 0x3976918480820410ULL);
7563 auto [res2, fpsr2] = AsmUqshrn(arg2);
7564 ASSERT_EQ(res2, MakeUInt128(0xffU, 0U));
7565 ASSERT_TRUE(IsQcBitSet(fpsr2));
7566 }
7567
TEST(Arm64InsnTest,UnsignedSaturatingShiftRightNarrowInt16x8)7568 TEST(Arm64InsnTest, UnsignedSaturatingShiftRightNarrowInt16x8) {
7569 constexpr auto AsmUqshrn = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("uqshrn %0.8b, %2.8h, #4");
7570
7571 __uint128_t arg1 = MakeUInt128(0x0867067907600099ULL, 0x0693007509490515ULL);
7572 auto [res1, fpsr1] = AsmUqshrn(arg1);
7573 ASSERT_EQ(res1, MakeUInt128(0x6907945186677609ULL, 0U));
7574 ASSERT_FALSE(IsQcBitSet(fpsr1));
7575
7576 __uint128_t arg2 = MakeUInt128(0x2736049811890413ULL, 0x0433116627747123ULL);
7577 auto [res2, fpsr2] = AsmUqshrn(arg2);
7578 ASSERT_EQ(res2, MakeUInt128(0x43ffffffff49ff41ULL, 0U));
7579 ASSERT_TRUE(IsQcBitSet(fpsr2));
7580 }
7581
TEST(Arm64InsnTest,UnignedSaturatingShiftRightNarrowInt16x8Upper)7582 TEST(Arm64InsnTest, UnignedSaturatingShiftRightNarrowInt16x8Upper) {
7583 constexpr auto AsmUqshrn2 = ASM_INSN_WRAP_FUNC_WQ_RES_W0_ARG("uqshrn2 %0.16b, %2.8h, #4");
7584
7585 __uint128_t arg1 = MakeUInt128(0x0441018407410768ULL, 0x0981066307240048ULL);
7586 __uint128_t arg2 = MakeUInt128(0x2393582740194493ULL, 0x5665161088463125ULL);
7587 auto [res1, fpsr1] = AsmUqshrn2(arg1, arg2);
7588 ASSERT_EQ(res1, MakeUInt128(0x2393582740194493ULL, 0x9866720444187476ULL));
7589 ASSERT_FALSE(IsQcBitSet(fpsr1));
7590
7591 __uint128_t arg3 = MakeUInt128(0x0785297709734684ULL, 0x3030614624180358ULL);
7592 auto [res2, fpsr2] = AsmUqshrn2(arg3, arg2);
7593 ASSERT_EQ(res2, MakeUInt128(0x2393582740194493ULL, 0xffffff3578ff97ffULL));
7594 ASSERT_TRUE(IsQcBitSet(fpsr2));
7595 }
7596
TEST(Arm64InsnTest,SignedSaturatingRoundingShiftRightNarrowInt16x1)7597 TEST(Arm64InsnTest, SignedSaturatingRoundingShiftRightNarrowInt16x1) {
7598 constexpr auto AsmSqrshrn = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqrshrn %b0, %h2, #4");
7599
7600 __uint128_t arg1 = MakeUInt128(0x9610330799410534ULL, 0x7784574699992128ULL);
7601 auto [res1, fpsr1] = AsmSqrshrn(arg1);
7602 ASSERT_EQ(res1, MakeUInt128(0x0000000000000053ULL, 0x0000000000000000ULL));
7603 ASSERT_FALSE(IsQcBitSet(fpsr1));
7604
7605 __uint128_t arg2 = MakeUInt128(0x5999993996122816ULL, 0x1521931488876938ULL);
7606 auto [res2, fpsr2] = AsmSqrshrn(arg2);
7607 ASSERT_EQ(res2, MakeUInt128(0x000000000000007fULL, 0x0000000000000000ULL));
7608 ASSERT_TRUE(IsQcBitSet(fpsr2));
7609
7610 __uint128_t arg3 = MakeUInt128(0x8022281083009986ULL, 0x0165494165426169ULL);
7611 auto [res3, fpsr3] = AsmSqrshrn(arg3);
7612 ASSERT_EQ(res3, MakeUInt128(0x0000000000000080ULL, 0x0000000000000000ULL));
7613 ASSERT_TRUE(IsQcBitSet(fpsr3));
7614 }
7615
TEST(Arm64InsnTest,SignedSaturatingRoundingShiftRightNarrowInt16x8)7616 TEST(Arm64InsnTest, SignedSaturatingRoundingShiftRightNarrowInt16x8) {
7617 constexpr auto AsmSqrshrn = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqrshrn %0.8b, %2.8h, #4");
7618
7619 __uint128_t arg1 = MakeUInt128(0x0666070401700260ULL, 0x0520059204930759ULL);
7620 auto [res1, fpsr1] = AsmSqrshrn(arg1);
7621 ASSERT_EQ(res1, MakeUInt128(0x5259497666701726ULL, 0x0000000000000000ULL));
7622 ASSERT_FALSE(IsQcBitSet(fpsr1));
7623
7624 __uint128_t arg2 = MakeUInt128(0x4143408146852981ULL, 0x5053947178900451ULL);
7625 auto [res2, fpsr2] = AsmSqrshrn(arg2);
7626 ASSERT_EQ(res2, MakeUInt128(0x7f807f457f7f7f7fULL, 0x0000000000000000ULL));
7627 ASSERT_TRUE(IsQcBitSet(fpsr2));
7628 }
7629
TEST(Arm64InsnTest,SignedSaturatingRoundingShiftRightNarrowInt16x8Upper)7630 TEST(Arm64InsnTest, SignedSaturatingRoundingShiftRightNarrowInt16x8Upper) {
7631 constexpr auto AsmSqrshrn2 = ASM_INSN_WRAP_FUNC_WQ_RES_W0_ARG("sqrshrn2 %0.16b, %2.8h, #4");
7632
7633 __uint128_t arg1 = MakeUInt128(0x0784017103960497ULL, 0x0707072501740336ULL);
7634 __uint128_t arg2 = MakeUInt128(0x5662725928440620ULL, 0x4302141137199227ULL);
7635 auto [res1, fpsr1] = AsmSqrshrn2(arg1, arg2);
7636 ASSERT_EQ(res1, MakeUInt128(0x5662725928440620ULL, 0x7072173378173949ULL));
7637 ASSERT_FALSE(IsQcBitSet(fpsr1));
7638
7639 __uint128_t arg3 = MakeUInt128(0x2066886512756882ULL, 0x6614973078865701ULL);
7640 __uint128_t arg4 = MakeUInt128(0x5685016918647488ULL, 0x5416791545965072ULL);
7641 auto [res2, fpsr2] = AsmSqrshrn2(arg3, arg4);
7642 ASSERT_EQ(res2, MakeUInt128(0x5685016918647488ULL, 0x7f807f7f7f807f7fULL));
7643 ASSERT_TRUE(IsQcBitSet(fpsr2));
7644 }
7645
TEST(Arm64InsnTest,UnsignedSaturatingRoundingShiftRightNarrowInt16x1)7646 TEST(Arm64InsnTest, UnsignedSaturatingRoundingShiftRightNarrowInt16x1) {
7647 constexpr auto AsmUqrshrn = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("uqrshrn %b0, %h2, #4");
7648
7649 __uint128_t arg1 = MakeUInt128(0x9614236585950920ULL, 0x9083073323356034ULL);
7650 auto [res1, fpsr1] = AsmUqrshrn(arg1);
7651 ASSERT_EQ(res1, MakeUInt128(0x0000000000000092ULL, 0x0000000000000000ULL));
7652 ASSERT_FALSE(IsQcBitSet(fpsr1));
7653
7654 __uint128_t arg2 = MakeUInt128(0x8465318730299026ULL, 0x6596450137183754ULL);
7655 auto [res2, fpsr2] = AsmUqrshrn(arg2);
7656 ASSERT_EQ(res2, MakeUInt128(0x00000000000000ffULL, 0x0000000000000000ULL));
7657 ASSERT_TRUE(IsQcBitSet(fpsr2));
7658 }
7659
TEST(Arm64InsnTest,UnsignedSaturatingRoundingShiftRightNarrowInt16x8)7660 TEST(Arm64InsnTest, UnsignedSaturatingRoundingShiftRightNarrowInt16x8) {
7661 constexpr auto AsmUqrshrn = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("uqrshrn %0.8b, %2.8h, #4");
7662
7663 __uint128_t arg1 = MakeUInt128(0x0301067603860240ULL, 0x0011030402470073ULL);
7664 auto [res1, fpsr1] = AsmUqrshrn(arg1);
7665 ASSERT_EQ(res1, MakeUInt128(0x0130240730673824ULL, 0x0000000000000000ULL));
7666 ASSERT_FALSE(IsQcBitSet(fpsr1));
7667
7668 __uint128_t arg2 = MakeUInt128(0x5085082872462713ULL, 0x4946368501815469ULL);
7669 auto [res2, fpsr2] = AsmUqrshrn(arg2);
7670 ASSERT_EQ(res2, MakeUInt128(0xffff18ffff83ffffULL, 0x0000000000000000ULL));
7671 ASSERT_TRUE(IsQcBitSet(fpsr2));
7672 }
7673
TEST(Arm64InsnTest,UnsignedSaturatingRoundingShiftRightNarrowInt16x8Upper)7674 TEST(Arm64InsnTest, UnsignedSaturatingRoundingShiftRightNarrowInt16x8Upper) {
7675 constexpr auto AsmUqrshrn = ASM_INSN_WRAP_FUNC_WQ_RES_W0_ARG("uqrshrn2 %0.16b, %2.8h, #4");
7676
7677 __uint128_t arg1 = MakeUInt128(0x0388099005730661ULL, 0x0237022304780112ULL);
7678 __uint128_t arg2 = MakeUInt128(0x0392269110277722ULL, 0x6102544149221576ULL);
7679 auto [res1, fpsr1] = AsmUqrshrn(arg1, arg2);
7680 ASSERT_EQ(res1, MakeUInt128(0x0392269110277722ULL, 0x2322481139995766ULL));
7681 ASSERT_FALSE(IsQcBitSet(fpsr1));
7682
7683 __uint128_t arg3 = MakeUInt128(0x9254069617600504ULL, 0x7974928060721268ULL);
7684 __uint128_t arg4 = MakeUInt128(0x8414695726397884ULL, 0x2560084531214065ULL);
7685 auto [res2, fpsr2] = AsmUqrshrn(arg3, arg4);
7686 ASSERT_EQ(res2, MakeUInt128(0x8414695726397884ULL, 0xffffffffff69ff50ULL));
7687 ASSERT_TRUE(IsQcBitSet(fpsr2));
7688 }
7689
TEST(Arm64InsnTest,SignedSaturatingShiftRightUnsignedNarrowInt16x1)7690 TEST(Arm64InsnTest, SignedSaturatingShiftRightUnsignedNarrowInt16x1) {
7691 constexpr auto AsmSqshrun = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqshrun %b0, %h2, #4");
7692
7693 __uint128_t arg1 = MakeUInt128(0x9143611439920063ULL, 0x8005083214098760ULL);
7694 auto [res1, fpsr1] = AsmSqshrun(arg1);
7695 ASSERT_EQ(res1, MakeUInt128(0x06U, 0U));
7696 ASSERT_FALSE(IsQcBitSet(fpsr1));
7697
7698 __uint128_t arg2 = MakeUInt128(0x3815174571259975ULL, 0x4953580239983146ULL);
7699 auto [res2, fpsr2] = AsmSqshrun(arg2);
7700 ASSERT_EQ(res2, MakeUInt128(0x00U, 0U));
7701 ASSERT_TRUE(IsQcBitSet(fpsr2));
7702
7703 __uint128_t arg3 = MakeUInt128(0x4599309324851025ULL, 0x1682944672606661ULL);
7704 auto [res3, fpsr3] = AsmSqshrun(arg3);
7705 ASSERT_EQ(res3, MakeUInt128(0xffU, 0U));
7706 ASSERT_TRUE(IsQcBitSet(fpsr3));
7707 }
7708
TEST(Arm64InsnTest,SignedSaturatingShiftRightUnsignedNarrowInt16x8)7709 TEST(Arm64InsnTest, SignedSaturatingShiftRightUnsignedNarrowInt16x8) {
7710 constexpr auto AsmSqshrun = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqshrun %0.8b, %2.8h, #4");
7711
7712 __uint128_t arg1 = MakeUInt128(0x0911066408340874ULL, 0x0800074107250670ULL);
7713 auto [res1, fpsr1] = AsmSqshrun(arg1);
7714 ASSERT_EQ(res1, MakeUInt128(0x8074726791668387ULL, 0U));
7715 ASSERT_FALSE(IsQcBitSet(fpsr1));
7716
7717 __uint128_t arg2 = MakeUInt128(0x4792258319129415ULL, 0x7390809143831384ULL);
7718 auto [res2, fpsr2] = AsmSqshrun(arg2);
7719 ASSERT_EQ(res2, MakeUInt128(0xff00ffffffffff00ULL, 0U));
7720 ASSERT_TRUE(IsQcBitSet(fpsr2));
7721 }
7722
TEST(Arm64InsnTest,SignedSaturatingShiftRightUnsignedNarrowInt16x8Upper)7723 TEST(Arm64InsnTest, SignedSaturatingShiftRightUnsignedNarrowInt16x8Upper) {
7724 constexpr auto AsmSqshrun2 = ASM_INSN_WRAP_FUNC_WQ_RES_W0_ARG("sqshrun2 %0.16b, %2.8h, #4");
7725
7726 __uint128_t arg1 = MakeUInt128(0x0625082101740415ULL, 0x0233074903960353ULL);
7727 __uint128_t arg2 = MakeUInt128(0x0136178653673760ULL, 0x6421667781377399ULL);
7728 auto [res1, fpsr1] = AsmSqshrun2(arg1, arg2);
7729 ASSERT_EQ(res1, MakeUInt128(0x0136178653673760ULL, 0x2374393562821741ULL));
7730 ASSERT_FALSE(IsQcBitSet(fpsr1));
7731
7732 __uint128_t arg3 = MakeUInt128(0x4295810545651083ULL, 0x1046297282937584ULL);
7733 __uint128_t arg4 = MakeUInt128(0x1611625325625165ULL, 0x7249807849209989ULL);
7734 auto [res2, fpsr2] = AsmSqshrun2(arg3, arg4);
7735 ASSERT_EQ(res2, MakeUInt128(0x1611625325625165ULL, 0xffff00ffff00ffffULL));
7736 ASSERT_TRUE(IsQcBitSet(fpsr2));
7737 }
7738
TEST(Arm64InsnTest,SignedSaturatingRoundingShiftRightUnsignedNarrowInt16x1)7739 TEST(Arm64InsnTest, SignedSaturatingRoundingShiftRightUnsignedNarrowInt16x1) {
7740 constexpr auto AsmSqrshrun = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqrshrun %b0, %h2, #4");
7741
7742 __uint128_t arg1 = MakeUInt128(0x5760186946490886ULL, 0x8154528562134698ULL);
7743 auto [res1, fpsr1] = AsmSqrshrun(arg1);
7744 ASSERT_EQ(res1, MakeUInt128(0x88ULL, 0U));
7745 ASSERT_FALSE(IsQcBitSet(fpsr1));
7746
7747 __uint128_t arg2 = MakeUInt128(0x8355444560249556ULL, 0x6684366029221951ULL);
7748 auto [res2, fpsr2] = AsmSqrshrun(arg2);
7749 ASSERT_EQ(res2, MakeUInt128(0x00ULL, 0U));
7750 ASSERT_TRUE(IsQcBitSet(fpsr2));
7751
7752 __uint128_t arg3 = MakeUInt128(0x2483091060537720ULL, 0x1980218310103270ULL);
7753 auto [res3, fpsr3] = AsmSqrshrun(arg3);
7754 ASSERT_EQ(res3, MakeUInt128(0xffULL, 0U));
7755 ASSERT_TRUE(IsQcBitSet(fpsr3));
7756 }
7757
TEST(Arm64InsnTest,SignedSaturatingRoundingShiftRightUnsignedNarrowInt16x8)7758 TEST(Arm64InsnTest, SignedSaturatingRoundingShiftRightUnsignedNarrowInt16x8) {
7759 constexpr auto AsmSqrshrun = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqrshrun %0.8b, %2.8h, #4");
7760
7761 __uint128_t arg1 = MakeUInt128(0x0150069001490702ULL, 0x0673033808340550ULL);
7762 auto [res1, fpsr1] = AsmSqrshrun(arg1);
7763 ASSERT_EQ(res1, MakeUInt128(0x6734835515691570ULL, 0U));
7764 ASSERT_FALSE(IsQcBitSet(fpsr1));
7765
7766 __uint128_t arg2 = MakeUInt128(0x8363660178487710ULL, 0x6080980426924713ULL);
7767 auto [res2, fpsr2] = AsmSqrshrun(arg2);
7768 ASSERT_EQ(res2, MakeUInt128(0xff00ffff00ffffffULL, 0U));
7769 ASSERT_TRUE(IsQcBitSet(fpsr2));
7770 }
7771
TEST(Arm64InsnTest,SignedSaturatingRoundingShiftRightUnsignedNarrowInt16x8Upper)7772 TEST(Arm64InsnTest, SignedSaturatingRoundingShiftRightUnsignedNarrowInt16x8Upper) {
7773 constexpr auto AsmSqrshrun2 = ASM_INSN_WRAP_FUNC_WQ_RES_W0_ARG("sqrshrun2 %0.16b, %2.8h, #4");
7774
7775 __uint128_t arg1 = MakeUInt128(0x0733049502080757ULL, 0x0651018705990498ULL);
7776 __uint128_t arg2 = MakeUInt128(0x5693795623875551ULL, 0x6175754380917805ULL);
7777 auto [res1, fpsr1] = AsmSqrshrun2(arg1, arg2);
7778 ASSERT_EQ(res1, MakeUInt128(0x5693795623875551ULL, 0x65185a4a73492175ULL));
7779 ASSERT_FALSE(IsQcBitSet(fpsr1));
7780
7781 __uint128_t arg3 = MakeUInt128(0x1444671298615527ULL, 0x5982014514102756ULL);
7782 __uint128_t arg4 = MakeUInt128(0x0068929750246304ULL, 0x0173514891945763ULL);
7783 auto [res2, fpsr2] = AsmSqrshrun2(arg3, arg4);
7784 ASSERT_EQ(res2, MakeUInt128(0x0068929750246304ULL, 0xff14ffffffff00ffULL));
7785 ASSERT_TRUE(IsQcBitSet(fpsr2));
7786 }
7787
TEST(Arm64InsnTest,SignedSaturatingShiftLeftUnsignedImmInt32x1)7788 TEST(Arm64InsnTest, SignedSaturatingShiftLeftUnsignedImmInt32x1) {
7789 constexpr auto AsmSqshlu = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqshlu %s0, %s2, #4");
7790
7791 __uint128_t arg1 = MakeUInt128(0x9704033001862556ULL, 0x1473321177711744ULL);
7792 auto [res1, fpsr1] = AsmSqshlu(arg1);
7793 ASSERT_EQ(res1, MakeUInt128(0x18625560ULL, 0U));
7794 ASSERT_FALSE(IsQcBitSet(fpsr1));
7795
7796 __uint128_t arg2 = MakeUInt128(0x3095760196946490ULL, 0x8868154528562134ULL);
7797 auto [res2, fpsr2] = AsmSqshlu(arg2);
7798 ASSERT_EQ(res2, MakeUInt128(0x00000000ULL, 0U));
7799 ASSERT_TRUE(IsQcBitSet(fpsr2));
7800
7801 __uint128_t arg3 = MakeUInt128(0x1335028160884035ULL, 0x1781452541964320ULL);
7802 auto [res3, fpsr3] = AsmSqshlu(arg3);
7803 ASSERT_EQ(res3, MakeUInt128(0xffffffffULL, 0U));
7804 ASSERT_TRUE(IsQcBitSet(fpsr3));
7805 }
7806
TEST(Arm64InsnTest,SignedSaturatingShiftLeftUnsignedImmInt32x4)7807 TEST(Arm64InsnTest, SignedSaturatingShiftLeftUnsignedImmInt32x4) {
7808 constexpr auto AsmSqshlu = ASM_INSN_WRAP_FUNC_WQ_RES_W_ARG("sqshlu %0.4s, %2.4s, #4");
7809
7810 __uint128_t arg1 = MakeUInt128(0x0865174507877133ULL, 0x0813875205980941ULL);
7811 auto [res1, fpsr1] = AsmSqshlu(arg1);
7812 ASSERT_EQ(res1, MakeUInt128(0x8651745078771330ULL, 0x8138752059809410ULL));
7813 ASSERT_FALSE(IsQcBitSet(fpsr1));
7814
7815 __uint128_t arg2 = MakeUInt128(0x2174227300352296ULL, 0x0080891797050682ULL);
7816 auto [res2, fpsr2] = AsmSqshlu(arg2);
7817 ASSERT_EQ(res2, MakeUInt128(0xffffffff03522960ULL, 0x0808917000000000ULL));
7818 ASSERT_TRUE(IsQcBitSet(fpsr2));
7819 }
7820
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyLong32x2)7821 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyLong32x2) {
7822 constexpr auto AsmSqdmull = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmull %0.2d, %2.2s, %3.2s");
7823
7824 __uint128_t arg1 = MakeUInt128(0x0000000200000004ULL, 0xfeed000300000010ULL);
7825 __uint128_t arg2 = MakeUInt128(0x0000000300000002ULL, 0xfeed00040000002ULL);
7826 auto [res1, fpsr1] = AsmSqdmull(arg1, arg2);
7827 ASSERT_EQ(res1, MakeUInt128(0x0000000000000010ULL, 0x000000000000000cULL));
7828 ASSERT_FALSE(IsQcBitSet(fpsr1));
7829
7830 __uint128_t arg3 = MakeUInt128(0x8000000000000004ULL, 0xfeed000300000010ULL);
7831 __uint128_t arg4 = MakeUInt128(0x8000000000000002ULL, 0xfeed00040000002ULL);
7832 auto [res2, fpsr2] = AsmSqdmull(arg3, arg4);
7833 ASSERT_EQ(res2, MakeUInt128(0x0000000000000010ULL, 0x7fffffffffffffffULL));
7834 ASSERT_TRUE(IsQcBitSet(fpsr2));
7835 }
7836
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyLong16x4)7837 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyLong16x4) {
7838 constexpr auto AsmSqdmull = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmull %0.4s, %2.4h, %3.4h");
7839
7840 __uint128_t arg1 = MakeUInt128(0x0004000200f00004ULL, 0xfeedfeedfeedfeedULL);
7841 __uint128_t arg2 = MakeUInt128(0x0008000300800002ULL, 0xabcd0123ffff4567ULL);
7842 auto [res1, fpsr1] = AsmSqdmull(arg1, arg2);
7843 ASSERT_EQ(res1, MakeUInt128(0x0000f00000000010ULL, 0x000000400000000cULL));
7844 ASSERT_FALSE(IsQcBitSet(fpsr1));
7845
7846 __uint128_t arg3 = MakeUInt128(0x8000000200f00004ULL, 0xfeedfeedfeedfeedULL);
7847 __uint128_t arg4 = MakeUInt128(0x8000000300800002ULL, 0xabcd0123ffff4567ULL);
7848 auto [res2, fpsr2] = AsmSqdmull(arg3, arg4);
7849 ASSERT_EQ(res2, MakeUInt128(0x0000f00000000010ULL, 0x7fffffff0000000cULL));
7850 ASSERT_TRUE(IsQcBitSet(fpsr2));
7851 }
7852
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyLongUpper32x2)7853 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyLongUpper32x2) {
7854 constexpr auto AsmSqdmull = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmull2 %0.2d, %2.4s, %3.4s");
7855
7856 __uint128_t arg1 = MakeUInt128(0x0000000200000004ULL, 0xfeed000300000010ULL);
7857 __uint128_t arg2 = MakeUInt128(0x0000000300000002ULL, 0xfeed00040000002ULL);
7858 auto [res1, fpsr1] = AsmSqdmull(arg1, arg2);
7859 ASSERT_EQ(res1, MakeUInt128(0x0000000800000040ULL, 0xffddc4ed7f98e000ULL));
7860 ASSERT_FALSE(IsQcBitSet(fpsr1));
7861
7862 __uint128_t arg3 = MakeUInt128(0x8000000000000004ULL, 0x8000000000000010ULL);
7863 __uint128_t arg4 = MakeUInt128(0x8000000000000002ULL, 0x8000000000000002ULL);
7864 auto [res2, fpsr2] = AsmSqdmull(arg3, arg4);
7865 ASSERT_EQ(res2, MakeUInt128(0x0000000000000040ULL, 0x7fffffffffffffffULL));
7866 ASSERT_TRUE(IsQcBitSet(fpsr2));
7867 }
7868
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyLongUpper16x4)7869 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyLongUpper16x4) {
7870 constexpr auto AsmSqdmull = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmull2 %0.4s, %2.8h, %3.8h");
7871
7872 __uint128_t arg1 = MakeUInt128(0x0004000200f00004ULL, 0xfeedfeedfeedfeedULL);
7873 __uint128_t arg2 = MakeUInt128(0x0008000300800002ULL, 0xabcd0123ffff4567ULL);
7874 auto [res1, fpsr1] = AsmSqdmull(arg1, arg2);
7875 ASSERT_EQ(res1, MakeUInt128(0x00000226ff6ae4b6ULL, 0x00b4e592fffd8eceULL));
7876 ASSERT_FALSE(IsQcBitSet(fpsr1));
7877
7878 __uint128_t arg3 = MakeUInt128(0x8000000000000004ULL, 0x8000000000000010ULL);
7879 __uint128_t arg4 = MakeUInt128(0x8000000000000002ULL, 0x8000000000000002ULL);
7880 auto [res2, fpsr2] = AsmSqdmull(arg3, arg4);
7881 ASSERT_EQ(res2, MakeUInt128(0x0000000000000040ULL, 0x7fffffff00000000ULL));
7882 ASSERT_TRUE(IsQcBitSet(fpsr2));
7883 }
7884
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyLong64x2IndexedElem)7885 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyLong64x2IndexedElem) {
7886 constexpr auto AsmSqdmull = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmull %0.2d, %2.2s, %3.s[1]");
7887
7888 __uint128_t arg1 = MakeUInt128(0x0022002211223344ULL, 0x1122334400110011LL);
7889 __uint128_t arg2 = MakeUInt128(0x0000000200000000ULL, 0x000000000000000ULL);
7890 auto [res1, fpsr1] = AsmSqdmull(arg1, arg2);
7891 ASSERT_EQ(res1, MakeUInt128(0x000000004488cd10ULL, 0x0000000000880088ULL));
7892 ASSERT_FALSE(IsQcBitSet(fpsr1));
7893
7894 __uint128_t arg3 = MakeUInt128(0x0022002280000000ULL, 0x1122334400110011LL);
7895 __uint128_t arg4 = MakeUInt128(0x8000000000000000ULL, 0x000000000000000ULL);
7896 auto [res2, fpsr2] = AsmSqdmull(arg3, arg4);
7897 ASSERT_EQ(res2, MakeUInt128(0x7fffffffffffffffULL, 0xffddffde00000000ULL));
7898 ASSERT_TRUE(IsQcBitSet(fpsr2));
7899 }
7900
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyLong32x4IndexedElem)7901 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyLong32x4IndexedElem) {
7902 constexpr auto AsmSqdmull = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmull %0.4s, %2.4h, %3.h[4]");
7903
7904 __uint128_t arg1 = MakeUInt128(0x0022002211223344ULL, 0x1122334400110011LL);
7905 __uint128_t arg2 = MakeUInt128(0x000f000f000f000fULL, 0x000f000f000f0002ULL);
7906 auto [res1, fpsr1] = AsmSqdmull(arg1, arg2);
7907 ASSERT_EQ(res1, MakeUInt128(0x000044880000cd10ULL, 0x0000008800000088ULL));
7908 ASSERT_FALSE(IsQcBitSet(fpsr1));
7909
7910 __uint128_t arg3 = MakeUInt128(0x0022002280000000ULL, 0x1122334400118000ULL);
7911 __uint128_t arg4 = MakeUInt128(0x1111111122222222ULL, 0x1122334411228000ULL);
7912 auto [res2, fpsr2] = AsmSqdmull(arg3, arg4);
7913 ASSERT_EQ(res2, MakeUInt128(0x7fffffff00000000ULL, 0xffde0000ffde0000ULL));
7914 ASSERT_TRUE(IsQcBitSet(fpsr2));
7915 }
7916
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyLongUpper64x2IndexedElem)7917 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyLongUpper64x2IndexedElem) {
7918 constexpr auto AsmSqdmull = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmull2 %0.2d, %2.4s, %3.s[3]");
7919
7920 __uint128_t arg1 = MakeUInt128(0x0022002211223344ULL, 0x1122334400110011ULL);
7921 __uint128_t arg2 = MakeUInt128(0xffffffffffffffffULL, 0x00000002ffffffffULL);
7922 auto [res1, fpsr1] = AsmSqdmull(arg1, arg2);
7923 ASSERT_EQ(res1, MakeUInt128(0x0000000000440044ULL, 0x000000004488cd10ULL));
7924 ASSERT_FALSE(IsQcBitSet(fpsr1));
7925
7926 __uint128_t arg3 = MakeUInt128(0x80000000ffffffffULL, 0x1122334480000000ULL);
7927 __uint128_t arg4 = MakeUInt128(0x1122334411223344ULL, 0x80000000ffffffffULL);
7928 auto [res2, fpsr2] = AsmSqdmull(arg3, arg4);
7929 ASSERT_EQ(res2, MakeUInt128(0x7fffffffffffffffULL, 0xeeddccbc00000000ULL));
7930 ASSERT_TRUE(IsQcBitSet(fpsr2));
7931 }
7932
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyLongUpper32x4IndexedElem)7933 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyLongUpper32x4IndexedElem) {
7934 constexpr auto AsmSqdmull = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmull2 %0.4s, %2.8h, %3.h[7]");
7935
7936 __uint128_t arg1 = MakeUInt128(0x0022002211223344ULL, 0x1122334400110011ULL);
7937 __uint128_t arg2 = MakeUInt128(0xffffffffffffffffULL, 0x0002ffffffffffffULL);
7938 auto [res1, fpsr1] = AsmSqdmull(arg1, arg2);
7939 ASSERT_EQ(res1, MakeUInt128(0x0000004400000044ULL, 0x000044880000cd10ULL));
7940 ASSERT_FALSE(IsQcBitSet(fpsr1));
7941
7942 __uint128_t arg3 = MakeUInt128(0x80000000ffffffffULL, 0x112233448000ffffULL);
7943 __uint128_t arg4 = MakeUInt128(0x1122334411223344ULL, 0x8000ffffffffffffULL);
7944 auto [res2, fpsr2] = AsmSqdmull(arg3, arg4);
7945 ASSERT_EQ(res2, MakeUInt128(0x7fffffff00010000ULL, 0xeede0000ccbc0000ULL));
7946 }
7947
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyLong64x1)7948 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyLong64x1) {
7949 constexpr auto AsmSqdmull = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmull %d0, %s2, %s3");
7950 __uint128_t arg1 = MakeUInt128(0x0000000811112222ULL, 0x0000000700000006ULL);
7951 __uint128_t arg2 = MakeUInt128(0x0000000510000000ULL, 0x0000000300000002ULL);
7952 auto [res1, fpsr1] = AsmSqdmull(arg1, arg2);
7953 ASSERT_EQ(res1, MakeUInt128(0x0222244440000000ULL, 0x0000000000000000ULL));
7954 ASSERT_FALSE(IsQcBitSet(fpsr1));
7955
7956 __uint128_t arg3 = MakeUInt128(0xaabbccdd80000000ULL, 0x1122334400110011ULL);
7957 __uint128_t arg4 = MakeUInt128(0xff11ff1180000000ULL, 0xffffffff11223344ULL);
7958 auto [res2, fpsr2] = AsmSqdmull(arg3, arg4);
7959 ASSERT_EQ(res2, MakeUInt128(0x7fffffffffffffffULL, 0x0000000000000000ULL));
7960 ASSERT_TRUE(IsQcBitSet(fpsr2));
7961 }
7962
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyLong32x1)7963 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyLong32x1) {
7964 constexpr auto AsmSqdmull = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmull %s0, %h2, %h3");
7965 __uint128_t arg1 = MakeUInt128(0x1111111811112222ULL, 0xf000000700080006ULL);
7966 __uint128_t arg2 = MakeUInt128(0x0000000510004444ULL, 0xf000000300080002ULL);
7967 auto [res1, fpsr1] = AsmSqdmull(arg1, arg2);
7968 ASSERT_EQ(res1, MakeUInt128(0x0000000012343210ULL, 0x0000000000000000ULL));
7969 ASSERT_FALSE(IsQcBitSet(fpsr1));
7970
7971 __uint128_t arg3 = MakeUInt128(0xaabbccdd00008000ULL, 0x1122334400110011ULL);
7972 __uint128_t arg4 = MakeUInt128(0xff11ff1100008000ULL, 0xffffffff11223344ULL);
7973 auto [res2, fpsr2] = AsmSqdmull(arg3, arg4);
7974 ASSERT_EQ(res2, MakeUInt128(0x000000007fffffffULL, 0x0000000000000000ULL));
7975 ASSERT_TRUE(IsQcBitSet(fpsr2));
7976 }
7977
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyLong32x1IndexedElem)7978 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyLong32x1IndexedElem) {
7979 constexpr auto AsmSqdmull = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmull %s0, %h2, %3.h[7]");
7980 __uint128_t arg1 = MakeUInt128(0x0000000811112222ULL, 0x0000000700000006ULL);
7981 __uint128_t arg2 = MakeUInt128(0x0000000510000000ULL, 0x1111000300000002ULL);
7982 auto [res1, fpsr1] = AsmSqdmull(arg1, arg2);
7983 ASSERT_EQ(res1, MakeUInt128(0x00000000048d0c84ULL, 0x0000000000000000ULL));
7984 ASSERT_FALSE(IsQcBitSet(fpsr1));
7985
7986 __uint128_t arg3 = MakeUInt128(0xaabbccddaabb8000ULL, 0x1122334400110011ULL);
7987 __uint128_t arg4 = MakeUInt128(0xff11ff11ff000ff0ULL, 0x8000aabb11223344ULL);
7988 auto [res2, fpsr2] = AsmSqdmull(arg3, arg4);
7989 ASSERT_EQ(res2, MakeUInt128(0x000000007fffffffULL, 0x0000000000000000ULL));
7990 ASSERT_TRUE(IsQcBitSet(fpsr2));
7991 }
7992
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyLong64x1IndexedElem)7993 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyLong64x1IndexedElem) {
7994 constexpr auto AsmSqdmull = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmull %d0, %s2, %3.s[3]");
7995 __uint128_t arg1 = MakeUInt128(0x0000000811112222ULL, 0x0000000700000006ULL);
7996 __uint128_t arg2 = MakeUInt128(0x0000000510000000ULL, 0x0000000300000002ULL);
7997 auto [res1, fpsr1] = AsmSqdmull(arg1, arg2);
7998 ASSERT_EQ(res1, MakeUInt128(0x000000006666ccccULL, 0x0000000000000000ULL));
7999 ASSERT_FALSE(IsQcBitSet(fpsr1));
8000
8001 __uint128_t arg3 = MakeUInt128(0xaabbccdd80000000ULL, 0x1122334400110011ULL);
8002 __uint128_t arg4 = MakeUInt128(0xff11ff11ff000ff0ULL, 0x8000000011223344ULL);
8003 auto [res2, fpsr2] = AsmSqdmull(arg3, arg4);
8004 ASSERT_EQ(res2, MakeUInt128(0x7fffffffffffffffULL, 0x0000000000000000ULL));
8005 ASSERT_TRUE(IsQcBitSet(fpsr2));
8006 }
8007
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyAddLong32x2)8008 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyAddLong32x2) {
8009 constexpr auto AsmSqdmlal = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlal %0.2d, %2.2s, %3.2s");
8010
8011 // No saturation.
8012 __uint128_t arg1 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8013 __uint128_t arg2 = MakeUInt128(0x0000000400000004ULL, 0x0123456701234567ULL);
8014 __uint128_t arg3 = MakeUInt128(0x0100010001000100ULL, 0x0400040004000400ULL);
8015 auto [res1, fpsr1] = AsmSqdmlal(arg1, arg2, arg3);
8016 ASSERT_EQ(res1, MakeUInt128(0x0100010111011100ULL, 0x040004008c008c00ULL));
8017 ASSERT_FALSE(IsQcBitSet(fpsr1));
8018
8019 // Saturates in the multiplication.
8020 __uint128_t arg4 = MakeUInt128(0x8000000000000004ULL, 0xfeed000300000010ULL);
8021 __uint128_t arg5 = MakeUInt128(0x8000000000000002ULL, 0xfeed000400000020ULL);
8022 __uint128_t arg6 = MakeUInt128(0x0000080000000900ULL, 0x00000a000000b000ULL);
8023 auto [res2, fpsr2] = AsmSqdmlal(arg4, arg5, arg6);
8024 ASSERT_EQ(res2, MakeUInt128(0x0000080000000910ULL, 0x7fffffffffffffffULL));
8025 ASSERT_TRUE(IsQcBitSet(fpsr2));
8026
8027 // Saturates in the addition.
8028 __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8029 __uint128_t arg8 = MakeUInt128(0x0000000400000004ULL, 0x0123456701234567ULL);
8030 __uint128_t arg9 = MakeUInt128(0x7fffffffffffffffULL, 0x00000a000000b000ULL);
8031 auto [res3, fpsr3] = AsmSqdmlal(arg7, arg8, arg9);
8032 ASSERT_EQ(res3, MakeUInt128(0x7fffffffffffffffULL, 0x00000a0088013800ULL));
8033 ASSERT_TRUE(IsQcBitSet(fpsr3));
8034 }
8035
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyAddLong16x4)8036 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyAddLong16x4) {
8037 constexpr auto AsmSqdmlal = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlal %0.4s, %2.4h, %3.4h");
8038
8039 // No saturation.
8040 __uint128_t arg1 = MakeUInt128(0x8000110022002200ULL, 0x7654321076543210ULL);
8041 __uint128_t arg2 = MakeUInt128(0x0010001100000004ULL, 0x0123456701234567ULL);
8042 __uint128_t arg3 = MakeUInt128(0x0100010001000100ULL, 0x0400040004000400ULL);
8043 auto [res1, fpsr1] = AsmSqdmlal(arg1, arg2, arg3);
8044 ASSERT_EQ(res1, MakeUInt128(0x0100010001011100ULL, 0x03f0040004024600ULL));
8045 ASSERT_FALSE(IsQcBitSet(fpsr1));
8046
8047 // Saturates in the multiplication.
8048 __uint128_t arg4 = MakeUInt128(0x8000111111111111ULL, 0x1234123412341234ULL);
8049 __uint128_t arg5 = MakeUInt128(0x8000111111111111ULL, 0x1234123412341234ULL);
8050 __uint128_t arg6 = MakeUInt128(0x0123456701234567ULL, 0x0123456701234567ULL);
8051 auto [res2, fpsr2] = AsmSqdmlal(arg4, arg5, arg6);
8052 ASSERT_EQ(res2, MakeUInt128(0x0369cba90369cba9ULL, 0x7fffffff0369cba9ULL));
8053 ASSERT_TRUE(IsQcBitSet(fpsr2));
8054
8055 // Saturates in the addition.
8056 __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8057 __uint128_t arg8 = MakeUInt128(0x0000000400010004ULL, 0x0123456701234567ULL);
8058 __uint128_t arg9 = MakeUInt128(0x7fffffff12345678ULL, 0x00000a000000b000ULL);
8059 auto [res3, fpsr3] = AsmSqdmlal(arg7, arg8, arg9);
8060 ASSERT_EQ(res3, MakeUInt128(0x7fffffff12356678ULL, 0x00000a0000013800ULL));
8061 ASSERT_TRUE(IsQcBitSet(fpsr3));
8062 }
8063
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyAddLongUpper32x2)8064 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyAddLongUpper32x2) {
8065 constexpr auto AsmSqdmlal = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlal2 %0.2d, %2.4s, %3.4s");
8066
8067 // No saturation.
8068 __uint128_t arg1 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8069 __uint128_t arg2 = MakeUInt128(0x0000000400000004ULL, 0x0123456701234567ULL);
8070 __uint128_t arg3 = MakeUInt128(0x0100010001000100ULL, 0x0400040004000400ULL);
8071 auto [res1, fpsr1] = AsmSqdmlal(arg1, arg2, arg3);
8072 ASSERT_EQ(res1, MakeUInt128(0x020d44926c1ce9e0ULL, 0x050d47926f1cece0ULL));
8073 ASSERT_FALSE(IsQcBitSet(fpsr1));
8074
8075 // Saturates in the multiplication.
8076 __uint128_t arg4 = MakeUInt128(0x1234567800000004ULL, 0x8000000001100010ULL);
8077 __uint128_t arg5 = MakeUInt128(0x1234567800000002ULL, 0x8000000001100020ULL);
8078 __uint128_t arg6 = MakeUInt128(0x0000080000000900ULL, 0x00000a000000b000ULL);
8079 auto [res2, fpsr2] = AsmSqdmlal(arg4, arg5, arg6);
8080 ASSERT_EQ(res2, MakeUInt128(0x00024a0066000d00ULL, 0x7fffffffffffffffULL));
8081 ASSERT_TRUE(IsQcBitSet(fpsr2));
8082
8083 // Saturates in the addition.
8084 __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8085 __uint128_t arg8 = MakeUInt128(0x0000000400000004ULL, 0x0123456701234567ULL);
8086 __uint128_t arg9 = MakeUInt128(0x1234567812345678ULL, 0x7fffffffffffffffULL);
8087 auto [res3, fpsr3] = AsmSqdmlal(arg7, arg8, arg9);
8088 ASSERT_EQ(res3, MakeUInt128(0x13419a0a7d513f58ULL, 0x7fffffffffffffffULL));
8089 ASSERT_TRUE(IsQcBitSet(fpsr3));
8090 }
8091
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyAddLongUpper16x4)8092 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyAddLongUpper16x4) {
8093 constexpr auto AsmSqdmlal = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlal2 %0.4s, %2.8h, %3.8h");
8094
8095 // No saturation.
8096 __uint128_t arg1 = MakeUInt128(0x8000110022002200ULL, 0x7654321076543210ULL);
8097 __uint128_t arg2 = MakeUInt128(0x0010001100000004ULL, 0x0123456701234567ULL);
8098 __uint128_t arg3 = MakeUInt128(0x0100010001000100ULL, 0x0400040004000400ULL);
8099 auto [res1, fpsr1] = AsmSqdmlal(arg1, arg2, arg3);
8100 ASSERT_EQ(res1, MakeUInt128(0x020d03f81c24e9e0ULL, 0x050d06f81f24ece0ULL));
8101 ASSERT_FALSE(IsQcBitSet(fpsr1));
8102
8103 // Saturates in the multiplication.
8104 __uint128_t arg4 = MakeUInt128(0x1111111111111111ULL, 0x8000123412341234ULL);
8105 __uint128_t arg5 = MakeUInt128(0x1111111111111111ULL, 0x8000123412341234ULL);
8106 __uint128_t arg6 = MakeUInt128(0x0123456701234567ULL, 0x0123456701234567ULL);
8107 auto [res2, fpsr2] = AsmSqdmlal(arg4, arg5, arg6);
8108 ASSERT_EQ(res2, MakeUInt128(0x03b9fa8703b9fa87ULL, 0x7fffffff03b9fa87ULL));
8109 ASSERT_TRUE(IsQcBitSet(fpsr2));
8110
8111 // Saturates in the addition.
8112 __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8113 __uint128_t arg8 = MakeUInt128(0x0000000400010004ULL, 0x0123456701234567ULL);
8114 __uint128_t arg9 = MakeUInt128(0x1234567812345678ULL, 0x7fffffff0000b000ULL);
8115 auto [res3, fpsr3] = AsmSqdmlal(arg7, arg8, arg9);
8116 ASSERT_EQ(res3, MakeUInt128(0x134159702d593f58ULL, 0x7fffffff1b2598e0ULL));
8117 ASSERT_TRUE(IsQcBitSet(fpsr3));
8118 }
8119
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyAddLong64x1)8120 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyAddLong64x1) {
8121 constexpr auto AsmSqdmlal = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlal %d0, %s2, %s3");
8122
8123 // No saturation.
8124 __uint128_t arg1 = MakeUInt128(0x1100110011223344ULL, 0x7654321076543210ULL);
8125 __uint128_t arg2 = MakeUInt128(0x0000000020000000ULL, 0x0123456701234567ULL);
8126 __uint128_t arg3 = MakeUInt128(0x12345678000000FFULL, 0x0400040004000400ULL);
8127 auto [res1, fpsr1] = AsmSqdmlal(arg1, arg2, arg3);
8128 ASSERT_EQ(res1, MakeUInt128(0x167ce349000000ffULL, 0x0000000000000000ULL));
8129 ASSERT_FALSE(IsQcBitSet(fpsr1));
8130
8131 // Saturates in the multiplication.
8132 __uint128_t arg4 = MakeUInt128(0x1122334480000000ULL, 0xfeed000300000010ULL);
8133 __uint128_t arg5 = MakeUInt128(0xaabbccdd80000000ULL, 0xfeed000400000020ULL);
8134 __uint128_t arg6 = MakeUInt128(0x1122334411111111ULL, 0x00000a000000b000ULL);
8135 auto [res2, fpsr2] = AsmSqdmlal(arg4, arg5, arg6);
8136 ASSERT_EQ(res2, MakeUInt128(0x7fffffffffffffffULL, 0x0000000000000000ULL));
8137 ASSERT_TRUE(IsQcBitSet(fpsr2));
8138
8139 // Saturates in the addition.
8140 __uint128_t arg7 = MakeUInt128(0x1122334400111111ULL, 0x7654321076543210ULL);
8141 __uint128_t arg8 = MakeUInt128(0xaabbccdd00222222ULL, 0x0123456701234567ULL);
8142 __uint128_t arg9 = MakeUInt128(0x7fffffffffffffffULL, 0x00000a000000b000ULL);
8143 auto [res3, fpsr3] = AsmSqdmlal(arg7, arg8, arg9);
8144 ASSERT_EQ(res3, MakeUInt128(0x7fffffffffffffffULL, 0x0000000000000000ULL));
8145 ASSERT_TRUE(IsQcBitSet(fpsr3));
8146 }
8147
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyAddLong32x1)8148 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyAddLong32x1) {
8149 constexpr auto AsmSqdmlal = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlal %s0, %h2, %h3");
8150
8151 // No saturation.
8152 __uint128_t arg1 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8153 __uint128_t arg2 = MakeUInt128(0x0000000000000004ULL, 0x0123456701234567ULL);
8154 __uint128_t arg3 = MakeUInt128(0x0100010001000100ULL, 0x0400040004000400ULL);
8155 auto [res1, fpsr1] = AsmSqdmlal(arg1, arg2, arg3);
8156 ASSERT_EQ(res1, MakeUInt128(0x0000000001011100ULL, 0x0000000000000000ULL));
8157 ASSERT_FALSE(IsQcBitSet(fpsr1));
8158
8159 // Saturates in the multiplication.
8160 __uint128_t arg4 = MakeUInt128(0x1122334411228000ULL, 0xfeed000300000010ULL);
8161 __uint128_t arg5 = MakeUInt128(0xaabbccddaabb8000ULL, 0xfeed000400000020ULL);
8162 __uint128_t arg6 = MakeUInt128(0x1122334411111111ULL, 0x00000a000000b000ULL);
8163 auto [res2, fpsr2] = AsmSqdmlal(arg4, arg5, arg6);
8164 ASSERT_EQ(res2, MakeUInt128(0x000000007fffffffULL, 0x0000000000000000ULL));
8165 ASSERT_TRUE(IsQcBitSet(fpsr2));
8166
8167 // Saturates in the addition.
8168 __uint128_t arg7 = MakeUInt128(0x1122334411220123ULL, 0x7654321076543210ULL);
8169 __uint128_t arg8 = MakeUInt128(0xaabbccddaabb0044ULL, 0x0123456701234567ULL);
8170 __uint128_t arg9 = MakeUInt128(0xaabbccdd7fffffffULL, 0x00000a000000b000ULL);
8171 auto [res3, fpsr3] = AsmSqdmlal(arg7, arg8, arg9);
8172 ASSERT_EQ(res3, MakeUInt128(0x000000007fffffffULL, 0x0000000000000000ULL));
8173 ASSERT_TRUE(IsQcBitSet(fpsr3));
8174 }
8175
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyAddLong64x2IndexedElem)8176 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyAddLong64x2IndexedElem) {
8177 constexpr auto AsmSqdmlal = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlal %0.2d, %2.2s, %3.s[1]");
8178
8179 // No saturation.
8180 __uint128_t arg1 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8181 __uint128_t arg2 = MakeUInt128(0x0000000400000004ULL, 0x0123456701234567ULL);
8182 __uint128_t arg3 = MakeUInt128(0x0100010001000100ULL, 0x0400040004000400ULL);
8183 auto [res1, fpsr1] = AsmSqdmlal(arg1, arg2, arg3);
8184 ASSERT_EQ(res1, MakeUInt128(0x0100010111011100ULL, 0x040004008c008c00ULL));
8185 ASSERT_FALSE(IsQcBitSet(fpsr1));
8186
8187 // Saturates in the multiplication.
8188 __uint128_t arg4 = MakeUInt128(0x8000000000000004ULL, 0xfeed000300000010ULL);
8189 __uint128_t arg5 = MakeUInt128(0x8000000000000002ULL, 0xfeed000400000020ULL);
8190 __uint128_t arg6 = MakeUInt128(0x0000080000000900ULL, 0x00000a000000b000ULL);
8191 auto [res2, fpsr2] = AsmSqdmlal(arg4, arg5, arg6);
8192 ASSERT_EQ(res2, MakeUInt128(0x000007fc00000900ULL, 0x7fffffffffffffffULL));
8193 ASSERT_TRUE(IsQcBitSet(fpsr2));
8194
8195 // Saturates in the addition.
8196 __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8197 __uint128_t arg8 = MakeUInt128(0x0000000400000004ULL, 0x0123456701234567ULL);
8198 __uint128_t arg9 = MakeUInt128(0x7fffffffffffffffULL, 0x00000a000000b000ULL);
8199 auto [res3, fpsr3] = AsmSqdmlal(arg7, arg8, arg9);
8200 ASSERT_EQ(res3, MakeUInt128(0x7fffffffffffffffULL, 0x00000a0088013800ULL));
8201 ASSERT_TRUE(IsQcBitSet(fpsr3));
8202 }
8203
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyAddLong32x4IndexedElem)8204 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyAddLong32x4IndexedElem) {
8205 constexpr auto AsmSqdmlal = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlal %0.4s, %2.4h, %3.h[7]");
8206
8207 // No saturation.
8208 __uint128_t arg1 = MakeUInt128(0x0102030405060708ULL, 0x7654321076543210ULL);
8209 __uint128_t arg2 = MakeUInt128(0x1122334488776655ULL, 0x0123456701234567ULL);
8210 __uint128_t arg3 = MakeUInt128(0x0123456789abcdefULL, 0xfedcba9876543210ULL);
8211 auto [res1, fpsr1] = AsmSqdmlal(arg1, arg2, arg3);
8212 ASSERT_EQ(res1, MakeUInt128(0x012eb10b89bbca1fULL, 0xfedf0524765b0d28ULL));
8213 ASSERT_FALSE(IsQcBitSet(fpsr1));
8214
8215 // Saturates in the multiplication.
8216 __uint128_t arg4 = MakeUInt128(0x80000123456789a4ULL, 0xfeed000300000010ULL);
8217 __uint128_t arg5 = MakeUInt128(0x0123456789abcdefULL, 0x8000fedcba123456ULL);
8218 __uint128_t arg6 = MakeUInt128(0x0123456701234567ULL, 0x0123456701234567ULL);
8219 auto [res2, fpsr2] = AsmSqdmlal(arg4, arg5, arg6);
8220 ASSERT_EQ(res2, MakeUInt128(0xbbbc4567777f4567ULL, 0x7fffffff00004567ULL));
8221 ASSERT_TRUE(IsQcBitSet(fpsr2));
8222
8223 // Saturates in the addition.
8224 __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8225 __uint128_t arg8 = MakeUInt128(0x8888111122223333ULL, 0x01234567ffffeeeeULL);
8226 __uint128_t arg9 = MakeUInt128(0x7fffffffffffffffULL, 0x00000a000000b000ULL);
8227 auto [res3, fpsr3] = AsmSqdmlal(arg7, arg8, arg9);
8228 ASSERT_EQ(res3, MakeUInt128(0x7fffffff004d4bffULL, 0x0026b00000275600ULL));
8229 ASSERT_TRUE(IsQcBitSet(fpsr3));
8230 }
8231
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyAddLongUpper64x2IndexedElem)8232 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyAddLongUpper64x2IndexedElem) {
8233 constexpr auto AsmSqdmlal = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlal2 %0.2d, %2.4s, %3.s[3]");
8234
8235 // No saturation.
8236 __uint128_t arg1 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8237 __uint128_t arg2 = MakeUInt128(0x0000000400000004ULL, 0x0123456701234567ULL);
8238 __uint128_t arg3 = MakeUInt128(0x0100010001000100ULL, 0x0400040004000400ULL);
8239 auto [res1, fpsr1] = AsmSqdmlal(arg1, arg2, arg3);
8240 ASSERT_EQ(res1, MakeUInt128(0x020d44926c1ce9e0ULL, 0x050d47926f1cece0ULL));
8241 ASSERT_FALSE(IsQcBitSet(fpsr1));
8242
8243 // Saturates in the multiplication.
8244 __uint128_t arg4 = MakeUInt128(0x0123456789abcdefULL, 0x1122334480000000ULL);
8245 __uint128_t arg5 = MakeUInt128(0x0123456789abcdefULL, 0x8000000011223344ULL);
8246 __uint128_t arg6 = MakeUInt128(0x0101010102020202ULL, 0x0303030304040404ULL);
8247 auto [res2, fpsr2] = AsmSqdmlal(arg4, arg5, arg6);
8248 ASSERT_EQ(res2, MakeUInt128(0x7fffffffffffffffULL, 0xf1e0cfbf04040404ULL));
8249 ASSERT_TRUE(IsQcBitSet(fpsr2));
8250
8251 // Saturates in the addition.
8252 __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8253 __uint128_t arg8 = MakeUInt128(0x1122334444332211ULL, 0x0123456701234567ULL);
8254 __uint128_t arg9 = MakeUInt128(0x7fffffffffffffffULL, 0x00000a000000b000ULL);
8255 auto [res3, fpsr3] = AsmSqdmlal(arg7, arg8, arg9);
8256 ASSERT_EQ(res3, MakeUInt128(0x7fffffffffffffffULL, 0x010d4d926b1d98e0ULL));
8257 ASSERT_TRUE(IsQcBitSet(fpsr3));
8258 }
8259
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyAddLongUpper32x4IndexedElem)8260 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyAddLongUpper32x4IndexedElem) {
8261 constexpr auto AsmSqdmlal = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlal2 %0.4s, %2.8h, %3.h[7]");
8262
8263 // No saturation.
8264 __uint128_t arg1 = MakeUInt128(0x0102030405060708ULL, 0x7654321076543210ULL);
8265 __uint128_t arg2 = MakeUInt128(0x1122334488776655ULL, 0x0123456701234567ULL);
8266 __uint128_t arg3 = MakeUInt128(0x0123456789abcdefULL, 0xfedcba9876543210ULL);
8267 auto [res1, fpsr1] = AsmSqdmlal(arg1, arg2, arg3);
8268 ASSERT_EQ(res1, MakeUInt128(0x0230485f8a1d9e4fULL, 0xffe9bd9076c60270ULL));
8269 ASSERT_FALSE(IsQcBitSet(fpsr1));
8270
8271 // Saturates in the multiplication.
8272 __uint128_t arg4 = MakeUInt128(0x0011223344556677ULL, 0xfeedfeedfeed8000ULL);
8273 __uint128_t arg5 = MakeUInt128(0x0123456789abcdefULL, 0x8000fedcba123456ULL);
8274 __uint128_t arg6 = MakeUInt128(0x0123456701234567ULL, 0x0123456701234567ULL);
8275 auto [res2, fpsr2] = AsmSqdmlal(arg4, arg5, arg6);
8276 ASSERT_EQ(res2, MakeUInt128(0x023645677fffffffULL, 0x0236456702364567ULL));
8277 ASSERT_TRUE(IsQcBitSet(fpsr2));
8278
8279 // Saturates in the addition.
8280 __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8281 __uint128_t arg8 = MakeUInt128(0x8888111122223333ULL, 0x01234567ffffeeeeULL);
8282 __uint128_t arg9 = MakeUInt128(0x7fffffffffffffffULL, 0x00000a000000b000ULL);
8283 auto [res3, fpsr3] = AsmSqdmlal(arg7, arg8, arg9);
8284 ASSERT_EQ(res3, MakeUInt128(0x7fffffff0071d05fULL, 0x010d0cf800728060ULL));
8285 ASSERT_TRUE(IsQcBitSet(fpsr3));
8286 }
8287
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyAddLong64x1IndexedElem)8288 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyAddLong64x1IndexedElem) {
8289 constexpr auto AsmSqdmlal = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlal %d0, %s2, %3.s[3]");
8290
8291 // No saturation.
8292 __uint128_t arg1 = MakeUInt128(0x0102030405060708ULL, 0x7654321076543210ULL);
8293 __uint128_t arg2 = MakeUInt128(0x1122334488776655ULL, 0x0123456701234567ULL);
8294 __uint128_t arg3 = MakeUInt128(0x0123456789abcdefULL, 0xfedcba9876543210ULL);
8295 auto [res1, fpsr1] = AsmSqdmlal(arg1, arg2, arg3);
8296 ASSERT_EQ(res1, MakeUInt128(0x012eb3d4d07fc65fULL, 0x0000000000000000ULL));
8297 ASSERT_FALSE(IsQcBitSet(fpsr1));
8298
8299 // Saturates in the multiplication.
8300 __uint128_t arg4 = MakeUInt128(0x0011223380000000ULL, 0xfeedfeedfeed8000ULL);
8301 __uint128_t arg5 = MakeUInt128(0x0123456789abcdefULL, 0x80000000ba123456ULL);
8302 __uint128_t arg6 = MakeUInt128(0x0123456701234567ULL, 0x0123456701234567ULL);
8303 auto [res2, fpsr2] = AsmSqdmlal(arg4, arg5, arg6);
8304 ASSERT_EQ(res2, MakeUInt128(0x7fffffffffffffffULL, 0x0000000000000000ULL));
8305 ASSERT_TRUE(IsQcBitSet(fpsr2));
8306
8307 // Saturates in the addition.
8308 __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8309 __uint128_t arg8 = MakeUInt128(0x8888111122223333ULL, 0x01234567ffffeeeeULL);
8310 __uint128_t arg9 = MakeUInt128(0x7fffffffffffffffULL, 0x00000a000000b000ULL);
8311 auto [res3, fpsr3] = AsmSqdmlal(arg7, arg8, arg9);
8312 ASSERT_EQ(res3, MakeUInt128(0x7fffffffffffffffULL, 0x0000000000000000ULL));
8313 ASSERT_TRUE(IsQcBitSet(fpsr3));
8314 }
8315
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyAddLong32x1IndexedElem)8316 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyAddLong32x1IndexedElem) {
8317 constexpr auto AsmSqdmlal = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlal %s0, %h2, %3.h[7]");
8318
8319 // No saturation.
8320 __uint128_t arg1 = MakeUInt128(0x0102030405060708ULL, 0x7654321076543210ULL);
8321 __uint128_t arg2 = MakeUInt128(0x1122334488776655ULL, 0x0123456701234567ULL);
8322 __uint128_t arg3 = MakeUInt128(0x0123456789abcdefULL, 0xfedcba9876543210ULL);
8323 auto [res1, fpsr1] = AsmSqdmlal(arg1, arg2, arg3);
8324 ASSERT_EQ(res1, MakeUInt128(0x0000000089bbca1fULL, 0x0000000000000000ULL));
8325 ASSERT_FALSE(IsQcBitSet(fpsr1));
8326
8327 // Saturates in the multiplication.
8328 __uint128_t arg4 = MakeUInt128(0x0011223344558000ULL, 0xfeedfeedfeed1234ULL);
8329 __uint128_t arg5 = MakeUInt128(0x0123456789abcdefULL, 0x8000fedcba123456ULL);
8330 __uint128_t arg6 = MakeUInt128(0x0123456701234567ULL, 0x0123456701234567ULL);
8331 auto [res2, fpsr2] = AsmSqdmlal(arg4, arg5, arg6);
8332 ASSERT_EQ(res2, MakeUInt128(0x000000007fffffffULL, 0x0000000000000000ULL));
8333 ASSERT_TRUE(IsQcBitSet(fpsr2));
8334
8335 // Saturates in the addition.
8336 __uint128_t arg7 = MakeUInt128(0xaabbccddeeff2200ULL, 0x7654321076543210ULL);
8337 __uint128_t arg8 = MakeUInt128(0x8888111122223333ULL, 0x0123aabbccddeeffULL);
8338 __uint128_t arg9 = MakeUInt128(0xaabbccdd7fffffffULL, 0x0011223344556677ULL);
8339 auto [res3, fpsr3] = AsmSqdmlal(arg7, arg8, arg9);
8340 ASSERT_EQ(res3, MakeUInt128(0x000000007fffffffULL, 0x0000000000000000ULL));
8341 ASSERT_TRUE(IsQcBitSet(fpsr3));
8342 }
8343
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplySubtractLong32x2)8344 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplySubtractLong32x2) {
8345 constexpr auto AsmSqdmlsl = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlsl %0.2d, %2.2s, %3.2s");
8346
8347 // No saturation.
8348 __uint128_t arg1 = MakeUInt128(0x0000000080000001ULL, 0x7654321076543210ULL);
8349 __uint128_t arg2 = MakeUInt128(0x0000000100000004ULL, 0x0123456701234567ULL);
8350 __uint128_t arg3 = MakeUInt128(0x0000100000000001ULL, 0x0400040004000400ULL);
8351 auto [res1, fpsr1] = AsmSqdmlsl(arg1, arg2, arg3);
8352 ASSERT_EQ(res1, MakeUInt128(0x00001003fffffff9ULL, 0x0400040004000400ULL));
8353 ASSERT_FALSE(IsQcBitSet(fpsr1));
8354
8355 // Saturates in the multiplication.
8356 __uint128_t arg4 = MakeUInt128(0x8000000000000004ULL, 0xfeed000300000010ULL);
8357 __uint128_t arg5 = MakeUInt128(0x8000000000000002ULL, 0xfeed000400000020ULL);
8358 __uint128_t arg6 = MakeUInt128(0x0000000000000900ULL, 0x00000a000000b000ULL);
8359 auto [res2, fpsr2] = AsmSqdmlsl(arg4, arg5, arg6);
8360 ASSERT_EQ(res2, MakeUInt128(0x00000000000008f0ULL, 0x80000a000000b001ULL));
8361 ASSERT_TRUE(IsQcBitSet(fpsr2));
8362
8363 // Saturates in the subtraction.
8364 __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8365 __uint128_t arg8 = MakeUInt128(0x0000000400000004ULL, 0x0123456701234567ULL);
8366 __uint128_t arg9 = MakeUInt128(0x8000000000000000ULL, 0x00000a000000b000ULL);
8367 auto [res3, fpsr3] = AsmSqdmlsl(arg7, arg8, arg9);
8368 ASSERT_EQ(res3, MakeUInt128(0x8000000000000000ULL, 0x000009ff78002800ULL));
8369 ASSERT_TRUE(IsQcBitSet(fpsr3));
8370 }
8371
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplySubtractLong16x4)8372 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplySubtractLong16x4) {
8373 constexpr auto AsmSqdmlsl = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlsl %0.4s, %2.4h, %3.4h");
8374
8375 // No saturation.
8376 __uint128_t arg1 = MakeUInt128(0x8000110022002200ULL, 0x7654321076543210ULL);
8377 __uint128_t arg2 = MakeUInt128(0x0010001100000004ULL, 0x0123456701234567ULL);
8378 __uint128_t arg3 = MakeUInt128(0x0100010001000100ULL, 0x0400040004000400ULL);
8379 auto [res1, fpsr1] = AsmSqdmlsl(arg1, arg2, arg3);
8380 ASSERT_EQ(res1, MakeUInt128(0x0100010000fef100ULL, 0x0410040003fdc200ULL));
8381 ASSERT_FALSE(IsQcBitSet(fpsr1));
8382
8383 // Saturates in the multiplication.
8384 __uint128_t arg4 = MakeUInt128(0x8000111111111111ULL, 0x1234123412341234ULL);
8385 __uint128_t arg5 = MakeUInt128(0x8000111111111111ULL, 0x1234123412341234ULL);
8386 __uint128_t arg6 = MakeUInt128(0x0123456701234567ULL, 0x0123456701234567ULL);
8387 auto [res2, fpsr2] = AsmSqdmlsl(arg4, arg5, arg6);
8388 ASSERT_EQ(res2, MakeUInt128(0xfedcbf25fedcbf25ULL, 0x81234568fedcbf25ULL));
8389 ASSERT_TRUE(IsQcBitSet(fpsr2));
8390
8391 // Saturates in the subtraction.
8392 __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8393 __uint128_t arg8 = MakeUInt128(0x0000000400010004ULL, 0x0123456701234567ULL);
8394 __uint128_t arg9 = MakeUInt128(0x8000000012345678ULL, 0x00000a000000b000ULL);
8395 auto [res3, fpsr3] = AsmSqdmlsl(arg7, arg8, arg9);
8396 ASSERT_EQ(res3, MakeUInt128(0x8000000012334678ULL, 0x00000a0000002800ULL));
8397 ASSERT_TRUE(IsQcBitSet(fpsr3));
8398 }
8399
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplySubtractLongUpper32x2)8400 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplySubtractLongUpper32x2) {
8401 constexpr auto AsmSqdmlsl = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlsl2 %0.2d, %2.4s, %3.4s");
8402
8403 // No saturation.
8404 __uint128_t arg1 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8405 __uint128_t arg2 = MakeUInt128(0x0000000400000004ULL, 0x0123456701234567ULL);
8406 __uint128_t arg3 = MakeUInt128(0x0100010001000100ULL, 0x0400040004000400ULL);
8407 auto [res1, fpsr1] = AsmSqdmlsl(arg1, arg2, arg3);
8408 ASSERT_EQ(res1, MakeUInt128(0xfff2bd6d95e31820ULL, 0x02f2c06d98e31b20ULL));
8409 ASSERT_FALSE(IsQcBitSet(fpsr1));
8410
8411 // Saturates in the multiplication.
8412 __uint128_t arg4 = MakeUInt128(0x1234567800000004ULL, 0x8000000001100010ULL);
8413 __uint128_t arg5 = MakeUInt128(0x1234567800000002ULL, 0x8000000001100020ULL);
8414 __uint128_t arg6 = MakeUInt128(0x0000080000000900ULL, 0x00000a000000b000ULL);
8415 auto [res2, fpsr2] = AsmSqdmlsl(arg4, arg5, arg6);
8416 ASSERT_EQ(res2, MakeUInt128(0xfffdc5ff9a000500ULL, 0x80000a000000b001ULL));
8417 ASSERT_TRUE(IsQcBitSet(fpsr2));
8418
8419 // Saturates in the subtraction.
8420 __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8421 __uint128_t arg8 = MakeUInt128(0x0000000400000004ULL, 0x0123456701234567ULL);
8422 __uint128_t arg9 = MakeUInt128(0x1234567812345678ULL, 0x8000000000000000ULL);
8423 auto [res3, fpsr3] = AsmSqdmlsl(arg7, arg8, arg9);
8424 ASSERT_EQ(res3, MakeUInt128(0x112712e5a7176d98ULL, 0x8000000000000000ULL));
8425 ASSERT_TRUE(IsQcBitSet(fpsr3));
8426 }
8427
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplySubtractLongUpper16x4)8428 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplySubtractLongUpper16x4) {
8429 constexpr auto AsmSqdmlsl = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlsl2 %0.4s, %2.8h, %3.8h");
8430
8431 // No saturation.
8432 __uint128_t arg1 = MakeUInt128(0x8000110022002200ULL, 0x7654321076543210ULL);
8433 __uint128_t arg2 = MakeUInt128(0x0010001100000004ULL, 0x0123456701234567ULL);
8434 __uint128_t arg3 = MakeUInt128(0x0100010001000100ULL, 0x0400040004000400ULL);
8435 auto [res1, fpsr1] = AsmSqdmlsl(arg1, arg2, arg3);
8436 ASSERT_EQ(res1, MakeUInt128(0xfff2fe08e5db1820ULL, 0x02f30108e8db1b20ULL));
8437 ASSERT_FALSE(IsQcBitSet(fpsr1));
8438
8439 // Saturates in the multiplication.
8440 __uint128_t arg4 = MakeUInt128(0x1111111111111111ULL, 0x8000123412341234ULL);
8441 __uint128_t arg5 = MakeUInt128(0x1111111111111111ULL, 0x8000123412341234ULL);
8442 __uint128_t arg6 = MakeUInt128(0x0123456701234567ULL, 0x0123456701234567ULL);
8443 auto [res2, fpsr2] = AsmSqdmlsl(arg4, arg5, arg6);
8444 ASSERT_EQ(res2, MakeUInt128(0xfe8c9047fe8c9047ULL, 0x81234568fe8c9047ULL));
8445 ASSERT_TRUE(IsQcBitSet(fpsr2));
8446
8447 // Saturates in the subtraction.
8448 __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8449 __uint128_t arg8 = MakeUInt128(0x0000000400010004ULL, 0x0123456701234567ULL);
8450 __uint128_t arg9 = MakeUInt128(0x1234567812345678ULL, 0x800000000000b000ULL);
8451 auto [res3, fpsr3] = AsmSqdmlsl(arg7, arg8, arg9);
8452 ASSERT_EQ(res3, MakeUInt128(0x11275380f70f6d98ULL, 0x80000000e4dbc720ULL));
8453 ASSERT_TRUE(IsQcBitSet(fpsr3));
8454 }
8455
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplySubtractLong64x1)8456 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplySubtractLong64x1) {
8457 constexpr auto AsmSqdmlsl = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlsl %d0, %s2, %s3");
8458
8459 // No saturation.
8460 __uint128_t arg1 = MakeUInt128(0x1100110011223344ULL, 0x7654321076543210ULL);
8461 __uint128_t arg2 = MakeUInt128(0x0000000020000000ULL, 0x0123456701234567ULL);
8462 __uint128_t arg3 = MakeUInt128(0x12345678000000FFULL, 0x0400040004000400ULL);
8463 auto [res1, fpsr1] = AsmSqdmlsl(arg1, arg2, arg3);
8464 ASSERT_EQ(res1, MakeUInt128(0x0debc9a7000000ffULL, 0x0000000000000000ULL));
8465 ASSERT_FALSE(IsQcBitSet(fpsr1));
8466
8467 // Saturates in the multiplication.
8468 __uint128_t arg4 = MakeUInt128(0x1122334480000000ULL, 0xfeed000300000010ULL);
8469 __uint128_t arg5 = MakeUInt128(0xaabbccdd80000000ULL, 0xfeed000400000020ULL);
8470 __uint128_t arg6 = MakeUInt128(0x1122334411111111ULL, 0x00000a000000b000ULL);
8471 auto [res2, fpsr2] = AsmSqdmlsl(arg4, arg5, arg6);
8472 ASSERT_EQ(res2, MakeUInt128(0x9122334411111112ULL, 0x0000000000000000ULL));
8473 ASSERT_TRUE(IsQcBitSet(fpsr2));
8474
8475 // Saturates in the subtraction.
8476 __uint128_t arg7 = MakeUInt128(0x1122334400111111ULL, 0x7654321076543210ULL);
8477 __uint128_t arg8 = MakeUInt128(0xaabbccdd00222222ULL, 0x0123456701234567ULL);
8478 __uint128_t arg9 = MakeUInt128(0x8000000000000000ULL, 0x00000a000000b000ULL);
8479 auto [res3, fpsr3] = AsmSqdmlsl(arg7, arg8, arg9);
8480 ASSERT_EQ(res3, MakeUInt128(0x8000000000000000ULL, 0x0000000000000000ULL));
8481 ASSERT_TRUE(IsQcBitSet(fpsr3));
8482 }
8483
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplySubtractLong32x1)8484 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplySubtractLong32x1) {
8485 constexpr auto AsmSqdmlsl = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlsl %s0, %h2, %h3");
8486
8487 // No saturation.
8488 __uint128_t arg1 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8489 __uint128_t arg2 = MakeUInt128(0x0000000000000004ULL, 0x0123456701234567ULL);
8490 __uint128_t arg3 = MakeUInt128(0x0100010001000100ULL, 0x0400040004000400ULL);
8491 auto [res1, fpsr1] = AsmSqdmlsl(arg1, arg2, arg3);
8492 ASSERT_EQ(res1, MakeUInt128(0x0000000000fef100ULL, 0x0000000000000000ULL));
8493 ASSERT_FALSE(IsQcBitSet(fpsr1));
8494
8495 // Saturates in the multiplication.
8496 __uint128_t arg4 = MakeUInt128(0x1122334411228000ULL, 0xfeed000300000010ULL);
8497 __uint128_t arg5 = MakeUInt128(0xaabbccddaabb8000ULL, 0xfeed000400000020ULL);
8498 __uint128_t arg6 = MakeUInt128(0x1122334411111111ULL, 0x00000a000000b000ULL);
8499 auto [res2, fpsr2] = AsmSqdmlsl(arg4, arg5, arg6);
8500 ASSERT_EQ(res2, MakeUInt128(0x0000000091111112ULL, 0x0000000000000000ULL));
8501 ASSERT_TRUE(IsQcBitSet(fpsr2));
8502
8503 // Saturates in the subtraction.
8504 __uint128_t arg7 = MakeUInt128(0x1122334411220123ULL, 0x7654321076543210ULL);
8505 __uint128_t arg8 = MakeUInt128(0xaabbccddaabb0044ULL, 0x0123456701234567ULL);
8506 __uint128_t arg9 = MakeUInt128(0xaabbccdd80000000ULL, 0x00000a000000b000ULL);
8507 auto [res3, fpsr3] = AsmSqdmlsl(arg7, arg8, arg9);
8508 ASSERT_EQ(res3, MakeUInt128(0x0000000080000000ULL, 0x0000000000000000ULL));
8509 ASSERT_TRUE(IsQcBitSet(fpsr3));
8510 }
8511
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplySubtractLong64x2IndexedElem)8512 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplySubtractLong64x2IndexedElem) {
8513 constexpr auto AsmSqdmlsl = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlsl %0.2d, %2.2s, %3.s[1]");
8514
8515 // No saturation.
8516 __uint128_t arg1 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8517 __uint128_t arg2 = MakeUInt128(0x0000000400000004ULL, 0x0123456701234567ULL);
8518 __uint128_t arg3 = MakeUInt128(0x0100010001000100ULL, 0x0400040004000400ULL);
8519 auto [res1, fpsr1] = AsmSqdmlsl(arg1, arg2, arg3);
8520 ASSERT_EQ(res1, MakeUInt128(0x010000fef0fef100ULL, 0x040003ff7bff7c00ULL));
8521 ASSERT_FALSE(IsQcBitSet(fpsr1));
8522
8523 // Saturates in the multiplication.
8524 __uint128_t arg4 = MakeUInt128(0x8000000000000004ULL, 0xfeed000300000010ULL);
8525 __uint128_t arg5 = MakeUInt128(0x8000000000000002ULL, 0xfeed000400000020ULL);
8526 __uint128_t arg6 = MakeUInt128(0x0000080000000900ULL, 0x00000a000000b000ULL);
8527 auto [res2, fpsr2] = AsmSqdmlsl(arg4, arg5, arg6);
8528 ASSERT_EQ(res2, MakeUInt128(0x0000080400000900ULL, 0x80000a000000b001ULL));
8529 ASSERT_TRUE(IsQcBitSet(fpsr2));
8530
8531 // Saturates in the subtraction.
8532 __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8533 __uint128_t arg8 = MakeUInt128(0x0000000400000004ULL, 0x0123456701234567ULL);
8534 __uint128_t arg9 = MakeUInt128(0x8000000000000000ULL, 0x00000a000000b000ULL);
8535 auto [res3, fpsr3] = AsmSqdmlsl(arg7, arg8, arg9);
8536 ASSERT_EQ(res3, MakeUInt128(0x8000000000000000ULL, 0x000009ff78002800ULL));
8537 ASSERT_TRUE(IsQcBitSet(fpsr3));
8538 }
8539
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplySubtractLong32x4IndexedElem)8540 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplySubtractLong32x4IndexedElem) {
8541 constexpr auto AsmSqdmlsl = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlsl %0.4s, %2.4h, %3.h[7]");
8542
8543 // No saturation.
8544 __uint128_t arg1 = MakeUInt128(0x0102030405060708ULL, 0x7654321076543210ULL);
8545 __uint128_t arg2 = MakeUInt128(0x1122334488776655ULL, 0x0123456701234567ULL);
8546 __uint128_t arg3 = MakeUInt128(0x0123456789abcdefULL, 0xfedcba9876543210ULL);
8547 auto [res1, fpsr1] = AsmSqdmlsl(arg1, arg2, arg3);
8548 ASSERT_EQ(res1, MakeUInt128(0x0117d9c3899bd1bfULL, 0xfeda700c764d56f8ULL));
8549 ASSERT_FALSE(IsQcBitSet(fpsr1));
8550
8551 // Saturates in the multiplication.
8552 __uint128_t arg4 = MakeUInt128(0x80000123456789a4ULL, 0xfeed000300000010ULL);
8553 __uint128_t arg5 = MakeUInt128(0x0123456789abcdefULL, 0x8000fedcba123456ULL);
8554 __uint128_t arg6 = MakeUInt128(0x0123456701234567ULL, 0x0123456701234567ULL);
8555 auto [res2, fpsr2] = AsmSqdmlsl(arg4, arg5, arg6);
8556 ASSERT_EQ(res2, MakeUInt128(0x468a45678ac74567ULL, 0x8123456802464567ULL));
8557 ASSERT_TRUE(IsQcBitSet(fpsr2));
8558
8559 // Saturates in the subtraction.
8560 __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8561 __uint128_t arg8 = MakeUInt128(0x8888111122223333ULL, 0x01234567ffffeeeeULL);
8562 __uint128_t arg9 = MakeUInt128(0x8000000000000000ULL, 0x00000a000000b000ULL);
8563 auto [res3, fpsr3] = AsmSqdmlsl(arg7, arg8, arg9);
8564 ASSERT_EQ(res3, MakeUInt128(0x80000000ffb2b400ULL, 0xffd96400ffda0a00ULL));
8565 ASSERT_TRUE(IsQcBitSet(fpsr3));
8566 }
8567
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplySubtractLongUpper64x2IndexedElem)8568 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplySubtractLongUpper64x2IndexedElem) {
8569 constexpr auto AsmSqdmlsl = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlsl2 %0.2d, %2.4s, %3.s[3]");
8570
8571 // No saturation.
8572 __uint128_t arg1 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8573 __uint128_t arg2 = MakeUInt128(0x0000000400000004ULL, 0x0123456701234567ULL);
8574 __uint128_t arg3 = MakeUInt128(0x0100010001000100ULL, 0x0400040004000400ULL);
8575 auto [res1, fpsr1] = AsmSqdmlsl(arg1, arg2, arg3);
8576 ASSERT_EQ(res1, MakeUInt128(0xfff2bd6d95e31820ULL, 0x02f2c06d98e31b20ULL));
8577 ASSERT_FALSE(IsQcBitSet(fpsr1));
8578
8579 // Saturates in the multiplication.
8580 __uint128_t arg4 = MakeUInt128(0x0123456789abcdefULL, 0x1122334480000000ULL);
8581 __uint128_t arg5 = MakeUInt128(0x0123456789abcdefULL, 0x8000000011223344ULL);
8582 __uint128_t arg6 = MakeUInt128(0x0101010102020202ULL, 0x0303030304040404ULL);
8583 auto [res2, fpsr2] = AsmSqdmlsl(arg4, arg5, arg6);
8584 ASSERT_EQ(res2, MakeUInt128(0x8101010102020203ULL, 0x1425364704040404ULL));
8585 ASSERT_TRUE(IsQcBitSet(fpsr2));
8586
8587 // Saturates in the subtraction.
8588 __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8589 __uint128_t arg8 = MakeUInt128(0x1122334444332211ULL, 0x0123456701234567ULL);
8590 __uint128_t arg9 = MakeUInt128(0x8000000000000000ULL, 0x00000a000000b000ULL);
8591 auto [res3, fpsr3] = AsmSqdmlsl(arg7, arg8, arg9);
8592 ASSERT_EQ(res3, MakeUInt128(0x8000000000000000ULL, 0xfef2c66d94e3c720ULL));
8593 ASSERT_TRUE(IsQcBitSet(fpsr3));
8594 }
8595
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplySubtractLongUpper32x4IndexedElem)8596 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplySubtractLongUpper32x4IndexedElem) {
8597 constexpr auto AsmSqdmlsl = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlsl2 %0.4s, %2.8h, %3.h[7]");
8598
8599 // No saturation.
8600 __uint128_t arg1 = MakeUInt128(0x0102030405060708ULL, 0x7654321076543210ULL);
8601 __uint128_t arg2 = MakeUInt128(0x1122334488776655ULL, 0x0123456701234567ULL);
8602 __uint128_t arg3 = MakeUInt128(0x0123456789abcdefULL, 0xfedcba9876543210ULL);
8603 auto [res1, fpsr1] = AsmSqdmlsl(arg1, arg2, arg3);
8604 ASSERT_EQ(res1, MakeUInt128(0x0016426f8939fd8fULL, 0xfdcfb7a075e261b0ULL));
8605 ASSERT_FALSE(IsQcBitSet(fpsr1));
8606
8607 // Saturates in the multiplication.
8608 __uint128_t arg4 = MakeUInt128(0x0011223344556677ULL, 0xfeedfeedfeed8000ULL);
8609 __uint128_t arg5 = MakeUInt128(0x0123456789abcdefULL, 0x8000fedcba123456ULL);
8610 __uint128_t arg6 = MakeUInt128(0x0123456701234567ULL, 0x0123456701234567ULL);
8611 auto [res2, fpsr2] = AsmSqdmlsl(arg4, arg5, arg6);
8612 ASSERT_EQ(res2, MakeUInt128(0x0010456781234568ULL, 0x0010456700104567ULL));
8613 ASSERT_TRUE(IsQcBitSet(fpsr2));
8614
8615 // Saturates in the subtraction.
8616 __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8617 __uint128_t arg8 = MakeUInt128(0x8888111122223333ULL, 0x01234567ffffeeeeULL);
8618 __uint128_t arg9 = MakeUInt128(0x8000000000000000ULL, 0x00000a000000b000ULL);
8619 auto [res3, fpsr3] = AsmSqdmlsl(arg7, arg8, arg9);
8620 ASSERT_EQ(res3, MakeUInt128(0x80000000ff8e2fa0ULL, 0xfef30708ff8edfa0ULL));
8621 ASSERT_TRUE(IsQcBitSet(fpsr3));
8622 }
8623
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplySubtractLong64x1IndexedElem)8624 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplySubtractLong64x1IndexedElem) {
8625 constexpr auto AsmSqdmlsl = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlsl %d0, %s2, %3.s[3]");
8626
8627 // No saturation.
8628 __uint128_t arg1 = MakeUInt128(0x0102030405060708ULL, 0x7654321076543210ULL);
8629 __uint128_t arg2 = MakeUInt128(0x1122334488776655ULL, 0x0123456701234567ULL);
8630 __uint128_t arg3 = MakeUInt128(0x0123456789abcdefULL, 0xfedcba9876543210ULL);
8631 auto [res1, fpsr1] = AsmSqdmlsl(arg1, arg2, arg3);
8632 ASSERT_EQ(res1, MakeUInt128(0x0117d6fa42d7d57fULL, 0x0ULL));
8633 ASSERT_FALSE(IsQcBitSet(fpsr1));
8634
8635 // Saturates in the multiplication.
8636 __uint128_t arg4 = MakeUInt128(0x0011223380000000ULL, 0xfeedfeedfeed8000ULL);
8637 __uint128_t arg5 = MakeUInt128(0x0123456789abcdefULL, 0x80000000ba123456ULL);
8638 __uint128_t arg6 = MakeUInt128(0x0123456701234567ULL, 0x0123456701234567ULL);
8639 auto [res2, fpsr2] = AsmSqdmlsl(arg4, arg5, arg6);
8640 ASSERT_EQ(res2, MakeUInt128(0x8123456701234568ULL, 0x0ULL));
8641 ASSERT_TRUE(IsQcBitSet(fpsr2));
8642
8643 // Saturates in the subtraction.
8644 __uint128_t arg7 = MakeUInt128(0x1100110022002200ULL, 0x7654321076543210ULL);
8645 __uint128_t arg8 = MakeUInt128(0x8888111122223333ULL, 0x01234567ffffeeeeULL);
8646 __uint128_t arg9 = MakeUInt128(0x8000000000000000ULL, 0x00000a000000b000ULL);
8647 auto [res3, fpsr3] = AsmSqdmlsl(arg7, arg8, arg9);
8648 ASSERT_EQ(res3, MakeUInt128(0x8000000000000000ULL, 0x0ULL));
8649 ASSERT_TRUE(IsQcBitSet(fpsr3));
8650 }
8651
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplySubtractLong32x1IndexedElem)8652 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplySubtractLong32x1IndexedElem) {
8653 constexpr auto AsmSqdmlsl = ASM_INSN_WRAP_FUNC_WQ_RES_WW0_ARG("sqdmlsl %s0, %h2, %3.h[7]");
8654
8655 // No saturation.
8656 __uint128_t arg1 = MakeUInt128(0x0102030405060708ULL, 0x7654321076543210ULL);
8657 __uint128_t arg2 = MakeUInt128(0x1122334488776655ULL, 0x0123456701234567ULL);
8658 __uint128_t arg3 = MakeUInt128(0x0123456789abcdefULL, 0xfedcba9876543210ULL);
8659 auto [res1, fpsr1] = AsmSqdmlsl(arg1, arg2, arg3);
8660 ASSERT_EQ(res1, MakeUInt128(0x00000000899bd1bfULL, 0x0ULL));
8661 ASSERT_FALSE(IsQcBitSet(fpsr1));
8662
8663 // Saturates in the multiplication.
8664 __uint128_t arg4 = MakeUInt128(0x0011223344558000ULL, 0xfeedfeedfeed1234ULL);
8665 __uint128_t arg5 = MakeUInt128(0x0123456789abcdefULL, 0x8000fedcba123456ULL);
8666 __uint128_t arg6 = MakeUInt128(0x0123456701234567ULL, 0x0123456701234567ULL);
8667 auto [res2, fpsr2] = AsmSqdmlsl(arg4, arg5, arg6);
8668 ASSERT_EQ(res2, MakeUInt128(0x0000000081234568ULL, 0x0ULL));
8669 ASSERT_TRUE(IsQcBitSet(fpsr2));
8670
8671 // Saturates in the subtraction.
8672 __uint128_t arg7 = MakeUInt128(0xaabbccddeeff2200ULL, 0x7654321076543210ULL);
8673 __uint128_t arg8 = MakeUInt128(0x8888111122223333ULL, 0x0123aabbccddeeffULL);
8674 __uint128_t arg9 = MakeUInt128(0xaabbccdd80000000ULL, 0x0011223344556677ULL);
8675 auto [res3, fpsr3] = AsmSqdmlsl(arg7, arg8, arg9);
8676 ASSERT_EQ(res3, MakeUInt128(0x0000000080000000ULL, 0x0ULL));
8677 ASSERT_TRUE(IsQcBitSet(fpsr3));
8678 }
8679
TEST(Arm64InsnTest,SignedSaturatingRoundingDoublingMultiplyHighHalf32x4)8680 TEST(Arm64InsnTest, SignedSaturatingRoundingDoublingMultiplyHighHalf32x4) {
8681 constexpr auto AsmSqrdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqrdmulh %0.4s, %2.4s, %3.4s");
8682
8683 __uint128_t arg1 = MakeU32x4(0x20000001UL, 0x00000004UL, 0x7eed0003UL, 0x00000010UL);
8684 __uint128_t arg2 = MakeU32x4(0x00000008UL, 0x00000002UL, 0x7eed0004UL, 0x00000002UL);
8685 auto [res1, fpsr1] = AsmSqrdmulh(arg1, arg2);
8686 ASSERT_EQ(res1, MakeU32x4(0x2UL, 0x0UL, 0x7ddc4ed9UL, 0x0UL));
8687 ASSERT_FALSE(IsQcBitSet(fpsr1));
8688
8689 __uint128_t arg3 = MakeU32x4(0x80000000UL, 0x00000004UL, 0xfeed0003UL, 0x00000010UL);
8690 __uint128_t arg4 = MakeU32x4(0x80000000UL, 0x00000002UL, 0xfeed0004UL, 0x00000002UL);
8691 auto [res2, fpsr2] = AsmSqrdmulh(arg3, arg4);
8692 ASSERT_EQ(res2, MakeU32x4(0x7fffffffUL, 0x0UL, 0x00024ed2UL, 0x0UL));
8693 ASSERT_TRUE(IsQcBitSet(fpsr2));
8694 }
8695
TEST(Arm64InsnTest,SignedSaturatingRoundingDoublingMultiplyHighHalf32x2)8696 TEST(Arm64InsnTest, SignedSaturatingRoundingDoublingMultiplyHighHalf32x2) {
8697 constexpr auto AsmSqrdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqrdmulh %0.2s, %2.2s, %3.2s");
8698
8699 __uint128_t arg1 = MakeU32x4(0x55555555UL, 0x00000004UL, 0xdeadc0deUL, 0xdeadc0deUL);
8700 __uint128_t arg2 = MakeU32x4(0x00000004UL, 0x00000002UL, 0xdeadc0deUL, 0xdeadc0deUL);
8701 auto [res1, fpsr1] = AsmSqrdmulh(arg1, arg2);
8702 ASSERT_EQ(res1, MakeU32x4(0x3, 0x0UL, 0x0UL, 0x0UL));
8703 ASSERT_FALSE(IsQcBitSet(fpsr1));
8704
8705 __uint128_t arg3 = MakeU32x4(0x80000000UL, 0x00000004UL, 0xdeadc0deUL, 0xdeadc0deUL);
8706 __uint128_t arg4 = MakeU32x4(0x80000000UL, 0x00000002UL, 0xdeadc0deUL, 0xdeadc0deUL);
8707 auto [res2, fpsr2] = AsmSqrdmulh(arg3, arg4);
8708 ASSERT_EQ(res2, MakeU32x4(0x7fffffffUL, 0x0UL, 0x0UL, 0x0UL));
8709 ASSERT_TRUE(IsQcBitSet(fpsr2));
8710 }
8711
TEST(Arm64InsnTest,SignedSaturatingRoundingDoublingMultiplyHighHalf16x8)8712 TEST(Arm64InsnTest, SignedSaturatingRoundingDoublingMultiplyHighHalf16x8) {
8713 constexpr auto AsmSqrdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqrdmulh %0.8h, %2.8h, %3.8h");
8714
8715 __uint128_t arg1 = MakeUInt128(0x200000017fff1111ULL, 0x7eed000300000010ULL);
8716 __uint128_t arg2 = MakeUInt128(0x0008000840000000ULL, 0x7eed000400000002ULL);
8717 auto [res1, fpsr1] = AsmSqrdmulh(arg1, arg2);
8718 ASSERT_EQ(res1, MakeUInt128(0x0002000040000000ULL, 0x7ddc000000000000ULL));
8719 ASSERT_FALSE(IsQcBitSet(fpsr1));
8720
8721 __uint128_t arg3 = MakeUInt128(0x8000700040010000ULL, 0xfeed0003ffff0010ULL);
8722 __uint128_t arg4 = MakeUInt128(0x8000000100040000ULL, 0xfeed0004ffff0002ULL);
8723 auto [res2, fpsr2] = AsmSqrdmulh(arg3, arg4);
8724 ASSERT_EQ(res2, MakeUInt128(0x7fff000100020000ULL, 0x0002000000000000ULL));
8725 ASSERT_TRUE(IsQcBitSet(fpsr2));
8726 }
8727
TEST(Arm64InsnTest,SignedSaturatingRoundingDoublingMultiplyHighHalf16x4)8728 TEST(Arm64InsnTest, SignedSaturatingRoundingDoublingMultiplyHighHalf16x4) {
8729 constexpr auto AsmSqrdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqrdmulh %0.4h, %2.4h, %3.4h");
8730
8731 __uint128_t arg1 = MakeUInt128(0x555500017fff1111ULL, 0xdeadc0dedeadc0deULL);
8732 __uint128_t arg2 = MakeUInt128(0x0004000840000000ULL, 0xdeadc0dedeadc0deULL);
8733 auto [res1, fpsr1] = AsmSqrdmulh(arg1, arg2);
8734 ASSERT_EQ(res1, MakeUInt128(0x0003000040000000ULL, 0x0000000000000000ULL));
8735 ASSERT_FALSE(IsQcBitSet(fpsr1));
8736
8737 __uint128_t arg3 = MakeUInt128(0x8000700040010000ULL, 0xdeadc0dedeadc0deULL);
8738 __uint128_t arg4 = MakeUInt128(0x8000000100040000ULL, 0xdeadc0dedeadc0deULL);
8739 auto [res2, fpsr2] = AsmSqrdmulh(arg3, arg4);
8740 ASSERT_EQ(res2, MakeUInt128(0x7fff000100020000ULL, 0x0000000000000000ULL));
8741 ASSERT_TRUE(IsQcBitSet(fpsr2));
8742 }
8743
TEST(Arm64InsnTest,SignedSaturatingRoundingDoublingMultiplyHighHalf32x4IndexedElem)8744 TEST(Arm64InsnTest, SignedSaturatingRoundingDoublingMultiplyHighHalf32x4IndexedElem) {
8745 constexpr auto AsmSqrdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqrdmulh %0.4s, %2.4s, %3.s[0]");
8746
8747 __uint128_t arg1 = MakeU32x4(0x20000001UL, 0x00000004UL, 0x7eed0003, 0x00000010UL);
8748 __uint128_t arg2 = MakeU32x4(0x00000008UL, 0xfeedfeedUL, 0xfeedfeed, 0xfeedfeedUL);
8749 auto [res1, fpsr1] = AsmSqrdmulh(arg1, arg2);
8750 // Without rounding, result should be 7 instead of 8.
8751 ASSERT_EQ(res1, MakeU32x4(0x2UL, 0x0UL, 0x8UL, 0x0UL));
8752 ASSERT_FALSE(IsQcBitSet(fpsr1));
8753
8754 __uint128_t arg3 = MakeU32x4(0x80000000UL, 0x00000004UL, 0xfeed0003UL, 0x00000010UL);
8755 __uint128_t arg4 = MakeU32x4(0x80000000UL, 0xfeedfeedUL, 0xfeedfeedUL, 0xfeedfeedUL);
8756 auto [res2, fpsr2] = AsmSqrdmulh(arg3, arg4);
8757 ASSERT_EQ(res2, MakeU32x4(0x7fffffffUL, 0xfffffffcUL, 0x0112fffdUL, 0xfffffff0UL));
8758 ASSERT_TRUE(IsQcBitSet(fpsr2));
8759 }
8760
TEST(Arm64InsnTest,SignedSaturatingRoundingDoublingMultiplyHighHalf32x2IndexedElem)8761 TEST(Arm64InsnTest, SignedSaturatingRoundingDoublingMultiplyHighHalf32x2IndexedElem) {
8762 constexpr auto AsmSqrdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqrdmulh %0.2s, %2.2s, %3.s[0]");
8763
8764 __uint128_t arg1 = MakeU32x4(0x55555555UL, 0x00000004UL, 0xdeadc0deUL, 0xdeadc0deUL);
8765 __uint128_t arg2 = MakeU32x4(0x00000004UL, 0xdeadc0deUL, 0xdeadc0deUL, 0xdeadc0deUL);
8766 auto [res1, fpsr1] = AsmSqrdmulh(arg1, arg2);
8767 ASSERT_EQ(res1, MakeU32x4(0x3UL, 0x0UL, 0x0UL, 0x0UL));
8768 ASSERT_FALSE(IsQcBitSet(fpsr1));
8769
8770 __uint128_t arg3 = MakeU32x4(0x80000000UL, 0x00000004UL, 0xdeadc0deUL, 0xdeadc0deUL);
8771 __uint128_t arg4 = MakeU32x4(0x80000000UL, 0xdeadc0deUL, 0xdeadc0deUL, 0xdeadc0deUL);
8772 auto [res2, fpsr2] = AsmSqrdmulh(arg3, arg4);
8773 ASSERT_EQ(res2, MakeU32x4(0x7fffffffUL, 0xfffffffcUL, 0x0UL, 0x0UL));
8774 ASSERT_TRUE(IsQcBitSet(fpsr2));
8775 }
8776
TEST(Arm64InsnTest,SignedSaturatingRoundingDoublingMultiplyHighHalf16x8IndexedElem)8777 TEST(Arm64InsnTest, SignedSaturatingRoundingDoublingMultiplyHighHalf16x8IndexedElem) {
8778 constexpr auto AsmSqrdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqrdmulh %0.8h, %2.8h, %3.h[7]");
8779
8780 __uint128_t arg1 = MakeUInt128(0x7fff800045670000ULL, 0xfe00780020004001ULL);
8781 __uint128_t arg2 = MakeUInt128(0xfeedfeedfeedfeedULL, 0x0008feedfeedfeedULL);
8782 auto [res1, fpsr1] = AsmSqrdmulh(arg1, arg2);
8783 ASSERT_EQ(res1, MakeUInt128(0x0008fff800040000ULL, 0x0000000800020004ULL));
8784 ASSERT_FALSE(IsQcBitSet(fpsr1));
8785
8786 __uint128_t arg3 = MakeUInt128(0x7fff800045670000ULL, 0xfe00780020004001ULL);
8787 __uint128_t arg4 = MakeUInt128(0xfeedfeedfeedfeedULL, 0x8000feedfeedfeedULL);
8788 auto [res2, fpsr2] = AsmSqrdmulh(arg3, arg4);
8789 ASSERT_EQ(res2, MakeUInt128(0x80017fffba990000ULL, 0x02008800e000bfffULL));
8790 ASSERT_TRUE(IsQcBitSet(fpsr2));
8791 }
8792
TEST(Arm64InsnTest,SignedSaturatingRoundingDoublingMultiplyHighHalf16x4IndexedElem)8793 TEST(Arm64InsnTest, SignedSaturatingRoundingDoublingMultiplyHighHalf16x4IndexedElem) {
8794 constexpr auto AsmSqrdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqrdmulh %0.4h, %2.4h, %3.h[7]");
8795
8796 __uint128_t arg1 = MakeUInt128(0x7fff800055550000ULL, 0xdeadc0dedeadc0deULL);
8797 __uint128_t arg2 = MakeUInt128(0xdeadc0dedeadc0deULL, 0x0004c0dedeadc0deULL);
8798 auto [res1, fpsr1] = AsmSqrdmulh(arg1, arg2);
8799 ASSERT_EQ(res1, MakeUInt128(0x0004fffc00030000ULL, 0x0000000000000000ULL));
8800 ASSERT_FALSE(IsQcBitSet(fpsr1));
8801
8802 __uint128_t arg3 = MakeUInt128(0x7fff800045670000ULL, 0xdeadc0dedeadc0deULL);
8803 __uint128_t arg4 = MakeUInt128(0xdeadc0dedeadc0deULL, 0x8000c0dedeadc0deULL);
8804 auto [res2, fpsr2] = AsmSqrdmulh(arg3, arg4);
8805 ASSERT_EQ(res2, MakeUInt128(0x80017fffba990000ULL, 0x0000000000000000ULL));
8806 ASSERT_TRUE(IsQcBitSet(fpsr2));
8807 }
8808
TEST(Arm64InsnTest,SignedSaturatingRoundingDoublingMultiplyHighHalf32x1)8809 TEST(Arm64InsnTest, SignedSaturatingRoundingDoublingMultiplyHighHalf32x1) {
8810 constexpr auto AsmSqrdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqrdmulh %s0, %s2, %s3");
8811
8812 __uint128_t arg1 = MakeU32x4(0x556789abUL, 0xfeedfeedUL, 0xfeedfeedUL, 0xfeedfeedUL);
8813 __uint128_t arg2 = MakeU32x4(0x00000004UL, 0xfeedfeedUL, 0xfeedfeedUL, 0xfeedfeedUL);
8814 auto [res1, fpsr1] = AsmSqrdmulh(arg1, arg2);
8815 // Without roundings, result should be 2 instead of 3.
8816 ASSERT_EQ(res1, MakeU32x4(0x3UL, 0x0UL, 0x0UL, 0x0UL));
8817 ASSERT_FALSE(IsQcBitSet(fpsr1));
8818
8819 __uint128_t arg3 = MakeU32x4(0x80000000UL, 0xfeedfeedUL, 0xfeedfeedUL, 0xfeedfeedUL);
8820 __uint128_t arg4 = MakeU32x4(0x80000000UL, 0xfeedfeedUL, 0xfeedfeedUL, 0xfeedfeedUL);
8821 auto [res2, fpsr2] = AsmSqrdmulh(arg3, arg4);
8822 ASSERT_EQ(res2, MakeU32x4(0x7fffffffUL, 0x0UL, 0x0UL, 0x0UL));
8823 ASSERT_TRUE(IsQcBitSet(fpsr2));
8824 }
8825
TEST(Arm64InsnTest,SignedSaturatingRoundingDoublingMultiplyHighHalf16x1)8826 TEST(Arm64InsnTest, SignedSaturatingRoundingDoublingMultiplyHighHalf16x1) {
8827 constexpr auto AsmSqrdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqrdmulh %h0, %h2, %h3");
8828
8829 __uint128_t arg1 = MakeUInt128(0xfeedfeedfeed5567ULL, 0xfeedfeedfeedfeedULL);
8830 __uint128_t arg2 = MakeUInt128(0xfeedfeedfeed0004ULL, 0xfeedfeedfeedfeedULL);
8831 auto [res1, fpsr1] = AsmSqrdmulh(arg1, arg2);
8832 ASSERT_EQ(res1, MakeUInt128(0x0000000000000003ULL, 0x0ULL));
8833 ASSERT_FALSE(IsQcBitSet(fpsr1));
8834
8835 __uint128_t arg3 = MakeUInt128(0xfeedfeedfeed8000ULL, 0xfeedfeedfeedfeedULL);
8836 __uint128_t arg4 = MakeUInt128(0xfeedfeedfeed8000ULL, 0xfeedfeedfeedfeedULL);
8837 auto [res2, fpsr2] = AsmSqrdmulh(arg3, arg4);
8838 ASSERT_EQ(res2, MakeUInt128(0x0000000000007fffULL, 0x0ULL));
8839 ASSERT_TRUE(IsQcBitSet(fpsr2));
8840 }
8841
TEST(Arm64InsnTest,SignedSaturatingRoundingDoublingMultiplyHighHalf32x1IndexedElem)8842 TEST(Arm64InsnTest, SignedSaturatingRoundingDoublingMultiplyHighHalf32x1IndexedElem) {
8843 constexpr auto AsmSqrdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqrdmulh %s0, %s2, %3.s[2]");
8844
8845 __uint128_t arg1 = MakeU32x4(0x556789abUL, 0xfeedfeedUL, 0xfeedfeedUL, 0xfeedfeedUL);
8846 __uint128_t arg2 = MakeU32x4(0xfeedfeedUL, 0xfeedfeedUL, 0x00000004UL, 0xfeedfeedUL);
8847 auto [res1, fpsr1] = AsmSqrdmulh(arg1, arg2);
8848 // Without rounding, result should be 2 instead of 3.
8849 ASSERT_EQ(res1, MakeU32x4(0x3UL, 0x0UL, 0x0UL, 0x0UL));
8850 ASSERT_FALSE(IsQcBitSet(fpsr1));
8851
8852 __uint128_t arg3 = MakeU32x4(0x80000000UL, 0xfeedfeedUL, 0xfeedfeedUL, 0xfeedfeedUL);
8853 __uint128_t arg4 = MakeU32x4(0xfeedfeedUL, 0xfeedfeedUL, 0x80000000UL, 0xfeedfeedUL);
8854 auto [res2, fpsr2] = AsmSqrdmulh(arg3, arg4);
8855 ASSERT_EQ(res2, MakeU32x4(0x7fffffffUL, 0x0UL, 0x0UL, 0x0UL));
8856 ASSERT_TRUE(IsQcBitSet(fpsr2));
8857 }
8858
TEST(Arm64InsnTest,SignedSaturatingRoundingDoublingMultiplyHighHalf16x1IndexedElem)8859 TEST(Arm64InsnTest, SignedSaturatingRoundingDoublingMultiplyHighHalf16x1IndexedElem) {
8860 constexpr auto AsmSqrdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqrdmulh %h0, %h2, %3.h[7]");
8861
8862 __uint128_t arg1 = MakeUInt128(0xfeedfeedfeed5567ULL, 0xfeedfeedfeedfeedULL);
8863 __uint128_t arg2 = MakeUInt128(0xfeedfeedfeedfeedULL, 0x0004feedfeedfeedULL);
8864 auto [res1, fpsr1] = AsmSqrdmulh(arg1, arg2);
8865 // Without rounding, result should be 2 instead of 3.
8866 ASSERT_EQ(res1, MakeUInt128(0x0000000000000003ULL, 0x0ULL));
8867 ASSERT_FALSE(IsQcBitSet(fpsr1));
8868
8869 __uint128_t arg3 = MakeUInt128(0xfeedfeedfeed8000ULL, 0xfeedfeedfeedfeedULL);
8870 __uint128_t arg4 = MakeUInt128(0xfeedfeedfeedfeedULL, 0x8000feedfeedfeedULL);
8871 auto [res2, fpsr2] = AsmSqrdmulh(arg3, arg4);
8872 ASSERT_EQ(res2, MakeUInt128(0x0000000000007fffULL, 0x0ULL));
8873 ASSERT_TRUE(IsQcBitSet(fpsr2));
8874 }
8875
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyHighHalf32x4)8876 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyHighHalf32x4) {
8877 constexpr auto AsmSqdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmulh %0.4s, %2.4s, %3.4s");
8878
8879 __uint128_t arg1 = MakeU32x4(0x20000001UL, 0x00000004UL, 0x7eed0003UL, 0x00000010UL);
8880 __uint128_t arg2 = MakeU32x4(0x00000008UL, 0x00000002UL, 0x7eed0004UL, 0x00000002UL);
8881 auto [res1, fpsr1] = AsmSqdmulh(arg1, arg2);
8882 ASSERT_EQ(res1, MakeU32x4(0x2UL, 0x0UL, 0x7ddc4ed8UL, 0x0UL));
8883 ASSERT_FALSE(IsQcBitSet(fpsr1));
8884
8885 __uint128_t arg3 = MakeU32x4(0x80000000UL, 0x00000004UL, 0xfeed0003UL, 0x00000010UL);
8886 __uint128_t arg4 = MakeU32x4(0x80000000UL, 0x00000002UL, 0xfeed0004UL, 0x00000002UL);
8887 auto [res2, fpsr2] = AsmSqdmulh(arg3, arg4);
8888 ASSERT_EQ(res2, MakeU32x4(0x7fffffffUL, 0x0UL, 0x00024ed1UL, 0x0UL));
8889 ASSERT_TRUE(IsQcBitSet(fpsr2));
8890 }
8891
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyHighHalf32x2)8892 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyHighHalf32x2) {
8893 constexpr auto AsmSqdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmulh %0.2s, %2.2s, %3.2s");
8894
8895 __uint128_t arg1 = MakeU32x4(0x55555555UL, 0x00000004UL, 0xdeadc0deUL, 0xdeadc0deUL);
8896 __uint128_t arg2 = MakeU32x4(0x00000004UL, 0x00000002UL, 0xdeadc0deUL, 0xdeadc0deUL);
8897 auto [res1, fpsr1] = AsmSqdmulh(arg1, arg2);
8898 ASSERT_EQ(res1, MakeU32x4(0x2UL, 0x0UL, 0x0UL, 0x0UL));
8899 ASSERT_FALSE(IsQcBitSet(fpsr1));
8900
8901 __uint128_t arg3 = MakeU32x4(0x80000000UL, 0x00000004UL, 0xdeadc0deUL, 0xdeadc0deUL);
8902 __uint128_t arg4 = MakeU32x4(0x80000000UL, 0x00000002UL, 0xdeadc0deUL, 0xdeadc0deUL);
8903 auto [res2, fpsr2] = AsmSqdmulh(arg3, arg4);
8904 ASSERT_EQ(res2, MakeU32x4(0x7fffffff, 0x0UL, 0x0UL, 0x0UL));
8905 ASSERT_TRUE(IsQcBitSet(fpsr2));
8906 }
8907
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyHighHalf16x8)8908 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyHighHalf16x8) {
8909 constexpr auto AsmSqdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmulh %0.8h, %2.8h, %3.8h");
8910
8911 __uint128_t arg1 = MakeUInt128(0x200000017fff1111ULL, 0x7eed000300000010ULL);
8912 __uint128_t arg2 = MakeUInt128(0x0008000840000000ULL, 0x7eed000400000002ULL);
8913 auto [res1, fpsr1] = AsmSqdmulh(arg1, arg2);
8914 ASSERT_EQ(res1, MakeUInt128(0x000200003fff0000ULL, 0x7ddc000000000000ULL));
8915 ASSERT_FALSE(IsQcBitSet(fpsr1));
8916
8917 __uint128_t arg3 = MakeUInt128(0x8000700040010000ULL, 0xfeed0003ffff0010ULL);
8918 __uint128_t arg4 = MakeUInt128(0x8000000100040000ULL, 0xfeed0004ffff0002ULL);
8919 auto [res2, fpsr2] = AsmSqdmulh(arg3, arg4);
8920 ASSERT_EQ(res2, MakeUInt128(0x7fff000000020000ULL, 0x0002000000000000ULL));
8921 ASSERT_TRUE(IsQcBitSet(fpsr2));
8922 }
8923
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyHighHalf16x4)8924 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyHighHalf16x4) {
8925 constexpr auto AsmSqdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmulh %0.4h, %2.4h, %3.4h");
8926
8927 __uint128_t arg1 = MakeUInt128(0x555500017fff1111ULL, 0xdeadc0dedeadc0deULL);
8928 __uint128_t arg2 = MakeUInt128(0x0004000840000000ULL, 0xdeadc0dedeadc0deULL);
8929 auto [res1, fpsr1] = AsmSqdmulh(arg1, arg2);
8930 ASSERT_EQ(res1, MakeUInt128(0x000200003fff0000ULL, 0x0000000000000000ULL));
8931 ASSERT_FALSE(IsQcBitSet(fpsr1));
8932
8933 __uint128_t arg3 = MakeUInt128(0x8000700040010000ULL, 0xdeadc0dedeadc0deULL);
8934 __uint128_t arg4 = MakeUInt128(0x8000000100040000ULL, 0xdeadc0dedeadc0deULL);
8935 auto [res2, fpsr2] = AsmSqdmulh(arg3, arg4);
8936 ASSERT_EQ(res2, MakeUInt128(0x7fff000000020000ULL, 0x0000000000000000ULL));
8937 ASSERT_TRUE(IsQcBitSet(fpsr2));
8938 }
8939
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyHighHalf32x4IndexedElem)8940 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyHighHalf32x4IndexedElem) {
8941 constexpr auto AsmSqdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmulh %0.4s, %2.4s, %3.s[0]");
8942
8943 __uint128_t arg1 = MakeU32x4(0x20000001UL, 0x00000004UL, 0x7eed0003UL, 0x00000010UL);
8944 __uint128_t arg2 = MakeU32x4(0x00000008UL, 0xfeedfeedUL, 0xfeedfeedUL, 0xfeedfeedUL);
8945 auto [res1, fpsr1] = AsmSqdmulh(arg1, arg2);
8946 ASSERT_EQ(res1, MakeU32x4(0x2UL, 0x0UL, 0x7UL, 0x0UL));
8947 ASSERT_FALSE(IsQcBitSet(fpsr1));
8948
8949 __uint128_t arg3 = MakeU32x4(0x80000000UL, 0x00000004UL, 0xfeed0003UL, 0x00000010UL);
8950 __uint128_t arg4 = MakeU32x4(0x80000000UL, 0xfeedfeedUL, 0xfeedfeedUL, 0xfeedfeedUL);
8951 auto [res2, fpsr2] = AsmSqdmulh(arg3, arg4);
8952 ASSERT_EQ(res2, MakeU32x4(0x7fffffffUL, 0xfffffffcUL, 0x0112fffdUL, 0xfffffff0UL));
8953 ASSERT_TRUE(IsQcBitSet(fpsr2));
8954 }
8955
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyHighHalf32x2IndexedElem)8956 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyHighHalf32x2IndexedElem) {
8957 constexpr auto AsmSqdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmulh %0.2s, %2.2s, %3.s[0]");
8958
8959 __uint128_t arg1 = MakeU32x4(0x55555555UL, 0x00000004UL, 0xdeadc0deUL, 0xdeadc0deUL);
8960 __uint128_t arg2 = MakeU32x4(0x00000004UL, 0xdeadc0deUL, 0xdeadc0deUL, 0xdeadc0deUL);
8961 auto [res1, fpsr1] = AsmSqdmulh(arg1, arg2);
8962 ASSERT_EQ(res1, MakeU32x4(0x2UL, 0x0UL, 0x0UL, 0x0UL));
8963 ASSERT_FALSE(IsQcBitSet(fpsr1));
8964
8965 __uint128_t arg3 = MakeU32x4(0x80000000UL, 0x00000004UL, 0xdeadc0deUL, 0xdeadc0deUL);
8966 __uint128_t arg4 = MakeU32x4(0x80000000UL, 0xdeadc0deUL, 0xdeadc0deUL, 0xdeadc0deUL);
8967 auto [res2, fpsr2] = AsmSqdmulh(arg3, arg4);
8968 ASSERT_EQ(res2, MakeU32x4(0x7fffffffUL, 0xfffffffcUL, 0x0UL, 0x0UL));
8969 ASSERT_TRUE(IsQcBitSet(fpsr2));
8970 }
8971
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyHighHalf16x8IndexedElem)8972 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyHighHalf16x8IndexedElem) {
8973 constexpr auto AsmSqdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmulh %0.8h, %2.8h, %3.h[7]");
8974
8975 __uint128_t arg1 = MakeUInt128(0x7fff800045670000ULL, 0xfe00780020004001ULL);
8976 __uint128_t arg2 = MakeUInt128(0xfeedfeedfeedfeedULL, 0x0008feedfeedfeedULL);
8977 auto [res1, fpsr1] = AsmSqdmulh(arg1, arg2);
8978 ASSERT_EQ(res1, MakeUInt128(0x0007fff800040000ULL, 0xffff000700020004ULL));
8979 ASSERT_FALSE(IsQcBitSet(fpsr1));
8980
8981 __uint128_t arg3 = MakeUInt128(0x7fff800045670000ULL, 0xfe00780020004001ULL);
8982 __uint128_t arg4 = MakeUInt128(0xfeedfeedfeedfeedULL, 0x8000feedfeedfeedULL);
8983 auto [res2, fpsr2] = AsmSqdmulh(arg3, arg4);
8984 ASSERT_EQ(res2, MakeUInt128(0x80017fffba990000ULL, 0x02008800e000bfffULL));
8985 ASSERT_TRUE(IsQcBitSet(fpsr2));
8986 }
8987
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyHighHalf16x4IndexedElem)8988 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyHighHalf16x4IndexedElem) {
8989 constexpr auto AsmSqdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmulh %0.4h, %2.4h, %3.h[7]");
8990
8991 __uint128_t arg1 = MakeUInt128(0x7fff800055550000ULL, 0xdeadc0dedeadc0deULL);
8992 __uint128_t arg2 = MakeUInt128(0xdeadc0dedeadc0deULL, 0x0004c0dedeadc0deULL);
8993 auto [res1, fpsr1] = AsmSqdmulh(arg1, arg2);
8994 ASSERT_EQ(res1, MakeUInt128(0x0003fffc00020000ULL, 0x0000000000000000ULL));
8995 ASSERT_FALSE(IsQcBitSet(fpsr1));
8996
8997 __uint128_t arg3 = MakeUInt128(0x7fff800045670000ULL, 0xdeadc0dedeadc0deULL);
8998 __uint128_t arg4 = MakeUInt128(0xdeadc0dedeadc0deULL, 0x8000c0dedeadc0deULL);
8999 auto [res2, fpsr2] = AsmSqdmulh(arg3, arg4);
9000 ASSERT_EQ(res2, MakeUInt128(0x80017fffba990000ULL, 0x0000000000000000ULL));
9001 ASSERT_TRUE(IsQcBitSet(fpsr2));
9002 }
9003
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyHighHalf32x1)9004 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyHighHalf32x1) {
9005 constexpr auto AsmSqdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmulh %s0, %s2, %s3");
9006
9007 __uint128_t arg1 = MakeU32x4(0x556789abUL, 0xfeedfeedUL, 0xfeedfeedUL, 0xfeedfeedUL);
9008 __uint128_t arg2 = MakeU32x4(0x00000004UL, 0xfeedfeedUL, 0xfeedfeedUL, 0xfeedfeedUL);
9009 auto [res1, fpsr1] = AsmSqdmulh(arg1, arg2);
9010 ASSERT_EQ(res1, MakeU32x4(0x2UL, 0x0UL, 0x0UL, 0x0ULL));
9011 ASSERT_FALSE(IsQcBitSet(fpsr1));
9012
9013 __uint128_t arg3 = MakeU32x4(0x80000000UL, 0xfeedfeedUL, 0xfeedfeedUL, 0xfeedfeedUL);
9014 __uint128_t arg4 = MakeU32x4(0x80000000UL, 0xfeedfeedUL, 0xfeedfeedUL, 0xfeedfeedUL);
9015 auto [res2, fpsr2] = AsmSqdmulh(arg3, arg4);
9016 ASSERT_EQ(res2, MakeU32x4(0x7fffffffUL, 0x0UL, 0x0UL, 0x0UL));
9017 ASSERT_TRUE(IsQcBitSet(fpsr2));
9018 }
9019
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyHighHalf16x1)9020 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyHighHalf16x1) {
9021 constexpr auto AsmSqdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmulh %h0, %h2, %h3");
9022
9023 __uint128_t arg1 = MakeUInt128(0xfeedfeedfeed5567ULL, 0xfeedfeedfeedfeedULL);
9024 __uint128_t arg2 = MakeUInt128(0xfeedfeedfeed0004ULL, 0xfeedfeedfeedfeedULL);
9025 auto [res1, fpsr1] = AsmSqdmulh(arg1, arg2);
9026 ASSERT_EQ(res1, MakeUInt128(0x0000000000000002ULL, 0x0ULL));
9027 ASSERT_FALSE(IsQcBitSet(fpsr1));
9028
9029 __uint128_t arg3 = MakeUInt128(0xfeedfeedfeed8000ULL, 0xfeedfeedfeedfeedULL);
9030 __uint128_t arg4 = MakeUInt128(0xfeedfeedfeed8000ULL, 0xfeedfeedfeedfeedULL);
9031 auto [res2, fpsr2] = AsmSqdmulh(arg3, arg4);
9032 ASSERT_EQ(res2, MakeUInt128(0x0000000000007fffULL, 0x0ULL));
9033 ASSERT_TRUE(IsQcBitSet(fpsr2));
9034 }
9035
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyHighHalf32x1IndexedElem)9036 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyHighHalf32x1IndexedElem) {
9037 constexpr auto AsmSqdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmulh %s0, %s2, %3.s[2]");
9038
9039 __uint128_t arg1 = MakeU32x4(0x556789abUL, 0xfeedfeedUL, 0xfeedfeedUL, 0xfeedfeedUL);
9040 __uint128_t arg2 = MakeU32x4(0xfeedfeedUL, 0xfeedfeedUL, 0x00000004UL, 0xfeedfeedUL);
9041 auto [res1, fpsr1] = AsmSqdmulh(arg1, arg2);
9042 ASSERT_EQ(res1, MakeU32x4(0x2UL, 0x0UL, 0x0UL, 0x0UL));
9043 ASSERT_FALSE(IsQcBitSet(fpsr1));
9044
9045 __uint128_t arg3 = MakeU32x4(0x80000000UL, 0xfeedfeedUL, 0xfeedfeedUL, 0xfeedfeedUL);
9046 __uint128_t arg4 = MakeU32x4(0xfeedfeedUL, 0xfeedfeedUL, 0x80000000UL, 0xfeedfeedUL);
9047 auto [res2, fpsr2] = AsmSqdmulh(arg3, arg4);
9048 ASSERT_EQ(res2, MakeU32x4(0x7fffffffUL, 0x0UL, 0x0UL, 0x0UL));
9049 ASSERT_TRUE(IsQcBitSet(fpsr2));
9050 }
9051
TEST(Arm64InsnTest,SignedSaturatingDoublingMultiplyHighHalf16x1IndexedElem)9052 TEST(Arm64InsnTest, SignedSaturatingDoublingMultiplyHighHalf16x1IndexedElem) {
9053 constexpr auto AsmSqdmulh = ASM_INSN_WRAP_FUNC_WQ_RES_WW_ARG("sqdmulh %h0, %h2, %3.h[7]");
9054
9055 __uint128_t arg1 = MakeUInt128(0xfeedfeedfeed5567ULL, 0xfeedfeedfeedfeedULL);
9056 __uint128_t arg2 = MakeUInt128(0xfeedfeedfeedfeedULL, 0x0004feedfeedfeedULL);
9057 auto [res1, fpsr1] = AsmSqdmulh(arg1, arg2);
9058 ASSERT_EQ(res1, MakeUInt128(0x0000000000000002ULL, 0x0ULL));
9059 ASSERT_FALSE(IsQcBitSet(fpsr1));
9060
9061 __uint128_t arg3 = MakeUInt128(0xfeedfeedfeed8000ULL, 0xfeedfeedfeedfeedULL);
9062 __uint128_t arg4 = MakeUInt128(0xfeedfeedfeedfeedULL, 0x8000feedfeedfeedULL);
9063 auto [res2, fpsr2] = AsmSqdmulh(arg3, arg4);
9064 ASSERT_EQ(res2, MakeUInt128(0x0000000000007fffULL, 0x0ULL));
9065 ASSERT_TRUE(IsQcBitSet(fpsr2));
9066 }
9067
9068 class FpcrBitSupport : public testing::TestWithParam<uint64_t> {};
9069
TEST_P(FpcrBitSupport,SupportsBit)9070 TEST_P(FpcrBitSupport, SupportsBit) {
9071 uint64_t fpcr1;
9072 asm("msr fpcr, %x1\n\t"
9073 "mrs %x0, fpcr"
9074 : "=r"(fpcr1)
9075 : "r"(static_cast<uint64_t>(GetParam())));
9076 ASSERT_EQ(fpcr1, GetParam()) << "Should be able to set then get FPCR bit: " << GetParam();
9077 };
9078
9079 // Note: The exception enablement flags (such as IOE) are not checked, because when tested on actual
9080 // ARM64 device we find that the tests fail either because they cannot be written or are RAZ (read
9081 // as zero).
9082 INSTANTIATE_TEST_SUITE_P(Arm64InsnTest,
9083 FpcrBitSupport,
9084 testing::Values(kFpcrRModeTieEven,
9085 kFpcrRModeZero,
9086 kFpcrRModeNegInf,
9087 kFpcrRModePosInf,
9088 kFpcrFzBit,
9089 kFpcrDnBit,
9090 0));
9091
9092 class FpsrBitSupport : public testing::TestWithParam<uint64_t> {};
9093
TEST_P(FpsrBitSupport,SupportsBit)9094 TEST_P(FpsrBitSupport, SupportsBit) {
9095 uint64_t fpsr1;
9096 asm("msr fpsr, %1\n\t"
9097 "mrs %0, fpsr"
9098 : "=r"(fpsr1)
9099 : "r"(static_cast<uint64_t>(GetParam())));
9100 ASSERT_EQ(fpsr1, GetParam()) << "Should be able to set then get FPSR bit";
9101 };
9102
9103 INSTANTIATE_TEST_SUITE_P(Arm64InsnTest,
9104 FpsrBitSupport,
9105 testing::Values(kFpsrIocBit,
9106 kFpsrDzcBit,
9107 kFpsrOfcBit,
9108 kFpsrUfcBit,
9109 kFpsrIxcBit,
9110 kFpsrIdcBit,
9111 kFpsrQcBit));
9112
TEST(Arm64InsnTest,UnsignedDivide64)9113 TEST(Arm64InsnTest, UnsignedDivide64) {
9114 auto udiv64 = [](uint64_t num, uint64_t den) {
9115 uint64_t result;
9116 asm("udiv %0, %1, %2" : "=r"(result) : "r"(num), "r"(den));
9117 return result;
9118 };
9119 ASSERT_EQ(udiv64(0x8'0000'0000ULL, 2ULL), 0x4'0000'0000ULL) << "Division should be 64-bit.";
9120 ASSERT_EQ(udiv64(123ULL, 0ULL), 0ULL) << "Div by 0 should result in 0.";
9121 }
9122
TEST(Arm64InsnTest,SignedDivide64)9123 TEST(Arm64InsnTest, SignedDivide64) {
9124 auto div64 = [](int64_t num, int64_t den) {
9125 int64_t result;
9126 asm("sdiv %0, %1, %2" : "=r"(result) : "r"(num), "r"(den));
9127 return result;
9128 };
9129 ASSERT_EQ(div64(67802402LL, -1LL), -67802402LL)
9130 << "Division by -1 should flip sign if dividend is not numeric_limits::min.";
9131 ASSERT_EQ(div64(-531675317891LL, -1LL), 531675317891LL)
9132 << "Division by -1 should flip sign if dividend is not numeric_limits::min.";
9133 ASSERT_EQ(div64(std::numeric_limits<int64_t>::min(), -1LL), std::numeric_limits<int64_t>::min())
9134 << "Div of numeric_limits::min by -1 should result in numeric_limits::min.";
9135 }
9136
TEST(Arm64InsnTest,AesEncode)9137 TEST(Arm64InsnTest, AesEncode) {
9138 __uint128_t arg = MakeUInt128(0x1111'2222'3333'4444ULL, 0x5555'6666'7777'8888ULL);
9139 __uint128_t key = MakeUInt128(0xaaaa'bbbb'cccc'ddddULL, 0xeeee'ffff'0000'9999ULL);
9140 __uint128_t res;
9141 asm("aese %0.16b, %2.16b" : "=w"(res) : "0"(arg), "w"(key));
9142 ASSERT_EQ(res, MakeUInt128(0x16ea'82ee'eaf5'eeeeULL, 0xf5ea'eeee'ea16'ee82ULL));
9143 }
9144
TEST(Arm64InsnTest,AesMixColumns)9145 TEST(Arm64InsnTest, AesMixColumns) {
9146 __uint128_t arg = MakeUInt128(0x1111'2222'3333'4444ULL, 0x5555'6666'7777'8888ULL);
9147 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("aesmc %0.16b, %1.16b")(arg);
9148 ASSERT_EQ(res, MakeUInt128(0x77114422dd33aa44ULL, 0x3355006692776d88ULL));
9149 }
9150
TEST(Arm64InsnTest,AesDecode)9151 TEST(Arm64InsnTest, AesDecode) {
9152 // Check that it's opposite to AesEncode with extra XORs.
9153 __uint128_t arg = MakeUInt128(0x16ea'82ee'eaf5'eeeeULL, 0xf5ea'eeee'ea16'ee82ULL);
9154 __uint128_t key = MakeUInt128(0xaaaa'bbbb'cccc'ddddULL, 0xeeee'ffff'0000'9999ULL);
9155 arg ^= key;
9156 __uint128_t res;
9157 asm("aesd %0.16b, %2.16b" : "=w"(res) : "0"(arg), "w"(key));
9158 ASSERT_EQ(res ^ key, MakeUInt128(0x1111'2222'3333'4444ULL, 0x5555'6666'7777'8888ULL));
9159 }
9160
TEST(Arm64InsnTest,AesInverseMixColumns)9161 TEST(Arm64InsnTest, AesInverseMixColumns) {
9162 __uint128_t arg = MakeUInt128(0x77114422dd33aa44ULL, 0x3355006692776d88ULL);
9163 __uint128_t res = ASM_INSN_WRAP_FUNC_W_RES_W_ARG("aesimc %0.16b, %1.16b")(arg);
9164 ASSERT_EQ(res, MakeUInt128(0x1111'2222'3333'4444ULL, 0x5555'6666'7777'8888ULL));
9165 }
9166
9167 } // namespace
9168