1 // SPDX-License-Identifier: Apache-2.0
2 // ----------------------------------------------------------------------------
3 // Copyright 2020-2022 Arm Limited
4 //
5 // Licensed under the Apache License, Version 2.0 (the "License"); you may not
6 // use this file except in compliance with the License. You may obtain a copy
7 // of the License at:
8 //
9 // http://www.apache.org/licenses/LICENSE-2.0
10 //
11 // Unless required by applicable law or agreed to in writing, software
12 // distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
13 // WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
14 // License for the specific language governing permissions and limitations
15 // under the License.
16 // ----------------------------------------------------------------------------
17
18 /**
19 * @brief Unit tests for the vectorized SIMD functionality.
20 */
21
22 #include <limits>
23
24 #include "gtest/gtest.h"
25
26 #include "../astcenc_internal.h"
27 #include "../astcenc_vecmathlib.h"
28
29 namespace astcenc
30 {
31
32 // Misc utility tests - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
33
round_down(int x)34 static int round_down(int x)
35 {
36 int remainder = x % ASTCENC_SIMD_WIDTH;
37 return x - remainder;
38 }
39
round_up(int x)40 static int round_up(int x)
41 {
42 int remainder = x % ASTCENC_SIMD_WIDTH;
43 if (!remainder)
44 {
45 return x;
46 }
47
48 return x - remainder + ASTCENC_SIMD_WIDTH;
49 }
50
51 /** @brief Test VLA loop limit round down. */
TEST(misc,RoundDownVLA)52 TEST(misc, RoundDownVLA)
53 {
54 // Static ones which are valid for all VLA widths
55 EXPECT_EQ(round_down_to_simd_multiple_vla(0), 0);
56 EXPECT_EQ(round_down_to_simd_multiple_vla(8), 8);
57 EXPECT_EQ(round_down_to_simd_multiple_vla(16), 16);
58
59 // Variable ones which depend on VLA width
60 EXPECT_EQ(round_down_to_simd_multiple_vla(3), round_down(3));
61 EXPECT_EQ(round_down_to_simd_multiple_vla(5), round_down(5));
62 EXPECT_EQ(round_down_to_simd_multiple_vla(7), round_down(7));
63 EXPECT_EQ(round_down_to_simd_multiple_vla(231), round_down(231));
64 }
65
66 /** @brief Test VLA loop limit round up. */
TEST(misc,RoundUpVLA)67 TEST(misc, RoundUpVLA)
68 {
69 // Static ones which are valid for all VLA widths
70 EXPECT_EQ(round_up_to_simd_multiple_vla(0), 0);
71 EXPECT_EQ(round_up_to_simd_multiple_vla(8), 8);
72 EXPECT_EQ(round_up_to_simd_multiple_vla(16), 16);
73
74 // Variable ones which depend on VLA width
75 EXPECT_EQ(round_up_to_simd_multiple_vla(3), round_up(3));
76 EXPECT_EQ(round_up_to_simd_multiple_vla(5), round_up(5));
77 EXPECT_EQ(round_up_to_simd_multiple_vla(7), round_up(7));
78 EXPECT_EQ(round_up_to_simd_multiple_vla(231), round_up(231));
79 }
80
81 #if ASTCENC_SIMD_WIDTH == 1
82
83 // VLA (1-wide) tests - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
84
85 /** @brief Test VLA change_sign. */
TEST(vfloat,ChangeSign)86 TEST(vfloat, ChangeSign)
87 {
88 vfloat a0(-1.0f);
89 vfloat b0(-1.0f);
90 vfloat r0 = change_sign(a0, b0);
91 EXPECT_EQ(r0.lane<0>(), 1.0f);
92
93 vfloat a1( 1.0f);
94 vfloat b1(-1.0f);
95 vfloat r1 = change_sign(a1, b1);
96 EXPECT_EQ(r1.lane<0>(), -1.0f);
97
98 vfloat a2(-3.12f);
99 vfloat b2( 3.12f);
100 vfloat r2 = change_sign(a2, b2);
101 EXPECT_EQ(r2.lane<0>(), -3.12f);
102
103 vfloat a3( 3.12f);
104 vfloat b3( 3.12f);
105 vfloat r3 = change_sign(a3, b3);
106 EXPECT_EQ(r3.lane<0>(), 3.12f);
107 }
108
109 /** @brief Test VLA atan. */
TEST(vfloat,Atan)110 TEST(vfloat, Atan)
111 {
112 vfloat a0(-0.15f);
113 vfloat r0 = atan(a0);
114 EXPECT_NEAR(r0.lane<0>(), -0.149061f, 0.005f);
115
116 vfloat a1(0.0f);
117 vfloat r1 = atan(a1);
118 EXPECT_NEAR(r1.lane<0>(), 0.000000f, 0.005f);
119
120 vfloat a2(0.9f);
121 vfloat r2 = atan(a2);
122 EXPECT_NEAR(r2.lane<0>(), 0.733616f, 0.005f);
123
124 vfloat a3(2.1f);
125 vfloat r3 = atan(a3);
126 EXPECT_NEAR(r3.lane<0>(), 1.123040f, 0.005f);
127 }
128
129 /** @brief Test VLA atan2. */
TEST(vfloat,Atan2)130 TEST(vfloat, Atan2)
131 {
132 vfloat a0(-0.15f);
133 vfloat b0( 1.15f);
134 vfloat r0 = atan2(a0, b0);
135 EXPECT_NEAR(r0.lane<0>(), -0.129816f, 0.005f);
136
137 vfloat a1( 0.0f);
138 vfloat b1(-3.0f);
139 vfloat r1 = atan2(a1, b1);
140 EXPECT_NEAR(r1.lane<0>(), 3.141592f, 0.005f);
141
142 vfloat a2( 0.9f);
143 vfloat b2(-0.9f);
144 vfloat r2 = atan2(a2, b2);
145 EXPECT_NEAR(r2.lane<0>(), 2.360342f, 0.005f);
146
147 vfloat a3( 2.1f);
148 vfloat b3( 1.1f);
149 vfloat r3 = atan2(a3, b3);
150 EXPECT_NEAR(r3.lane<0>(), 1.084357f, 0.005f);
151 }
152
153 #elif ASTCENC_SIMD_WIDTH == 4
154
155 // VLA (4-wide) tests - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
156
157 /** @brief Test VLA change_sign. */
TEST(vfloat,ChangeSign)158 TEST(vfloat, ChangeSign)
159 {
160 vfloat a(-1.0f, 1.0f, -3.12f, 3.12f);
161 vfloat b(-1.0f, -1.0f, 3.12f, 3.12f);
162 vfloat r = change_sign(a, b);
163 EXPECT_EQ(r.lane<0>(), 1.0f);
164 EXPECT_EQ(r.lane<1>(), -1.0f);
165 EXPECT_EQ(r.lane<2>(), -3.12f);
166 EXPECT_EQ(r.lane<3>(), 3.12f);
167 }
168
169 /** @brief Test VLA atan. */
TEST(vfloat,Atan)170 TEST(vfloat, Atan)
171 {
172 vfloat a(-0.15f, 0.0f, 0.9f, 2.1f);
173 vfloat r = atan(a);
174 EXPECT_NEAR(r.lane<0>(), -0.149061f, 0.005f);
175 EXPECT_NEAR(r.lane<1>(), 0.000000f, 0.005f);
176 EXPECT_NEAR(r.lane<2>(), 0.733616f, 0.005f);
177 EXPECT_NEAR(r.lane<3>(), 1.123040f, 0.005f);
178 }
179
180 /** @brief Test VLA atan2. */
TEST(vfloat,Atan2)181 TEST(vfloat, Atan2)
182 {
183 vfloat a(-0.15f, 0.0f, 0.9f, 2.1f);
184 vfloat b(1.15f, -3.0f, -0.9f, 1.1f);
185 vfloat r = atan2(a, b);
186 EXPECT_NEAR(r.lane<0>(), -0.129816f, 0.005f);
187 EXPECT_NEAR(r.lane<1>(), 3.141592f, 0.005f);
188 EXPECT_NEAR(r.lane<2>(), 2.360342f, 0.005f);
189 EXPECT_NEAR(r.lane<3>(), 1.084357f, 0.005f);
190 }
191
192 #elif ASTCENC_SIMD_WIDTH == 8
193
194 // VLA (8-wide) tests - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
195
196 /** @brief Test VLA change_sign. */
TEST(vfloat,ChangeSign)197 TEST(vfloat, ChangeSign)
198 {
199 vfloat a(-1.0f, 1.0f, -3.12f, 3.12f, -1.0f, 1.0f, -3.12f, 3.12f);
200 vfloat b(-1.0f, -1.0f, 3.12f, 3.12f, -1.0f, -1.0f, 3.12f, 3.12f);
201 vfloat r = change_sign(a, b);
202 EXPECT_EQ(r.lane<0>(), 1.0f);
203 EXPECT_EQ(r.lane<1>(), -1.0f);
204 EXPECT_EQ(r.lane<2>(), -3.12f);
205 EXPECT_EQ(r.lane<3>(), 3.12f);
206 EXPECT_EQ(r.lane<4>(), 1.0f);
207 EXPECT_EQ(r.lane<5>(), -1.0f);
208 EXPECT_EQ(r.lane<6>(), -3.12f);
209 EXPECT_EQ(r.lane<7>(), 3.12f);
210 }
211
212 /** @brief Test VLA atan. */
TEST(vfloat,Atan)213 TEST(vfloat, Atan)
214 {
215 vfloat a(-0.15f, 0.0f, 0.9f, 2.1f, -0.15f, 0.0f, 0.9f, 2.1f);
216 vfloat r = atan(a);
217 EXPECT_NEAR(r.lane<0>(), -0.149061f, 0.005f);
218 EXPECT_NEAR(r.lane<1>(), 0.000000f, 0.005f);
219 EXPECT_NEAR(r.lane<2>(), 0.733616f, 0.005f);
220 EXPECT_NEAR(r.lane<3>(), 1.123040f, 0.005f);
221 EXPECT_NEAR(r.lane<4>(), -0.149061f, 0.005f);
222 EXPECT_NEAR(r.lane<5>(), 0.000000f, 0.005f);
223 EXPECT_NEAR(r.lane<6>(), 0.733616f, 0.005f);
224 EXPECT_NEAR(r.lane<7>(), 1.123040f, 0.005f);
225 }
226
227 /** @brief Test VLA atan2. */
TEST(vfloat,Atan2)228 TEST(vfloat, Atan2)
229 {
230 vfloat a(-0.15f, 0.0f, 0.9f, 2.1f, -0.15f, 0.0f, 0.9f, 2.1f);
231 vfloat b(1.15f, -3.0f, -0.9f, 1.1f, 1.15f, -3.0f, -0.9f, 1.1f);
232 vfloat r = atan2(a, b);
233 EXPECT_NEAR(r.lane<0>(), -0.129816f, 0.005f);
234 EXPECT_NEAR(r.lane<1>(), 3.141592f, 0.005f);
235 EXPECT_NEAR(r.lane<2>(), 2.360342f, 0.005f);
236 EXPECT_NEAR(r.lane<3>(), 1.084357f, 0.005f);
237 EXPECT_NEAR(r.lane<4>(), -0.129816f, 0.005f);
238 EXPECT_NEAR(r.lane<5>(), 3.141592f, 0.005f);
239 EXPECT_NEAR(r.lane<6>(), 2.360342f, 0.005f);
240 EXPECT_NEAR(r.lane<7>(), 1.084357f, 0.005f);
241 }
242
243 #endif
244
245 static const float qnan = std::numeric_limits<float>::quiet_NaN();
246
247 alignas(32) static const float f32_data[9] {
248 0.0f, 1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f
249 };
250
251 alignas(32) static const int s32_data[9] {
252 0, 1, 2, 3, 4, 5 , 6, 7, 8
253 };
254
255 alignas(32) static const uint8_t u8_data[9] {
256 0, 1, 2, 3, 4, 5 , 6, 7, 8
257 };
258
259 // VFLOAT4 tests - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
260
261 /** @brief Test unaligned vfloat4 data load. */
TEST(vfloat4,UnalignedLoad)262 TEST(vfloat4, UnalignedLoad)
263 {
264 vfloat4 a(&(f32_data[1]));
265 EXPECT_EQ(a.lane<0>(), 1.0f);
266 EXPECT_EQ(a.lane<1>(), 2.0f);
267 EXPECT_EQ(a.lane<2>(), 3.0f);
268 EXPECT_EQ(a.lane<3>(), 4.0f);
269 }
270
271 /** @brief Test scalar duplicated vfloat4 load. */
TEST(vfloat4,ScalarDupLoad)272 TEST(vfloat4, ScalarDupLoad)
273 {
274 vfloat4 a(1.1f);
275 EXPECT_EQ(a.lane<0>(), 1.1f);
276 EXPECT_EQ(a.lane<1>(), 1.1f);
277 EXPECT_EQ(a.lane<2>(), 1.1f);
278 EXPECT_EQ(a.lane<3>(), 1.1f);
279 }
280
281 /** @brief Test scalar vfloat4 load. */
TEST(vfloat4,ScalarLoad)282 TEST(vfloat4, ScalarLoad)
283 {
284 vfloat4 a(1.1f, 2.2f, 3.3f, 4.4f);
285 EXPECT_EQ(a.lane<0>(), 1.1f);
286 EXPECT_EQ(a.lane<1>(), 2.2f);
287 EXPECT_EQ(a.lane<2>(), 3.3f);
288 EXPECT_EQ(a.lane<3>(), 4.4f);
289 }
290
291 /** @brief Test copy vfloat4 load. */
TEST(vfloat4,CopyLoad)292 TEST(vfloat4, CopyLoad)
293 {
294 vfloat4 s(1.1f, 2.2f, 3.3f, 4.4f);
295 vfloat4 a(s.m);
296 EXPECT_EQ(a.lane<0>(), 1.1f);
297 EXPECT_EQ(a.lane<1>(), 2.2f);
298 EXPECT_EQ(a.lane<2>(), 3.3f);
299 EXPECT_EQ(a.lane<3>(), 4.4f);
300 }
301
302 /** @brief Test vfloat4 scalar lane set. */
TEST(vfloat4,SetLane)303 TEST(vfloat4, SetLane)
304 {
305 vfloat4 a(0.0f);
306
307 a.set_lane<0>(1.0f);
308 EXPECT_EQ(a.lane<0>(), 1.0f);
309 EXPECT_EQ(a.lane<1>(), 0.0f);
310 EXPECT_EQ(a.lane<2>(), 0.0f);
311 EXPECT_EQ(a.lane<3>(), 0.0f);
312
313 a.set_lane<1>(2.0f);
314 EXPECT_EQ(a.lane<0>(), 1.0f);
315 EXPECT_EQ(a.lane<1>(), 2.0f);
316 EXPECT_EQ(a.lane<2>(), 0.0f);
317 EXPECT_EQ(a.lane<3>(), 0.0f);
318
319 a.set_lane<2>(3.0f);
320 EXPECT_EQ(a.lane<0>(), 1.0f);
321 EXPECT_EQ(a.lane<1>(), 2.0f);
322 EXPECT_EQ(a.lane<2>(), 3.0f);
323 EXPECT_EQ(a.lane<3>(), 0.0f);
324
325 a.set_lane<3>(4.0f);
326 EXPECT_EQ(a.lane<0>(), 1.0f);
327 EXPECT_EQ(a.lane<1>(), 2.0f);
328 EXPECT_EQ(a.lane<2>(), 3.0f);
329 EXPECT_EQ(a.lane<3>(), 4.0f);
330 }
331
332 /** @brief Test vfloat4 zero. */
TEST(vfloat4,Zero)333 TEST(vfloat4, Zero)
334 {
335 vfloat4 a = vfloat4::zero();
336 EXPECT_EQ(a.lane<0>(), 0.0f);
337 EXPECT_EQ(a.lane<1>(), 0.0f);
338 EXPECT_EQ(a.lane<2>(), 0.0f);
339 EXPECT_EQ(a.lane<3>(), 0.0f);
340 }
341
342 /** @brief Test vfloat4 load1. */
TEST(vfloat4,Load1)343 TEST(vfloat4, Load1)
344 {
345 float s = 3.14f;
346 vfloat4 a = vfloat4::load1(&s);
347 EXPECT_EQ(a.lane<0>(), 3.14f);
348 EXPECT_EQ(a.lane<1>(), 3.14f);
349 EXPECT_EQ(a.lane<2>(), 3.14f);
350 EXPECT_EQ(a.lane<3>(), 3.14f);
351 }
352
353 /** @brief Test vfloat4 loada. */
TEST(vfloat4,Loada)354 TEST(vfloat4, Loada)
355 {
356 vfloat4 a = vfloat4::loada(&(f32_data[0]));
357 EXPECT_EQ(a.lane<0>(), 0.0f);
358 EXPECT_EQ(a.lane<1>(), 1.0f);
359 EXPECT_EQ(a.lane<2>(), 2.0f);
360 EXPECT_EQ(a.lane<3>(), 3.0f);
361 }
362
363 /** @brief Test vfloat4 lane_id. */
TEST(vfloat4,LaneID)364 TEST(vfloat4, LaneID)
365 {
366 vfloat4 a = vfloat4::lane_id();
367 EXPECT_EQ(a.lane<0>(), 0.0f);
368 EXPECT_EQ(a.lane<1>(), 1.0f);
369 EXPECT_EQ(a.lane<2>(), 2.0f);
370 EXPECT_EQ(a.lane<3>(), 3.0f);
371 }
372
373 /** @brief Test vfloat4 swz to float4. */
TEST(vfloat4,swz4)374 TEST(vfloat4, swz4)
375 {
376 vfloat4 a(1.0f, 2.0f, 3.0f, 4.0f);
377 vfloat4 r = a.swz<0, 3, 2, 1>();
378 EXPECT_EQ(r.lane<0>(), 1.0f);
379 EXPECT_EQ(r.lane<1>(), 4.0f);
380 EXPECT_EQ(r.lane<2>(), 3.0f);
381 EXPECT_EQ(r.lane<3>(), 2.0f);
382
383 r = a.swz<3, 1, 1, 0>();
384 EXPECT_EQ(r.lane<0>(), 4.0f);
385 EXPECT_EQ(r.lane<1>(), 2.0f);
386 EXPECT_EQ(r.lane<2>(), 2.0f);
387 EXPECT_EQ(r.lane<3>(), 1.0f);
388 }
389
390 /** @brief Test vfloat4 swz to float3. */
TEST(vfloat4,swz3)391 TEST(vfloat4, swz3)
392 {
393 vfloat4 a(1.0f, 2.0f, 3.0f, 4.0f);
394 vfloat4 r = a.swz<0, 3, 2>();
395 EXPECT_EQ(r.lane<0>(), 1.0f);
396 EXPECT_EQ(r.lane<1>(), 4.0f);
397 EXPECT_EQ(r.lane<2>(), 3.0f);
398 EXPECT_EQ(r.lane<3>(), 0.0f);
399
400 r = a.swz<3, 1, 1>();
401 EXPECT_EQ(r.lane<0>(), 4.0f);
402 EXPECT_EQ(r.lane<1>(), 2.0f);
403 EXPECT_EQ(r.lane<2>(), 2.0f);
404 EXPECT_EQ(r.lane<3>(), 0.0f);
405 }
406
407 /** @brief Test vfloat4 swz to float2. */
TEST(vfloat4,swz2)408 TEST(vfloat4, swz2)
409 {
410 vfloat4 a(1.0f, 2.0f, 3.0f, 4.0f);
411 vfloat4 r = a.swz<0, 3>();
412 EXPECT_EQ(r.lane<0>(), 1.0f);
413 EXPECT_EQ(r.lane<1>(), 4.0f);
414
415 r = a.swz<2, 1>();
416 EXPECT_EQ(r.lane<0>(), 3.0f);
417 EXPECT_EQ(r.lane<1>(), 2.0f);
418 }
419
420 /** @brief Test vfloat4 add. */
TEST(vfloat4,vadd)421 TEST(vfloat4, vadd)
422 {
423 vfloat4 a(1.0f, 2.0f, 3.0f, 4.0f);
424 vfloat4 b(0.1f, 0.2f, 0.3f, 0.4f);
425 a = a + b;
426 EXPECT_EQ(a.lane<0>(), 1.0f + 0.1f);
427 EXPECT_EQ(a.lane<1>(), 2.0f + 0.2f);
428 EXPECT_EQ(a.lane<2>(), 3.0f + 0.3f);
429 EXPECT_EQ(a.lane<3>(), 4.0f + 0.4f);
430 }
431
432 /** @brief Test vfloat4 self-add. */
TEST(vfloat4,vselfadd1)433 TEST(vfloat4, vselfadd1)
434 {
435 vfloat4 a(1.0f, 2.0f, 3.0f, 4.0f);
436 vfloat4 b(0.1f, 0.2f, 0.3f, 0.4f);
437
438 // Test increment by another variable
439 a += b;
440 EXPECT_EQ(a.lane<0>(), 1.0f + 0.1f);
441 EXPECT_EQ(a.lane<1>(), 2.0f + 0.2f);
442 EXPECT_EQ(a.lane<2>(), 3.0f + 0.3f);
443 EXPECT_EQ(a.lane<3>(), 4.0f + 0.4f);
444
445 // Test increment by an expression
446 a += b + b;
447 EXPECT_NEAR(a.lane<0>(), 1.0f + 0.3f, 0.001f);
448 EXPECT_NEAR(a.lane<1>(), 2.0f + 0.6f, 0.001f);
449 EXPECT_NEAR(a.lane<2>(), 3.0f + 0.9f, 0.001f);
450 EXPECT_NEAR(a.lane<3>(), 4.0f + 1.2f, 0.001f);
451 }
452
453 /** @brief Test vfloat4 sub. */
TEST(vfloat4,vsub)454 TEST(vfloat4, vsub)
455 {
456 vfloat4 a(1.0f, 2.0f, 3.0f, 4.0f);
457 vfloat4 b(0.1f, 0.2f, 0.3f, 0.4f);
458 a = a - b;
459 EXPECT_EQ(a.lane<0>(), 1.0f - 0.1f);
460 EXPECT_EQ(a.lane<1>(), 2.0f - 0.2f);
461 EXPECT_EQ(a.lane<2>(), 3.0f - 0.3f);
462 EXPECT_EQ(a.lane<3>(), 4.0f - 0.4f);
463 }
464
465 /** @brief Test vfloat4 mul. */
TEST(vfloat4,vmul)466 TEST(vfloat4, vmul)
467 {
468 vfloat4 a(1.0f, 2.0f, 3.0f, 4.0f);
469 vfloat4 b(0.1f, 0.2f, 0.3f, 0.4f);
470 a = a * b;
471 EXPECT_EQ(a.lane<0>(), 1.0f * 0.1f);
472 EXPECT_EQ(a.lane<1>(), 2.0f * 0.2f);
473 EXPECT_EQ(a.lane<2>(), 3.0f * 0.3f);
474 EXPECT_EQ(a.lane<3>(), 4.0f * 0.4f);
475 }
476
477 /** @brief Test vfloat4 mul. */
TEST(vfloat4,vsmul)478 TEST(vfloat4, vsmul)
479 {
480 vfloat4 a(1.0f, 2.0f, 3.0f, 4.0f);
481 float b = 3.14f;
482 a = a * b;
483 EXPECT_EQ(a.lane<0>(), 1.0f * 3.14f);
484 EXPECT_EQ(a.lane<1>(), 2.0f * 3.14f);
485 EXPECT_EQ(a.lane<2>(), 3.0f * 3.14f);
486 EXPECT_EQ(a.lane<3>(), 4.0f * 3.14f);
487 }
488
489 /** @brief Test vfloat4 mul. */
TEST(vfloat4,svmul)490 TEST(vfloat4, svmul)
491 {
492 float a = 3.14f;
493 vfloat4 b(1.0f, 2.0f, 3.0f, 4.0f);
494 b = a * b;
495 EXPECT_EQ(b.lane<0>(), 3.14f * 1.0f);
496 EXPECT_EQ(b.lane<1>(), 3.14f * 2.0f);
497 EXPECT_EQ(b.lane<2>(), 3.14f * 3.0f);
498 EXPECT_EQ(b.lane<3>(), 3.14f * 4.0f);
499 }
500
501 /** @brief Test vfloat4 div. */
TEST(vfloat4,vdiv)502 TEST(vfloat4, vdiv)
503 {
504 vfloat4 a(1.0f, 2.0f, 3.0f, 4.0f);
505 vfloat4 b(0.1f, 0.2f, 0.3f, 0.4f);
506 a = a / b;
507 EXPECT_EQ(a.lane<0>(), 1.0f / 0.1f);
508 EXPECT_EQ(a.lane<1>(), 2.0f / 0.2f);
509 EXPECT_EQ(a.lane<2>(), 3.0f / 0.3f);
510 EXPECT_EQ(a.lane<3>(), 4.0f / 0.4f);
511 }
512
513 /** @brief Test vfloat4 div. */
TEST(vfloat4,vsdiv)514 TEST(vfloat4, vsdiv)
515 {
516 vfloat4 a(1.0f, 2.0f, 3.0f, 4.0f);
517 float b = 0.3f;
518 a = a / b;
519 EXPECT_EQ(a.lane<0>(), 1.0f / 0.3f);
520 EXPECT_EQ(a.lane<1>(), 2.0f / 0.3f);
521 EXPECT_EQ(a.lane<2>(), 3.0f / 0.3f);
522 EXPECT_EQ(a.lane<3>(), 4.0f / 0.3f);
523 }
524
525 /** @brief Test vfloat4 div. */
TEST(vfloat4,svdiv)526 TEST(vfloat4, svdiv)
527 {
528 float a = 3.0f;
529 vfloat4 b(0.1f, 0.2f, 0.3f, 0.4f);
530 b = a / b;
531 EXPECT_EQ(b.lane<0>(), 3.0f / 0.1f);
532 EXPECT_EQ(b.lane<1>(), 3.0f / 0.2f);
533 EXPECT_EQ(b.lane<2>(), 3.0f / 0.3f);
534 EXPECT_EQ(b.lane<3>(), 3.0f / 0.4f);
535 }
536
537 /** @brief Test vfloat4 ceq. */
TEST(vfloat4,ceq)538 TEST(vfloat4, ceq)
539 {
540 vfloat4 a1(1.0f, 2.0f, 3.0f, 4.0f);
541 vfloat4 b1(0.1f, 0.2f, 0.3f, 0.4f);
542 vmask4 r1 = a1 == b1;
543 EXPECT_EQ(0, mask(r1));
544 EXPECT_EQ(false, any(r1));
545 EXPECT_EQ(false, all(r1));
546
547 vfloat4 a2(1.0f, 2.0f, 3.0f, 4.0f);
548 vfloat4 b2(1.0f, 0.2f, 0.3f, 0.4f);
549 vmask4 r2 = a2 == b2;
550 EXPECT_EQ(0x1, mask(r2));
551 EXPECT_EQ(true, any(r2));
552 EXPECT_EQ(false, all(r2));
553
554 vfloat4 a3(1.0f, 2.0f, 3.0f, 4.0f);
555 vfloat4 b3(1.0f, 0.2f, 3.0f, 0.4f);
556 vmask4 r3 = a3 == b3;
557 EXPECT_EQ(0x5, mask(r3));
558 EXPECT_EQ(true, any(r3));
559 EXPECT_EQ(false, all(r3));
560
561 vfloat4 a4(1.0f, 2.0f, 3.0f, 4.0f);
562 vmask4 r4 = a4 == a4;
563 EXPECT_EQ(0xF, mask(r4));
564 EXPECT_EQ(true, any(r4));
565 EXPECT_EQ(true, all(r4));
566 }
567
568 /** @brief Test vfloat4 cne. */
TEST(vfloat4,cne)569 TEST(vfloat4, cne)
570 {
571 vfloat4 a1(1.0f, 2.0f, 3.0f, 4.0f);
572 vfloat4 b1(0.1f, 0.2f, 0.3f, 0.4f);
573 vmask4 r1 = a1 != b1;
574 EXPECT_EQ(0xF, mask(r1));
575 EXPECT_EQ(true, any(r1));
576 EXPECT_EQ(true, all(r1));
577
578 vfloat4 a2(1.0f, 2.0f, 3.0f, 4.0f);
579 vfloat4 b2(1.0f, 0.2f, 0.3f, 0.4f);
580 vmask4 r2 = a2 != b2;
581 EXPECT_EQ(0xE, mask(r2));
582 EXPECT_EQ(true, any(r2));
583 EXPECT_EQ(false, all(r2));
584
585 vfloat4 a3(1.0f, 2.0f, 3.0f, 4.0f);
586 vfloat4 b3(1.0f, 0.2f, 3.0f, 0.4f);
587 vmask4 r3 = a3 != b3;
588 EXPECT_EQ(0xA, mask(r3));
589 EXPECT_EQ(true, any(r3));
590 EXPECT_EQ(false, all(r3));
591
592 vfloat4 a4(1.0f, 2.0f, 3.0f, 4.0f);
593 vmask4 r4 = a4 != a4;
594 EXPECT_EQ(0, mask(r4));
595 EXPECT_EQ(false, any(r4));
596 EXPECT_EQ(false, all(r4));
597 }
598
599 /** @brief Test vfloat4 clt. */
TEST(vfloat4,clt)600 TEST(vfloat4, clt)
601 {
602 vfloat4 a(1.0f, 2.0f, 3.0f, 4.0f);
603 vfloat4 b(0.9f, 2.1f, 3.0f, 4.1f);
604 vmask4 r = a < b;
605 EXPECT_EQ(0xA, mask(r));
606 }
607
608 /** @brief Test vfloat4 cle. */
TEST(vfloat4,cle)609 TEST(vfloat4, cle)
610 {
611 vfloat4 a(1.0f, 2.0f, 3.0f, 4.0f);
612 vfloat4 b(0.9f, 2.1f, 3.0f, 4.1f);
613 vmask4 r = a <= b;
614 EXPECT_EQ(0xE, mask(r));
615 }
616
617 /** @brief Test vfloat4 cgt. */
TEST(vfloat4,cgt)618 TEST(vfloat4, cgt)
619 {
620 vfloat4 a(1.0f, 2.0f, 3.0f, 4.0f);
621 vfloat4 b(0.9f, 2.1f, 3.0f, 4.1f);
622 vmask4 r = a > b;
623 EXPECT_EQ(0x1, mask(r));
624 }
625
626 /** @brief Test vfloat4 cge. */
TEST(vfloat4,cge)627 TEST(vfloat4, cge)
628 {
629 vfloat4 a(1.0f, 2.0f, 3.0f, 4.0f);
630 vfloat4 b(0.9f, 2.1f, 3.0f, 4.1f);
631 vmask4 r = a >= b;
632 EXPECT_EQ(0x5, mask(r));
633 }
634
635 /** @brief Test vfloat4 min. */
TEST(vfloat4,min)636 TEST(vfloat4, min)
637 {
638 vfloat4 a(1.0f, 2.0f, 3.0f, 4.0f);
639 vfloat4 b(0.9f, 2.1f, 3.0f, 4.1f);
640 vfloat4 r = min(a, b);
641 EXPECT_EQ(r.lane<0>(), 0.9f);
642 EXPECT_EQ(r.lane<1>(), 2.0f);
643 EXPECT_EQ(r.lane<2>(), 3.0f);
644 EXPECT_EQ(r.lane<3>(), 4.0f);
645
646 float c = 0.3f;
647 r = min(a, c);
648 EXPECT_EQ(r.lane<0>(), 0.3f);
649 EXPECT_EQ(r.lane<1>(), 0.3f);
650 EXPECT_EQ(r.lane<2>(), 0.3f);
651 EXPECT_EQ(r.lane<3>(), 0.3f);
652
653 float d = 1.5f;
654 r = min(a, d);
655 EXPECT_EQ(r.lane<0>(), 1.0f);
656 EXPECT_EQ(r.lane<1>(), 1.5f);
657 EXPECT_EQ(r.lane<2>(), 1.5f);
658 EXPECT_EQ(r.lane<3>(), 1.5f);
659 }
660
661 /** @brief Test vfloat4 max. */
TEST(vfloat4,max)662 TEST(vfloat4, max)
663 {
664 vfloat4 a(1.0f, 2.0f, 3.0f, 4.0f);
665 vfloat4 b(0.9f, 2.1f, 3.0f, 4.1f);
666 vfloat4 r = max(a, b);
667 EXPECT_EQ(r.lane<0>(), 1.0f);
668 EXPECT_EQ(r.lane<1>(), 2.1f);
669 EXPECT_EQ(r.lane<2>(), 3.0f);
670 EXPECT_EQ(r.lane<3>(), 4.1f);
671
672 float c = 4.3f;
673 r = max(a, c);
674 EXPECT_EQ(r.lane<0>(), 4.3f);
675 EXPECT_EQ(r.lane<1>(), 4.3f);
676 EXPECT_EQ(r.lane<2>(), 4.3f);
677 EXPECT_EQ(r.lane<3>(), 4.3f);
678
679 float d = 1.5f;
680 r = max(a, d);
681 EXPECT_EQ(r.lane<0>(), 1.5f);
682 EXPECT_EQ(r.lane<1>(), 2.0f);
683 EXPECT_EQ(r.lane<2>(), 3.0f);
684 EXPECT_EQ(r.lane<3>(), 4.0f);
685 }
686
687 /** @brief Test vfloat4 clamp. */
TEST(vfloat4,clamp)688 TEST(vfloat4, clamp)
689 {
690 vfloat4 a1(1.0f, 2.0f, 3.0f, 4.0f);
691 vfloat4 r1 = clamp(2.1f, 3.0f, a1);
692 EXPECT_EQ(r1.lane<0>(), 2.1f);
693 EXPECT_EQ(r1.lane<1>(), 2.1f);
694 EXPECT_EQ(r1.lane<2>(), 3.0f);
695 EXPECT_EQ(r1.lane<3>(), 3.0f);
696
697 vfloat4 a2(1.0f, 2.0f, qnan, 4.0f);
698 vfloat4 r2 = clamp(2.1f, 3.0f, a2);
699 EXPECT_EQ(r2.lane<0>(), 2.1f);
700 EXPECT_EQ(r2.lane<1>(), 2.1f);
701 EXPECT_EQ(r2.lane<2>(), 2.1f);
702 EXPECT_EQ(r2.lane<3>(), 3.0f);
703 }
704
705 /** @brief Test vfloat4 clampz. */
TEST(vfloat4,clampz)706 TEST(vfloat4, clampz)
707 {
708 vfloat4 a1(-1.0f, 0.0f, 0.1f, 4.0f);
709 vfloat4 r1 = clampz(3.0f, a1);
710 EXPECT_EQ(r1.lane<0>(), 0.0f);
711 EXPECT_EQ(r1.lane<1>(), 0.0f);
712 EXPECT_EQ(r1.lane<2>(), 0.1f);
713 EXPECT_EQ(r1.lane<3>(), 3.0f);
714
715 vfloat4 a2(-1.0f, 0.0f, qnan, 4.0f);
716 vfloat4 r2 = clampz(3.0f, a2);
717 EXPECT_EQ(r2.lane<0>(), 0.0f);
718 EXPECT_EQ(r2.lane<1>(), 0.0f);
719 EXPECT_EQ(r2.lane<2>(), 0.0f);
720 EXPECT_EQ(r2.lane<3>(), 3.0f);
721 }
722
723 /** @brief Test vfloat4 clampz. */
TEST(vfloat4,clampzo)724 TEST(vfloat4, clampzo)
725 {
726 vfloat4 a1(-1.0f, 0.0f, 0.1f, 4.0f);
727 vfloat4 r1 = clampzo(a1);
728 EXPECT_EQ(r1.lane<0>(), 0.0f);
729 EXPECT_EQ(r1.lane<1>(), 0.0f);
730 EXPECT_EQ(r1.lane<2>(), 0.1f);
731 EXPECT_EQ(r1.lane<3>(), 1.0f);
732
733 vfloat4 a2(-1.0f, 0.0f, qnan, 4.0f);
734 vfloat4 r2 = clampzo(a2);
735 EXPECT_EQ(r2.lane<0>(), 0.0f);
736 EXPECT_EQ(r2.lane<1>(), 0.0f);
737 EXPECT_EQ(r2.lane<2>(), 0.0f);
738 EXPECT_EQ(r2.lane<3>(), 1.0f);
739 }
740
741 /** @brief Test vfloat4 abs. */
TEST(vfloat4,abs)742 TEST(vfloat4, abs)
743 {
744 vfloat4 a(-1.0f, 0.0f, 0.1f, 4.0f);
745 vfloat4 r = abs(a);
746 EXPECT_EQ(r.lane<0>(), 1.0f);
747 EXPECT_EQ(r.lane<1>(), 0.0f);
748 EXPECT_EQ(r.lane<2>(), 0.1f);
749 EXPECT_EQ(r.lane<3>(), 4.0f);
750 }
751
752 /** @brief Test vfloat4 round. */
TEST(vfloat4,round)753 TEST(vfloat4, round)
754 {
755 vfloat4 a1(1.1f, 1.5f, 1.6f, 4.0f);
756 vfloat4 r1 = round(a1);
757 EXPECT_EQ(r1.lane<0>(), 1.0f);
758 EXPECT_EQ(r1.lane<1>(), 2.0f);
759 EXPECT_EQ(r1.lane<2>(), 2.0f);
760 EXPECT_EQ(r1.lane<3>(), 4.0f);
761
762 vfloat4 a2(-2.5f, -2.5f, -3.5f, -3.5f);
763 vfloat4 r2 = round(a2);
764 EXPECT_EQ(r2.lane<0>(), -2.0f);
765 EXPECT_EQ(r2.lane<2>(), -4.0f);
766 }
767
768 /** @brief Test vfloat4 hmin. */
TEST(vfloat4,hmin)769 TEST(vfloat4, hmin)
770 {
771 vfloat4 a1(1.1f, 1.5f, 1.6f, 4.0f);
772 vfloat4 r1 = hmin(a1);
773 EXPECT_EQ(r1.lane<0>(), 1.1f);
774 EXPECT_EQ(r1.lane<1>(), 1.1f);
775 EXPECT_EQ(r1.lane<2>(), 1.1f);
776 EXPECT_EQ(r1.lane<3>(), 1.1f);
777
778 vfloat4 a2(1.1f, 1.5f, 1.6f, 0.2f);
779 vfloat4 r2 = hmin(a2);
780 EXPECT_EQ(r2.lane<0>(), 0.2f);
781 EXPECT_EQ(r2.lane<1>(), 0.2f);
782 EXPECT_EQ(r2.lane<2>(), 0.2f);
783 EXPECT_EQ(r2.lane<3>(), 0.2f);
784 }
785
786 /** @brief Test vfloat4 hmin_s. */
TEST(vfloat4,hmin_s)787 TEST(vfloat4, hmin_s)
788 {
789 vfloat4 a1(1.1f, 1.5f, 1.6f, 4.0f);
790 float r1 = hmin_s(a1);
791 EXPECT_EQ(r1, 1.1f);
792
793 vfloat4 a2(1.1f, 1.5f, 1.6f, 0.2f);
794 float r2 = hmin_s(a2);
795 EXPECT_EQ(r2, 0.2f);
796 }
797
798 /** @brief Test vfloat4 hmin_rgb_s. */
TEST(vfloat4,hmin_rgb_s)799 TEST(vfloat4, hmin_rgb_s)
800 {
801 vfloat4 a1(1.1f, 1.5f, 1.6f, 0.2f);
802 float r1 = hmin_rgb_s(a1);
803 EXPECT_EQ(r1, 1.1f);
804
805 vfloat4 a2(1.5f, 0.9f, 1.6f, 1.2f);
806 float r2 = hmin_rgb_s(a2);
807 EXPECT_EQ(r2, 0.9f);
808 }
809
810 /** @brief Test vfloat4 hmax. */
TEST(vfloat4,hmax)811 TEST(vfloat4, hmax)
812 {
813 vfloat4 a1(1.1f, 1.5f, 1.6f, 4.0f);
814 vfloat4 r1 = hmax(a1);
815 EXPECT_EQ(r1.lane<0>(), 4.0f);
816 EXPECT_EQ(r1.lane<1>(), 4.0f);
817 EXPECT_EQ(r1.lane<2>(), 4.0f);
818 EXPECT_EQ(r1.lane<3>(), 4.0f);
819
820 vfloat4 a2(1.1f, 1.5f, 1.6f, 0.2f);
821 vfloat4 r2 = hmax(a2);
822 EXPECT_EQ(r2.lane<0>(), 1.6f);
823 EXPECT_EQ(r2.lane<1>(), 1.6f);
824 EXPECT_EQ(r2.lane<2>(), 1.6f);
825 EXPECT_EQ(r2.lane<3>(), 1.6f);
826 }
827
828 /** @brief Test vfloat4 hmax_s. */
TEST(vfloat4,hmax_s)829 TEST(vfloat4, hmax_s)
830 {
831 vfloat4 a1(1.1f, 1.5f, 1.6f, 4.0f);
832 float r1 = hmax_s(a1);
833 EXPECT_EQ(r1, 4.0f);
834
835 vfloat4 a2(1.1f, 1.5f, 1.6f, 0.2f);
836 float r2 = hmax_s(a2);
837 EXPECT_EQ(r2, 1.6f);
838 }
839
840 /** @brief Test vfloat4 hadd_s. */
TEST(vfloat4,hadd_s)841 TEST(vfloat4, hadd_s)
842 {
843 vfloat4 a1(1.1f, 1.5f, 1.6f, 4.0f);
844 float sum = 1.1f + 1.5f + 1.6f + 4.0f;
845 float r = hadd_s(a1);
846 EXPECT_NEAR(r, sum, 0.005f);
847 }
848
849 /** @brief Test vfloat4 hadd_rgb_s. */
TEST(vfloat4,hadd_rgb_s)850 TEST(vfloat4, hadd_rgb_s)
851 {
852 vfloat4 a1(1.1f, 1.5f, 1.6f, 4.0f);
853 float sum = 1.1f + 1.5f + 1.6f;
854 float r = hadd_rgb_s(a1);
855 EXPECT_NEAR(r, sum, 0.005f);
856 }
857
858 /** @brief Test vfloat4 sqrt. */
TEST(vfloat4,sqrt)859 TEST(vfloat4, sqrt)
860 {
861 vfloat4 a(1.0f, 2.0f, 3.0f, 4.0f);
862 vfloat4 r = sqrt(a);
863 EXPECT_EQ(r.lane<0>(), std::sqrt(1.0f));
864 EXPECT_EQ(r.lane<1>(), std::sqrt(2.0f));
865 EXPECT_EQ(r.lane<2>(), std::sqrt(3.0f));
866 EXPECT_EQ(r.lane<3>(), std::sqrt(4.0f));
867 }
868
869 /** @brief Test vfloat4 select. */
TEST(vfloat4,select)870 TEST(vfloat4, select)
871 {
872 vfloat4 m1(1.0f, 1.0f, 1.0f, 1.0f);
873 vfloat4 m2(1.0f, 2.0f, 1.0f, 2.0f);
874 vmask4 cond = m1 == m2;
875
876 vfloat4 a(1.0f, 3.0f, 3.0f, 1.0f);
877 vfloat4 b(4.0f, 2.0f, 2.0f, 4.0f);
878
879 // Select in one direction
880 vfloat4 r1 = select(a, b, cond);
881 EXPECT_EQ(r1.lane<0>(), 4.0f);
882 EXPECT_EQ(r1.lane<1>(), 3.0f);
883 EXPECT_EQ(r1.lane<2>(), 2.0f);
884 EXPECT_EQ(r1.lane<3>(), 1.0f);
885
886 // Select in the other
887 vfloat4 r2 = select(b, a, cond);
888 EXPECT_EQ(r2.lane<0>(), 1.0f);
889 EXPECT_EQ(r2.lane<1>(), 2.0f);
890 EXPECT_EQ(r2.lane<2>(), 3.0f);
891 EXPECT_EQ(r2.lane<3>(), 4.0f);
892 }
893
894 /** @brief Test vfloat4 select MSB only. */
TEST(vfloat4,select_msb)895 TEST(vfloat4, select_msb)
896 {
897 vint4 msb(0x80000000, 0, 0x80000000, 0);
898 vmask4 cond(msb.m);
899
900 vfloat4 a(1.0f, 3.0f, 3.0f, 1.0f);
901 vfloat4 b(4.0f, 2.0f, 2.0f, 4.0f);
902
903 // Select in one direction
904 vfloat4 r1 = select_msb(a, b, cond);
905 EXPECT_EQ(r1.lane<0>(), 4.0f);
906 EXPECT_EQ(r1.lane<1>(), 3.0f);
907 EXPECT_EQ(r1.lane<2>(), 2.0f);
908 EXPECT_EQ(r1.lane<3>(), 1.0f);
909
910 // Select in the other
911 vfloat4 r2 = select_msb(b, a, cond);
912 EXPECT_EQ(r2.lane<0>(), 1.0f);
913 EXPECT_EQ(r2.lane<1>(), 2.0f);
914 EXPECT_EQ(r2.lane<2>(), 3.0f);
915 EXPECT_EQ(r2.lane<3>(), 4.0f);
916 }
917
918 /** @brief Test vfloat4 gatherf. */
TEST(vfloat4,gatherf)919 TEST(vfloat4, gatherf)
920 {
921 vint4 indices(0, 4, 3, 2);
922 vfloat4 r = gatherf(f32_data, indices);
923 EXPECT_EQ(r.lane<0>(), 0.0f);
924 EXPECT_EQ(r.lane<1>(), 4.0f);
925 EXPECT_EQ(r.lane<2>(), 3.0f);
926 EXPECT_EQ(r.lane<3>(), 2.0f);
927 }
928
929 /** @brief Test vfloat4 storea. */
TEST(vfloat4,storea)930 TEST(vfloat4, storea)
931 {
932 alignas(16) float out[4];
933 vfloat4 a(f32_data);
934 storea(a, out);
935 EXPECT_EQ(out[0], 0.0f);
936 EXPECT_EQ(out[1], 1.0f);
937 EXPECT_EQ(out[2], 2.0f);
938 EXPECT_EQ(out[3], 3.0f);
939 }
940
941 /** @brief Test vfloat4 store. */
TEST(vfloat4,store)942 TEST(vfloat4, store)
943 {
944 alignas(16) float out[5];
945 vfloat4 a(f32_data);
946 store(a, &(out[1]));
947 EXPECT_EQ(out[1], 0.0f);
948 EXPECT_EQ(out[2], 1.0f);
949 EXPECT_EQ(out[3], 2.0f);
950 EXPECT_EQ(out[4], 3.0f);
951 }
952
953 /** @brief Test vfloat4 dot. */
TEST(vfloat4,dot)954 TEST(vfloat4, dot)
955 {
956 vfloat4 a1(1.0f, 2.0f, 4.0f, 8.0f);
957 vfloat4 b1(1.0f, 0.5f, 0.25f, 0.125f);
958 vfloat4 r1 = dot(a1, b1);
959 EXPECT_EQ(r1.lane<0>(), 4.0f);
960 EXPECT_EQ(r1.lane<1>(), 4.0f);
961 EXPECT_EQ(r1.lane<2>(), 4.0f);
962 EXPECT_EQ(r1.lane<3>(), 4.0f);
963
964 // These values will fail to add to the same value if reassociated
965 float l0 = 141.2540435791015625f;
966 float l1 = 5345345.5000000000000000f;
967 float l2 = 234234.7031250000000000f;
968 float l3 = 124353454080.0000000000000000f;
969
970 vfloat4 a2(1.0f, 1.0f, 1.0f, 1.0f);
971 vfloat4 b2(l0, l1, l2, l3);
972 vfloat4 r2 = dot(a2, b2);
973
974 // Test that reassociation causes a failure with the numbers we chose
975 EXPECT_FALSE(any(r2 == vfloat4(l0 + l1 + l2 + l3)));
976
977 // Test that the sum works, for the association pattern we want used
978 EXPECT_TRUE(all(r2 == vfloat4((l0 + l2) + (l1 + l3))));
979 }
980
981 /** @brief Test vfloat4 dot_s. */
TEST(vfloat4,dot_s)982 TEST(vfloat4, dot_s)
983 {
984 vfloat4 a1(1.0f, 2.0f, 4.0f, 8.0f);
985 vfloat4 b1(1.0f, 0.5f, 0.25f, 0.125f);
986 float r1 = dot_s(a1, b1);
987 EXPECT_EQ(r1, 4.0f);
988
989 // These values will fail to add to the same value if reassociated
990 float l0 = 141.2540435791015625f;
991 float l1 = 5345345.5000000000000000f;
992 float l2 = 234234.7031250000000000f;
993 float l3 = 124353454080.0000000000000000f;
994
995 vfloat4 a2(1.0f, 1.0f, 1.0f, 1.0f);
996 vfloat4 b2(l0, l1, l2, l3);
997 float r2 = dot_s(a2, b2);
998
999 // Test that reassociation causes a failure with the numbers we chose
1000 EXPECT_NE(r2, l0 + l1 + l2 + l3);
1001
1002 // Test that the sum works, for the association pattern we want used
1003 EXPECT_EQ(r2, (l0 + l2) + (l1 + l3));
1004 }
1005
1006 /** @brief Test vfloat4 dot3. */
TEST(vfloat4,dot3)1007 TEST(vfloat4, dot3)
1008 {
1009 vfloat4 a(1.0f, 2.0f, 4.0f, 8.0f);
1010 vfloat4 b(1.0f, 0.5f, 0.25f, 0.125f);
1011 vfloat4 r = dot3(a, b);
1012 EXPECT_EQ(r.lane<0>(), 3.0f);
1013 EXPECT_EQ(r.lane<1>(), 3.0f);
1014 EXPECT_EQ(r.lane<2>(), 3.0f);
1015 EXPECT_EQ(r.lane<3>(), 0.0f);
1016 }
1017
1018 /** @brief Test vfloat4 dot3_s. */
TEST(vfloat4,dot3_s)1019 TEST(vfloat4, dot3_s)
1020 {
1021 vfloat4 a(1.0f, 2.0f, 4.0f, 8.0f);
1022 vfloat4 b(1.0f, 0.5f, 0.25f, 0.125f);
1023 float r = dot3_s(a, b);
1024 EXPECT_EQ(r, 3.0f);
1025 }
1026
1027 /** @brief Test vfloat4 normalize. */
TEST(vfloat4,normalize)1028 TEST(vfloat4, normalize)
1029 {
1030 vfloat4 a(1.0f, 2.0f, 3.0f, 4.0f);
1031 vfloat4 r = normalize(a);
1032 EXPECT_NEAR(r.lane<0>(), 1.0f / astc::sqrt(30.0f), 0.0005f);
1033 EXPECT_NEAR(r.lane<1>(), 2.0f / astc::sqrt(30.0f), 0.0005f);
1034 EXPECT_NEAR(r.lane<2>(), 3.0f / astc::sqrt(30.0f), 0.0005f);
1035 EXPECT_NEAR(r.lane<3>(), 4.0f / astc::sqrt(30.0f), 0.0005f);
1036 }
1037
1038 /** @brief Test vfloat4 normalize_safe. */
TEST(vfloat4,normalize_safe)1039 TEST(vfloat4, normalize_safe)
1040 {
1041 vfloat4 s(-1.0f, -1.0f, -1.0f, -1.0f);
1042
1043 vfloat4 a1(1.0f, 2.0f, 3.0f, 4.0f);
1044 vfloat4 r1 = normalize_safe(a1, s);
1045 EXPECT_NEAR(r1.lane<0>(), 1.0f / astc::sqrt(30.0f), 0.0005f);
1046 EXPECT_NEAR(r1.lane<1>(), 2.0f / astc::sqrt(30.0f), 0.0005f);
1047 EXPECT_NEAR(r1.lane<2>(), 3.0f / astc::sqrt(30.0f), 0.0005f);
1048 EXPECT_NEAR(r1.lane<3>(), 4.0f / astc::sqrt(30.0f), 0.0005f);
1049
1050 vfloat4 a2(0.0f, 0.0f, 0.0f, 0.0f);
1051 vfloat4 r2 = normalize_safe(a2, s);
1052 EXPECT_EQ(r2.lane<0>(), -1.0f);
1053 EXPECT_EQ(r2.lane<1>(), -1.0f);
1054 EXPECT_EQ(r2.lane<2>(), -1.0f);
1055 EXPECT_EQ(r2.lane<3>(), -1.0f);
1056 }
1057
1058 /** @brief Test vfloat4 float_to_int. */
TEST(vfloat4,float_to_int)1059 TEST(vfloat4, float_to_int)
1060 {
1061 vfloat4 a(1.1f, 1.5f, -1.6f, 4.0f);
1062 vint4 r = float_to_int(a);
1063 EXPECT_EQ(r.lane<0>(), 1);
1064 EXPECT_EQ(r.lane<1>(), 1);
1065 EXPECT_EQ(r.lane<2>(), -1);
1066 EXPECT_EQ(r.lane<3>(), 4);
1067 }
1068
1069 /** @brief Test vfloat4 round. */
TEST(vfloat4,float_to_int_rtn)1070 TEST(vfloat4, float_to_int_rtn)
1071 {
1072 vfloat4 a(1.1f, 1.5f, 1.6f, 4.0f);
1073 vint4 r = float_to_int_rtn(a);
1074 EXPECT_EQ(r.lane<0>(), 1);
1075 EXPECT_EQ(r.lane<1>(), 2);
1076 EXPECT_EQ(r.lane<2>(), 2);
1077 EXPECT_EQ(r.lane<3>(), 4);
1078 }
1079
1080 /** @brief Test vfloat4 round. */
TEST(vfloat4,int_to_float)1081 TEST(vfloat4, int_to_float)
1082 {
1083 vint4 a(1, 2, 3, 4);
1084 vfloat4 r = int_to_float(a);
1085 EXPECT_EQ(r.lane<0>(), 1.0f);
1086 EXPECT_EQ(r.lane<1>(), 2.0f);
1087 EXPECT_EQ(r.lane<2>(), 3.0f);
1088 EXPECT_EQ(r.lane<3>(), 4.0f);
1089 }
1090
1091 /** @brief Test vfloat4 float to fp16 conversion. */
TEST(vfloat4,float_to_float16)1092 TEST(vfloat4, float_to_float16)
1093 {
1094 vfloat4 a(1.5, 234.5, 345345.0, qnan);
1095 vint4 r = float_to_float16(a);
1096
1097 // Normal numbers
1098 EXPECT_EQ(r.lane<0>(), 0x3E00);
1099 EXPECT_EQ(r.lane<1>(), 0x5B54);
1100
1101 // Large numbers convert to infinity
1102 EXPECT_EQ(r.lane<2>(), 0x7C00);
1103
1104 // NaN must convert to any valid NaN encoding
1105 EXPECT_EQ((r.lane<3>() >> 10) & 0x1F, 0x1F); // Exponent must be all 1s
1106 EXPECT_NE(r.lane<3>() & (0x3FF), 0); // Mantissa must be non-zero
1107 }
1108
1109 /** @brief Test float to fp16 conversion. */
TEST(sfloat,float_to_float16)1110 TEST(sfloat, float_to_float16)
1111 {
1112 int r = float_to_float16(234.5);
1113 EXPECT_EQ(r, 0x5B54);
1114 }
1115
1116 /** @brief Test vfloat4 fp16 to float conversion. */
TEST(vfloat4,float16_to_float)1117 TEST(vfloat4, float16_to_float)
1118 { vint4 a(0x3E00, 0x5B54, 0x7C00, 0xFFFF);
1119 vfloat4 r = float16_to_float(a);
1120
1121 // Normal numbers
1122 EXPECT_EQ(r.lane<0>(), 1.5);
1123 EXPECT_EQ(r.lane<1>(), 234.5);
1124
1125 // Infinities must be preserved
1126 EXPECT_NE(std::isinf(r.lane<2>()), 0);
1127
1128 // NaNs must be preserved
1129 EXPECT_NE(std::isnan(r.lane<3>()), 0);
1130 }
1131
1132 /** @brief Test fp16 to float conversion. */
TEST(sfloat,float16_to_float)1133 TEST(sfloat, float16_to_float)
1134 {
1135 float r = float16_to_float(0x5B54);
1136 EXPECT_EQ(r, 234.5);
1137 }
1138
1139 // VINT4 tests - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1140
1141 /** @brief Test unaligned vint4 data load. */
TEST(vint4,UnalignedLoad)1142 TEST(vint4, UnalignedLoad)
1143 {
1144 vint4 a(&(s32_data[1]));
1145 EXPECT_EQ(a.lane<0>(), 1);
1146 EXPECT_EQ(a.lane<1>(), 2);
1147 EXPECT_EQ(a.lane<2>(), 3);
1148 EXPECT_EQ(a.lane<3>(), 4);
1149 }
1150
1151 /** @brief Test unaligned vint4 data load. */
TEST(vint4,UnalignedLoad8)1152 TEST(vint4, UnalignedLoad8)
1153 {
1154 vint4 a(&(u8_data[1]));
1155 EXPECT_EQ(a.lane<0>(), 1);
1156 EXPECT_EQ(a.lane<1>(), 2);
1157 EXPECT_EQ(a.lane<2>(), 3);
1158 EXPECT_EQ(a.lane<3>(), 4);
1159 }
1160
1161 /** @brief Test scalar duplicated vint4 load. */
TEST(vint4,ScalarDupLoad)1162 TEST(vint4, ScalarDupLoad)
1163 {
1164 vint4 a(42);
1165 EXPECT_EQ(a.lane<0>(), 42);
1166 EXPECT_EQ(a.lane<1>(), 42);
1167 EXPECT_EQ(a.lane<2>(), 42);
1168 EXPECT_EQ(a.lane<3>(), 42);
1169 }
1170
1171 /** @brief Test scalar vint4 load. */
TEST(vint4,ScalarLoad)1172 TEST(vint4, ScalarLoad)
1173 {
1174 vint4 a(11, 22, 33, 44);
1175 EXPECT_EQ(a.lane<0>(), 11);
1176 EXPECT_EQ(a.lane<1>(), 22);
1177 EXPECT_EQ(a.lane<2>(), 33);
1178 EXPECT_EQ(a.lane<3>(), 44);
1179 }
1180
1181 /** @brief Test copy vint4 load. */
TEST(vint4,CopyLoad)1182 TEST(vint4, CopyLoad)
1183 {
1184 vint4 s(11, 22, 33, 44);
1185 vint4 a(s.m);
1186 EXPECT_EQ(a.lane<0>(), 11);
1187 EXPECT_EQ(a.lane<1>(), 22);
1188 EXPECT_EQ(a.lane<2>(), 33);
1189 EXPECT_EQ(a.lane<3>(), 44);
1190 }
1191
1192 /** @brief Test vint4 scalar lane set. */
TEST(int4,SetLane)1193 TEST(int4, SetLane)
1194 {
1195 vint4 a(0);
1196
1197 a.set_lane<0>(1);
1198 EXPECT_EQ(a.lane<0>(), 1);
1199 EXPECT_EQ(a.lane<1>(), 0);
1200 EXPECT_EQ(a.lane<2>(), 0);
1201 EXPECT_EQ(a.lane<3>(), 0);
1202
1203 a.set_lane<1>(2);
1204 EXPECT_EQ(a.lane<0>(), 1);
1205 EXPECT_EQ(a.lane<1>(), 2);
1206 EXPECT_EQ(a.lane<2>(), 0);
1207 EXPECT_EQ(a.lane<3>(), 0);
1208
1209 a.set_lane<2>(3);
1210 EXPECT_EQ(a.lane<0>(), 1);
1211 EXPECT_EQ(a.lane<1>(), 2);
1212 EXPECT_EQ(a.lane<2>(), 3);
1213 EXPECT_EQ(a.lane<3>(), 0);
1214
1215 a.set_lane<3>(4);
1216 EXPECT_EQ(a.lane<0>(), 1);
1217 EXPECT_EQ(a.lane<1>(), 2);
1218 EXPECT_EQ(a.lane<2>(), 3);
1219 EXPECT_EQ(a.lane<3>(), 4);
1220 }
1221
1222 /** @brief Test vint4 zero. */
TEST(vint4,Zero)1223 TEST(vint4, Zero)
1224 {
1225 vint4 a = vint4::zero();
1226 EXPECT_EQ(a.lane<0>(), 0);
1227 EXPECT_EQ(a.lane<1>(), 0);
1228 EXPECT_EQ(a.lane<2>(), 0);
1229 EXPECT_EQ(a.lane<3>(), 0);
1230 }
1231
1232 /** @brief Test vint4 load1. */
TEST(vint4,Load1)1233 TEST(vint4, Load1)
1234 {
1235 int s = 42;
1236 vint4 a = vint4::load1(&s);
1237 EXPECT_EQ(a.lane<0>(), 42);
1238 EXPECT_EQ(a.lane<1>(), 42);
1239 EXPECT_EQ(a.lane<2>(), 42);
1240 EXPECT_EQ(a.lane<3>(), 42);
1241 }
1242
1243 /** @brief Test vint4 loada. */
TEST(vint4,Loada)1244 TEST(vint4, Loada)
1245 {
1246 vint4 a = vint4::loada(&(s32_data[0]));
1247 EXPECT_EQ(a.lane<0>(), 0);
1248 EXPECT_EQ(a.lane<1>(), 1);
1249 EXPECT_EQ(a.lane<2>(), 2);
1250 EXPECT_EQ(a.lane<3>(), 3);
1251 }
1252
1253 /** @brief Test vint4 lane_id. */
TEST(vint4,LaneID)1254 TEST(vint4, LaneID)
1255 {
1256 vint4 a = vint4::lane_id();
1257 EXPECT_EQ(a.lane<0>(), 0);
1258 EXPECT_EQ(a.lane<1>(), 1);
1259 EXPECT_EQ(a.lane<2>(), 2);
1260 EXPECT_EQ(a.lane<3>(), 3);
1261 }
1262
1263 /** @brief Test vint4 add. */
TEST(vint4,vadd)1264 TEST(vint4, vadd)
1265 {
1266 vint4 a(1, 2, 3, 4);
1267 vint4 b(2, 3, 4, 5);
1268 a = a + b;
1269 EXPECT_EQ(a.lane<0>(), 1 + 2);
1270 EXPECT_EQ(a.lane<1>(), 2 + 3);
1271 EXPECT_EQ(a.lane<2>(), 3 + 4);
1272 EXPECT_EQ(a.lane<3>(), 4 + 5);
1273 }
1274
1275 /** @brief Test vint4 self-add. */
TEST(vint4,vselfadd)1276 TEST(vint4, vselfadd)
1277 {
1278 vint4 a(1, 2, 3, 4);
1279 vint4 b(2, 3, 4, 5);
1280 a += b;
1281
1282 EXPECT_EQ(a.lane<0>(), 1 + 2);
1283 EXPECT_EQ(a.lane<1>(), 2 + 3);
1284 EXPECT_EQ(a.lane<2>(), 3 + 4);
1285 EXPECT_EQ(a.lane<3>(), 4 + 5);
1286 }
1287
1288 /** @brief Test vint4 add. */
TEST(vint4,vsadd)1289 TEST(vint4, vsadd)
1290 {
1291 vint4 a(1, 2, 3, 4);
1292 int b = 5;
1293 a = a + b;
1294 EXPECT_EQ(a.lane<0>(), 1 + 5);
1295 EXPECT_EQ(a.lane<1>(), 2 + 5);
1296 EXPECT_EQ(a.lane<2>(), 3 + 5);
1297 EXPECT_EQ(a.lane<3>(), 4 + 5);
1298 }
1299
1300 /** @brief Test vint4 sub. */
TEST(vint4,vsub)1301 TEST(vint4, vsub)
1302 {
1303 vint4 a(1, 2, 4, 4);
1304 vint4 b(2, 3, 3, 5);
1305 a = a - b;
1306 EXPECT_EQ(a.lane<0>(), 1 - 2);
1307 EXPECT_EQ(a.lane<1>(), 2 - 3);
1308 EXPECT_EQ(a.lane<2>(), 4 - 3);
1309 EXPECT_EQ(a.lane<3>(), 4 - 5);
1310 }
1311
1312 /** @brief Test vint4 sub. */
TEST(vint4,vssub)1313 TEST(vint4, vssub)
1314 {
1315 vint4 a(1, 2, 4, 4);
1316 int b = 5;
1317 a = a - b;
1318 EXPECT_EQ(a.lane<0>(), 1 - 5);
1319 EXPECT_EQ(a.lane<1>(), 2 - 5);
1320 EXPECT_EQ(a.lane<2>(), 4 - 5);
1321 EXPECT_EQ(a.lane<3>(), 4 - 5);
1322 }
1323
1324 /** @brief Test vint4 mul. */
TEST(vint4,vmul)1325 TEST(vint4, vmul)
1326 {
1327 vint4 a(1, 2, 4, 4);
1328 vint4 b(2, 3, 3, 5);
1329 a = a * b;
1330 EXPECT_EQ(a.lane<0>(), 1 * 2);
1331 EXPECT_EQ(a.lane<1>(), 2 * 3);
1332 EXPECT_EQ(a.lane<2>(), 4 * 3);
1333 EXPECT_EQ(a.lane<3>(), 4 * 5);
1334 }
1335
1336 /** @brief Test vint4 mul. */
TEST(vint4,vsmul)1337 TEST(vint4, vsmul)
1338 {
1339 vint4 a(1, 2, 4, 4);
1340 a = a * 3;
1341 EXPECT_EQ(a.lane<0>(), 1 * 3);
1342 EXPECT_EQ(a.lane<1>(), 2 * 3);
1343 EXPECT_EQ(a.lane<2>(), 4 * 3);
1344 EXPECT_EQ(a.lane<3>(), 4 * 3);
1345
1346 vint4 b(1, 2, -4, 4);
1347 b = b * -3;
1348 EXPECT_EQ(b.lane<0>(), 1 * -3);
1349 EXPECT_EQ(b.lane<1>(), 2 * -3);
1350 EXPECT_EQ(b.lane<2>(), -4 * -3);
1351 EXPECT_EQ(b.lane<3>(), 4 * -3);
1352 }
1353
1354 /** @brief Test vint4 bitwise invert. */
TEST(vint4,bit_invert)1355 TEST(vint4, bit_invert)
1356 {
1357 vint4 a(-1, 0, 1, 2);
1358 a = ~a;
1359 EXPECT_EQ(a.lane<0>(), ~-1);
1360 EXPECT_EQ(a.lane<1>(), ~0);
1361 EXPECT_EQ(a.lane<2>(), ~1);
1362 EXPECT_EQ(a.lane<3>(), ~2);
1363 }
1364
1365 /** @brief Test vint4 bitwise or. */
TEST(vint4,bit_vor)1366 TEST(vint4, bit_vor)
1367 {
1368 vint4 a(1, 2, 3, 4);
1369 vint4 b(2, 3, 4, 5);
1370 a = a | b;
1371 EXPECT_EQ(a.lane<0>(), 3);
1372 EXPECT_EQ(a.lane<1>(), 3);
1373 EXPECT_EQ(a.lane<2>(), 7);
1374 EXPECT_EQ(a.lane<3>(), 5);
1375 }
1376
TEST(vint4,bit_vsor)1377 TEST(vint4, bit_vsor)
1378 {
1379 vint4 a(1, 2, 3, 4);
1380 int b = 2;
1381 a = a | b;
1382 EXPECT_EQ(a.lane<0>(), 3);
1383 EXPECT_EQ(a.lane<1>(), 2);
1384 EXPECT_EQ(a.lane<2>(), 3);
1385 EXPECT_EQ(a.lane<3>(), 6);
1386 }
1387
1388 /** @brief Test vint4 bitwise and. */
TEST(vint4,bit_vand)1389 TEST(vint4, bit_vand)
1390 {
1391 vint4 a(1, 2, 3, 4);
1392 vint4 b(2, 3, 4, 5);
1393 a = a & b;
1394 EXPECT_EQ(a.lane<0>(), 0);
1395 EXPECT_EQ(a.lane<1>(), 2);
1396 EXPECT_EQ(a.lane<2>(), 0);
1397 EXPECT_EQ(a.lane<3>(), 4);
1398 }
1399
1400 /** @brief Test vint4 bitwise and. */
TEST(vint4,bit_vsand)1401 TEST(vint4, bit_vsand)
1402 {
1403 vint4 a(1, 2, 3, 4);
1404 int b = 2;
1405 a = a & b;
1406 EXPECT_EQ(a.lane<0>(), 0);
1407 EXPECT_EQ(a.lane<1>(), 2);
1408 EXPECT_EQ(a.lane<2>(), 2);
1409 EXPECT_EQ(a.lane<3>(), 0);
1410 }
1411
1412 /** @brief Test vint4 bitwise xor. */
TEST(vint4,bit_vxor)1413 TEST(vint4, bit_vxor)
1414 {
1415 vint4 a(1, 2, 3, 4);
1416 vint4 b(2, 3, 4, 5);
1417 a = a ^ b;
1418 EXPECT_EQ(a.lane<0>(), 3);
1419 EXPECT_EQ(a.lane<1>(), 1);
1420 EXPECT_EQ(a.lane<2>(), 7);
1421 EXPECT_EQ(a.lane<3>(), 1);
1422 }
1423
1424 /** @brief Test vint4 bitwise xor. */
TEST(vint4,bit_vsxor)1425 TEST(vint4, bit_vsxor)
1426 {
1427 vint4 a(1, 2, 3, 4);
1428 int b = 2;
1429 a = a ^ b;
1430 EXPECT_EQ(a.lane<0>(), 3);
1431 EXPECT_EQ(a.lane<1>(), 0);
1432 EXPECT_EQ(a.lane<2>(), 1);
1433 EXPECT_EQ(a.lane<3>(), 6);
1434 }
1435
1436 /** @brief Test vint4 ceq. */
TEST(vint4,ceq)1437 TEST(vint4, ceq)
1438 {
1439 vint4 a1(1, 2, 3, 4);
1440 vint4 b1(0, 1, 2, 3);
1441 vmask4 r1 = a1 == b1;
1442 EXPECT_EQ(0, mask(r1));
1443 EXPECT_EQ(false, any(r1));
1444 EXPECT_EQ(false, all(r1));
1445
1446 vint4 a2(1, 2, 3, 4);
1447 vint4 b2(1, 0, 0, 0);
1448 vmask4 r2 = a2 == b2;
1449 EXPECT_EQ(0x1, mask(r2));
1450 EXPECT_EQ(true, any(r2));
1451 EXPECT_EQ(false, all(r2));
1452
1453 vint4 a3(1, 2, 3, 4);
1454 vint4 b3(1, 0, 3, 0);
1455 vmask4 r3 = a3 == b3;
1456 EXPECT_EQ(0x5, mask(r3));
1457 EXPECT_EQ(true, any(r3));
1458 EXPECT_EQ(false, all(r3));
1459
1460 vint4 a4(1, 2, 3, 4);
1461 vmask4 r4 = a4 == a4;
1462 EXPECT_EQ(0xF, mask(r4));
1463 EXPECT_EQ(true, any(r4));
1464 EXPECT_EQ(true, all(r4));
1465 }
1466
1467 /** @brief Test vint4 cne. */
TEST(vint4,cne)1468 TEST(vint4, cne)
1469 {
1470 vint4 a1(1, 2, 3, 4);
1471 vint4 b1(0, 1, 2, 3);
1472 vmask4 r1 = a1 != b1;
1473 EXPECT_EQ(0xF, mask(r1));
1474 EXPECT_EQ(true, any(r1));
1475 EXPECT_EQ(true, all(r1));
1476
1477 vint4 a2(1, 2, 3, 4);
1478 vint4 b2(1, 0, 0, 0);
1479 vmask4 r2 = a2 != b2;
1480 EXPECT_EQ(0xE, mask(r2));
1481 EXPECT_EQ(true, any(r2));
1482 EXPECT_EQ(false, all(r2));
1483
1484 vint4 a3(1, 2, 3, 4);
1485 vint4 b3(1, 0, 3, 0);
1486 vmask4 r3 = a3 != b3;
1487 EXPECT_EQ(0xA, mask(r3));
1488 EXPECT_EQ(true, any(r3));
1489 EXPECT_EQ(false, all(r3));
1490
1491 vint4 a4(1, 2, 3, 4);
1492 vmask4 r4 = a4 != a4;
1493 EXPECT_EQ(0, mask(r4));
1494 EXPECT_EQ(false, any(r4));
1495 EXPECT_EQ(false, all(r4));
1496 }
1497
1498 /** @brief Test vint4 clt. */
TEST(vint4,clt)1499 TEST(vint4, clt)
1500 {
1501 vint4 a(1, 2, 3, 4);
1502 vint4 b(0, 3, 3, 5);
1503 vmask4 r = a < b;
1504 EXPECT_EQ(0xA, mask(r));
1505 }
1506
1507 /** @brief Test vint4 cgt. */
TEST(vint4,cle)1508 TEST(vint4, cle)
1509 {
1510 vint4 a(1, 2, 3, 4);
1511 vint4 b(0, 3, 3, 5);
1512 vmask4 r = a > b;
1513 EXPECT_EQ(0x1, mask(r));
1514 }
1515
1516 /** @brief Test vint4 lsl. */
TEST(vint4,lsl)1517 TEST(vint4, lsl)
1518 {
1519 vint4 a(1, 2, 4, 4);
1520 a = lsl<0>(a);
1521 EXPECT_EQ(a.lane<0>(), 1);
1522 EXPECT_EQ(a.lane<1>(), 2);
1523 EXPECT_EQ(a.lane<2>(), 4);
1524 EXPECT_EQ(a.lane<3>(), 4);
1525
1526 a = lsl<1>(a);
1527 EXPECT_EQ(a.lane<0>(), 2);
1528 EXPECT_EQ(a.lane<1>(), 4);
1529 EXPECT_EQ(a.lane<2>(), 8);
1530 EXPECT_EQ(a.lane<3>(), 8);
1531
1532 a = lsl<2>(a);
1533 EXPECT_EQ(a.lane<0>(), 8);
1534 EXPECT_EQ(a.lane<1>(), 16);
1535 EXPECT_EQ(a.lane<2>(), 32);
1536 EXPECT_EQ(a.lane<3>(), 32);
1537 }
1538
1539 /** @brief Test vint4 lsr. */
TEST(vint4,lsr)1540 TEST(vint4, lsr)
1541 {
1542 vint4 a(1, 2, 4, -4);
1543 a = lsr<0>(a);
1544 EXPECT_EQ(a.lane<0>(), 1);
1545 EXPECT_EQ(a.lane<1>(), 2);
1546 EXPECT_EQ(a.lane<2>(), 4);
1547 EXPECT_EQ(a.lane<3>(), 0xFFFFFFFC);
1548
1549 a = lsr<1>(a);
1550 EXPECT_EQ(a.lane<0>(), 0);
1551 EXPECT_EQ(a.lane<1>(), 1);
1552 EXPECT_EQ(a.lane<2>(), 2);
1553 EXPECT_EQ(a.lane<3>(), 0x7FFFFFFE);
1554
1555 a = lsr<2>(a);
1556 EXPECT_EQ(a.lane<0>(), 0);
1557 EXPECT_EQ(a.lane<1>(), 0);
1558 EXPECT_EQ(a.lane<2>(), 0);
1559 EXPECT_EQ(a.lane<3>(), 0x1FFFFFFF);
1560 }
1561
1562 /** @brief Test vint4 asr. */
TEST(vint4,asr)1563 TEST(vint4, asr)
1564 {
1565 vint4 a(1, 2, 4, -4);
1566 a = asr<0>(a);
1567 EXPECT_EQ(a.lane<0>(), 1);
1568 EXPECT_EQ(a.lane<1>(), 2);
1569 EXPECT_EQ(a.lane<2>(), 4);
1570 EXPECT_EQ(a.lane<3>(), -4);
1571
1572 a = asr<1>(a);
1573 EXPECT_EQ(a.lane<0>(), 0);
1574 EXPECT_EQ(a.lane<1>(), 1);
1575 EXPECT_EQ(a.lane<2>(), 2);
1576 EXPECT_EQ(a.lane<3>(), -2);
1577
1578 // Note - quirk of asr is that you will get "stuck" at -1
1579 a = asr<2>(a);
1580 EXPECT_EQ(a.lane<0>(), 0);
1581 EXPECT_EQ(a.lane<1>(), 0);
1582 EXPECT_EQ(a.lane<2>(), 0);
1583 EXPECT_EQ(a.lane<3>(), -1);
1584 }
1585
1586 /** @brief Test vint4 min. */
TEST(vint4,min)1587 TEST(vint4, min)
1588 {
1589 vint4 a(1, 2, 3, 4);
1590 vint4 b(0, 3, 3, 5);
1591 vint4 r = min(a, b);
1592 EXPECT_EQ(r.lane<0>(), 0);
1593 EXPECT_EQ(r.lane<1>(), 2);
1594 EXPECT_EQ(r.lane<2>(), 3);
1595 EXPECT_EQ(r.lane<3>(), 4);
1596 }
1597
1598 /** @brief Test vint4 max. */
TEST(vint4,max)1599 TEST(vint4, max)
1600 {
1601 vint4 a(1, 2, 3, 4);
1602 vint4 b(0, 3, 3, 5);
1603 vint4 r = max(a, b);
1604 EXPECT_EQ(r.lane<0>(), 1);
1605 EXPECT_EQ(r.lane<1>(), 3);
1606 EXPECT_EQ(r.lane<2>(), 3);
1607 EXPECT_EQ(r.lane<3>(), 5);
1608 }
1609
1610 /** @brief Test vint4 clamp. */
TEST(vint4,clamp)1611 TEST(vint4, clamp)
1612 {
1613 vint4 a(1, 2, 3, 4);
1614 vint4 r = clamp(2, 3, a);
1615 EXPECT_EQ(r.lane<0>(), 2);
1616 EXPECT_EQ(r.lane<1>(), 2);
1617 EXPECT_EQ(r.lane<2>(), 3);
1618 EXPECT_EQ(r.lane<3>(), 3);
1619 }
1620
1621 /** @brief Test vint4 hmin. */
TEST(vint4,hmin)1622 TEST(vint4, hmin)
1623 {
1624 vint4 a1(1, 2, 1, 2);
1625 vint4 r1 = hmin(a1);
1626 EXPECT_EQ(r1.lane<0>(), 1);
1627 EXPECT_EQ(r1.lane<1>(), 1);
1628 EXPECT_EQ(r1.lane<2>(), 1);
1629 EXPECT_EQ(r1.lane<3>(), 1);
1630
1631 vint4 a2(1, 2, -1, 5);
1632 vint4 r2 = hmin(a2);
1633 EXPECT_EQ(r2.lane<0>(), -1);
1634 EXPECT_EQ(r2.lane<1>(), -1);
1635 EXPECT_EQ(r2.lane<2>(), -1);
1636 EXPECT_EQ(r2.lane<3>(), -1);
1637 }
1638
1639 /** @brief Test vint4 hmax. */
TEST(vint4,hmax)1640 TEST(vint4, hmax)
1641 {
1642 vint4 a1(1, 3, 1, 2);
1643 vint4 r1 = hmax(a1);
1644 EXPECT_EQ(r1.lane<0>(), 3);
1645 EXPECT_EQ(r1.lane<1>(), 3);
1646 EXPECT_EQ(r1.lane<2>(), 3);
1647 EXPECT_EQ(r1.lane<3>(), 3);
1648
1649 vint4 a2(1, 2, -1, 5);
1650 vint4 r2 = hmax(a2);
1651 EXPECT_EQ(r2.lane<0>(), 5);
1652 EXPECT_EQ(r2.lane<1>(), 5);
1653 EXPECT_EQ(r2.lane<2>(), 5);
1654 EXPECT_EQ(r2.lane<3>(), 5);
1655 }
1656
1657 /** @brief Test vint4 hadd_s. */
TEST(vint4,hadd_s)1658 TEST(vint4, hadd_s)
1659 {
1660 vint4 a1(1, 3, 5, 7);
1661 int r1 = hadd_s(a1);
1662 EXPECT_EQ(r1, 16);
1663
1664 vint4 a2(1, 2, -1, 5);
1665 int r2 = hadd_s(a2);
1666 EXPECT_EQ(r2, 7);
1667 }
1668
1669 /** @brief Test vint4 hadd_rgb_s. */
TEST(vint4,hadd_rgb_s)1670 TEST(vint4, hadd_rgb_s)
1671 {
1672 vint4 a1(1, 3, 5, 7);
1673 int r1 = hadd_rgb_s(a1);
1674 EXPECT_EQ(r1, 9);
1675
1676 vint4 a2(1, 2, -1, 5);
1677 int r2 = hadd_rgb_s(a2);
1678 EXPECT_EQ(r2, 2);
1679 }
1680
1681 /** @brief Test vint4 clz. */
TEST(vint4,clz)1682 TEST(vint4, clz)
1683 {
1684 vint4 a1(0x80000000, 0x40000000, 0x20000000, 0x10000000);
1685 vint4 r1 = clz(a1);
1686 EXPECT_EQ(r1.lane<0>(), 0);
1687 EXPECT_EQ(r1.lane<1>(), 1);
1688 EXPECT_EQ(r1.lane<2>(), 2);
1689 EXPECT_EQ(r1.lane<3>(), 3);
1690
1691 vint4 a2(0x0, 0x1, 0x2, 0x4);
1692 vint4 r2 = clz(a2);
1693 EXPECT_EQ(r2.lane<0>(), 32);
1694 EXPECT_EQ(r2.lane<1>(), 31);
1695 EXPECT_EQ(r2.lane<2>(), 30);
1696 EXPECT_EQ(r2.lane<3>(), 29);
1697 }
1698
1699 /** @brief Test vint4 two_to_the_n. */
TEST(vint4,two_to_the_n)1700 TEST(vint4, two_to_the_n)
1701 {
1702 vint4 a1(0, 1, 2, 3);
1703 vint4 r1 = two_to_the_n(a1);
1704 EXPECT_EQ(r1.lane<0>(), 1 << 0);
1705 EXPECT_EQ(r1.lane<1>(), 1 << 1);
1706 EXPECT_EQ(r1.lane<2>(), 1 << 2);
1707 EXPECT_EQ(r1.lane<3>(), 1 << 3);
1708
1709 vint4 a2(27, 28, 29, 30);
1710 vint4 r2 = two_to_the_n(a2);
1711 EXPECT_EQ(r2.lane<0>(), 1 << 27);
1712 EXPECT_EQ(r2.lane<1>(), 1 << 28);
1713 EXPECT_EQ(r2.lane<2>(), 1 << 29);
1714 EXPECT_EQ(r2.lane<3>(), 1 << 30);
1715
1716 // Shifts higher than 30 are not allowed as it overflows the int type;
1717 // and results in implementation-defined behavior because of how we
1718 // generate the shifted result in two_to_the_n().
1719 // - Shift by 31 shifts into sign bit
1720 // - Shift by 32 shifts off the end
1721 }
1722
1723 /** @brief Test vint4 storea. */
TEST(vint4,storea)1724 TEST(vint4, storea)
1725 {
1726 alignas(16) int out[4];
1727 vint4 a(s32_data);
1728 storea(a, out);
1729 EXPECT_EQ(out[0], 0);
1730 EXPECT_EQ(out[1], 1);
1731 EXPECT_EQ(out[2], 2);
1732 EXPECT_EQ(out[3], 3);
1733 }
1734
1735 /** @brief Test vint4 store. */
TEST(vint4,store)1736 TEST(vint4, store)
1737 {
1738 alignas(16) int out[5];
1739 vint4 a(s32_data);
1740 store(a, &(out[1]));
1741 EXPECT_EQ(out[1], 0);
1742 EXPECT_EQ(out[2], 1);
1743 EXPECT_EQ(out[3], 2);
1744 EXPECT_EQ(out[4], 3);
1745 }
1746
1747 /** @brief Test vint4 store_nbytes. */
TEST(vint4,store_nbytes)1748 TEST(vint4, store_nbytes)
1749 {
1750 alignas(16) int out;
1751 vint4 a(42, 314, 75, 90);
1752 store_nbytes(a, (uint8_t*)&out);
1753 EXPECT_EQ(out, 42);
1754 }
1755
1756 /** @brief Test vint4 gatheri. */
TEST(vint4,gatheri)1757 TEST(vint4, gatheri)
1758 {
1759 vint4 indices(0, 4, 3, 2);
1760 vint4 r = gatheri(s32_data, indices);
1761 EXPECT_EQ(r.lane<0>(), 0);
1762 EXPECT_EQ(r.lane<1>(), 4);
1763 EXPECT_EQ(r.lane<2>(), 3);
1764 EXPECT_EQ(r.lane<3>(), 2);
1765 }
1766
1767 /** @brief Test vint4 pack_low_bytes. */
TEST(vint4,pack_low_bytes)1768 TEST(vint4, pack_low_bytes)
1769 {
1770 vint4 a(1, 2, 3, 4);
1771 vint4 r = pack_low_bytes(a);
1772 EXPECT_EQ(r.lane<0>(), (4 << 24) | (3 << 16) | (2 << 8) | (1 << 0));
1773 }
1774
1775 /** @brief Test vint4 select. */
TEST(vint4,select)1776 TEST(vint4, select)
1777 {
1778 vint4 m1(1, 1, 1, 1);
1779 vint4 m2(1, 2, 1, 2);
1780 vmask4 cond = m1 == m2;
1781
1782 vint4 a(1, 3, 3, 1);
1783 vint4 b(4, 2, 2, 4);
1784
1785 vint4 r1 = select(a, b, cond);
1786 EXPECT_EQ(r1.lane<0>(), 4);
1787 EXPECT_EQ(r1.lane<1>(), 3);
1788 EXPECT_EQ(r1.lane<2>(), 2);
1789 EXPECT_EQ(r1.lane<3>(), 1);
1790
1791 vint4 r2 = select(b, a, cond);
1792 EXPECT_EQ(r2.lane<0>(), 1);
1793 EXPECT_EQ(r2.lane<1>(), 2);
1794 EXPECT_EQ(r2.lane<2>(), 3);
1795 EXPECT_EQ(r2.lane<3>(), 4);
1796 }
1797
1798 // VMASK4 tests - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1799 /** @brief Test vmask4 scalar literal constructor. */
TEST(vmask4,scalar_literal_construct)1800 TEST(vmask4, scalar_literal_construct)
1801 {
1802 vfloat4 m1a(0, 0, 0, 0);
1803 vfloat4 m1b(1, 1, 1, 1);
1804 vmask4 m1(true);
1805
1806 vfloat4 r = select(m1a, m1b, m1);
1807
1808 EXPECT_EQ(r.lane<0>(), 1);
1809 EXPECT_EQ(r.lane<1>(), 1);
1810 EXPECT_EQ(r.lane<2>(), 1);
1811 EXPECT_EQ(r.lane<3>(), 1);
1812
1813 r = select(m1b, m1a, m1);
1814
1815 EXPECT_EQ(r.lane<0>(), 0);
1816 EXPECT_EQ(r.lane<1>(), 0);
1817 EXPECT_EQ(r.lane<2>(), 0);
1818 EXPECT_EQ(r.lane<3>(), 0);
1819 }
1820
1821 /** @brief Test vmask4 literal constructor. */
TEST(vmask4,literal_construct)1822 TEST(vmask4, literal_construct)
1823 {
1824 vfloat4 m1a(0, 0, 0, 0);
1825 vfloat4 m1b(1, 1, 1, 1);
1826 vmask4 m1(true, false, true, false);
1827
1828 vfloat4 r = select(m1a, m1b, m1);
1829
1830 EXPECT_EQ(r.lane<0>(), 1);
1831 EXPECT_EQ(r.lane<1>(), 0);
1832 EXPECT_EQ(r.lane<2>(), 1);
1833 EXPECT_EQ(r.lane<3>(), 0);
1834 }
1835
1836 /** @brief Test vmask4 or. */
TEST(vmask4,or)1837 TEST(vmask4, or)
1838 {
1839 vfloat4 m1a(0, 1, 0, 1);
1840 vfloat4 m1b(1, 1, 1, 1);
1841 vmask4 m1 = m1a == m1b;
1842
1843 vfloat4 m2a(1, 1, 0, 0);
1844 vfloat4 m2b(1, 1, 1, 1);
1845 vmask4 m2 = m2a == m2b;
1846
1847 vmask4 r = m1 | m2;
1848 EXPECT_EQ(mask(r), 0xB);
1849 }
1850
1851 /** @brief Test vmask4 and. */
TEST(vmask4,and)1852 TEST(vmask4, and)
1853 {
1854 vfloat4 m1a(0, 1, 0, 1);
1855 vfloat4 m1b(1, 1, 1, 1);
1856 vmask4 m1 = m1a == m1b;
1857
1858 vfloat4 m2a(1, 1, 0, 0);
1859 vfloat4 m2b(1, 1, 1, 1);
1860 vmask4 m2 = m2a == m2b;
1861
1862 vmask4 r = m1 & m2;
1863 EXPECT_EQ(mask(r), 0x2);
1864 }
1865
1866 /** @brief Test vmask4 xor. */
TEST(vmask4,xor)1867 TEST(vmask4, xor)
1868 {
1869 vfloat4 m1a(0, 1, 0, 1);
1870 vfloat4 m1b(1, 1, 1, 1);
1871 vmask4 m1 = m1a == m1b;
1872
1873 vfloat4 m2a(1, 1, 0, 0);
1874 vfloat4 m2b(1, 1, 1, 1);
1875 vmask4 m2 = m2a == m2b;
1876
1877 vmask4 r = m1 ^ m2;
1878 EXPECT_EQ(mask(r), 0x9);
1879 }
1880
1881 /** @brief Test vmask4 not. */
TEST(vmask4,not)1882 TEST(vmask4, not)
1883 {
1884 vfloat4 m1a(0, 1, 0, 1);
1885 vfloat4 m1b(1, 1, 1, 1);
1886 vmask4 m1 = m1a == m1b;
1887 vmask4 r = ~m1;
1888 EXPECT_EQ(mask(r), 0x5);
1889 }
1890
1891 # if ASTCENC_SIMD_WIDTH == 8
1892
1893 // VFLOAT8 tests - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
1894
1895 /** @brief Test unaligned vfloat8 data load. */
TEST(vfloat8,UnalignedLoad)1896 TEST(vfloat8, UnalignedLoad)
1897 {
1898 vfloat8 a(&(f32_data[1]));
1899 EXPECT_EQ(a.lane<0>(), 1.0f);
1900 EXPECT_EQ(a.lane<1>(), 2.0f);
1901 EXPECT_EQ(a.lane<2>(), 3.0f);
1902 EXPECT_EQ(a.lane<3>(), 4.0f);
1903 EXPECT_EQ(a.lane<4>(), 5.0f);
1904 EXPECT_EQ(a.lane<5>(), 6.0f);
1905 EXPECT_EQ(a.lane<6>(), 7.0f);
1906 EXPECT_EQ(a.lane<7>(), 8.0f);
1907 }
1908
1909 /** @brief Test scalar duplicated vfloat8 load. */
TEST(vfloat8,ScalarDupLoad)1910 TEST(vfloat8, ScalarDupLoad)
1911 {
1912 vfloat8 a(1.1f);
1913 EXPECT_EQ(a.lane<0>(), 1.1f);
1914 EXPECT_EQ(a.lane<1>(), 1.1f);
1915 EXPECT_EQ(a.lane<2>(), 1.1f);
1916 EXPECT_EQ(a.lane<3>(), 1.1f);
1917 EXPECT_EQ(a.lane<4>(), 1.1f);
1918 EXPECT_EQ(a.lane<5>(), 1.1f);
1919 EXPECT_EQ(a.lane<6>(), 1.1f);
1920 EXPECT_EQ(a.lane<7>(), 1.1f);
1921 }
1922
1923 /** @brief Test scalar vfloat8 load. */
TEST(vfloat8,ScalarLoad)1924 TEST(vfloat8, ScalarLoad)
1925 {
1926 vfloat8 a(1.1f, 2.2f, 3.3f, 4.4f, 5.5f, 6.6f, 7.7f, 8.8f);
1927 EXPECT_EQ(a.lane<0>(), 1.1f);
1928 EXPECT_EQ(a.lane<1>(), 2.2f);
1929 EXPECT_EQ(a.lane<2>(), 3.3f);
1930 EXPECT_EQ(a.lane<3>(), 4.4f);
1931 EXPECT_EQ(a.lane<4>(), 5.5f);
1932 EXPECT_EQ(a.lane<5>(), 6.6f);
1933 EXPECT_EQ(a.lane<6>(), 7.7f);
1934 EXPECT_EQ(a.lane<7>(), 8.8f);
1935 }
1936
1937 /** @brief Test copy vfloat8 load. */
TEST(vfloat8,CopyLoad)1938 TEST(vfloat8, CopyLoad)
1939 {
1940 vfloat8 s(1.1f, 2.2f, 3.3f, 4.4f, 5.5f, 6.6f, 7.7f, 8.8f);
1941 vfloat8 a(s.m);
1942 EXPECT_EQ(a.lane<0>(), 1.1f);
1943 EXPECT_EQ(a.lane<1>(), 2.2f);
1944 EXPECT_EQ(a.lane<2>(), 3.3f);
1945 EXPECT_EQ(a.lane<3>(), 4.4f);
1946 EXPECT_EQ(a.lane<4>(), 5.5f);
1947 EXPECT_EQ(a.lane<5>(), 6.6f);
1948 EXPECT_EQ(a.lane<6>(), 7.7f);
1949 EXPECT_EQ(a.lane<7>(), 8.8f);
1950 }
1951
1952 /** @brief Test vfloat8 zero. */
TEST(vfloat8,Zero)1953 TEST(vfloat8, Zero)
1954 {
1955 vfloat8 a = vfloat8::zero();
1956 EXPECT_EQ(a.lane<0>(), 0.0f);
1957 EXPECT_EQ(a.lane<1>(), 0.0f);
1958 EXPECT_EQ(a.lane<2>(), 0.0f);
1959 EXPECT_EQ(a.lane<3>(), 0.0f);
1960 EXPECT_EQ(a.lane<4>(), 0.0f);
1961 EXPECT_EQ(a.lane<5>(), 0.0f);
1962 EXPECT_EQ(a.lane<6>(), 0.0f);
1963 EXPECT_EQ(a.lane<7>(), 0.0f);
1964 }
1965
1966 /** @brief Test vfloat8 load1. */
TEST(vfloat8,Load1)1967 TEST(vfloat8, Load1)
1968 {
1969 float s = 3.14f;
1970 vfloat8 a = vfloat8::load1(&s);
1971 EXPECT_EQ(a.lane<0>(), 3.14f);
1972 EXPECT_EQ(a.lane<1>(), 3.14f);
1973 EXPECT_EQ(a.lane<2>(), 3.14f);
1974 EXPECT_EQ(a.lane<3>(), 3.14f);
1975 EXPECT_EQ(a.lane<4>(), 3.14f);
1976 EXPECT_EQ(a.lane<5>(), 3.14f);
1977 EXPECT_EQ(a.lane<6>(), 3.14f);
1978 EXPECT_EQ(a.lane<7>(), 3.14f);
1979 }
1980
1981 /** @brief Test vfloat8 loada. */
TEST(vfloat8,Loada)1982 TEST(vfloat8, Loada)
1983 {
1984 vfloat8 a = vfloat8::loada(&(f32_data[0]));
1985 EXPECT_EQ(a.lane<0>(), 0.0f);
1986 EXPECT_EQ(a.lane<1>(), 1.0f);
1987 EXPECT_EQ(a.lane<2>(), 2.0f);
1988 EXPECT_EQ(a.lane<3>(), 3.0f);
1989 EXPECT_EQ(a.lane<4>(), 4.0f);
1990 EXPECT_EQ(a.lane<5>(), 5.0f);
1991 EXPECT_EQ(a.lane<6>(), 6.0f);
1992 EXPECT_EQ(a.lane<7>(), 7.0f);
1993 }
1994
1995 /** @brief Test vfloat8 lane_id. */
TEST(vfloat8,LaneID)1996 TEST(vfloat8, LaneID)
1997 {
1998 vfloat8 a = vfloat8::lane_id();
1999 EXPECT_EQ(a.lane<0>(), 0.0f);
2000 EXPECT_EQ(a.lane<1>(), 1.0f);
2001 EXPECT_EQ(a.lane<2>(), 2.0f);
2002 EXPECT_EQ(a.lane<3>(), 3.0f);
2003 EXPECT_EQ(a.lane<4>(), 4.0f);
2004 EXPECT_EQ(a.lane<5>(), 5.0f);
2005 EXPECT_EQ(a.lane<6>(), 6.0f);
2006 EXPECT_EQ(a.lane<7>(), 7.0f);
2007 }
2008
2009 /** @brief Test vfloat8 add. */
TEST(vfloat8,vadd)2010 TEST(vfloat8, vadd)
2011 {
2012 vfloat8 a(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f);
2013 vfloat8 b(0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f);
2014 a = a + b;
2015 EXPECT_EQ(a.lane<0>(), 1.0f + 0.1f);
2016 EXPECT_EQ(a.lane<1>(), 2.0f + 0.2f);
2017 EXPECT_EQ(a.lane<2>(), 3.0f + 0.3f);
2018 EXPECT_EQ(a.lane<3>(), 4.0f + 0.4f);
2019 EXPECT_EQ(a.lane<4>(), 5.0f + 0.5f);
2020 EXPECT_EQ(a.lane<5>(), 6.0f + 0.6f);
2021 EXPECT_EQ(a.lane<6>(), 7.0f + 0.7f);
2022 EXPECT_EQ(a.lane<7>(), 8.0f + 0.8f);
2023 }
2024
2025 /** @brief Test vfloat8 sub. */
TEST(vfloat8,vsub)2026 TEST(vfloat8, vsub)
2027 {
2028 vfloat8 a(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f);
2029 vfloat8 b(0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f);
2030 a = a - b;
2031 EXPECT_EQ(a.lane<0>(), 1.0f - 0.1f);
2032 EXPECT_EQ(a.lane<1>(), 2.0f - 0.2f);
2033 EXPECT_EQ(a.lane<2>(), 3.0f - 0.3f);
2034 EXPECT_EQ(a.lane<3>(), 4.0f - 0.4f);
2035 EXPECT_EQ(a.lane<4>(), 5.0f - 0.5f);
2036 EXPECT_EQ(a.lane<5>(), 6.0f - 0.6f);
2037 EXPECT_EQ(a.lane<6>(), 7.0f - 0.7f);
2038 EXPECT_EQ(a.lane<7>(), 8.0f - 0.8f);
2039 }
2040
2041 /** @brief Test vfloat8 mul. */
TEST(vfloat8,vmul)2042 TEST(vfloat8, vmul)
2043 {
2044 vfloat8 a(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f);
2045 vfloat8 b(0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f);
2046 a = a * b;
2047 EXPECT_EQ(a.lane<0>(), 1.0f * 0.1f);
2048 EXPECT_EQ(a.lane<1>(), 2.0f * 0.2f);
2049 EXPECT_EQ(a.lane<2>(), 3.0f * 0.3f);
2050 EXPECT_EQ(a.lane<3>(), 4.0f * 0.4f);
2051 EXPECT_EQ(a.lane<4>(), 5.0f * 0.5f);
2052 EXPECT_EQ(a.lane<5>(), 6.0f * 0.6f);
2053 EXPECT_EQ(a.lane<6>(), 7.0f * 0.7f);
2054 EXPECT_EQ(a.lane<7>(), 8.0f * 0.8f);
2055 }
2056
2057 /** @brief Test vfloat8 mul. */
TEST(vfloat8,vsmul)2058 TEST(vfloat8, vsmul)
2059 {
2060 vfloat8 a(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f);
2061 float b = 3.14f;
2062 a = a * b;
2063 EXPECT_EQ(a.lane<0>(), 1.0f * 3.14f);
2064 EXPECT_EQ(a.lane<1>(), 2.0f * 3.14f);
2065 EXPECT_EQ(a.lane<2>(), 3.0f * 3.14f);
2066 EXPECT_EQ(a.lane<3>(), 4.0f * 3.14f);
2067 EXPECT_EQ(a.lane<4>(), 5.0f * 3.14f);
2068 EXPECT_EQ(a.lane<5>(), 6.0f * 3.14f);
2069 EXPECT_EQ(a.lane<6>(), 7.0f * 3.14f);
2070 EXPECT_EQ(a.lane<7>(), 8.0f * 3.14f);
2071 }
2072
2073 /** @brief Test vfloat8 mul. */
TEST(vfloat8,svmul)2074 TEST(vfloat8, svmul)
2075 {
2076 float a = 3.14f;
2077 vfloat8 b(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f);
2078 b = a * b;
2079 EXPECT_EQ(b.lane<0>(), 3.14f * 1.0f);
2080 EXPECT_EQ(b.lane<1>(), 3.14f * 2.0f);
2081 EXPECT_EQ(b.lane<2>(), 3.14f * 3.0f);
2082 EXPECT_EQ(b.lane<3>(), 3.14f * 4.0f);
2083 EXPECT_EQ(b.lane<4>(), 3.14f * 5.0f);
2084 EXPECT_EQ(b.lane<5>(), 3.14f * 6.0f);
2085 EXPECT_EQ(b.lane<6>(), 3.14f * 7.0f);
2086 EXPECT_EQ(b.lane<7>(), 3.14f * 8.0f);
2087 }
2088
2089 /** @brief Test vfloat8 div. */
TEST(vfloat8,vdiv)2090 TEST(vfloat8, vdiv)
2091 {
2092 vfloat8 a(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f);
2093 vfloat8 b(0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f);
2094 a = a / b;
2095 EXPECT_EQ(a.lane<0>(), 1.0f / 0.1f);
2096 EXPECT_EQ(a.lane<1>(), 2.0f / 0.2f);
2097 EXPECT_EQ(a.lane<2>(), 3.0f / 0.3f);
2098 EXPECT_EQ(a.lane<3>(), 4.0f / 0.4f);
2099 EXPECT_EQ(a.lane<4>(), 5.0f / 0.5f);
2100 EXPECT_EQ(a.lane<5>(), 6.0f / 0.6f);
2101 EXPECT_EQ(a.lane<6>(), 7.0f / 0.7f);
2102 EXPECT_EQ(a.lane<7>(), 8.0f / 0.8f);
2103 }
2104
2105 /** @brief Test vfloat8 div. */
TEST(vfloat8,vsdiv)2106 TEST(vfloat8, vsdiv)
2107 {
2108 vfloat8 a(0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f);
2109 float b = 3.14f;
2110 vfloat8 r = a / b;
2111
2112 EXPECT_EQ(r.lane<0>(), 0.1f / 3.14f);
2113 EXPECT_EQ(r.lane<1>(), 0.2f / 3.14f);
2114 EXPECT_EQ(r.lane<2>(), 0.3f / 3.14f);
2115 EXPECT_EQ(r.lane<3>(), 0.4f / 3.14f);
2116 EXPECT_EQ(r.lane<4>(), 0.5f / 3.14f);
2117 EXPECT_EQ(r.lane<5>(), 0.6f / 3.14f);
2118 EXPECT_EQ(r.lane<6>(), 0.7f / 3.14f);
2119 EXPECT_EQ(r.lane<7>(), 0.8f / 3.14f);
2120 }
2121
2122 /** @brief Test vfloat8 div. */
TEST(vfloat8,svdiv)2123 TEST(vfloat8, svdiv)
2124 {
2125 float a = 3.14f;
2126 vfloat8 b(0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f);
2127 vfloat8 r = a / b;
2128
2129 EXPECT_EQ(r.lane<0>(), 3.14f / 0.1f);
2130 EXPECT_EQ(r.lane<1>(), 3.14f / 0.2f);
2131 EXPECT_EQ(r.lane<2>(), 3.14f / 0.3f);
2132 EXPECT_EQ(r.lane<3>(), 3.14f / 0.4f);
2133 EXPECT_EQ(r.lane<4>(), 3.14f / 0.5f);
2134 EXPECT_EQ(r.lane<5>(), 3.14f / 0.6f);
2135 EXPECT_EQ(r.lane<6>(), 3.14f / 0.7f);
2136 EXPECT_EQ(r.lane<7>(), 3.14f / 0.8f);
2137 }
2138
2139 /** @brief Test vfloat8 ceq. */
TEST(vfloat8,ceq)2140 TEST(vfloat8, ceq)
2141 {
2142 vfloat8 a1(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f);
2143 vfloat8 b1(0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f);
2144 vmask8 r1 = a1 == b1;
2145 EXPECT_EQ(0, mask(r1));
2146 EXPECT_EQ(false, any(r1));
2147 EXPECT_EQ(false, all(r1));
2148
2149 vfloat8 a2(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f);
2150 vfloat8 b2(1.0f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f);
2151 vmask8 r2 = a2 == b2;
2152 EXPECT_EQ(0x1, mask(r2));
2153 EXPECT_EQ(true, any(r2));
2154 EXPECT_EQ(false, all(r2));
2155
2156 vfloat8 a3(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f);
2157 vfloat8 b3(1.0f, 0.2f, 3.0f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f);
2158 vmask8 r3 = a3 == b3;
2159 EXPECT_EQ(0x5, mask(r3));
2160 EXPECT_EQ(true, any(r3));
2161 EXPECT_EQ(false, all(r3));
2162
2163 vfloat8 a4(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f);
2164 vmask8 r4 = a4 == a4;
2165 EXPECT_EQ(0xFF, mask(r4));
2166 EXPECT_EQ(true, any(r4));
2167 EXPECT_EQ(true, all(r4));
2168 }
2169
2170 /** @brief Test vfloat8 cne. */
TEST(vfloat8,cne)2171 TEST(vfloat8, cne)
2172 {
2173 vfloat8 a1(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f);
2174 vfloat8 b1(0.1f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f);
2175 vmask8 r1 = a1 != b1;
2176 EXPECT_EQ(0xFF, mask(r1));
2177 EXPECT_EQ(true, any(r1));
2178 EXPECT_EQ(true, all(r1));
2179
2180 vfloat8 a2(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f);
2181 vfloat8 b2(1.0f, 0.2f, 0.3f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f);
2182 vmask8 r2 = a2 != b2;
2183 EXPECT_EQ(0xFE, mask(r2));
2184 EXPECT_EQ(true, any(r2));
2185 EXPECT_EQ(false, all(r2));
2186
2187 vfloat8 a3(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f);
2188 vfloat8 b3(1.0f, 0.2f, 3.0f, 0.4f, 0.5f, 0.6f, 0.7f, 0.8f);
2189 vmask8 r3 = a3 != b3;
2190 EXPECT_EQ(0xFA, mask(r3));
2191 EXPECT_EQ(true, any(r3));
2192 EXPECT_EQ(false, all(r3));
2193
2194 vfloat8 a4(1.0f, 2.0f, 3.0f, 4.0f, 5.0f, 6.0f, 7.0f, 8.0f);
2195 vmask8 r4 = a4 != a4;
2196 EXPECT_EQ(0, mask(r4));
2197 EXPECT_EQ(false, any(r4));
2198 EXPECT_EQ(false, all(r4));
2199 }
2200
2201 /** @brief Test vfloat8 clt. */
TEST(vfloat8,clt)2202 TEST(vfloat8, clt)
2203 {
2204 vfloat8 a(1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f);
2205 vfloat8 b(0.9f, 2.1f, 3.0f, 4.1f, 0.9f, 2.1f, 3.0f, 4.1f);
2206 vmask8 r = a < b;
2207 EXPECT_EQ(0xAA, mask(r));
2208 }
2209
2210 /** @brief Test vfloat8 cle. */
TEST(vfloat8,cle)2211 TEST(vfloat8, cle)
2212 {
2213 vfloat8 a(1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f);
2214 vfloat8 b(0.9f, 2.1f, 3.0f, 4.1f, 0.9f, 2.1f, 3.0f, 4.1f);
2215 vmask8 r = a <= b;
2216 EXPECT_EQ(0xEE, mask(r));
2217 }
2218
2219 /** @brief Test vfloat8 cgt. */
TEST(vfloat8,cgt)2220 TEST(vfloat8, cgt)
2221 {
2222 vfloat8 a(1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f);
2223 vfloat8 b(0.9f, 2.1f, 3.0f, 4.1f, 0.9f, 2.1f, 3.0f, 4.1f);
2224 vmask8 r = a > b;
2225 EXPECT_EQ(0x11, mask(r));
2226 }
2227
2228 /** @brief Test vfloat8 cge. */
TEST(vfloat8,cge)2229 TEST(vfloat8, cge)
2230 {
2231 vfloat8 a(1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f);
2232 vfloat8 b(0.9f, 2.1f, 3.0f, 4.1f, 0.9f, 2.1f, 3.0f, 4.1f);
2233 vmask8 r = a >= b;
2234 EXPECT_EQ(0x55, mask(r));
2235 }
2236
2237 /** @brief Test vfloat8 min. */
TEST(vfloat8,min)2238 TEST(vfloat8, min)
2239 {
2240 vfloat8 a(1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f);
2241 vfloat8 b(0.9f, 2.1f, 3.0f, 4.1f, 0.9f, 2.1f, 3.0f, 4.1f);
2242 vfloat8 r = min(a, b);
2243 EXPECT_EQ(r.lane<0>(), 0.9f);
2244 EXPECT_EQ(r.lane<1>(), 2.0f);
2245 EXPECT_EQ(r.lane<2>(), 3.0f);
2246 EXPECT_EQ(r.lane<3>(), 4.0f);
2247 EXPECT_EQ(r.lane<4>(), 0.9f);
2248 EXPECT_EQ(r.lane<5>(), 2.0f);
2249 EXPECT_EQ(r.lane<6>(), 3.0f);
2250 EXPECT_EQ(r.lane<7>(), 4.0f);
2251 }
2252
2253 /** @brief Test vfloat8 max. */
TEST(vfloat8,max)2254 TEST(vfloat8, max)
2255 {
2256 vfloat8 a(1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f);
2257 vfloat8 b(0.9f, 2.1f, 3.0f, 4.1f, 0.9f, 2.1f, 3.0f, 4.1f);
2258 vfloat8 r = max(a, b);
2259 EXPECT_EQ(r.lane<0>(), 1.0f);
2260 EXPECT_EQ(r.lane<1>(), 2.1f);
2261 EXPECT_EQ(r.lane<2>(), 3.0f);
2262 EXPECT_EQ(r.lane<3>(), 4.1f);
2263 EXPECT_EQ(r.lane<4>(), 1.0f);
2264 EXPECT_EQ(r.lane<5>(), 2.1f);
2265 EXPECT_EQ(r.lane<6>(), 3.0f);
2266 EXPECT_EQ(r.lane<7>(), 4.1f);
2267 }
2268
2269 /** @brief Test vfloat8 clamp. */
TEST(vfloat8,clamp)2270 TEST(vfloat8, clamp)
2271 {
2272 vfloat8 a1(1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f);
2273 vfloat8 r1 = clamp(2.1f, 3.0f, a1);
2274 EXPECT_EQ(r1.lane<0>(), 2.1f);
2275 EXPECT_EQ(r1.lane<1>(), 2.1f);
2276 EXPECT_EQ(r1.lane<2>(), 3.0f);
2277 EXPECT_EQ(r1.lane<3>(), 3.0f);
2278 EXPECT_EQ(r1.lane<4>(), 2.1f);
2279 EXPECT_EQ(r1.lane<5>(), 2.1f);
2280 EXPECT_EQ(r1.lane<6>(), 3.0f);
2281 EXPECT_EQ(r1.lane<7>(), 3.0f);
2282
2283 vfloat8 a2(1.0f, 2.0f, qnan, 4.0f, 1.0f, 2.0f, qnan, 4.0f);
2284 vfloat8 r2 = clamp(2.1f, 3.0f, a2);
2285 EXPECT_EQ(r2.lane<0>(), 2.1f);
2286 EXPECT_EQ(r2.lane<1>(), 2.1f);
2287 EXPECT_EQ(r2.lane<2>(), 2.1f);
2288 EXPECT_EQ(r2.lane<3>(), 3.0f);
2289 EXPECT_EQ(r2.lane<4>(), 2.1f);
2290 EXPECT_EQ(r2.lane<5>(), 2.1f);
2291 EXPECT_EQ(r2.lane<6>(), 2.1f);
2292 EXPECT_EQ(r2.lane<7>(), 3.0f);
2293 }
2294
2295 /** @brief Test vfloat8 clampz. */
TEST(vfloat8,clampz)2296 TEST(vfloat8, clampz)
2297 {
2298 vfloat8 a1(-1.0f, 0.0f, 0.1f, 4.0f, -1.0f, 0.0f, 0.1f, 4.0f);
2299 vfloat8 r1 = clampz(3.0f, a1);
2300 EXPECT_EQ(r1.lane<0>(), 0.0f);
2301 EXPECT_EQ(r1.lane<1>(), 0.0f);
2302 EXPECT_EQ(r1.lane<2>(), 0.1f);
2303 EXPECT_EQ(r1.lane<3>(), 3.0f);
2304 EXPECT_EQ(r1.lane<4>(), 0.0f);
2305 EXPECT_EQ(r1.lane<5>(), 0.0f);
2306 EXPECT_EQ(r1.lane<6>(), 0.1f);
2307 EXPECT_EQ(r1.lane<7>(), 3.0f);
2308
2309 vfloat8 a2(-1.0f, 0.0f, qnan, 4.0f, -1.0f, 0.0f, qnan, 4.0f);
2310 vfloat8 r2 = clampz(3.0f, a2);
2311 EXPECT_EQ(r2.lane<0>(), 0.0f);
2312 EXPECT_EQ(r2.lane<1>(), 0.0f);
2313 EXPECT_EQ(r2.lane<2>(), 0.0f);
2314 EXPECT_EQ(r2.lane<3>(), 3.0f);
2315 EXPECT_EQ(r2.lane<4>(), 0.0f);
2316 EXPECT_EQ(r2.lane<5>(), 0.0f);
2317 EXPECT_EQ(r2.lane<6>(), 0.0f);
2318 EXPECT_EQ(r2.lane<7>(), 3.0f);
2319 }
2320
2321 /** @brief Test vfloat8 clampz. */
TEST(vfloat8,clampzo)2322 TEST(vfloat8, clampzo)
2323 {
2324 vfloat8 a1(-1.0f, 0.0f, 0.1f, 4.0f, -1.0f, 0.0f, 0.1f, 4.0f);
2325 vfloat8 r1 = clampzo(a1);
2326 EXPECT_EQ(r1.lane<0>(), 0.0f);
2327 EXPECT_EQ(r1.lane<1>(), 0.0f);
2328 EXPECT_EQ(r1.lane<2>(), 0.1f);
2329 EXPECT_EQ(r1.lane<3>(), 1.0f);
2330 EXPECT_EQ(r1.lane<4>(), 0.0f);
2331 EXPECT_EQ(r1.lane<5>(), 0.0f);
2332 EXPECT_EQ(r1.lane<6>(), 0.1f);
2333 EXPECT_EQ(r1.lane<7>(), 1.0f);
2334
2335 vfloat8 a2(-1.0f, 0.0f, qnan, 4.0f, -1.0f, 0.0f, qnan, 4.0f);
2336 vfloat8 r2 = clampzo(a2);
2337 EXPECT_EQ(r2.lane<0>(), 0.0f);
2338 EXPECT_EQ(r2.lane<1>(), 0.0f);
2339 EXPECT_EQ(r2.lane<2>(), 0.0f);
2340 EXPECT_EQ(r2.lane<3>(), 1.0f);
2341 EXPECT_EQ(r2.lane<4>(), 0.0f);
2342 EXPECT_EQ(r2.lane<5>(), 0.0f);
2343 EXPECT_EQ(r2.lane<6>(), 0.0f);
2344 EXPECT_EQ(r2.lane<7>(), 1.0f);
2345 }
2346
2347 /** @brief Test vfloat8 abs. */
TEST(vfloat8,abs)2348 TEST(vfloat8, abs)
2349 {
2350 vfloat8 a(-1.0f, 0.0f, 0.1f, 4.0f, -1.0f, 0.0f, 0.1f, 4.0f);
2351 vfloat8 r = abs(a);
2352 EXPECT_EQ(r.lane<0>(), 1.0f);
2353 EXPECT_EQ(r.lane<1>(), 0.0f);
2354 EXPECT_EQ(r.lane<2>(), 0.1f);
2355 EXPECT_EQ(r.lane<3>(), 4.0f);
2356 EXPECT_EQ(r.lane<4>(), 1.0f);
2357 EXPECT_EQ(r.lane<5>(), 0.0f);
2358 EXPECT_EQ(r.lane<6>(), 0.1f);
2359 EXPECT_EQ(r.lane<7>(), 4.0f);
2360 }
2361
2362 /** @brief Test vfloat8 round. */
TEST(vfloat8,round)2363 TEST(vfloat8, round)
2364 {
2365 vfloat8 a(1.1f, 1.5f, 1.6f, 4.0f, 1.1f, 1.5f, 1.6f, 4.0f);
2366 vfloat8 r = round(a);
2367 EXPECT_EQ(r.lane<0>(), 1.0f);
2368 EXPECT_EQ(r.lane<1>(), 2.0f);
2369 EXPECT_EQ(r.lane<2>(), 2.0f);
2370 EXPECT_EQ(r.lane<3>(), 4.0f);
2371 EXPECT_EQ(r.lane<4>(), 1.0f);
2372 EXPECT_EQ(r.lane<5>(), 2.0f);
2373 EXPECT_EQ(r.lane<6>(), 2.0f);
2374 EXPECT_EQ(r.lane<7>(), 4.0f);
2375 }
2376
2377 /** @brief Test vfloat8 hmin. */
TEST(vfloat8,hmin)2378 TEST(vfloat8, hmin)
2379 {
2380 vfloat8 a1(1.1f, 1.5f, 1.6f, 4.0f, 1.1f, 1.5f, 1.6f, 4.0f);
2381 vfloat8 r1 = hmin(a1);
2382 EXPECT_EQ(r1.lane<0>(), 1.1f);
2383 EXPECT_EQ(r1.lane<1>(), 1.1f);
2384 EXPECT_EQ(r1.lane<2>(), 1.1f);
2385 EXPECT_EQ(r1.lane<3>(), 1.1f);
2386 EXPECT_EQ(r1.lane<4>(), 1.1f);
2387 EXPECT_EQ(r1.lane<5>(), 1.1f);
2388 EXPECT_EQ(r1.lane<6>(), 1.1f);
2389 EXPECT_EQ(r1.lane<7>(), 1.1f);
2390
2391 vfloat8 a2(1.1f, 1.5f, 1.6f, 0.2f, 1.1f, 1.5f, 1.6f, 0.2f);
2392 vfloat8 r2 = hmin(a2);
2393 EXPECT_EQ(r2.lane<0>(), 0.2f);
2394 EXPECT_EQ(r2.lane<1>(), 0.2f);
2395 EXPECT_EQ(r2.lane<2>(), 0.2f);
2396 EXPECT_EQ(r2.lane<3>(), 0.2f);
2397 EXPECT_EQ(r2.lane<4>(), 0.2f);
2398 EXPECT_EQ(r2.lane<5>(), 0.2f);
2399 EXPECT_EQ(r2.lane<6>(), 0.2f);
2400 EXPECT_EQ(r2.lane<7>(), 0.2f);
2401 }
2402
2403 /** @brief Test vfloat8 hmin_s. */
TEST(vfloat8,hmin_s)2404 TEST(vfloat8, hmin_s)
2405 {
2406 vfloat8 a1(1.1f, 1.5f, 1.6f, 4.0f, 1.1f, 1.5f, 1.6f, 4.0f);
2407 float r1 = hmin_s(a1);
2408 EXPECT_EQ(r1, 1.1f);
2409
2410 vfloat8 a2(1.1f, 1.5f, 1.6f, 0.2f, 1.1f, 1.5f, 1.6f, 0.2f);
2411 float r2 = hmin_s(a2);
2412 EXPECT_EQ(r2, 0.2f);
2413 }
2414
2415 /** @brief Test vfloat8 hmax. */
TEST(vfloat8,hmax)2416 TEST(vfloat8, hmax)
2417 {
2418 vfloat8 a1(1.1f, 1.5f, 1.6f, 4.0f, 1.1f, 1.5f, 1.6f, 4.0f);
2419 vfloat8 r1 = hmax(a1);
2420 EXPECT_EQ(r1.lane<0>(), 4.0f);
2421 EXPECT_EQ(r1.lane<1>(), 4.0f);
2422 EXPECT_EQ(r1.lane<2>(), 4.0f);
2423 EXPECT_EQ(r1.lane<3>(), 4.0f);
2424 EXPECT_EQ(r1.lane<4>(), 4.0f);
2425 EXPECT_EQ(r1.lane<5>(), 4.0f);
2426 EXPECT_EQ(r1.lane<6>(), 4.0f);
2427 EXPECT_EQ(r1.lane<7>(), 4.0f);
2428
2429 vfloat8 a2(1.1f, 1.5f, 1.6f, 0.2f, 1.1f, 1.5f, 1.6f, 0.2f);
2430 vfloat8 r2 = hmax(a2);
2431 EXPECT_EQ(r2.lane<0>(), 1.6f);
2432 EXPECT_EQ(r2.lane<1>(), 1.6f);
2433 EXPECT_EQ(r2.lane<2>(), 1.6f);
2434 EXPECT_EQ(r2.lane<3>(), 1.6f);
2435 EXPECT_EQ(r2.lane<4>(), 1.6f);
2436 EXPECT_EQ(r2.lane<5>(), 1.6f);
2437 EXPECT_EQ(r2.lane<6>(), 1.6f);
2438 EXPECT_EQ(r2.lane<7>(), 1.6f);
2439 }
2440
2441 /** @brief Test vfloat8 hmax_s. */
TEST(vfloat8,hmax_s)2442 TEST(vfloat8, hmax_s)
2443 {
2444 vfloat8 a1(1.1f, 1.5f, 1.6f, 4.0f, 1.1f, 1.5f, 1.6f, 4.0f);
2445 float r1 = hmax_s(a1);
2446 EXPECT_EQ(r1, 4.0f);
2447
2448 vfloat8 a2(1.1f, 1.5f, 1.6f, 0.2f, 1.1f, 1.5f, 1.6f, 0.2f);
2449 float r2 = hmax_s(a2);
2450 EXPECT_EQ(r2, 1.6f);
2451 }
2452
2453 /** @brief Test vfloat8 hadd_s. */
TEST(vfloat8,hadd_s)2454 TEST(vfloat8, hadd_s)
2455 {
2456 vfloat8 a1(1.1f, 1.5f, 1.6f, 4.0f, 1.1f, 1.5f, 1.6f, 4.0f);
2457 float sum = 1.1f + 1.5f + 1.6f + 4.0f + 1.1f + 1.5f + 1.6f + 4.0f;
2458 float r = hadd_s(a1);
2459 EXPECT_NEAR(r, sum, 0.005f);
2460 }
2461
2462 /** @brief Test vfloat8 sqrt. */
TEST(vfloat8,sqrt)2463 TEST(vfloat8, sqrt)
2464 {
2465 vfloat8 a(1.0f, 2.0f, 3.0f, 4.0f, 1.0f, 2.0f, 3.0f, 4.0f);
2466 vfloat8 r = sqrt(a);
2467 EXPECT_EQ(r.lane<0>(), std::sqrt(1.0f));
2468 EXPECT_EQ(r.lane<1>(), std::sqrt(2.0f));
2469 EXPECT_EQ(r.lane<2>(), std::sqrt(3.0f));
2470 EXPECT_EQ(r.lane<3>(), std::sqrt(4.0f));
2471 EXPECT_EQ(r.lane<4>(), std::sqrt(1.0f));
2472 EXPECT_EQ(r.lane<5>(), std::sqrt(2.0f));
2473 EXPECT_EQ(r.lane<6>(), std::sqrt(3.0f));
2474 EXPECT_EQ(r.lane<7>(), std::sqrt(4.0f));
2475 }
2476
2477 /** @brief Test vfloat8 select. */
TEST(vfloat8,select)2478 TEST(vfloat8, select)
2479 {
2480 vfloat8 m1(1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f, 1.0f);
2481 vfloat8 m2(1.0f, 2.0f, 1.0f, 2.0f, 1.0f, 2.0f, 1.0f, 2.0f);
2482 vmask8 cond = m1 == m2;
2483
2484 vfloat8 a(1.0f, 3.0f, 3.0f, 1.0f, 1.0f, 3.0f, 3.0f, 1.0);
2485 vfloat8 b(4.0f, 2.0f, 2.0f, 4.0f, 4.0f, 2.0f, 2.0f, 4.0);
2486
2487 // Select in one direction
2488 vfloat8 r1 = select(a, b, cond);
2489 EXPECT_EQ(r1.lane<0>(), 4.0f);
2490 EXPECT_EQ(r1.lane<1>(), 3.0f);
2491 EXPECT_EQ(r1.lane<2>(), 2.0f);
2492 EXPECT_EQ(r1.lane<3>(), 1.0f);
2493 EXPECT_EQ(r1.lane<4>(), 4.0f);
2494 EXPECT_EQ(r1.lane<5>(), 3.0f);
2495 EXPECT_EQ(r1.lane<6>(), 2.0f);
2496 EXPECT_EQ(r1.lane<7>(), 1.0f);
2497
2498 // Select in the other
2499 vfloat8 r2 = select(b, a, cond);
2500 EXPECT_EQ(r2.lane<0>(), 1.0f);
2501 EXPECT_EQ(r2.lane<1>(), 2.0f);
2502 EXPECT_EQ(r2.lane<2>(), 3.0f);
2503 EXPECT_EQ(r2.lane<3>(), 4.0f);
2504 EXPECT_EQ(r2.lane<4>(), 1.0f);
2505 EXPECT_EQ(r2.lane<5>(), 2.0f);
2506 EXPECT_EQ(r2.lane<6>(), 3.0f);
2507 EXPECT_EQ(r2.lane<7>(), 4.0f);
2508 }
2509
2510 /** @brief Test vfloat8 select MSB only. */
TEST(vfloat8,select_msb)2511 TEST(vfloat8, select_msb)
2512 {
2513 vint8 msb(0x80000000, 0, 0x80000000, 0, 0x80000000, 0, 0x80000000, 0);
2514 vmask8 cond(msb.m);
2515
2516 vfloat8 a(1.0f, 3.0f, 3.0f, 1.0f, 1.0f, 3.0f, 3.0f, 1.0f);
2517 vfloat8 b(4.0f, 2.0f, 2.0f, 4.0f, 4.0f, 2.0f, 2.0f, 4.0f);
2518
2519 // Select in one direction
2520 vfloat8 r1 = select(a, b, cond);
2521 EXPECT_EQ(r1.lane<0>(), 4.0f);
2522 EXPECT_EQ(r1.lane<1>(), 3.0f);
2523 EXPECT_EQ(r1.lane<2>(), 2.0f);
2524 EXPECT_EQ(r1.lane<3>(), 1.0f);
2525 EXPECT_EQ(r1.lane<4>(), 4.0f);
2526 EXPECT_EQ(r1.lane<5>(), 3.0f);
2527 EXPECT_EQ(r1.lane<6>(), 2.0f);
2528 EXPECT_EQ(r1.lane<7>(), 1.0f);
2529
2530
2531 // Select in the other
2532 vfloat8 r2 = select(b, a, cond);
2533 EXPECT_EQ(r2.lane<0>(), 1.0f);
2534 EXPECT_EQ(r2.lane<1>(), 2.0f);
2535 EXPECT_EQ(r2.lane<2>(), 3.0f);
2536 EXPECT_EQ(r2.lane<3>(), 4.0f);
2537 EXPECT_EQ(r2.lane<4>(), 1.0f);
2538 EXPECT_EQ(r2.lane<5>(), 2.0f);
2539 EXPECT_EQ(r2.lane<6>(), 3.0f);
2540 EXPECT_EQ(r2.lane<7>(), 4.0f);
2541 }
2542
2543 /** @brief Test vfloat8 gatherf. */
TEST(vfloat8,gatherf)2544 TEST(vfloat8, gatherf)
2545 {
2546 vint8 indices(0, 4, 3, 2, 7, 4, 3, 2);
2547 vfloat8 r = gatherf(f32_data, indices);
2548 EXPECT_EQ(r.lane<0>(), 0.0f);
2549 EXPECT_EQ(r.lane<1>(), 4.0f);
2550 EXPECT_EQ(r.lane<2>(), 3.0f);
2551 EXPECT_EQ(r.lane<3>(), 2.0f);
2552 EXPECT_EQ(r.lane<4>(), 7.0f);
2553 EXPECT_EQ(r.lane<5>(), 4.0f);
2554 EXPECT_EQ(r.lane<6>(), 3.0f);
2555 EXPECT_EQ(r.lane<7>(), 2.0f);
2556 }
2557
2558 /** @brief Test vfloat8 store. */
TEST(vfloat8,store)2559 TEST(vfloat8, store)
2560 {
2561 alignas(32) float out[9];
2562 vfloat8 a(f32_data);
2563 store(a, &(out[1]));
2564 EXPECT_EQ(out[1], 0.0f);
2565 EXPECT_EQ(out[2], 1.0f);
2566 EXPECT_EQ(out[3], 2.0f);
2567 EXPECT_EQ(out[4], 3.0f);
2568 EXPECT_EQ(out[5], 4.0f);
2569 EXPECT_EQ(out[6], 5.0f);
2570 EXPECT_EQ(out[7], 6.0f);
2571 EXPECT_EQ(out[8], 7.0f);
2572 }
2573
2574 /** @brief Test vfloat8 storea. */
TEST(vfloat8,storea)2575 TEST(vfloat8, storea)
2576 {
2577 alignas(32) float out[9];
2578 vfloat8 a(f32_data);
2579 store(a, out);
2580 EXPECT_EQ(out[0], 0.0f);
2581 EXPECT_EQ(out[1], 1.0f);
2582 EXPECT_EQ(out[2], 2.0f);
2583 EXPECT_EQ(out[3], 3.0f);
2584 EXPECT_EQ(out[4], 4.0f);
2585 EXPECT_EQ(out[5], 5.0f);
2586 EXPECT_EQ(out[6], 6.0f);
2587 EXPECT_EQ(out[7], 7.0f);
2588 }
2589
2590 /** @brief Test vfloat8 float_to_int. */
TEST(vfloat8,float_to_int)2591 TEST(vfloat8, float_to_int)
2592 {
2593 vfloat8 a(1.1f, 1.5f, 1.6f, 4.0f, 1.1f, 1.5f, 1.6f, 4.0f);
2594 vint8 r = float_to_int(a);
2595 EXPECT_EQ(r.lane<0>(), 1);
2596 EXPECT_EQ(r.lane<1>(), 1);
2597 EXPECT_EQ(r.lane<2>(), 1);
2598 EXPECT_EQ(r.lane<3>(), 4);
2599 EXPECT_EQ(r.lane<4>(), 1);
2600 EXPECT_EQ(r.lane<5>(), 1);
2601 EXPECT_EQ(r.lane<6>(), 1);
2602 EXPECT_EQ(r.lane<7>(), 4);
2603 }
2604
2605 // vint8 tests - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
2606
2607 /** @brief Test unaligned vint8 data load. */
TEST(vint8,UnalignedLoad)2608 TEST(vint8, UnalignedLoad)
2609 {
2610 vint8 a(&(s32_data[1]));
2611 EXPECT_EQ(a.lane<0>(), 1);
2612 EXPECT_EQ(a.lane<1>(), 2);
2613 EXPECT_EQ(a.lane<2>(), 3);
2614 EXPECT_EQ(a.lane<3>(), 4);
2615 EXPECT_EQ(a.lane<4>(), 5);
2616 EXPECT_EQ(a.lane<5>(), 6);
2617 EXPECT_EQ(a.lane<6>(), 7);
2618 EXPECT_EQ(a.lane<7>(), 8);
2619 }
2620
2621 /** @brief Test unaligned vint8 data load. */
TEST(vint8,UnalignedLoad8)2622 TEST(vint8, UnalignedLoad8)
2623 {
2624 vint8 a(&(u8_data[1]));
2625 EXPECT_EQ(a.lane<0>(), 1);
2626 EXPECT_EQ(a.lane<1>(), 2);
2627 EXPECT_EQ(a.lane<2>(), 3);
2628 EXPECT_EQ(a.lane<3>(), 4);
2629 EXPECT_EQ(a.lane<4>(), 5);
2630 EXPECT_EQ(a.lane<5>(), 6);
2631 EXPECT_EQ(a.lane<6>(), 7);
2632 EXPECT_EQ(a.lane<7>(), 8);
2633 }
2634
2635 /** @brief Test scalar duplicated vint8 load. */
TEST(vint8,ScalarDupLoad)2636 TEST(vint8, ScalarDupLoad)
2637 {
2638 vint8 a(42);
2639 EXPECT_EQ(a.lane<0>(), 42);
2640 EXPECT_EQ(a.lane<1>(), 42);
2641 EXPECT_EQ(a.lane<2>(), 42);
2642 EXPECT_EQ(a.lane<3>(), 42);
2643 EXPECT_EQ(a.lane<4>(), 42);
2644 EXPECT_EQ(a.lane<5>(), 42);
2645 EXPECT_EQ(a.lane<6>(), 42);
2646 EXPECT_EQ(a.lane<7>(), 42);
2647 }
2648
2649 /** @brief Test scalar vint8 load. */
TEST(vint8,ScalarLoad)2650 TEST(vint8, ScalarLoad)
2651 {
2652 vint8 a(11, 22, 33, 44, 55, 66, 77, 88);
2653 EXPECT_EQ(a.lane<0>(), 11);
2654 EXPECT_EQ(a.lane<1>(), 22);
2655 EXPECT_EQ(a.lane<2>(), 33);
2656 EXPECT_EQ(a.lane<3>(), 44);
2657 EXPECT_EQ(a.lane<4>(), 55);
2658 EXPECT_EQ(a.lane<5>(), 66);
2659 EXPECT_EQ(a.lane<6>(), 77);
2660 EXPECT_EQ(a.lane<7>(), 88);
2661 }
2662
2663 /** @brief Test copy vint8 load. */
TEST(vint8,CopyLoad)2664 TEST(vint8, CopyLoad)
2665 {
2666 vint8 s(11, 22, 33, 44, 55, 66, 77, 88);
2667 vint8 a(s.m);
2668 EXPECT_EQ(a.lane<0>(), 11);
2669 EXPECT_EQ(a.lane<1>(), 22);
2670 EXPECT_EQ(a.lane<2>(), 33);
2671 EXPECT_EQ(a.lane<3>(), 44);
2672 EXPECT_EQ(a.lane<4>(), 55);
2673 EXPECT_EQ(a.lane<5>(), 66);
2674 EXPECT_EQ(a.lane<6>(), 77);
2675 EXPECT_EQ(a.lane<7>(), 88);
2676 }
2677
2678 /** @brief Test vint8 zero. */
TEST(vint8,Zero)2679 TEST(vint8, Zero)
2680 {
2681 vint8 a = vint8::zero();
2682 EXPECT_EQ(a.lane<0>(), 0);
2683 EXPECT_EQ(a.lane<1>(), 0);
2684 EXPECT_EQ(a.lane<2>(), 0);
2685 EXPECT_EQ(a.lane<3>(), 0);
2686 EXPECT_EQ(a.lane<4>(), 0);
2687 EXPECT_EQ(a.lane<5>(), 0);
2688 EXPECT_EQ(a.lane<6>(), 0);
2689 EXPECT_EQ(a.lane<7>(), 0);
2690 }
2691
2692 /** @brief Test vint8 load1. */
TEST(vint8,Load1)2693 TEST(vint8, Load1)
2694 {
2695 int s = 42;
2696 vint8 a = vint8::load1(&s);
2697 EXPECT_EQ(a.lane<0>(), 42);
2698 EXPECT_EQ(a.lane<1>(), 42);
2699 EXPECT_EQ(a.lane<2>(), 42);
2700 EXPECT_EQ(a.lane<3>(), 42);
2701 EXPECT_EQ(a.lane<4>(), 42);
2702 EXPECT_EQ(a.lane<5>(), 42);
2703 EXPECT_EQ(a.lane<6>(), 42);
2704 EXPECT_EQ(a.lane<7>(), 42);
2705 }
2706
2707 /** @brief Test vint8 loada. */
TEST(vint8,Loada)2708 TEST(vint8, Loada)
2709 {
2710 vint8 a = vint8::loada(&(s32_data[0]));
2711 EXPECT_EQ(a.lane<0>(), 0);
2712 EXPECT_EQ(a.lane<1>(), 1);
2713 EXPECT_EQ(a.lane<2>(), 2);
2714 EXPECT_EQ(a.lane<3>(), 3);
2715 EXPECT_EQ(a.lane<4>(), 4);
2716 EXPECT_EQ(a.lane<5>(), 5);
2717 EXPECT_EQ(a.lane<6>(), 6);
2718 EXPECT_EQ(a.lane<7>(), 7);
2719 }
2720
2721 /** @brief Test vint8 lane_id. */
TEST(vint8,LaneID)2722 TEST(vint8, LaneID)
2723 {
2724 vint8 a = vint8::lane_id();
2725 EXPECT_EQ(a.lane<0>(), 0);
2726 EXPECT_EQ(a.lane<1>(), 1);
2727 EXPECT_EQ(a.lane<2>(), 2);
2728 EXPECT_EQ(a.lane<3>(), 3);
2729 EXPECT_EQ(a.lane<4>(), 4);
2730 EXPECT_EQ(a.lane<5>(), 5);
2731 EXPECT_EQ(a.lane<6>(), 6);
2732 EXPECT_EQ(a.lane<7>(), 7);
2733 }
2734
2735 /** @brief Test vint8 add. */
TEST(vint8,vadd)2736 TEST(vint8, vadd)
2737 {
2738 vint8 a(1, 2, 3, 4, 1, 2, 3, 4);
2739 vint8 b(2, 3, 4, 5, 2, 3, 4, 5);
2740 a = a + b;
2741 EXPECT_EQ(a.lane<0>(), 1 + 2);
2742 EXPECT_EQ(a.lane<1>(), 2 + 3);
2743 EXPECT_EQ(a.lane<2>(), 3 + 4);
2744 EXPECT_EQ(a.lane<3>(), 4 + 5);
2745 EXPECT_EQ(a.lane<4>(), 1 + 2);
2746 EXPECT_EQ(a.lane<5>(), 2 + 3);
2747 EXPECT_EQ(a.lane<6>(), 3 + 4);
2748 EXPECT_EQ(a.lane<7>(), 4 + 5);
2749 }
2750
2751
2752 /** @brief Test vint8 self-add. */
TEST(vint8,vselfadd1)2753 TEST(vint8, vselfadd1)
2754 {
2755 vint8 a(1, 2, 3, 4, 1, 2, 3, 4);
2756 vint8 b(2, 3, 4, 5, 2, 3, 4, 5);
2757 a += b;
2758
2759 EXPECT_EQ(a.lane<0>(), 1 + 2);
2760 EXPECT_EQ(a.lane<1>(), 2 + 3);
2761 EXPECT_EQ(a.lane<2>(), 3 + 4);
2762 EXPECT_EQ(a.lane<3>(), 4 + 5);
2763 EXPECT_EQ(a.lane<4>(), 1 + 2);
2764 EXPECT_EQ(a.lane<5>(), 2 + 3);
2765 EXPECT_EQ(a.lane<6>(), 3 + 4);
2766 EXPECT_EQ(a.lane<7>(), 4 + 5);
2767 }
2768
2769 /** @brief Test vint8 sub. */
TEST(vint8,vsub)2770 TEST(vint8, vsub)
2771 {
2772 vint8 a(1, 2, 4, 4, 1, 2, 4, 4);
2773 vint8 b(2, 3, 3, 5, 2, 3, 3, 5);
2774 a = a - b;
2775 EXPECT_EQ(a.lane<0>(), 1 - 2);
2776 EXPECT_EQ(a.lane<1>(), 2 - 3);
2777 EXPECT_EQ(a.lane<2>(), 4 - 3);
2778 EXPECT_EQ(a.lane<3>(), 4 - 5);
2779 EXPECT_EQ(a.lane<4>(), 1 - 2);
2780 EXPECT_EQ(a.lane<5>(), 2 - 3);
2781 EXPECT_EQ(a.lane<6>(), 4 - 3);
2782 EXPECT_EQ(a.lane<7>(), 4 - 5);
2783 }
2784
2785 /** @brief Test vint8 mul. */
TEST(vint8,vmul)2786 TEST(vint8, vmul)
2787 {
2788 vint8 a(1, 2, 4, 4, 1, 2, 4, 4);
2789 vint8 b(2, 3, 3, 5, 2, 3, 3, 5);
2790 a = a * b;
2791 EXPECT_EQ(a.lane<0>(), 1 * 2);
2792 EXPECT_EQ(a.lane<1>(), 2 * 3);
2793 EXPECT_EQ(a.lane<2>(), 4 * 3);
2794 EXPECT_EQ(a.lane<3>(), 4 * 5);
2795 EXPECT_EQ(a.lane<4>(), 1 * 2);
2796 EXPECT_EQ(a.lane<5>(), 2 * 3);
2797 EXPECT_EQ(a.lane<6>(), 4 * 3);
2798 EXPECT_EQ(a.lane<7>(), 4 * 5);
2799 }
2800
2801 /** @brief Test vint8 bitwise invert. */
TEST(vint8,bit_invert)2802 TEST(vint8, bit_invert)
2803 {
2804 vint8 a(-1, 0, 1, 2, -1, 0, 1, 2);
2805 a = ~a;
2806 EXPECT_EQ(a.lane<0>(), ~-1);
2807 EXPECT_EQ(a.lane<1>(), ~0);
2808 EXPECT_EQ(a.lane<2>(), ~1);
2809 EXPECT_EQ(a.lane<3>(), ~2);
2810 EXPECT_EQ(a.lane<4>(), ~-1);
2811 EXPECT_EQ(a.lane<5>(), ~0);
2812 EXPECT_EQ(a.lane<6>(), ~1);
2813 EXPECT_EQ(a.lane<7>(), ~2);
2814 }
2815
2816 /** @brief Test vint8 bitwise or. */
TEST(vint8,bit_vor)2817 TEST(vint8, bit_vor)
2818 {
2819 vint8 a(1, 2, 3, 4, 1, 2, 3, 4);
2820 vint8 b(2, 3, 4, 5, 2, 3, 4, 5);
2821 a = a | b;
2822 EXPECT_EQ(a.lane<0>(), 3);
2823 EXPECT_EQ(a.lane<1>(), 3);
2824 EXPECT_EQ(a.lane<2>(), 7);
2825 EXPECT_EQ(a.lane<3>(), 5);
2826 EXPECT_EQ(a.lane<4>(), 3);
2827 EXPECT_EQ(a.lane<5>(), 3);
2828 EXPECT_EQ(a.lane<6>(), 7);
2829 EXPECT_EQ(a.lane<7>(), 5);
2830 }
2831
2832 /** @brief Test vint8 bitwise and. */
TEST(vint8,bit_vand)2833 TEST(vint8, bit_vand)
2834 {
2835 vint8 a(1, 2, 3, 4, 1, 2, 3, 4);
2836 vint8 b(2, 3, 4, 5, 2, 3, 4, 5);
2837 a = a & b;
2838 EXPECT_EQ(a.lane<0>(), 0);
2839 EXPECT_EQ(a.lane<1>(), 2);
2840 EXPECT_EQ(a.lane<2>(), 0);
2841 EXPECT_EQ(a.lane<3>(), 4);
2842 EXPECT_EQ(a.lane<4>(), 0);
2843 EXPECT_EQ(a.lane<5>(), 2);
2844 EXPECT_EQ(a.lane<6>(), 0);
2845 EXPECT_EQ(a.lane<7>(), 4);
2846 }
2847
2848 /** @brief Test vint8 bitwise xor. */
TEST(vint8,bit_vxor)2849 TEST(vint8, bit_vxor)
2850 {
2851 vint8 a(1, 2, 3, 4, 1, 2, 3, 4);
2852 vint8 b(2, 3, 4, 5, 2, 3, 4, 5);
2853 a = a ^ b;
2854 EXPECT_EQ(a.lane<0>(), 3);
2855 EXPECT_EQ(a.lane<1>(), 1);
2856 EXPECT_EQ(a.lane<2>(), 7);
2857 EXPECT_EQ(a.lane<3>(), 1);
2858 EXPECT_EQ(a.lane<4>(), 3);
2859 EXPECT_EQ(a.lane<5>(), 1);
2860 EXPECT_EQ(a.lane<6>(), 7);
2861 EXPECT_EQ(a.lane<7>(), 1);
2862 }
2863
2864 /** @brief Test vint8 ceq. */
TEST(vint8,ceq)2865 TEST(vint8, ceq)
2866 {
2867 vint8 a1(1, 2, 3, 4, 1, 2, 3, 4);
2868 vint8 b1(0, 1, 2, 3, 0, 1, 2, 3);
2869 vmask8 r1 = a1 == b1;
2870 EXPECT_EQ(0, mask(r1));
2871 EXPECT_EQ(false, any(r1));
2872 EXPECT_EQ(false, all(r1));
2873
2874 vint8 a2(1, 2, 3, 4, 1, 2, 3, 4);
2875 vint8 b2(1, 0, 0, 0, 1, 0, 0, 0);
2876 vmask8 r2 = a2 == b2;
2877 EXPECT_EQ(0x11, mask(r2));
2878 EXPECT_EQ(true, any(r2));
2879 EXPECT_EQ(false, all(r2));
2880
2881 vint8 a3(1, 2, 3, 4, 1, 2, 3, 4);
2882 vint8 b3(1, 0, 3, 0, 1, 0, 3, 0);
2883 vmask8 r3 = a3 == b3;
2884 EXPECT_EQ(0x55, mask(r3));
2885 EXPECT_EQ(true, any(r3));
2886 EXPECT_EQ(false, all(r3));
2887
2888 vint8 a4(1, 2, 3, 4, 1, 2, 3, 4);
2889 vmask8 r4 = a4 == a4;
2890 EXPECT_EQ(0xFF, mask(r4));
2891 EXPECT_EQ(true, any(r4));
2892 EXPECT_EQ(true, all(r4));
2893 }
2894
2895 /** @brief Test vint8 cne. */
TEST(vint8,cne)2896 TEST(vint8, cne)
2897 {
2898 vint8 a1(1, 2, 3, 4, 1, 2, 3, 4);
2899 vint8 b1(0, 1, 2, 3, 0, 1, 2, 3);
2900 vmask8 r1 = a1 != b1;
2901 EXPECT_EQ(0xFF, mask(r1));
2902 EXPECT_EQ(true, any(r1));
2903 EXPECT_EQ(true, all(r1));
2904
2905 vint8 a2(1, 2, 3, 4, 1, 2, 3, 4);
2906 vint8 b2(1, 0, 0, 0, 1, 0, 0, 0);
2907 vmask8 r2 = a2 != b2;
2908 EXPECT_EQ(0xEE, mask(r2));
2909 EXPECT_EQ(true, any(r2));
2910 EXPECT_EQ(false, all(r2));
2911
2912 vint8 a3(1, 2, 3, 4, 1, 2, 3, 4);
2913 vint8 b3(1, 0, 3, 0, 1, 0, 3, 0);
2914 vmask8 r3 = a3 != b3;
2915 EXPECT_EQ(0xAA, mask(r3));
2916 EXPECT_EQ(true, any(r3));
2917 EXPECT_EQ(false, all(r3));
2918
2919 vint8 a4(1, 2, 3, 4, 1, 2, 3, 4);
2920 vmask8 r4 = a4 != a4;
2921 EXPECT_EQ(0, mask(r4));
2922 EXPECT_EQ(false, any(r4));
2923 EXPECT_EQ(false, all(r4));
2924 }
2925
2926 /** @brief Test vint8 clt. */
TEST(vint8,clt)2927 TEST(vint8, clt)
2928 {
2929 vint8 a(1, 2, 3, 4, 1, 2, 3, 4);
2930 vint8 b(0, 3, 3, 5, 0, 3, 3, 5);
2931 vmask8 r = a < b;
2932 EXPECT_EQ(0xAA, mask(r));
2933 }
2934
2935 /** @brief Test vint8 cgt. */
TEST(vint8,cgt)2936 TEST(vint8, cgt)
2937 {
2938 vint8 a(1, 2, 3, 4, 1, 2, 3, 4);
2939 vint8 b(0, 3, 3, 5, 0, 3, 3, 5);
2940 vmask8 r = a > b;
2941 EXPECT_EQ(0x11, mask(r));
2942 }
2943
2944 /** @brief Test vint8 min. */
TEST(vint8,min)2945 TEST(vint8, min)
2946 {
2947 vint8 a(1, 2, 3, 4, 1, 2, 3, 4);
2948 vint8 b(0, 3, 3, 5, 0, 3, 3, 5);
2949 vint8 r = min(a, b);
2950 EXPECT_EQ(r.lane<0>(), 0);
2951 EXPECT_EQ(r.lane<1>(), 2);
2952 EXPECT_EQ(r.lane<2>(), 3);
2953 EXPECT_EQ(r.lane<3>(), 4);
2954 EXPECT_EQ(r.lane<4>(), 0);
2955 EXPECT_EQ(r.lane<5>(), 2);
2956 EXPECT_EQ(r.lane<6>(), 3);
2957 EXPECT_EQ(r.lane<7>(), 4);
2958 }
2959
2960 /** @brief Test vint8 max. */
TEST(vint8,max)2961 TEST(vint8, max)
2962 {
2963 vint8 a(1, 2, 3, 4, 1, 2, 3, 4);
2964 vint8 b(0, 3, 3, 5, 0, 3, 3, 5);
2965 vint8 r = max(a, b);
2966 EXPECT_EQ(r.lane<0>(), 1);
2967 EXPECT_EQ(r.lane<1>(), 3);
2968 EXPECT_EQ(r.lane<2>(), 3);
2969 EXPECT_EQ(r.lane<3>(), 5);
2970 EXPECT_EQ(r.lane<4>(), 1);
2971 EXPECT_EQ(r.lane<5>(), 3);
2972 EXPECT_EQ(r.lane<6>(), 3);
2973 EXPECT_EQ(r.lane<7>(), 5);
2974 }
2975
2976 /** @brief Test vint8 lsr. */
TEST(vint8,lsr)2977 TEST(vint8, lsr)
2978 {
2979 vint8 a(1, 2, 4, -4, 1, 2, 4, -4);
2980 a = lsr<0>(a);
2981 EXPECT_EQ(a.lane<0>(), 1);
2982 EXPECT_EQ(a.lane<1>(), 2);
2983 EXPECT_EQ(a.lane<2>(), 4);
2984 EXPECT_EQ(a.lane<3>(), 0xFFFFFFFC);
2985 EXPECT_EQ(a.lane<4>(), 1);
2986 EXPECT_EQ(a.lane<5>(), 2);
2987 EXPECT_EQ(a.lane<6>(), 4);
2988 EXPECT_EQ(a.lane<7>(), 0xFFFFFFFC);
2989
2990
2991 a = lsr<1>(a);
2992 EXPECT_EQ(a.lane<0>(), 0);
2993 EXPECT_EQ(a.lane<1>(), 1);
2994 EXPECT_EQ(a.lane<2>(), 2);
2995 EXPECT_EQ(a.lane<3>(), 0x7FFFFFFE);
2996 EXPECT_EQ(a.lane<4>(), 0);
2997 EXPECT_EQ(a.lane<5>(), 1);
2998 EXPECT_EQ(a.lane<6>(), 2);
2999 EXPECT_EQ(a.lane<7>(), 0x7FFFFFFE);
3000
3001 a = lsr<2>(a);
3002 EXPECT_EQ(a.lane<0>(), 0);
3003 EXPECT_EQ(a.lane<1>(), 0);
3004 EXPECT_EQ(a.lane<2>(), 0);
3005 EXPECT_EQ(a.lane<3>(), 0x1FFFFFFF);
3006 EXPECT_EQ(a.lane<4>(), 0);
3007 EXPECT_EQ(a.lane<5>(), 0);
3008 EXPECT_EQ(a.lane<6>(), 0);
3009 EXPECT_EQ(a.lane<7>(), 0x1FFFFFFF);
3010 }
3011
3012 /** @brief Test vint8 asr. */
TEST(vint8,asr)3013 TEST(vint8, asr)
3014 {
3015 vint8 a(1, 2, 4, -4, 1, 2, 4, -4);
3016 a = asr<0>(a);
3017 EXPECT_EQ(a.lane<0>(), 1);
3018 EXPECT_EQ(a.lane<1>(), 2);
3019 EXPECT_EQ(a.lane<2>(), 4);
3020 EXPECT_EQ(a.lane<3>(), -4);
3021 EXPECT_EQ(a.lane<4>(), 1);
3022 EXPECT_EQ(a.lane<5>(), 2);
3023 EXPECT_EQ(a.lane<6>(), 4);
3024 EXPECT_EQ(a.lane<7>(), -4);
3025
3026 a = asr<1>(a);
3027 EXPECT_EQ(a.lane<0>(), 0);
3028 EXPECT_EQ(a.lane<1>(), 1);
3029 EXPECT_EQ(a.lane<2>(), 2);
3030 EXPECT_EQ(a.lane<3>(), -2);
3031 EXPECT_EQ(a.lane<4>(), 0);
3032 EXPECT_EQ(a.lane<5>(), 1);
3033 EXPECT_EQ(a.lane<6>(), 2);
3034 EXPECT_EQ(a.lane<7>(), -2);
3035
3036 // Note - quirk of asr is that you will get "stuck" at -1
3037 a = asr<2>(a);
3038 EXPECT_EQ(a.lane<0>(), 0);
3039 EXPECT_EQ(a.lane<1>(), 0);
3040 EXPECT_EQ(a.lane<2>(), 0);
3041 EXPECT_EQ(a.lane<3>(), -1);
3042 EXPECT_EQ(a.lane<4>(), 0);
3043 EXPECT_EQ(a.lane<5>(), 0);
3044 EXPECT_EQ(a.lane<6>(), 0);
3045 EXPECT_EQ(a.lane<7>(), -1);
3046 }
3047
3048 /** @brief Test vint8 hmin. */
TEST(vint8,hmin)3049 TEST(vint8, hmin)
3050 {
3051 vint8 a1(1, 2, 1, 2, 1, 2, 1, 2);
3052 vint8 r1 = hmin(a1);
3053 EXPECT_EQ(r1.lane<0>(), 1);
3054 EXPECT_EQ(r1.lane<1>(), 1);
3055 EXPECT_EQ(r1.lane<2>(), 1);
3056 EXPECT_EQ(r1.lane<3>(), 1);
3057 EXPECT_EQ(r1.lane<4>(), 1);
3058 EXPECT_EQ(r1.lane<5>(), 1);
3059 EXPECT_EQ(r1.lane<6>(), 1);
3060 EXPECT_EQ(r1.lane<7>(), 1);
3061
3062 vint8 a2(1, 2, -1, 5, 1, 2, -1, 5);
3063 vint8 r2 = hmin(a2);
3064 EXPECT_EQ(r2.lane<0>(), -1);
3065 EXPECT_EQ(r2.lane<1>(), -1);
3066 EXPECT_EQ(r2.lane<2>(), -1);
3067 EXPECT_EQ(r2.lane<3>(), -1);
3068 EXPECT_EQ(r2.lane<4>(), -1);
3069 EXPECT_EQ(r2.lane<5>(), -1);
3070 EXPECT_EQ(r2.lane<6>(), -1);
3071 EXPECT_EQ(r2.lane<7>(), -1);
3072 }
3073
3074 /** @brief Test vint8 hmax. */
TEST(vint8,hmax)3075 TEST(vint8, hmax)
3076 {
3077 vint8 a1(1, 2, 1, 2, 1, 3, 1, 2);
3078 vint8 r1 = hmax(a1);
3079 EXPECT_EQ(r1.lane<0>(), 3);
3080 EXPECT_EQ(r1.lane<1>(), 3);
3081 EXPECT_EQ(r1.lane<2>(), 3);
3082 EXPECT_EQ(r1.lane<3>(), 3);
3083 EXPECT_EQ(r1.lane<4>(), 3);
3084 EXPECT_EQ(r1.lane<5>(), 3);
3085 EXPECT_EQ(r1.lane<6>(), 3);
3086 EXPECT_EQ(r1.lane<7>(), 3);
3087
3088 vint8 a2(1, 2, -1, 5, 1, 2, -1, 5);
3089 vint8 r2 = hmax(a2);
3090 EXPECT_EQ(r2.lane<0>(), 5);
3091 EXPECT_EQ(r2.lane<1>(), 5);
3092 EXPECT_EQ(r2.lane<2>(), 5);
3093 EXPECT_EQ(r2.lane<3>(), 5);
3094 EXPECT_EQ(r2.lane<4>(), 5);
3095 EXPECT_EQ(r2.lane<5>(), 5);
3096 EXPECT_EQ(r2.lane<6>(), 5);
3097 EXPECT_EQ(r2.lane<7>(), 5);
3098 }
3099
3100 /** @brief Test vint8 storea. */
TEST(vint8,storea)3101 TEST(vint8, storea)
3102 {
3103 alignas(32) int out[8];
3104 vint8 a(s32_data);
3105 storea(a, out);
3106 EXPECT_EQ(out[0], 0);
3107 EXPECT_EQ(out[1], 1);
3108 EXPECT_EQ(out[2], 2);
3109 EXPECT_EQ(out[3], 3);
3110 EXPECT_EQ(out[4], 4);
3111 EXPECT_EQ(out[5], 5);
3112 EXPECT_EQ(out[6], 6);
3113 EXPECT_EQ(out[7], 7);
3114 }
3115
3116 /** @brief Test vint8 store. */
TEST(vint8,store)3117 TEST(vint8, store)
3118 {
3119 alignas(32) int out[9];
3120 vint8 a(s32_data);
3121 store(a, out + 1);
3122 EXPECT_EQ(out[1], 0);
3123 EXPECT_EQ(out[2], 1);
3124 EXPECT_EQ(out[3], 2);
3125 EXPECT_EQ(out[4], 3);
3126 EXPECT_EQ(out[5], 4);
3127 EXPECT_EQ(out[6], 5);
3128 EXPECT_EQ(out[7], 6);
3129 EXPECT_EQ(out[8], 7);
3130 }
3131
3132 /** @brief Test vint8 store_nbytes. */
TEST(vint8,store_nbytes)3133 TEST(vint8, store_nbytes)
3134 {
3135 alignas(32) int out[2];
3136 vint8 a(42, 314, 75, 90, 42, 314, 75, 90);
3137 store_nbytes(a, (uint8_t*)&out);
3138 EXPECT_EQ(out[0], 42);
3139 EXPECT_EQ(out[1], 314);
3140 }
3141
3142 /** @brief Test vint8 gatheri. */
TEST(vint8,gatheri)3143 TEST(vint8, gatheri)
3144 {
3145 vint8 indices(0, 4, 3, 2, 7, 4, 3, 2);
3146 vint8 r = gatheri(s32_data, indices);
3147 EXPECT_EQ(r.lane<0>(), 0);
3148 EXPECT_EQ(r.lane<1>(), 4);
3149 EXPECT_EQ(r.lane<2>(), 3);
3150 EXPECT_EQ(r.lane<3>(), 2);
3151 EXPECT_EQ(r.lane<4>(), 7);
3152 EXPECT_EQ(r.lane<5>(), 4);
3153 EXPECT_EQ(r.lane<6>(), 3);
3154 EXPECT_EQ(r.lane<7>(), 2);
3155 }
3156
3157 /** @brief Test vint8 pack_low_bytes. */
TEST(vint8,pack_low_bytes)3158 TEST(vint8, pack_low_bytes)
3159 {
3160 vint8 a(1, 2, 3, 4, 2, 3, 4, 5);
3161 vint8 r = pack_low_bytes(a);
3162 EXPECT_EQ(r.lane<0>(), (4 << 24) | (3 << 16) | (2 << 8) | (1 << 0));
3163 EXPECT_EQ(r.lane<1>(), (5 << 24) | (4 << 16) | (3 << 8) | (2 << 0));
3164 }
3165
3166 /** @brief Test vint8 select. */
TEST(vint8,select)3167 TEST(vint8, select)
3168 {
3169 vint8 m1(1, 1, 1, 1, 1, 1, 1, 1);
3170 vint8 m2(1, 2, 1, 2, 1, 2, 1, 2);
3171 vmask8 cond = m1 == m2;
3172
3173 vint8 a(1, 3, 3, 1, 1, 3, 3, 1);
3174 vint8 b(4, 2, 2, 4, 4, 2, 2, 4);
3175
3176 vint8 r1 = select(a, b, cond);
3177 EXPECT_EQ(r1.lane<0>(), 4);
3178 EXPECT_EQ(r1.lane<1>(), 3);
3179 EXPECT_EQ(r1.lane<2>(), 2);
3180 EXPECT_EQ(r1.lane<3>(), 1);
3181 EXPECT_EQ(r1.lane<4>(), 4);
3182 EXPECT_EQ(r1.lane<5>(), 3);
3183 EXPECT_EQ(r1.lane<6>(), 2);
3184 EXPECT_EQ(r1.lane<7>(), 1);
3185
3186 vint8 r2 = select(b, a, cond);
3187 EXPECT_EQ(r2.lane<0>(), 1);
3188 EXPECT_EQ(r2.lane<1>(), 2);
3189 EXPECT_EQ(r2.lane<2>(), 3);
3190 EXPECT_EQ(r2.lane<3>(), 4);
3191 EXPECT_EQ(r2.lane<4>(), 1);
3192 EXPECT_EQ(r2.lane<5>(), 2);
3193 EXPECT_EQ(r2.lane<6>(), 3);
3194 EXPECT_EQ(r2.lane<7>(), 4);
3195 }
3196
3197 // vmask8 tests - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
3198
3199 /** @brief Test vmask8 scalar literal constructor. */
TEST(vmask8,scalar_literal_construct)3200 TEST(vmask8, scalar_literal_construct)
3201 {
3202 vfloat8 ma(0.0f);
3203 vfloat8 mb(1.0f);
3204
3205 vmask8 m1(true);
3206 vfloat8 r1 = select(ma, mb, m1);
3207 vmask8 rm1 = r1 == mb;
3208 EXPECT_EQ(all(rm1), true);
3209
3210 vmask8 m2(false);
3211 vfloat8 r2 = select(ma, mb, m2);
3212 vmask8 rm2 = r2 == mb;
3213 EXPECT_EQ(any(rm2), false);
3214 }
3215
3216 /** @brief Test vmask8 or. */
TEST(vmask8,or)3217 TEST(vmask8, or)
3218 {
3219 vfloat8 m1a(0, 1, 0, 1, 0, 1, 0, 1);
3220 vfloat8 m1b(1, 1, 1, 1, 1, 1, 1, 1);
3221 vmask8 m1 = m1a == m1b;
3222
3223 vfloat8 m2a(1, 1, 0, 0, 1, 1, 0, 0);
3224 vfloat8 m2b(1, 1, 1, 1, 1, 1, 1, 1);
3225 vmask8 m2 = m2a == m2b;
3226
3227 vmask8 r = m1 | m2;
3228 EXPECT_EQ(mask(r), 0xBB);
3229 }
3230
3231 /** @brief Test vmask8 and. */
TEST(vmask8,and)3232 TEST(vmask8, and)
3233 {
3234 vfloat8 m1a(0, 1, 0, 1, 0, 1, 0, 1);
3235 vfloat8 m1b(1, 1, 1, 1, 1, 1, 1, 1);
3236 vmask8 m1 = m1a == m1b;
3237
3238 vfloat8 m2a(1, 1, 0, 0, 1, 1, 0, 0);
3239 vfloat8 m2b(1, 1, 1, 1, 1, 1, 1, 1);
3240 vmask8 m2 = m2a == m2b;
3241
3242 vmask8 r = m1 & m2;
3243 EXPECT_EQ(mask(r), 0x22);
3244 }
3245
3246 /** @brief Test vmask8 xor. */
TEST(vmask8,xor)3247 TEST(vmask8, xor)
3248 {
3249 vfloat8 m1a(0, 1, 0, 1, 0, 1, 0, 1);
3250 vfloat8 m1b(1, 1, 1, 1, 1, 1, 1, 1);
3251 vmask8 m1 = m1a == m1b;
3252
3253 vfloat8 m2a(1, 1, 0, 0, 1, 1, 0, 0);
3254 vfloat8 m2b(1, 1, 1, 1, 1, 1, 1, 1);
3255 vmask8 m2 = m2a == m2b;
3256
3257 vmask8 r = m1 ^ m2;
3258 EXPECT_EQ(mask(r), 0x99);
3259 }
3260
3261 /** @brief Test vmask8 not. */
TEST(vmask8,not)3262 TEST(vmask8, not)
3263 {
3264 vfloat8 m1a(0, 1, 0, 1, 0, 1, 0, 1);
3265 vfloat8 m1b(1, 1, 1, 1, 1, 1, 1, 1);
3266 vmask8 m1 = m1a == m1b;
3267 vmask8 r = ~m1;
3268 EXPECT_EQ(mask(r), 0x55);
3269 }
3270
3271 #endif
3272
3273 }
3274