1 /*
2 * Copyright 2015 Google Inc.
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8 #include "Sk4px.h"
9 #include "SkNx.h"
10 #include "SkRandom.h"
11 #include "Test.h"
12
13 template <int N>
test_Nf(skiatest::Reporter * r)14 static void test_Nf(skiatest::Reporter* r) {
15
16 auto assert_nearly_eq = [&](float eps, const SkNx<N, float>& v,
17 float a, float b, float c, float d) {
18 auto close = [=](float a, float b) { return fabsf(a-b) <= eps; };
19 float vals[4];
20 v.store(vals);
21 bool ok = close(vals[0], a) && close(vals[1], b)
22 && close( v[0], a) && close( v[1], b);
23 REPORTER_ASSERT(r, ok);
24 if (N == 4) {
25 ok = close(vals[2], c) && close(vals[3], d)
26 && close( v[2], c) && close( v[3], d);
27 REPORTER_ASSERT(r, ok);
28 }
29 };
30 auto assert_eq = [&](const SkNx<N, float>& v, float a, float b, float c, float d) {
31 return assert_nearly_eq(0, v, a,b,c,d);
32 };
33
34 float vals[] = {3, 4, 5, 6};
35 SkNx<N,float> a = SkNx<N,float>::Load(vals),
36 b(a),
37 c = a;
38 SkNx<N,float> d;
39 d = a;
40
41 assert_eq(a, 3, 4, 5, 6);
42 assert_eq(b, 3, 4, 5, 6);
43 assert_eq(c, 3, 4, 5, 6);
44 assert_eq(d, 3, 4, 5, 6);
45
46 assert_eq(a+b, 6, 8, 10, 12);
47 assert_eq(a*b, 9, 16, 25, 36);
48 assert_eq(a*b-b, 6, 12, 20, 30);
49 assert_eq((a*b).sqrt(), 3, 4, 5, 6);
50 assert_eq(a/b, 1, 1, 1, 1);
51 assert_eq(SkNx<N,float>(0)-a, -3, -4, -5, -6);
52
53 SkNx<N,float> fours(4);
54
55 assert_eq(fours.sqrt(), 2,2,2,2);
56 assert_nearly_eq(0.001f, fours.rsqrt(), 0.5, 0.5, 0.5, 0.5);
57
58 assert_nearly_eq(0.001f, fours.invert(), 0.25, 0.25, 0.25, 0.25);
59
60 assert_eq(SkNx<N,float>::Min(a, fours), 3, 4, 4, 4);
61 assert_eq(SkNx<N,float>::Max(a, fours), 4, 4, 5, 6);
62
63 // Test some comparisons. This is not exhaustive.
64 REPORTER_ASSERT(r, (a == b).allTrue());
65 REPORTER_ASSERT(r, (a+b == a*b-b).anyTrue());
66 REPORTER_ASSERT(r, !(a+b == a*b-b).allTrue());
67 REPORTER_ASSERT(r, !(a+b == a*b).anyTrue());
68 REPORTER_ASSERT(r, !(a != b).anyTrue());
69 REPORTER_ASSERT(r, (a < fours).anyTrue());
70 REPORTER_ASSERT(r, (a <= fours).anyTrue());
71 REPORTER_ASSERT(r, !(a > fours).allTrue());
72 REPORTER_ASSERT(r, !(a >= fours).allTrue());
73 }
74
DEF_TEST(SkNf,r)75 DEF_TEST(SkNf, r) {
76 test_Nf<2>(r);
77 test_Nf<4>(r);
78 }
79
80 template <int N, typename T>
test_Ni(skiatest::Reporter * r)81 void test_Ni(skiatest::Reporter* r) {
82 auto assert_eq = [&](const SkNx<N,T>& v, T a, T b, T c, T d, T e, T f, T g, T h) {
83 T vals[8];
84 v.store(vals);
85
86 switch (N) {
87 case 8: REPORTER_ASSERT(r, vals[4] == e && vals[5] == f && vals[6] == g && vals[7] == h);
88 case 4: REPORTER_ASSERT(r, vals[2] == c && vals[3] == d);
89 case 2: REPORTER_ASSERT(r, vals[0] == a && vals[1] == b);
90 }
91 switch (N) {
92 case 8: REPORTER_ASSERT(r, v[4] == e && v[5] == f &&
93 v[6] == g && v[7] == h);
94 case 4: REPORTER_ASSERT(r, v[2] == c && v[3] == d);
95 case 2: REPORTER_ASSERT(r, v[0] == a && v[1] == b);
96 }
97 };
98
99 T vals[] = { 1,2,3,4,5,6,7,8 };
100 SkNx<N,T> a = SkNx<N,T>::Load(vals),
101 b(a),
102 c = a;
103 SkNx<N,T> d;
104 d = a;
105
106 assert_eq(a, 1,2,3,4,5,6,7,8);
107 assert_eq(b, 1,2,3,4,5,6,7,8);
108 assert_eq(c, 1,2,3,4,5,6,7,8);
109 assert_eq(d, 1,2,3,4,5,6,7,8);
110
111 assert_eq(a+a, 2,4,6,8,10,12,14,16);
112 assert_eq(a*a, 1,4,9,16,25,36,49,64);
113 assert_eq(a*a-a, 0,2,6,12,20,30,42,56);
114
115 assert_eq(a >> 2, 0,0,0,1,1,1,1,2);
116 assert_eq(a << 1, 2,4,6,8,10,12,14,16);
117
118 REPORTER_ASSERT(r, a[1] == 2);
119 }
120
DEF_TEST(SkNx,r)121 DEF_TEST(SkNx, r) {
122 test_Ni<2, uint16_t>(r);
123 test_Ni<4, uint16_t>(r);
124 test_Ni<8, uint16_t>(r);
125
126 test_Ni<2, int>(r);
127 test_Ni<4, int>(r);
128 test_Ni<8, int>(r);
129 }
130
DEF_TEST(SkNi_min_lt,r)131 DEF_TEST(SkNi_min_lt, r) {
132 // Exhaustively check the 8x8 bit space.
133 for (int a = 0; a < (1<<8); a++) {
134 for (int b = 0; b < (1<<8); b++) {
135 Sk16b aw(a), bw(b);
136 REPORTER_ASSERT(r, Sk16b::Min(aw, bw)[0] == SkTMin(a, b));
137 REPORTER_ASSERT(r, !(aw < bw)[0] == !(a < b));
138 }}
139
140 // Exhausting the 16x16 bit space is kind of slow, so only do that in release builds.
141 #ifdef SK_DEBUG
142 SkRandom rand;
143 for (int i = 0; i < (1<<16); i++) {
144 uint16_t a = rand.nextU() >> 16,
145 b = rand.nextU() >> 16;
146 REPORTER_ASSERT(r, Sk16h::Min(Sk16h(a), Sk16h(b))[0] == SkTMin(a, b));
147 }
148 #else
149 for (int a = 0; a < (1<<16); a++) {
150 for (int b = 0; b < (1<<16); b++) {
151 REPORTER_ASSERT(r, Sk16h::Min(Sk16h(a), Sk16h(b))[0] == SkTMin(a, b));
152 }}
153 #endif
154 }
155
DEF_TEST(SkNi_saturatedAdd,r)156 DEF_TEST(SkNi_saturatedAdd, r) {
157 for (int a = 0; a < (1<<8); a++) {
158 for (int b = 0; b < (1<<8); b++) {
159 int exact = a+b;
160 if (exact > 255) { exact = 255; }
161 if (exact < 0) { exact = 0; }
162
163 REPORTER_ASSERT(r, Sk16b(a).saturatedAdd(Sk16b(b))[0] == exact);
164 }
165 }
166 }
167
DEF_TEST(SkNi_mulHi,r)168 DEF_TEST(SkNi_mulHi, r) {
169 // First 8 primes.
170 Sk4u a{ 0x00020000, 0x00030000, 0x00050000, 0x00070000 };
171 Sk4u b{ 0x000b0000, 0x000d0000, 0x00110000, 0x00130000 };
172
173 Sk4u q{22, 39, 85, 133};
174
175 Sk4u c = a.mulHi(b);
176 REPORTER_ASSERT(r, c[0] == q[0]);
177 REPORTER_ASSERT(r, c[1] == q[1]);
178 REPORTER_ASSERT(r, c[2] == q[2]);
179 REPORTER_ASSERT(r, c[3] == q[3]);
180 }
181
DEF_TEST(Sk4px_muldiv255round,r)182 DEF_TEST(Sk4px_muldiv255round, r) {
183 for (int a = 0; a < (1<<8); a++) {
184 for (int b = 0; b < (1<<8); b++) {
185 int exact = (a*b+127)/255;
186
187 // Duplicate a and b 16x each.
188 auto av = Sk4px::DupAlpha(a),
189 bv = Sk4px::DupAlpha(b);
190
191 // This way should always be exactly correct.
192 int correct = (av * bv).div255()[0];
193 REPORTER_ASSERT(r, correct == exact);
194
195 // We're a bit more flexible on this method: correct for 0 or 255, otherwise off by <=1.
196 int fast = av.approxMulDiv255(bv)[0];
197 REPORTER_ASSERT(r, fast-exact >= -1 && fast-exact <= 1);
198 if (a == 0 || a == 255 || b == 0 || b == 255) {
199 REPORTER_ASSERT(r, fast == exact);
200 }
201 }
202 }
203 }
204
DEF_TEST(Sk4px_widening,r)205 DEF_TEST(Sk4px_widening, r) {
206 SkPMColor colors[] = {
207 SkPreMultiplyColor(0xff00ff00),
208 SkPreMultiplyColor(0x40008000),
209 SkPreMultiplyColor(0x7f020406),
210 SkPreMultiplyColor(0x00000000),
211 };
212 auto packed = Sk4px::Load4(colors);
213
214 auto wideLo = packed.widenLo(),
215 wideHi = packed.widenHi(),
216 wideLoHi = packed.widenLoHi(),
217 wideLoHiAlt = wideLo + wideHi;
218 REPORTER_ASSERT(r, 0 == memcmp(&wideLoHi, &wideLoHiAlt, sizeof(wideLoHi)));
219 }
220
DEF_TEST(SkNx_abs,r)221 DEF_TEST(SkNx_abs, r) {
222 auto fs = Sk4f(0.0f, -0.0f, 2.0f, -4.0f).abs();
223 REPORTER_ASSERT(r, fs[0] == 0.0f);
224 REPORTER_ASSERT(r, fs[1] == 0.0f);
225 REPORTER_ASSERT(r, fs[2] == 2.0f);
226 REPORTER_ASSERT(r, fs[3] == 4.0f);
227 auto fshi = Sk2f(0.0f, -0.0f).abs();
228 auto fslo = Sk2f(2.0f, -4.0f).abs();
229 REPORTER_ASSERT(r, fshi[0] == 0.0f);
230 REPORTER_ASSERT(r, fshi[1] == 0.0f);
231 REPORTER_ASSERT(r, fslo[0] == 2.0f);
232 REPORTER_ASSERT(r, fslo[1] == 4.0f);
233 }
234
DEF_TEST(Sk4i_abs,r)235 DEF_TEST(Sk4i_abs, r) {
236 auto is = Sk4i(0, -1, 2, -2147483647).abs();
237 REPORTER_ASSERT(r, is[0] == 0);
238 REPORTER_ASSERT(r, is[1] == 1);
239 REPORTER_ASSERT(r, is[2] == 2);
240 REPORTER_ASSERT(r, is[3] == 2147483647);
241 }
242
DEF_TEST(Sk4i_minmax,r)243 DEF_TEST(Sk4i_minmax, r) {
244 auto a = Sk4i(0, 2, 4, 6);
245 auto b = Sk4i(1, 1, 3, 7);
246 auto min = Sk4i::Min(a, b);
247 auto max = Sk4i::Max(a, b);
248 for(int i = 0; i < 4; ++i) {
249 REPORTER_ASSERT(r, min[i] == SkTMin(a[i], b[i]));
250 REPORTER_ASSERT(r, max[i] == SkTMax(a[i], b[i]));
251 }
252 }
253
DEF_TEST(SkNx_floor,r)254 DEF_TEST(SkNx_floor, r) {
255 auto fs = Sk4f(0.4f, -0.4f, 0.6f, -0.6f).floor();
256 REPORTER_ASSERT(r, fs[0] == 0.0f);
257 REPORTER_ASSERT(r, fs[1] == -1.0f);
258 REPORTER_ASSERT(r, fs[2] == 0.0f);
259 REPORTER_ASSERT(r, fs[3] == -1.0f);
260 }
261
DEF_TEST(SkNx_shuffle,r)262 DEF_TEST(SkNx_shuffle, r) {
263 Sk4f f4(0,10,20,30);
264
265 Sk2f f2 = SkNx_shuffle<2,1>(f4);
266 REPORTER_ASSERT(r, f2[0] == 20);
267 REPORTER_ASSERT(r, f2[1] == 10);
268
269 f4 = SkNx_shuffle<0,1,1,0>(f2);
270 REPORTER_ASSERT(r, f4[0] == 20);
271 REPORTER_ASSERT(r, f4[1] == 10);
272 REPORTER_ASSERT(r, f4[2] == 10);
273 REPORTER_ASSERT(r, f4[3] == 20);
274 }
275
DEF_TEST(SkNx_int_float,r)276 DEF_TEST(SkNx_int_float, r) {
277 Sk4f f(-2.3f, 1.0f, 0.45f, 0.6f);
278
279 Sk4i i = SkNx_cast<int>(f);
280 REPORTER_ASSERT(r, i[0] == -2);
281 REPORTER_ASSERT(r, i[1] == 1);
282 REPORTER_ASSERT(r, i[2] == 0);
283 REPORTER_ASSERT(r, i[3] == 0);
284
285 f = SkNx_cast<float>(i);
286 REPORTER_ASSERT(r, f[0] == -2.0f);
287 REPORTER_ASSERT(r, f[1] == 1.0f);
288 REPORTER_ASSERT(r, f[2] == 0.0f);
289 REPORTER_ASSERT(r, f[3] == 0.0f);
290 }
291
292 #include "SkRandom.h"
293
DEF_TEST(SkNx_u16_float,r)294 DEF_TEST(SkNx_u16_float, r) {
295 {
296 // u16 --> float
297 auto h4 = Sk4h(15, 17, 257, 65535);
298 auto f4 = SkNx_cast<float>(h4);
299 REPORTER_ASSERT(r, f4[0] == 15.0f);
300 REPORTER_ASSERT(r, f4[1] == 17.0f);
301 REPORTER_ASSERT(r, f4[2] == 257.0f);
302 REPORTER_ASSERT(r, f4[3] == 65535.0f);
303 }
304 {
305 // float -> u16
306 auto f4 = Sk4f(15, 17, 257, 65535);
307 auto h4 = SkNx_cast<uint16_t>(f4);
308 REPORTER_ASSERT(r, h4[0] == 15);
309 REPORTER_ASSERT(r, h4[1] == 17);
310 REPORTER_ASSERT(r, h4[2] == 257);
311 REPORTER_ASSERT(r, h4[3] == 65535);
312 }
313
314 // starting with any u16 value, we should be able to have a perfect round-trip in/out of floats
315 //
316 SkRandom rand;
317 for (int i = 0; i < 10000; ++i) {
318 const uint16_t s16[4] {
319 (uint16_t)rand.nextU16(), (uint16_t)rand.nextU16(),
320 (uint16_t)rand.nextU16(), (uint16_t)rand.nextU16(),
321 };
322 auto u4_0 = Sk4h::Load(s16);
323 auto f4 = SkNx_cast<float>(u4_0);
324 auto u4_1 = SkNx_cast<uint16_t>(f4);
325 uint16_t d16[4];
326 u4_1.store(d16);
327 REPORTER_ASSERT(r, !memcmp(s16, d16, sizeof(s16)));
328 }
329 }
330
331 // The SSE2 implementation of SkNx_cast<uint16_t>(Sk4i) is non-trivial, so worth a test.
DEF_TEST(SkNx_int_u16,r)332 DEF_TEST(SkNx_int_u16, r) {
333 // These are pretty hard to get wrong.
334 for (int i = 0; i <= 0x7fff; i++) {
335 uint16_t expected = (uint16_t)i;
336 uint16_t actual = SkNx_cast<uint16_t>(Sk4i(i))[0];
337
338 REPORTER_ASSERT(r, expected == actual);
339 }
340
341 // A naive implementation with _mm_packs_epi32 would succeed up to 0x7fff but fail here:
342 for (int i = 0x8000; (1) && i <= 0xffff; i++) {
343 uint16_t expected = (uint16_t)i;
344 uint16_t actual = SkNx_cast<uint16_t>(Sk4i(i))[0];
345
346 REPORTER_ASSERT(r, expected == actual);
347 }
348 }
349
DEF_TEST(SkNx_4fLoad4Store4,r)350 DEF_TEST(SkNx_4fLoad4Store4, r) {
351 float src[] = {
352 0.0f, 1.0f, 2.0f, 3.0f,
353 4.0f, 5.0f, 6.0f, 7.0f,
354 8.0f, 9.0f, 10.0f, 11.0f,
355 12.0f, 13.0f, 14.0f, 15.0f
356 };
357
358 Sk4f a, b, c, d;
359 Sk4f::Load4(src, &a, &b, &c, &d);
360 REPORTER_ASSERT(r, 0.0f == a[0]);
361 REPORTER_ASSERT(r, 4.0f == a[1]);
362 REPORTER_ASSERT(r, 8.0f == a[2]);
363 REPORTER_ASSERT(r, 12.0f == a[3]);
364 REPORTER_ASSERT(r, 1.0f == b[0]);
365 REPORTER_ASSERT(r, 5.0f == b[1]);
366 REPORTER_ASSERT(r, 9.0f == b[2]);
367 REPORTER_ASSERT(r, 13.0f == b[3]);
368 REPORTER_ASSERT(r, 2.0f == c[0]);
369 REPORTER_ASSERT(r, 6.0f == c[1]);
370 REPORTER_ASSERT(r, 10.0f == c[2]);
371 REPORTER_ASSERT(r, 14.0f == c[3]);
372 REPORTER_ASSERT(r, 3.0f == d[0]);
373 REPORTER_ASSERT(r, 7.0f == d[1]);
374 REPORTER_ASSERT(r, 11.0f == d[2]);
375 REPORTER_ASSERT(r, 15.0f == d[3]);
376
377 float dst[16];
378 Sk4f::Store4(dst, a, b, c, d);
379 REPORTER_ASSERT(r, 0 == memcmp(dst, src, 16 * sizeof(float)));
380 }
381
DEF_TEST(SkNx_neg,r)382 DEF_TEST(SkNx_neg, r) {
383 auto fs = -Sk4f(0.0f, -0.0f, 2.0f, -4.0f);
384 REPORTER_ASSERT(r, fs[0] == 0.0f);
385 REPORTER_ASSERT(r, fs[1] == 0.0f);
386 REPORTER_ASSERT(r, fs[2] == -2.0f);
387 REPORTER_ASSERT(r, fs[3] == 4.0f);
388 auto fshi = -Sk2f(0.0f, -0.0f);
389 auto fslo = -Sk2f(2.0f, -4.0f);
390 REPORTER_ASSERT(r, fshi[0] == 0.0f);
391 REPORTER_ASSERT(r, fshi[1] == 0.0f);
392 REPORTER_ASSERT(r, fslo[0] == -2.0f);
393 REPORTER_ASSERT(r, fslo[1] == 4.0f);
394 }
395
DEF_TEST(SkNx_thenElse,r)396 DEF_TEST(SkNx_thenElse, r) {
397 auto fs = (Sk4f(0.0f, -0.0f, 2.0f, -4.0f) < 0).thenElse(-1, 1);
398 REPORTER_ASSERT(r, fs[0] == 1);
399 REPORTER_ASSERT(r, fs[1] == 1);
400 REPORTER_ASSERT(r, fs[2] == 1);
401 REPORTER_ASSERT(r, fs[3] == -1);
402 auto fshi = (Sk2f(0.0f, -0.0f) < 0).thenElse(-1, 1);
403 auto fslo = (Sk2f(2.0f, -4.0f) < 0).thenElse(-1, 1);
404 REPORTER_ASSERT(r, fshi[0] == 1);
405 REPORTER_ASSERT(r, fshi[1] == 1);
406 REPORTER_ASSERT(r, fslo[0] == 1);
407 REPORTER_ASSERT(r, fslo[1] == -1);
408 }
409
DEF_TEST(Sk4f_Load2,r)410 DEF_TEST(Sk4f_Load2, r) {
411 float xy[8] = { 0,1,2,3,4,5,6,7 };
412
413 Sk4f x,y;
414 Sk4f::Load2(xy, &x,&y);
415
416 REPORTER_ASSERT(r, x[0] == 0);
417 REPORTER_ASSERT(r, x[1] == 2);
418 REPORTER_ASSERT(r, x[2] == 4);
419 REPORTER_ASSERT(r, x[3] == 6);
420
421 REPORTER_ASSERT(r, y[0] == 1);
422 REPORTER_ASSERT(r, y[1] == 3);
423 REPORTER_ASSERT(r, y[2] == 5);
424 REPORTER_ASSERT(r, y[3] == 7);
425 }
426
DEF_TEST(Sk2f_Store3,r)427 DEF_TEST(Sk2f_Store3, r) {
428 Sk2f p0{0, 3};
429 Sk2f p1{1, 4};
430 Sk2f p2{2, 5};
431 float dst[6];
432 Sk2f::Store3(dst, p0, p1, p2);
433 REPORTER_ASSERT(r, dst[0] == 0);
434 REPORTER_ASSERT(r, dst[1] == 1);
435 REPORTER_ASSERT(r, dst[2] == 2);
436 REPORTER_ASSERT(r, dst[3] == 3);
437 REPORTER_ASSERT(r, dst[4] == 4);
438 REPORTER_ASSERT(r, dst[5] == 5);
439 }
440