1 /*
2 * Copyright 2019 Google LLC
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8 #include "include/core/SkColorPriv.h"
9 #include "include/private/SkColorData.h"
10 #include "src/core/SkCpu.h"
11 #include "src/core/SkMSAN.h"
12 #include "src/core/SkVM.h"
13 #include "src/gpu/GrShaderCaps.h"
14 #include "src/sksl/SkSLCompiler.h"
15 #include "src/sksl/codegen/SkSLVMCodeGenerator.h"
16 #include "src/sksl/tracing/SkVMDebugTrace.h"
17 #include "src/utils/SkVMVisualizer.h"
18 #include "tests/Test.h"
19
20 template <typename Fn>
test_jit_and_interpreter(const skvm::Builder & b,Fn && test)21 static void test_jit_and_interpreter(const skvm::Builder& b, Fn&& test) {
22 skvm::Program p = b.done();
23 test(p);
24 if (p.hasJIT()) {
25 test(b.done(/*debug_name=*/nullptr, /*allow_jit=*/false));
26 }
27 }
28
DEF_TEST(SkVM_eliminate_dead_code,r)29 DEF_TEST(SkVM_eliminate_dead_code, r) {
30 skvm::Builder b;
31 {
32 skvm::Ptr arg = b.varying<int>();
33 skvm::I32 l = b.load32(arg);
34 skvm::I32 a = b.add(l, l);
35 b.add(a, b.splat(7));
36 }
37
38 std::vector<skvm::Instruction> program = b.program();
39 REPORTER_ASSERT(r, program.size() == 4);
40
41 program = skvm::eliminate_dead_code(program);
42 REPORTER_ASSERT(r, program.size() == 0);
43 }
44
DEF_TEST(SkVM_Pointless,r)45 DEF_TEST(SkVM_Pointless, r) {
46 // Let's build a program with no memory arguments.
47 // It should all be pegged as dead code, but we should be able to "run" it.
48 skvm::Builder b;
49 {
50 b.add(b.splat(5.0f),
51 b.splat(4.0f));
52 }
53
54 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
55 for (int N = 0; N < 64; N++) {
56 program.eval(N);
57 }
58 });
59
60 for (const skvm::OptimizedInstruction& inst : b.optimize()) {
61 REPORTER_ASSERT(r, inst.death == 0 && inst.can_hoist == true);
62 }
63 }
64
DEF_TEST(SkVM_memset,r)65 DEF_TEST(SkVM_memset, r) {
66 skvm::Builder b;
67 b.store32(b.varying<int>(), b.splat(42));
68
69 test_jit_and_interpreter(b, [&](const skvm::Program& p) {
70 int buf[18];
71 buf[17] = 47;
72
73 p.eval(17, buf);
74 for (int i = 0; i < 17; i++) {
75 REPORTER_ASSERT(r, buf[i] == 42);
76 }
77 REPORTER_ASSERT(r, buf[17] == 47);
78 });
79 }
80
DEF_TEST(SkVM_memcpy,r)81 DEF_TEST(SkVM_memcpy, r) {
82 skvm::Builder b;
83 {
84 auto src = b.varying<int>(),
85 dst = b.varying<int>();
86 b.store32(dst, b.load32(src));
87 }
88
89 test_jit_and_interpreter(b, [&](const skvm::Program& p) {
90 int src[] = {1,2,3,4,5,6,7,8,9},
91 dst[] = {0,0,0,0,0,0,0,0,0};
92
93 p.eval(SK_ARRAY_COUNT(src)-1, src, dst);
94 for (size_t i = 0; i < SK_ARRAY_COUNT(src)-1; i++) {
95 REPORTER_ASSERT(r, dst[i] == src[i]);
96 }
97 size_t i = SK_ARRAY_COUNT(src)-1;
98 REPORTER_ASSERT(r, dst[i] == 0);
99 });
100 }
101
DEF_TEST(SkVM_allow_jit,r)102 DEF_TEST(SkVM_allow_jit, r) {
103 skvm::Builder b;
104 {
105 auto src = b.varying<int>(),
106 dst = b.varying<int>();
107 b.store32(dst, b.load32(src));
108 }
109
110 if (b.done("test-allow_jit", /*allow_jit=*/true).hasJIT()) {
111 REPORTER_ASSERT(r, !b.done("", false).hasJIT());
112 }
113 }
114
DEF_TEST(SkVM_LoopCounts,r)115 DEF_TEST(SkVM_LoopCounts, r) {
116 // Make sure we cover all the exact N we want.
117
118 // buf[i] += 1
119 skvm::Builder b;
120 skvm::Ptr arg = b.varying<int>();
121 b.store32(arg,
122 b.add(b.splat(1),
123 b.load32(arg)));
124
125 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
126 int buf[64];
127 for (int N = 0; N <= (int)SK_ARRAY_COUNT(buf); N++) {
128 for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) {
129 buf[i] = i;
130 }
131 program.eval(N, buf);
132
133 for (int i = 0; i < N; i++) {
134 REPORTER_ASSERT(r, buf[i] == i+1);
135 }
136 for (int i = N; i < (int)SK_ARRAY_COUNT(buf); i++) {
137 REPORTER_ASSERT(r, buf[i] == i);
138 }
139 }
140 });
141 }
142
DEF_TEST(SkVM_gather32,r)143 DEF_TEST(SkVM_gather32, r) {
144 skvm::Builder b;
145 {
146 skvm::UPtr uniforms = b.uniform();
147 skvm::Ptr buf = b.varying<int>();
148 skvm::I32 x = b.load32(buf);
149 b.store32(buf, b.gather32(uniforms,0, b.bit_and(x, b.splat(7))));
150 }
151
152 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
153 const int img[] = {12,34,56,78, 90,98,76,54};
154
155 int buf[20];
156 for (int i = 0; i < 20; i++) {
157 buf[i] = i;
158 }
159
160 struct Uniforms {
161 const int* img;
162 } uniforms{img};
163
164 program.eval(20, &uniforms, buf);
165 int i = 0;
166 REPORTER_ASSERT(r, buf[i] == 12); i++;
167 REPORTER_ASSERT(r, buf[i] == 34); i++;
168 REPORTER_ASSERT(r, buf[i] == 56); i++;
169 REPORTER_ASSERT(r, buf[i] == 78); i++;
170 REPORTER_ASSERT(r, buf[i] == 90); i++;
171 REPORTER_ASSERT(r, buf[i] == 98); i++;
172 REPORTER_ASSERT(r, buf[i] == 76); i++;
173 REPORTER_ASSERT(r, buf[i] == 54); i++;
174
175 REPORTER_ASSERT(r, buf[i] == 12); i++;
176 REPORTER_ASSERT(r, buf[i] == 34); i++;
177 REPORTER_ASSERT(r, buf[i] == 56); i++;
178 REPORTER_ASSERT(r, buf[i] == 78); i++;
179 REPORTER_ASSERT(r, buf[i] == 90); i++;
180 REPORTER_ASSERT(r, buf[i] == 98); i++;
181 REPORTER_ASSERT(r, buf[i] == 76); i++;
182 REPORTER_ASSERT(r, buf[i] == 54); i++;
183
184 REPORTER_ASSERT(r, buf[i] == 12); i++;
185 REPORTER_ASSERT(r, buf[i] == 34); i++;
186 REPORTER_ASSERT(r, buf[i] == 56); i++;
187 REPORTER_ASSERT(r, buf[i] == 78); i++;
188 });
189 }
190
DEF_TEST(SkVM_gathers,r)191 DEF_TEST(SkVM_gathers, r) {
192 skvm::Builder b;
193 {
194 skvm::UPtr uniforms = b.uniform();
195 skvm::Ptr buf32 = b.varying<int>(),
196 buf16 = b.varying<uint16_t>(),
197 buf8 = b.varying<uint8_t>();
198
199 skvm::I32 x = b.load32(buf32);
200
201 b.store32(buf32, b.gather32(uniforms,0, b.bit_and(x, b.splat( 7))));
202 b.store16(buf16, b.gather16(uniforms,0, b.bit_and(x, b.splat(15))));
203 b.store8 (buf8 , b.gather8 (uniforms,0, b.bit_and(x, b.splat(31))));
204 }
205
206 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
207 const int img[] = {12,34,56,78, 90,98,76,54};
208
209 constexpr int N = 20;
210 int buf32[N];
211 uint16_t buf16[N];
212 uint8_t buf8 [N];
213
214 for (int i = 0; i < 20; i++) {
215 buf32[i] = i;
216 }
217
218 struct Uniforms {
219 const int* img;
220 } uniforms{img};
221
222 program.eval(N, &uniforms, buf32, buf16, buf8);
223 int i = 0;
224 REPORTER_ASSERT(r, buf32[i] == 12 && buf16[i] == 12 && buf8[i] == 12); i++;
225 REPORTER_ASSERT(r, buf32[i] == 34 && buf16[i] == 0 && buf8[i] == 0); i++;
226 REPORTER_ASSERT(r, buf32[i] == 56 && buf16[i] == 34 && buf8[i] == 0); i++;
227 REPORTER_ASSERT(r, buf32[i] == 78 && buf16[i] == 0 && buf8[i] == 0); i++;
228 REPORTER_ASSERT(r, buf32[i] == 90 && buf16[i] == 56 && buf8[i] == 34); i++;
229 REPORTER_ASSERT(r, buf32[i] == 98 && buf16[i] == 0 && buf8[i] == 0); i++;
230 REPORTER_ASSERT(r, buf32[i] == 76 && buf16[i] == 78 && buf8[i] == 0); i++;
231 REPORTER_ASSERT(r, buf32[i] == 54 && buf16[i] == 0 && buf8[i] == 0); i++;
232
233 REPORTER_ASSERT(r, buf32[i] == 12 && buf16[i] == 90 && buf8[i] == 56); i++;
234 REPORTER_ASSERT(r, buf32[i] == 34 && buf16[i] == 0 && buf8[i] == 0); i++;
235 REPORTER_ASSERT(r, buf32[i] == 56 && buf16[i] == 98 && buf8[i] == 0); i++;
236 REPORTER_ASSERT(r, buf32[i] == 78 && buf16[i] == 0 && buf8[i] == 0); i++;
237 REPORTER_ASSERT(r, buf32[i] == 90 && buf16[i] == 76 && buf8[i] == 78); i++;
238 REPORTER_ASSERT(r, buf32[i] == 98 && buf16[i] == 0 && buf8[i] == 0); i++;
239 REPORTER_ASSERT(r, buf32[i] == 76 && buf16[i] == 54 && buf8[i] == 0); i++;
240 REPORTER_ASSERT(r, buf32[i] == 54 && buf16[i] == 0 && buf8[i] == 0); i++;
241
242 REPORTER_ASSERT(r, buf32[i] == 12 && buf16[i] == 12 && buf8[i] == 90); i++;
243 REPORTER_ASSERT(r, buf32[i] == 34 && buf16[i] == 0 && buf8[i] == 0); i++;
244 REPORTER_ASSERT(r, buf32[i] == 56 && buf16[i] == 34 && buf8[i] == 0); i++;
245 REPORTER_ASSERT(r, buf32[i] == 78 && buf16[i] == 0 && buf8[i] == 0); i++;
246 });
247 }
248
DEF_TEST(SkVM_gathers2,r)249 DEF_TEST(SkVM_gathers2, r) {
250 skvm::Builder b;
251 {
252 skvm::UPtr uniforms = b.uniform();
253 skvm::Ptr buf32 = b.varying<int>(),
254 buf16 = b.varying<uint16_t>(),
255 buf8 = b.varying<uint8_t>();
256
257 skvm::I32 x = b.load32(buf32);
258
259 b.store32(buf32, b.gather32(uniforms,0, x));
260 b.store16(buf16, b.gather16(uniforms,0, x));
261 b.store8 (buf8 , b.gather8 (uniforms,0, x));
262 }
263
264 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
265 uint8_t img[256];
266 for (int i = 0; i < 256; i++) {
267 img[i] = i;
268 }
269
270 int buf32[64];
271 uint16_t buf16[64];
272 uint8_t buf8 [64];
273
274 for (int i = 0; i < 64; i++) {
275 buf32[i] = (i*47)&63;
276 buf16[i] = 0;
277 buf8 [i] = 0;
278 }
279
280 struct Uniforms {
281 const uint8_t* img;
282 } uniforms{img};
283
284 program.eval(64, &uniforms, buf32, buf16, buf8);
285
286 for (int i = 0; i < 64; i++) {
287 REPORTER_ASSERT(r, buf8[i] == ((i*47)&63)); // 0,47,30,13,60,...
288 }
289
290 REPORTER_ASSERT(r, buf16[ 0] == 0x0100);
291 REPORTER_ASSERT(r, buf16[63] == 0x2322);
292
293 REPORTER_ASSERT(r, buf32[ 0] == 0x03020100);
294 REPORTER_ASSERT(r, buf32[63] == 0x47464544);
295 });
296 }
297
DEF_TEST(SkVM_bitops,r)298 DEF_TEST(SkVM_bitops, r) {
299 skvm::Builder b;
300 {
301 skvm::Ptr ptr = b.varying<int>();
302
303 skvm::I32 x = b.load32(ptr);
304
305 x = b.bit_and (x, b.splat(0xf1)); // 0x40
306 x = b.bit_or (x, b.splat(0x80)); // 0xc0
307 x = b.bit_xor (x, b.splat(0xfe)); // 0x3e
308 x = b.bit_clear(x, b.splat(0x30)); // 0x0e
309
310 x = b.shl(x, 28); // 0xe000'0000
311 x = b.sra(x, 28); // 0xffff'fffe
312 x = b.shr(x, 1); // 0x7fff'ffff
313
314 b.store32(ptr, x);
315 }
316
317 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
318 int x = 0x42;
319 program.eval(1, &x);
320 REPORTER_ASSERT(r, x == 0x7fff'ffff);
321 });
322 }
323
DEF_TEST(SkVM_select_is_NaN,r)324 DEF_TEST(SkVM_select_is_NaN, r) {
325 skvm::Builder b;
326 {
327 skvm::Ptr src = b.varying<float>(),
328 dst = b.varying<float>();
329
330 skvm::F32 x = b.loadF(src);
331 x = select(is_NaN(x), b.splat(0.0f)
332 , x);
333 b.storeF(dst, x);
334 }
335
336 std::vector<skvm::OptimizedInstruction> program = b.optimize();
337 REPORTER_ASSERT(r, program.size() == 4);
338 REPORTER_ASSERT(r, program[0].op == skvm::Op::load32);
339 REPORTER_ASSERT(r, program[1].op == skvm::Op::neq_f32);
340 REPORTER_ASSERT(r, program[2].op == skvm::Op::bit_clear);
341 REPORTER_ASSERT(r, program[3].op == skvm::Op::store32);
342
343 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
344 // ±NaN, ±0, ±1, ±inf
345 uint32_t src[] = {0x7f80'0001, 0xff80'0001, 0x0000'0000, 0x8000'0000,
346 0x3f80'0000, 0xbf80'0000, 0x7f80'0000, 0xff80'0000};
347 uint32_t dst[SK_ARRAY_COUNT(src)];
348 program.eval(SK_ARRAY_COUNT(src), src, dst);
349
350 for (int i = 0; i < (int)SK_ARRAY_COUNT(src); i++) {
351 REPORTER_ASSERT(r, dst[i] == (i < 2 ? 0 : src[i]));
352 }
353 });
354 }
355
DEF_TEST(SkVM_f32,r)356 DEF_TEST(SkVM_f32, r) {
357 skvm::Builder b;
358 {
359 skvm::Ptr arg = b.varying<float>();
360
361 skvm::F32 x = b.loadF(arg),
362 y = b.add(x,x), // y = 2x
363 z = b.sub(y,x), // z = 2x-x = x
364 w = b.div(z,x); // w = x/x = 1
365 b.storeF(arg, w);
366 }
367
368 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
369 float buf[] = { 1,2,3,4,5,6,7,8,9 };
370 program.eval(SK_ARRAY_COUNT(buf), buf);
371 for (float v : buf) {
372 REPORTER_ASSERT(r, v == 1.0f);
373 }
374 });
375 }
376
DEF_TEST(SkVM_cmp_i32,r)377 DEF_TEST(SkVM_cmp_i32, r) {
378 skvm::Builder b;
379 {
380 skvm::I32 x = b.load32(b.varying<int>());
381
382 auto to_bit = [&](int shift, skvm::I32 mask) {
383 return b.shl(b.bit_and(mask, b.splat(0x1)), shift);
384 };
385
386 skvm::I32 m = b.splat(0);
387 m = b.bit_or(m, to_bit(0, b. eq(x, b.splat(0))));
388 m = b.bit_or(m, to_bit(1, b.neq(x, b.splat(1))));
389 m = b.bit_or(m, to_bit(2, b. lt(x, b.splat(2))));
390 m = b.bit_or(m, to_bit(3, b.lte(x, b.splat(3))));
391 m = b.bit_or(m, to_bit(4, b. gt(x, b.splat(4))));
392 m = b.bit_or(m, to_bit(5, b.gte(x, b.splat(5))));
393
394 b.store32(b.varying<int>(), m);
395 }
396 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
397 int in[] = { 0,1,2,3,4,5,6,7,8,9 };
398 int out[SK_ARRAY_COUNT(in)];
399
400 program.eval(SK_ARRAY_COUNT(in), in, out);
401
402 REPORTER_ASSERT(r, out[0] == 0b001111);
403 REPORTER_ASSERT(r, out[1] == 0b001100);
404 REPORTER_ASSERT(r, out[2] == 0b001010);
405 REPORTER_ASSERT(r, out[3] == 0b001010);
406 REPORTER_ASSERT(r, out[4] == 0b000010);
407 for (int i = 5; i < (int)SK_ARRAY_COUNT(out); i++) {
408 REPORTER_ASSERT(r, out[i] == 0b110010);
409 }
410 });
411 }
412
DEF_TEST(SkVM_cmp_f32,r)413 DEF_TEST(SkVM_cmp_f32, r) {
414 skvm::Builder b;
415 {
416 skvm::F32 x = b.loadF(b.varying<float>());
417
418 auto to_bit = [&](int shift, skvm::I32 mask) {
419 return b.shl(b.bit_and(mask, b.splat(0x1)), shift);
420 };
421
422 skvm::I32 m = b.splat(0);
423 m = b.bit_or(m, to_bit(0, b. eq(x, b.splat(0.0f))));
424 m = b.bit_or(m, to_bit(1, b.neq(x, b.splat(1.0f))));
425 m = b.bit_or(m, to_bit(2, b. lt(x, b.splat(2.0f))));
426 m = b.bit_or(m, to_bit(3, b.lte(x, b.splat(3.0f))));
427 m = b.bit_or(m, to_bit(4, b. gt(x, b.splat(4.0f))));
428 m = b.bit_or(m, to_bit(5, b.gte(x, b.splat(5.0f))));
429
430 b.store32(b.varying<int>(), m);
431 }
432
433 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
434 float in[] = { 0,1,2,3,4,5,6,7,8,9 };
435 int out[SK_ARRAY_COUNT(in)];
436
437 program.eval(SK_ARRAY_COUNT(in), in, out);
438
439 REPORTER_ASSERT(r, out[0] == 0b001111);
440 REPORTER_ASSERT(r, out[1] == 0b001100);
441 REPORTER_ASSERT(r, out[2] == 0b001010);
442 REPORTER_ASSERT(r, out[3] == 0b001010);
443 REPORTER_ASSERT(r, out[4] == 0b000010);
444 for (int i = 5; i < (int)SK_ARRAY_COUNT(out); i++) {
445 REPORTER_ASSERT(r, out[i] == 0b110010);
446 }
447 });
448 }
449
DEF_TEST(SkVM_index,r)450 DEF_TEST(SkVM_index, r) {
451 skvm::Builder b;
452 b.store32(b.varying<int>(), b.index());
453
454 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
455 int buf[23];
456 program.eval(SK_ARRAY_COUNT(buf), buf);
457 for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) {
458 REPORTER_ASSERT(r, buf[i] == (int)SK_ARRAY_COUNT(buf)-i);
459 }
460 });
461 }
462
DEF_TEST(SkVM_mad,r)463 DEF_TEST(SkVM_mad, r) {
464 // This program is designed to exercise the tricky corners of instruction
465 // and register selection for Op::mad_f32.
466
467 skvm::Builder b;
468 {
469 skvm::Ptr arg = b.varying<int>();
470
471 skvm::F32 x = b.to_F32(b.load32(arg)),
472 y = b.mad(x,x,x), // x is needed in the future, so r[x] != r[y].
473 z = b.mad(y,y,x), // y is needed in the future, but r[z] = r[x] is ok.
474 w = b.mad(z,z,y), // w can alias z but not y.
475 v = b.mad(w,y,w); // Got to stop somewhere.
476 b.store32(arg, b.trunc(v));
477 }
478
479 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
480 int x = 2;
481 program.eval(1, &x);
482 // x = 2
483 // y = 2*2 + 2 = 6
484 // z = 6*6 + 2 = 38
485 // w = 38*38 + 6 = 1450
486 // v = 1450*6 + 1450 = 10150
487 REPORTER_ASSERT(r, x == 10150);
488 });
489 }
490
DEF_TEST(SkVM_fms,r)491 DEF_TEST(SkVM_fms, r) {
492 // Create a pattern that can be peepholed into an Op::fms_f32.
493 skvm::Builder b;
494 {
495 skvm::Ptr arg = b.varying<int>();
496
497 skvm::F32 x = b.to_F32(b.load32(arg)),
498 v = b.sub(b.mul(x, b.splat(2.0f)),
499 b.splat(1.0f));
500 b.store32(arg, b.trunc(v));
501 }
502
503 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
504 int buf[] = {0,1,2,3,4,5,6,7,8,9,10};
505 program.eval((int)SK_ARRAY_COUNT(buf), &buf);
506
507 for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) {
508 REPORTER_ASSERT(r, buf[i] = 2*i-1);
509 }
510 });
511 }
512
DEF_TEST(SkVM_fnma,r)513 DEF_TEST(SkVM_fnma, r) {
514 // Create a pattern that can be peepholed into an Op::fnma_f32.
515 skvm::Builder b;
516 {
517 skvm::Ptr arg = b.varying<int>();
518
519 skvm::F32 x = b.to_F32(b.load32(arg)),
520 v = b.sub(b.splat(1.0f),
521 b.mul(x, b.splat(2.0f)));
522 b.store32(arg, b.trunc(v));
523 }
524
525 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
526 int buf[] = {0,1,2,3,4,5,6,7,8,9,10};
527 program.eval((int)SK_ARRAY_COUNT(buf), &buf);
528
529 for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) {
530 REPORTER_ASSERT(r, buf[i] = 1-2*i);
531 }
532 });
533 }
534
DEF_TEST(SkVM_madder,r)535 DEF_TEST(SkVM_madder, r) {
536 skvm::Builder b;
537 {
538 skvm::Ptr arg = b.varying<float>();
539
540 skvm::F32 x = b.loadF(arg),
541 y = b.mad(x,x,x), // x is needed in the future, so r[x] != r[y].
542 z = b.mad(y,x,y), // r[x] can be reused after this instruction, but not r[y].
543 w = b.mad(y,y,z);
544 b.storeF(arg, w);
545 }
546
547 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
548 float x = 2.0f;
549 // y = 2*2 + 2 = 6
550 // z = 6*2 + 6 = 18
551 // w = 6*6 + 18 = 54
552 program.eval(1, &x);
553 REPORTER_ASSERT(r, x == 54.0f);
554 });
555 }
556
DEF_TEST(SkVM_floor,r)557 DEF_TEST(SkVM_floor, r) {
558 skvm::Builder b;
559 {
560 skvm::Ptr arg = b.varying<float>();
561 b.storeF(arg, b.floor(b.loadF(arg)));
562 }
563
564 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
565 float buf[] = { -2.0f, -1.5f, -1.0f, 0.0f, 1.0f, 1.5f, 2.0f };
566 float want[] = { -2.0f, -2.0f, -1.0f, 0.0f, 1.0f, 1.0f, 2.0f };
567 program.eval(SK_ARRAY_COUNT(buf), buf);
568 for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) {
569 REPORTER_ASSERT(r, buf[i] == want[i]);
570 }
571 });
572 }
573
DEF_TEST(SkVM_round,r)574 DEF_TEST(SkVM_round, r) {
575 skvm::Builder b;
576 {
577 skvm::Ptr src = b.varying<float>();
578 skvm::Ptr dst = b.varying<int>();
579 b.store32(dst, b.round(b.loadF(src)));
580 }
581
582 // The test cases on exact 0.5f boundaries assume the current rounding mode is nearest even.
583 // We haven't explicitly guaranteed that here... it just probably is.
584 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
585 float buf[] = { -1.5f, -0.5f, 0.0f, 0.5f, 0.2f, 0.6f, 1.0f, 1.4f, 1.5f, 2.0f };
586 int want[] = { -2 , 0 , 0 , 0 , 0 , 1 , 1 , 1 , 2 , 2 };
587 int dst[SK_ARRAY_COUNT(buf)];
588
589 program.eval(SK_ARRAY_COUNT(buf), buf, dst);
590 for (int i = 0; i < (int)SK_ARRAY_COUNT(dst); i++) {
591 REPORTER_ASSERT(r, dst[i] == want[i]);
592 }
593 });
594 }
595
DEF_TEST(SkVM_min,r)596 DEF_TEST(SkVM_min, r) {
597 skvm::Builder b;
598 {
599 skvm::Ptr src1 = b.varying<float>();
600 skvm::Ptr src2 = b.varying<float>();
601 skvm::Ptr dst = b.varying<float>();
602
603 b.storeF(dst, b.min(b.loadF(src1), b.loadF(src2)));
604 }
605
606 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
607 float s1[] = { 0.0f, 1.0f, 4.0f, -1.0f, -1.0f};
608 float s2[] = { 0.0f, 2.0f, 3.0f, 1.0f, -2.0f};
609 float want[] = { 0.0f, 1.0f, 3.0f, -1.0f, -2.0f};
610 float d[SK_ARRAY_COUNT(s1)];
611 program.eval(SK_ARRAY_COUNT(d), s1, s2, d);
612 for (int i = 0; i < (int)SK_ARRAY_COUNT(d); i++) {
613 REPORTER_ASSERT(r, d[i] == want[i]);
614 }
615 });
616 }
617
DEF_TEST(SkVM_max,r)618 DEF_TEST(SkVM_max, r) {
619 skvm::Builder b;
620 {
621 skvm::Ptr src1 = b.varying<float>();
622 skvm::Ptr src2 = b.varying<float>();
623 skvm::Ptr dst = b.varying<float>();
624
625 b.storeF(dst, b.max(b.loadF(src1), b.loadF(src2)));
626 }
627
628 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
629 float s1[] = { 0.0f, 1.0f, 4.0f, -1.0f, -1.0f};
630 float s2[] = { 0.0f, 2.0f, 3.0f, 1.0f, -2.0f};
631 float want[] = { 0.0f, 2.0f, 4.0f, 1.0f, -1.0f};
632 float d[SK_ARRAY_COUNT(s1)];
633 program.eval(SK_ARRAY_COUNT(d), s1, s2, d);
634 for (int i = 0; i < (int)SK_ARRAY_COUNT(d); i++) {
635 REPORTER_ASSERT(r, d[i] == want[i]);
636 }
637 });
638 }
639
DEF_TEST(SkVM_hoist,r)640 DEF_TEST(SkVM_hoist, r) {
641 // This program uses enough constants that it will fail to JIT if we hoist them.
642 // The JIT will try again without hoisting, and that'll just need 2 registers.
643 skvm::Builder b;
644 {
645 skvm::Ptr arg = b.varying<int>();
646 skvm::I32 x = b.load32(arg);
647 for (int i = 0; i < 32; i++) {
648 x = b.add(x, b.splat(i));
649 }
650 b.store32(arg, x);
651 }
652
653 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
654 int x = 4;
655 program.eval(1, &x);
656 // x += 0 + 1 + 2 + 3 + ... + 30 + 31
657 // x += 496
658 REPORTER_ASSERT(r, x == 500);
659 });
660 }
661
DEF_TEST(SkVM_select,r)662 DEF_TEST(SkVM_select, r) {
663 skvm::Builder b;
664 {
665 skvm::Ptr buf = b.varying<int>();
666
667 skvm::I32 x = b.load32(buf);
668
669 x = b.select( b.gt(x, b.splat(4)), x, b.splat(42) );
670
671 b.store32(buf, x);
672 }
673
674 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
675 int buf[] = { 0,1,2,3,4,5,6,7,8 };
676 program.eval(SK_ARRAY_COUNT(buf), buf);
677 for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) {
678 REPORTER_ASSERT(r, buf[i] == (i > 4 ? i : 42));
679 }
680 });
681 }
682
DEF_TEST(SkVM_swap,r)683 DEF_TEST(SkVM_swap, r) {
684 skvm::Builder b;
685 {
686 // This program is the equivalent of
687 // x = *X
688 // y = *Y
689 // *X = y
690 // *Y = x
691 // One rescheduling of the program based only on data flow of Op arguments is
692 // x = *X
693 // *Y = x
694 // y = *Y
695 // *X = y
696 // but this reordering does not produce the same results and is invalid.
697 skvm::Ptr X = b.varying<int>(),
698 Y = b.varying<int>();
699
700 skvm::I32 x = b.load32(X),
701 y = b.load32(Y);
702
703 b.store32(X, y);
704 b.store32(Y, x);
705 }
706
707 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
708 int b1[] = { 0,1,2,3 };
709 int b2[] = { 4,5,6,7 };
710 program.eval(SK_ARRAY_COUNT(b1), b1, b2);
711 for (int i = 0; i < (int)SK_ARRAY_COUNT(b1); i++) {
712 REPORTER_ASSERT(r, b1[i] == 4 + i);
713 REPORTER_ASSERT(r, b2[i] == i);
714 }
715 });
716 }
717
DEF_TEST(SkVM_NewOps,r)718 DEF_TEST(SkVM_NewOps, r) {
719 // Exercise a somewhat arbitrary set of new ops.
720 skvm::Builder b;
721 {
722 skvm::Ptr buf = b.varying<int16_t>();
723 skvm::UPtr uniforms = b.uniform();
724
725 skvm::I32 x = b.load16(buf);
726
727 const size_t kPtr = sizeof(const int*);
728
729 x = b.add(x, b.uniform32(uniforms, kPtr+0));
730 x = b.mul(x, b.uniform32(uniforms, kPtr+4));
731 x = b.sub(x, b.uniform32(uniforms, kPtr+8));
732
733 skvm::I32 limit = b.uniform32(uniforms, kPtr+12);
734 x = b.select(b.lt(x, b.splat(0)), b.splat(0), x);
735 x = b.select(b.gt(x, limit ), limit , x);
736
737 x = b.gather8(uniforms,0, x);
738
739 b.store16(buf, x);
740 }
741
742 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
743 const int N = 31;
744 int16_t buf[N];
745 for (int i = 0; i < N; i++) {
746 buf[i] = i;
747 }
748
749 const int M = 16;
750 uint8_t img[M];
751 for (int i = 0; i < M; i++) {
752 img[i] = i*i;
753 }
754
755 struct {
756 const uint8_t* img;
757 int add = 5;
758 int mul = 3;
759 int sub = 18;
760 int limit = M-1;
761 } uniforms{img};
762
763 program.eval(N, buf, &uniforms);
764
765 for (int i = 0; i < N; i++) {
766 // Our first math calculates x = (i+5)*3 - 18 a.k.a 3*(i-1).
767 int x = 3*(i-1);
768
769 // Then that's pinned to the limits of img.
770 if (i < 2) { x = 0; } // Notice i == 1 hits x == 0 exactly...
771 if (i > 5) { x = 15; } // ...and i == 6 hits x == 15 exactly
772 REPORTER_ASSERT(r, buf[i] == img[x]);
773 }
774 });
775 }
776
DEF_TEST(SKVM_array32,r)777 DEF_TEST(SKVM_array32, r) {
778
779
780
781 skvm::Builder b;
782 skvm::Uniforms uniforms(b.uniform(), 0);
783 // Take up the first slot, so other uniforms are not at 0 offset.
784 uniforms.push(0);
785 int i[] = {3, 7};
786 skvm::Uniform array = uniforms.pushArray(i);
787 float f[] = {5, 9};
788 skvm::Uniform arrayF = uniforms.pushArrayF(f);
789 {
790 skvm::Ptr buf0 = b.varying<int32_t>(),
791 buf1 = b.varying<int32_t>(),
792 buf2 = b.varying<int32_t>();
793
794 skvm::I32 j = b.array32(array, 0);
795 b.store32(buf0, j);
796 skvm::I32 k = b.array32(array, 1);
797 b.store32(buf1, k);
798
799 skvm::F32 x = b.arrayF(arrayF, 0);
800 skvm::F32 y = b.arrayF(arrayF, 1);
801 b.store32(buf2, b.trunc(b.add(x, y)));
802 }
803
804 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
805 const int K = 10;
806 int32_t buf0[K],
807 buf1[K],
808 buf2[K];
809
810 // reset the i[0] for the two tests.
811 i[0] = 3;
812 f[1] = 9;
813 program.eval(K, uniforms.buf.data(), buf0, buf1, buf2);
814 for (auto v : buf0) {
815 REPORTER_ASSERT(r, v == 3);
816 }
817 for (auto v : buf1) {
818 REPORTER_ASSERT(r, v == 7);
819 }
820 for (auto v : buf2) {
821 REPORTER_ASSERT(r, v == 14);
822 }
823 i[0] = 4;
824 f[1] = 10;
825 program.eval(K, uniforms.buf.data(), buf0, buf1, buf2);
826 for (auto v : buf0) {
827 REPORTER_ASSERT(r, v == 4);
828 }
829 for (auto v : buf1) {
830 REPORTER_ASSERT(r, v == 7);
831 }
832 for (auto v : buf2) {
833 REPORTER_ASSERT(r, v == 15);
834 }
835 });
836 }
837
DEF_TEST(SkVM_sqrt,r)838 DEF_TEST(SkVM_sqrt, r) {
839 skvm::Builder b;
840 auto buf = b.varying<int>();
841 b.storeF(buf, b.sqrt(b.loadF(buf)));
842
843 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
844 constexpr int K = 17;
845 float buf[K];
846 for (int i = 0; i < K; i++) {
847 buf[i] = (float)(i*i);
848 }
849
850 // x^2 -> x
851 program.eval(K, buf);
852
853 for (int i = 0; i < K; i++) {
854 REPORTER_ASSERT(r, buf[i] == (float)i);
855 }
856 });
857 }
858
DEF_TEST(SkVM_MSAN,r)859 DEF_TEST(SkVM_MSAN, r) {
860 // This little memset32() program should be able to JIT, but if we run that
861 // JIT code in an MSAN build, it won't see the writes initialize buf. So
862 // this tests that we're using the interpreter instead.
863 skvm::Builder b;
864 b.store32(b.varying<int>(), b.splat(42));
865
866 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
867 constexpr int K = 17;
868 int buf[K]; // Intentionally uninitialized.
869 program.eval(K, buf);
870 sk_msan_assert_initialized(buf, buf+K);
871 for (int x : buf) {
872 REPORTER_ASSERT(r, x == 42);
873 }
874 });
875 }
876
DEF_TEST(SkVM_assert,r)877 DEF_TEST(SkVM_assert, r) {
878 skvm::Builder b;
879 b.assert_true(b.lt(b.load32(b.varying<int>()),
880 b.splat(42)));
881
882 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
883 int buf[] = { 0,1,2,3,4,5,6,7,8,9 };
884 program.eval(SK_ARRAY_COUNT(buf), buf);
885 });
886 }
887
DEF_TEST(SkVM_trace_line,r)888 DEF_TEST(SkVM_trace_line, r) {
889 class TestTraceHook : public skvm::TraceHook {
890 public:
891 void var(int, int32_t) override { fBuffer.push_back(-9999999); }
892 void enter(int) override { fBuffer.push_back(-9999999); }
893 void exit(int) override { fBuffer.push_back(-9999999); }
894 void scope(int) override { fBuffer.push_back(-9999999); }
895 void line(int lineNum) override { fBuffer.push_back(lineNum); }
896
897 std::vector<int> fBuffer;
898 };
899
900 skvm::Builder b;
901 TestTraceHook testTrace;
902 int traceHookID = b.attachTraceHook(&testTrace);
903 b.trace_line(traceHookID, b.splat(0xFFFFFFFF), b.splat(0xFFFFFFFF), 123);
904 b.trace_line(traceHookID, b.splat(0x00000000), b.splat(0xFFFFFFFF), 456);
905 b.trace_line(traceHookID, b.splat(0xFFFFFFFF), b.splat(0x00000000), 567);
906 b.trace_line(traceHookID, b.splat(0x00000000), b.splat(0x00000000), 678);
907 b.trace_line(traceHookID, b.splat(0xFFFFFFFF), b.splat(0xFFFFFFFF), 789);
908 skvm::Program p = b.done();
909 p.eval(1);
910
911 REPORTER_ASSERT(r, (testTrace.fBuffer == std::vector<int>{123, 789}));
912 }
913
DEF_TEST(SkVM_trace_var,r)914 DEF_TEST(SkVM_trace_var, r) {
915 class TestTraceHook : public skvm::TraceHook {
916 public:
917 void line(int) override { fBuffer.push_back(-9999999); }
918 void enter(int) override { fBuffer.push_back(-9999999); }
919 void exit(int) override { fBuffer.push_back(-9999999); }
920 void scope(int) override { fBuffer.push_back(-9999999); }
921 void var(int slot, int32_t val) override {
922 fBuffer.push_back(slot);
923 fBuffer.push_back(val);
924 }
925
926 std::vector<int> fBuffer;
927 };
928
929 skvm::Builder b;
930 TestTraceHook testTrace;
931 int traceHookID = b.attachTraceHook(&testTrace);
932 b.trace_var(traceHookID, b.splat(0x00000000), b.splat(0xFFFFFFFF), 2, b.splat(333));
933 b.trace_var(traceHookID, b.splat(0xFFFFFFFF), b.splat(0xFFFFFFFF), 4, b.splat(555));
934 b.trace_var(traceHookID, b.splat(0x00000000), b.splat(0x00000000), 5, b.splat(666));
935 b.trace_var(traceHookID, b.splat(0xFFFFFFFF), b.splat(0xFFFFFFFF), 6, b.splat(777));
936 b.trace_var(traceHookID, b.splat(0xFFFFFFFF), b.splat(0x00000000), 8, b.splat(999));
937 skvm::Program p = b.done();
938 p.eval(1);
939
940 REPORTER_ASSERT(r, (testTrace.fBuffer == std::vector<int>{4, 555, 6, 777}));
941 }
942
DEF_TEST(SkVM_trace_enter_exit,r)943 DEF_TEST(SkVM_trace_enter_exit, r) {
944 class TestTraceHook : public skvm::TraceHook {
945 public:
946 void line(int) override { fBuffer.push_back(-9999999); }
947 void var(int, int32_t) override { fBuffer.push_back(-9999999); }
948 void scope(int) override { fBuffer.push_back(-9999999); }
949 void enter(int fnIdx) override {
950 fBuffer.push_back(fnIdx);
951 fBuffer.push_back(1);
952 }
953 void exit(int fnIdx) override {
954 fBuffer.push_back(fnIdx);
955 fBuffer.push_back(0);
956 }
957
958 std::vector<int> fBuffer;
959 };
960
961 skvm::Builder b;
962 TestTraceHook testTrace;
963 int traceHookID = b.attachTraceHook(&testTrace);
964 b.trace_enter(traceHookID, b.splat(0x00000000), b.splat(0x00000000), 99);
965 b.trace_enter(traceHookID, b.splat(0xFFFFFFFF), b.splat(0xFFFFFFFF), 12);
966 b.trace_enter(traceHookID, b.splat(0x00000000), b.splat(0xFFFFFFFF), 34);
967 b.trace_exit(traceHookID, b.splat(0xFFFFFFFF), b.splat(0xFFFFFFFF), 56);
968 b.trace_exit(traceHookID, b.splat(0xFFFFFFFF), b.splat(0x00000000), 78);
969 b.trace_exit(traceHookID, b.splat(0x00000000), b.splat(0x00000000), 90);
970 skvm::Program p = b.done();
971 p.eval(1);
972
973 REPORTER_ASSERT(r, (testTrace.fBuffer == std::vector<int>{12, 1, 56, 0}));
974 }
975
DEF_TEST(SkVM_trace_scope,r)976 DEF_TEST(SkVM_trace_scope, r) {
977 class TestTraceHook : public skvm::TraceHook {
978 public:
979 void var(int, int32_t) override { fBuffer.push_back(-9999999); }
980 void enter(int) override { fBuffer.push_back(-9999999); }
981 void exit(int) override { fBuffer.push_back(-9999999); }
982 void line(int) override { fBuffer.push_back(-9999999); }
983 void scope(int delta) override { fBuffer.push_back(delta); }
984
985 std::vector<int> fBuffer;
986 };
987
988 skvm::Builder b;
989 TestTraceHook testTrace;
990 int traceHookID = b.attachTraceHook(&testTrace);
991 b.trace_scope(traceHookID, b.splat(0xFFFFFFFF), b.splat(0xFFFFFFFF), 1);
992 b.trace_scope(traceHookID, b.splat(0xFFFFFFFF), b.splat(0x00000000), -2);
993 b.trace_scope(traceHookID, b.splat(0x00000000), b.splat(0x00000000), 3);
994 b.trace_scope(traceHookID, b.splat(0x00000000), b.splat(0xFFFFFFFF), 4);
995 b.trace_scope(traceHookID, b.splat(0xFFFFFFFF), b.splat(0xFFFFFFFF), -5);
996 skvm::Program p = b.done();
997 p.eval(1);
998
999 REPORTER_ASSERT(r, (testTrace.fBuffer == std::vector<int>{1, -5}));
1000 }
1001
DEF_TEST(SkVM_trace_multiple_hooks,r)1002 DEF_TEST(SkVM_trace_multiple_hooks, r) {
1003 class TestTraceHook : public skvm::TraceHook {
1004 public:
1005 void var(int, int32_t) override { fBuffer.push_back(-9999999); }
1006 void enter(int) override { fBuffer.push_back(-9999999); }
1007 void exit(int) override { fBuffer.push_back(-9999999); }
1008 void scope(int) override { fBuffer.push_back(-9999999); }
1009 void line(int lineNum) override { fBuffer.push_back(lineNum); }
1010
1011 std::vector<int> fBuffer;
1012 };
1013
1014 skvm::Builder b;
1015 TestTraceHook testTraceA, testTraceB, testTraceC;
1016 int traceHookAID = b.attachTraceHook(&testTraceA);
1017 int traceHookBID = b.attachTraceHook(&testTraceB);
1018 int traceHookCID = b.attachTraceHook(&testTraceC);
1019 b.trace_line(traceHookCID, b.splat(0xFFFFFFFF), b.splat(0xFFFFFFFF), 111);
1020 b.trace_line(traceHookAID, b.splat(0xFFFFFFFF), b.splat(0xFFFFFFFF), 222);
1021 b.trace_line(traceHookCID, b.splat(0x00000000), b.splat(0x00000000), 333);
1022 b.trace_line(traceHookBID, b.splat(0xFFFFFFFF), b.splat(0x00000000), 444);
1023 b.trace_line(traceHookAID, b.splat(0x00000000), b.splat(0xFFFFFFFF), 555);
1024 b.trace_line(traceHookBID, b.splat(0xFFFFFFFF), b.splat(0xFFFFFFFF), 666);
1025 skvm::Program p = b.done();
1026 p.eval(1);
1027
1028 REPORTER_ASSERT(r, (testTraceA.fBuffer == std::vector<int>{222}));
1029 REPORTER_ASSERT(r, (testTraceB.fBuffer == std::vector<int>{666}));
1030 REPORTER_ASSERT(r, (testTraceC.fBuffer == std::vector<int>{111}));
1031 }
1032
DEF_TEST(SkVM_premul,reporter)1033 DEF_TEST(SkVM_premul, reporter) {
1034 // Test that premul is short-circuited when alpha is known opaque.
1035 {
1036 skvm::Builder p;
1037 auto rptr = p.varying<int>(),
1038 aptr = p.varying<int>();
1039
1040 skvm::F32 r = p.loadF(rptr),
1041 g = p.splat(0.0f),
1042 b = p.splat(0.0f),
1043 a = p.loadF(aptr);
1044
1045 p.premul(&r, &g, &b, a);
1046 p.storeF(rptr, r);
1047
1048 // load red, load alpha, red *= alpha, store red
1049 REPORTER_ASSERT(reporter, p.done().instructions().size() == 4);
1050 }
1051
1052 {
1053 skvm::Builder p;
1054 auto rptr = p.varying<int>();
1055
1056 skvm::F32 r = p.loadF(rptr),
1057 g = p.splat(0.0f),
1058 b = p.splat(0.0f),
1059 a = p.splat(1.0f);
1060
1061 p.premul(&r, &g, &b, a);
1062 p.storeF(rptr, r);
1063
1064 // load red, store red
1065 REPORTER_ASSERT(reporter, p.done().instructions().size() == 2);
1066 }
1067
1068 // Same deal for unpremul.
1069 {
1070 skvm::Builder p;
1071 auto rptr = p.varying<int>(),
1072 aptr = p.varying<int>();
1073
1074 skvm::F32 r = p.loadF(rptr),
1075 g = p.splat(0.0f),
1076 b = p.splat(0.0f),
1077 a = p.loadF(aptr);
1078
1079 p.unpremul(&r, &g, &b, a);
1080 p.storeF(rptr, r);
1081
1082 // load red, load alpha, a bunch of unpremul instructions, store red
1083 REPORTER_ASSERT(reporter, p.done().instructions().size() >= 4);
1084 }
1085
1086 {
1087 skvm::Builder p;
1088 auto rptr = p.varying<int>();
1089
1090 skvm::F32 r = p.loadF(rptr),
1091 g = p.splat(0.0f),
1092 b = p.splat(0.0f),
1093 a = p.splat(1.0f);
1094
1095 p.unpremul(&r, &g, &b, a);
1096 p.storeF(rptr, r);
1097
1098 // load red, store red
1099 REPORTER_ASSERT(reporter, p.done().instructions().size() == 2);
1100 }
1101 }
1102
1103 template <typename Fn>
test_asm(skiatest::Reporter * r,Fn && fn,std::initializer_list<uint8_t> expected)1104 static void test_asm(skiatest::Reporter* r, Fn&& fn, std::initializer_list<uint8_t> expected) {
1105 uint8_t buf[4096];
1106 skvm::Assembler a{buf};
1107 fn(a);
1108
1109 REPORTER_ASSERT(r, a.size() == expected.size());
1110
1111 auto got = (const uint8_t*)buf,
1112 want = expected.begin();
1113 for (int i = 0; i < (int)std::min(a.size(), expected.size()); i++) {
1114 REPORTER_ASSERT(r, got[i] == want[i],
1115 "byte %d was %02x, want %02x", i, got[i], want[i]);
1116 }
1117 }
1118
DEF_TEST(SkVM_Assembler,r)1119 DEF_TEST(SkVM_Assembler, r) {
1120 // Easiest way to generate test cases is
1121 //
1122 // echo '...some asm...' | llvm-mc -show-encoding -x86-asm-syntax=intel
1123 //
1124 // The -x86-asm-syntax=intel bit is optional, controlling the
1125 // input syntax only; the output will always be AT&T op x,y,dst style.
1126 // Our APIs read more like Intel op dst,x,y as op(dst,x,y), so I find
1127 // that a bit easier to use here, despite maybe favoring AT&T overall.
1128
1129 using A = skvm::Assembler;
1130 // Our exit strategy from AVX code.
1131 test_asm(r, [&](A& a) {
1132 a.int3();
1133 a.vzeroupper();
1134 a.ret();
1135 },{
1136 0xcc,
1137 0xc5, 0xf8, 0x77,
1138 0xc3,
1139 });
1140
1141 // Align should pad with zero
1142 test_asm(r, [&](A& a) {
1143 a.ret();
1144 a.align(4);
1145 },{
1146 0xc3,
1147 0x00, 0x00, 0x00,
1148 });
1149
1150 test_asm(r, [&](A& a) {
1151 a.add(A::rax, 8); // Always good to test rax.
1152 a.sub(A::rax, 32);
1153
1154 a.add(A::rdi, 12); // Last 0x48 REX
1155 a.sub(A::rdi, 8);
1156
1157 a.add(A::r8 , 7); // First 0x49 REX
1158 a.sub(A::r8 , 4);
1159
1160 a.add(A::rsi, 128); // Requires 4 byte immediate.
1161 a.sub(A::r8 , 1000000);
1162
1163 a.add(A::Mem{A::rsi}, 7); // addq $7, (%rsi)
1164 a.add(A::Mem{A::rsi, 12}, 7); // addq $7, 12(%rsi)
1165 a.add(A::Mem{A::rsp, 12}, 7); // addq $7, 12(%rsp)
1166 a.add(A::Mem{A::r12, 12}, 7); // addq $7, 12(%r12)
1167 a.add(A::Mem{A::rsp, 12, A::rax, A::FOUR}, 7); // addq $7, 12(%rsp,%rax,4)
1168 a.add(A::Mem{A::r12, 12, A::rax, A::FOUR}, 7); // addq $7, 12(%r12,%rax,4)
1169 a.add(A::Mem{A::rax, 12, A::r12, A::FOUR}, 7); // addq $7, 12(%rax,%r12,4)
1170 a.add(A::Mem{A::r11, 12, A::r8 , A::TWO }, 7); // addq $7, 12(%r11,%r8,2)
1171 a.add(A::Mem{A::r11, 12, A::rax} , 7); // addq $7, 12(%r11,%rax)
1172 a.add(A::Mem{A::rax, 12, A::r11} , 7); // addq $7, 12(%rax,%r11)
1173
1174 a.sub(A::Mem{A::rax, 12, A::r11} , 7); // subq $7, 12(%rax,%r11)
1175
1176 a.add( A::rax , A::rcx); // addq %rcx, %rax
1177 a.add(A::Mem{A::rax} , A::rcx); // addq %rcx, (%rax)
1178 a.add(A::Mem{A::rax, 12}, A::rcx); // addq %rcx, 12(%rax)
1179 a.add(A::rcx, A::Mem{A::rax, 12}); // addq 12(%rax), %rcx
1180
1181 a.sub(A::rcx, A::Mem{A::rax, 12}); // subq 12(%rax), %rcx
1182 },{
1183 0x48, 0x83, 0b11'000'000, 0x08,
1184 0x48, 0x83, 0b11'101'000, 0x20,
1185
1186 0x48, 0x83, 0b11'000'111, 0x0c,
1187 0x48, 0x83, 0b11'101'111, 0x08,
1188
1189 0x49, 0x83, 0b11'000'000, 0x07,
1190 0x49, 0x83, 0b11'101'000, 0x04,
1191
1192 0x48, 0x81, 0b11'000'110, 0x80, 0x00, 0x00, 0x00,
1193 0x49, 0x81, 0b11'101'000, 0x40, 0x42, 0x0f, 0x00,
1194
1195 0x48,0x83,0x06,0x07,
1196 0x48,0x83,0x46,0x0c,0x07,
1197 0x48,0x83,0x44,0x24,0x0c,0x07,
1198 0x49,0x83,0x44,0x24,0x0c,0x07,
1199 0x48,0x83,0x44,0x84,0x0c,0x07,
1200 0x49,0x83,0x44,0x84,0x0c,0x07,
1201 0x4a,0x83,0x44,0xa0,0x0c,0x07,
1202 0x4b,0x83,0x44,0x43,0x0c,0x07,
1203 0x49,0x83,0x44,0x03,0x0c,0x07,
1204 0x4a,0x83,0x44,0x18,0x0c,0x07,
1205
1206 0x4a,0x83,0x6c,0x18,0x0c,0x07,
1207
1208 0x48,0x01,0xc8,
1209 0x48,0x01,0x08,
1210 0x48,0x01,0x48,0x0c,
1211 0x48,0x03,0x48,0x0c,
1212 0x48,0x2b,0x48,0x0c,
1213 });
1214
1215
1216 test_asm(r, [&](A& a) {
1217 a.vpaddd (A::ymm0, A::ymm1, A::ymm2); // Low registers and 0x0f map -> 2-byte VEX.
1218 a.vpaddd (A::ymm8, A::ymm1, A::ymm2); // A high dst register is ok -> 2-byte VEX.
1219 a.vpaddd (A::ymm0, A::ymm8, A::ymm2); // A high first argument register -> 2-byte VEX.
1220 a.vpaddd (A::ymm0, A::ymm1, A::ymm8); // A high second argument -> 3-byte VEX.
1221 a.vpmulld(A::ymm0, A::ymm1, A::ymm2); // Using non-0x0f map instruction -> 3-byte VEX.
1222 a.vpsubd (A::ymm0, A::ymm1, A::ymm2); // Test vpsubd to ensure argument order is right.
1223 },{
1224 /* VEX */ /*op*/ /*modRM*/
1225 0xc5, 0xf5, 0xfe, 0xc2,
1226 0xc5, 0x75, 0xfe, 0xc2,
1227 0xc5, 0xbd, 0xfe, 0xc2,
1228 0xc4, 0xc1, 0x75, 0xfe, 0xc0,
1229 0xc4, 0xe2, 0x75, 0x40, 0xc2,
1230 0xc5, 0xf5, 0xfa, 0xc2,
1231 });
1232
1233 test_asm(r, [&](A& a) {
1234 a.vpaddw (A::ymm4, A::ymm3, A::ymm2);
1235 a.vpavgw (A::ymm4, A::ymm3, A::ymm2);
1236 a.vpcmpeqw (A::ymm4, A::ymm3, A::ymm2);
1237 a.vpcmpgtw (A::ymm4, A::ymm3, A::ymm2);
1238
1239 a.vpminsw (A::ymm4, A::ymm3, A::ymm2);
1240 a.vpmaxsw (A::ymm4, A::ymm3, A::ymm2);
1241 a.vpminuw (A::ymm4, A::ymm3, A::ymm2);
1242 a.vpmaxuw (A::ymm4, A::ymm3, A::ymm2);
1243
1244 a.vpmulhrsw(A::ymm4, A::ymm3, A::ymm2);
1245 a.vpabsw (A::ymm4, A::ymm3);
1246 a.vpsllw (A::ymm4, A::ymm3, 12);
1247 a.vpsraw (A::ymm4, A::ymm3, 12);
1248 },{
1249 0xc5, 0xe5, 0xfd, 0xe2,
1250 0xc5, 0xe5, 0xe3, 0xe2,
1251 0xc5, 0xe5, 0x75, 0xe2,
1252 0xc5, 0xe5, 0x65, 0xe2,
1253
1254 0xc5, 0xe5, 0xea, 0xe2,
1255 0xc5, 0xe5, 0xee, 0xe2,
1256 0xc4,0xe2,0x65, 0x3a, 0xe2,
1257 0xc4,0xe2,0x65, 0x3e, 0xe2,
1258
1259 0xc4,0xe2,0x65, 0x0b, 0xe2,
1260 0xc4,0xe2,0x7d, 0x1d, 0xe3,
1261 0xc5,0xdd,0x71, 0xf3, 0x0c,
1262 0xc5,0xdd,0x71, 0xe3, 0x0c,
1263 });
1264
1265 test_asm(r, [&](A& a) {
1266 A::Label l;
1267 a.vcmpeqps (A::ymm0, A::ymm1, &l); // vcmpeqps 0x1c(%rip), %ymm1, %ymm0
1268 a.vpcmpeqd (A::ymm0, A::ymm1, A::ymm2);
1269 a.vpcmpgtd (A::ymm0, A::ymm1, A::ymm2);
1270 a.vcmpeqps (A::ymm0, A::ymm1, A::ymm2);
1271 a.vcmpltps (A::ymm0, A::ymm1, A::ymm2);
1272 a.vcmpleps (A::ymm0, A::ymm1, A::ymm2);
1273 a.vcmpneqps(A::ymm0, A::ymm1, A::ymm2);
1274 a.label(&l); // 28 bytes after the vcmpeqps that uses it.
1275 },{
1276 0xc5,0xf4,0xc2,0x05,0x1c,0x00,0x00,0x00,0x00,
1277 0xc5,0xf5,0x76,0xc2,
1278 0xc5,0xf5,0x66,0xc2,
1279 0xc5,0xf4,0xc2,0xc2,0x00,
1280 0xc5,0xf4,0xc2,0xc2,0x01,
1281 0xc5,0xf4,0xc2,0xc2,0x02,
1282 0xc5,0xf4,0xc2,0xc2,0x04,
1283 });
1284
1285 test_asm(r, [&](A& a) {
1286 a.vminps(A::ymm0, A::ymm1, A::ymm2);
1287 a.vmaxps(A::ymm0, A::ymm1, A::ymm2);
1288 },{
1289 0xc5,0xf4,0x5d,0xc2,
1290 0xc5,0xf4,0x5f,0xc2,
1291 });
1292
1293 test_asm(r, [&](A& a) {
1294 a.vpblendvb(A::ymm0, A::ymm1, A::ymm2, A::ymm3);
1295 },{
1296 0xc4,0xe3,0x75, 0x4c, 0xc2, 0x30,
1297 });
1298
1299 test_asm(r, [&](A& a) {
1300 a.vpsrld(A::ymm15, A::ymm2, 8);
1301 a.vpsrld(A::ymm0 , A::ymm8, 5);
1302 },{
1303 0xc5, 0x85, 0x72,0xd2, 0x08,
1304 0xc4,0xc1,0x7d, 0x72,0xd0, 0x05,
1305 });
1306
1307 test_asm(r, [&](A& a) {
1308 A::Label l;
1309 a.vpermps(A::ymm1, A::ymm2, A::Mem{A::rdi, 32});
1310 a.vperm2f128(A::ymm1, A::ymm2, &l, 0x20);
1311 a.vpermq(A::ymm1, A::ymm2, 5);
1312 a.label(&l); // 6 bytes after vperm2f128
1313 },{
1314 0xc4,0xe2,0x6d,0x16,0x4f,0x20,
1315 0xc4,0xe3,0x6d,0x06,0x0d,0x06,0x00,0x00,0x00,0x20,
1316 0xc4,0xe3,0xfd, 0x00,0xca, 0x05,
1317 });
1318
1319 test_asm(r, [&](A& a) {
1320 a.vpunpckldq(A::ymm1, A::ymm2, A::Mem{A::rdi});
1321 a.vpunpckhdq(A::ymm1, A::ymm2, A::ymm3);
1322 },{
1323 0xc5,0xed,0x62,0x0f,
1324 0xc5,0xed,0x6a,0xcb,
1325 });
1326
1327 test_asm(r, [&](A& a) {
1328 a.vroundps(A::ymm1, A::ymm2, A::NEAREST);
1329 a.vroundps(A::ymm1, A::ymm2, A::FLOOR);
1330 a.vroundps(A::ymm1, A::ymm2, A::CEIL);
1331 a.vroundps(A::ymm1, A::ymm2, A::TRUNC);
1332 },{
1333 0xc4,0xe3,0x7d,0x08,0xca,0x00,
1334 0xc4,0xe3,0x7d,0x08,0xca,0x01,
1335 0xc4,0xe3,0x7d,0x08,0xca,0x02,
1336 0xc4,0xe3,0x7d,0x08,0xca,0x03,
1337 });
1338
1339 test_asm(r, [&](A& a) {
1340 A::Label l;
1341 a.label(&l);
1342 a.byte(1);
1343 a.byte(2);
1344 a.byte(3);
1345 a.byte(4);
1346
1347 a.vbroadcastss(A::ymm0 , &l);
1348 a.vbroadcastss(A::ymm1 , &l);
1349 a.vbroadcastss(A::ymm8 , &l);
1350 a.vbroadcastss(A::ymm15, &l);
1351
1352 a.vpshufb(A::ymm4, A::ymm3, &l);
1353 a.vpaddd (A::ymm4, A::ymm3, &l);
1354 a.vpsubd (A::ymm4, A::ymm3, &l);
1355
1356 a.vptest(A::ymm4, &l);
1357
1358 a.vmulps (A::ymm4, A::ymm3, &l);
1359 },{
1360 0x01, 0x02, 0x03, 0x4,
1361
1362 /* VEX */ /*op*/ /* ModRM */ /* offset */
1363 0xc4, 0xe2, 0x7d, 0x18, 0b00'000'101, 0xf3,0xff,0xff,0xff, // 0xfffffff3 == -13
1364 0xc4, 0xe2, 0x7d, 0x18, 0b00'001'101, 0xea,0xff,0xff,0xff, // 0xffffffea == -22
1365 0xc4, 0x62, 0x7d, 0x18, 0b00'000'101, 0xe1,0xff,0xff,0xff, // 0xffffffe1 == -31
1366 0xc4, 0x62, 0x7d, 0x18, 0b00'111'101, 0xd8,0xff,0xff,0xff, // 0xffffffd8 == -40
1367
1368 0xc4, 0xe2, 0x65, 0x00, 0b00'100'101, 0xcf,0xff,0xff,0xff, // 0xffffffcf == -49
1369
1370 0xc5, 0xe5, 0xfe, 0b00'100'101, 0xc7,0xff,0xff,0xff, // 0xffffffc7 == -57
1371 0xc5, 0xe5, 0xfa, 0b00'100'101, 0xbf,0xff,0xff,0xff, // 0xffffffbf == -65
1372
1373 0xc4, 0xe2, 0x7d, 0x17, 0b00'100'101, 0xb6,0xff,0xff,0xff, // 0xffffffb6 == -74
1374
1375 0xc5, 0xe4, 0x59, 0b00'100'101, 0xae,0xff,0xff,0xff, // 0xffffffaf == -82
1376 });
1377
1378 test_asm(r, [&](A& a) {
1379 a.vbroadcastss(A::ymm0, A::Mem{A::rdi, 0});
1380 a.vbroadcastss(A::ymm13, A::Mem{A::r14, 7});
1381 a.vbroadcastss(A::ymm8, A::Mem{A::rdx, -12});
1382 a.vbroadcastss(A::ymm8, A::Mem{A::rdx, 400});
1383
1384 a.vbroadcastss(A::ymm8, A::xmm0);
1385 a.vbroadcastss(A::ymm0, A::xmm13);
1386 },{
1387 /* VEX */ /*op*/ /*ModRM*/ /*offset*/
1388 0xc4,0xe2,0x7d, 0x18, 0b00'000'111,
1389 0xc4,0x42,0x7d, 0x18, 0b01'101'110, 0x07,
1390 0xc4,0x62,0x7d, 0x18, 0b01'000'010, 0xf4,
1391 0xc4,0x62,0x7d, 0x18, 0b10'000'010, 0x90,0x01,0x00,0x00,
1392
1393 0xc4,0x62,0x7d, 0x18, 0b11'000'000,
1394 0xc4,0xc2,0x7d, 0x18, 0b11'000'101,
1395 });
1396
1397 test_asm(r, [&](A& a) {
1398 A::Label l;
1399 a.label(&l);
1400 a.jne(&l);
1401 a.jne(&l);
1402 a.je (&l);
1403 a.jmp(&l);
1404 a.jl (&l);
1405 a.jc (&l);
1406
1407 a.cmp(A::rdx, 1);
1408 a.cmp(A::rax, 12);
1409 a.cmp(A::r14, 2000000000);
1410 },{
1411 0x0f,0x85, 0xfa,0xff,0xff,0xff, // near jne -6 bytes
1412 0x0f,0x85, 0xf4,0xff,0xff,0xff, // near jne -12 bytes
1413 0x0f,0x84, 0xee,0xff,0xff,0xff, // near je -18 bytes
1414 0xe9, 0xe9,0xff,0xff,0xff, // near jmp -23 bytes
1415 0x0f,0x8c, 0xe3,0xff,0xff,0xff, // near jl -29 bytes
1416 0x0f,0x82, 0xdd,0xff,0xff,0xff, // near jc -35 bytes
1417
1418 0x48,0x83,0xfa,0x01,
1419 0x48,0x83,0xf8,0x0c,
1420 0x49,0x81,0xfe,0x00,0x94,0x35,0x77,
1421 });
1422
1423 test_asm(r, [&](A& a) {
1424 a.vmovups(A::ymm5, A::Mem{A::rsi});
1425 a.vmovups(A::Mem{A::rsi}, A::ymm5);
1426
1427 a.vmovups(A::xmm5, A::Mem{A::rsi});
1428 a.vmovups(A::Mem{A::rsi}, A::xmm5);
1429
1430 a.vpmovzxwd(A::ymm4, A::Mem{A::rsi});
1431 a.vpmovzxbd(A::ymm4, A::Mem{A::rsi});
1432
1433 a.vmovq(A::Mem{A::rdx}, A::xmm15);
1434 },{
1435 /* VEX */ /*Op*/ /* ModRM */
1436 0xc5, 0xfc, 0x10, 0b00'101'110,
1437 0xc5, 0xfc, 0x11, 0b00'101'110,
1438
1439 0xc5, 0xf8, 0x10, 0b00'101'110,
1440 0xc5, 0xf8, 0x11, 0b00'101'110,
1441
1442 0xc4,0xe2,0x7d, 0x33, 0b00'100'110,
1443 0xc4,0xe2,0x7d, 0x31, 0b00'100'110,
1444
1445 0xc5, 0x79, 0xd6, 0b00'111'010,
1446 });
1447
1448 test_asm(r, [&](A& a) {
1449 a.vmovups(A::ymm5, A::Mem{A::rsp, 0});
1450 a.vmovups(A::ymm5, A::Mem{A::rsp, 64});
1451 a.vmovups(A::ymm5, A::Mem{A::rsp,128});
1452
1453 a.vmovups(A::Mem{A::rsp, 0}, A::ymm5);
1454 a.vmovups(A::Mem{A::rsp, 64}, A::ymm5);
1455 a.vmovups(A::Mem{A::rsp,128}, A::ymm5);
1456 },{
1457 0xc5,0xfc,0x10,0x2c,0x24,
1458 0xc5,0xfc,0x10,0x6c,0x24,0x40,
1459 0xc5,0xfc,0x10,0xac,0x24,0x80,0x00,0x00,0x00,
1460
1461 0xc5,0xfc,0x11,0x2c,0x24,
1462 0xc5,0xfc,0x11,0x6c,0x24,0x40,
1463 0xc5,0xfc,0x11,0xac,0x24,0x80,0x00,0x00,0x00,
1464 });
1465
1466 test_asm(r, [&](A& a) {
1467 a.movzbq(A::rax, A::Mem{A::rsi}); // Low registers for src and dst.
1468 a.movzbq(A::rax, A::Mem{A::r8,}); // High src register.
1469 a.movzbq(A::r8 , A::Mem{A::rsi}); // High dst register.
1470 a.movzbq(A::r8, A::Mem{A::rsi, 12});
1471 a.movzbq(A::r8, A::Mem{A::rsi, 400});
1472
1473 a.movzwq(A::rax, A::Mem{A::rsi}); // Low registers for src and dst.
1474 a.movzwq(A::rax, A::Mem{A::r8,}); // High src register.
1475 a.movzwq(A::r8 , A::Mem{A::rsi}); // High dst register.
1476 a.movzwq(A::r8, A::Mem{A::rsi, 12});
1477 a.movzwq(A::r8, A::Mem{A::rsi, 400});
1478
1479 a.vmovd(A::Mem{A::rax}, A::xmm0);
1480 a.vmovd(A::Mem{A::rax}, A::xmm8);
1481 a.vmovd(A::Mem{A::r8 }, A::xmm0);
1482
1483 a.vmovd(A::xmm0, A::Mem{A::rax});
1484 a.vmovd(A::xmm8, A::Mem{A::rax});
1485 a.vmovd(A::xmm0, A::Mem{A::r8 });
1486
1487 a.vmovd(A::xmm0 , A::Mem{A::rax, 0, A::rcx, A::FOUR});
1488 a.vmovd(A::xmm15, A::Mem{A::rax, 0, A::r8, A::TWO });
1489 a.vmovd(A::xmm0 , A::Mem{A::r8 , 0, A::rcx});
1490
1491 a.vmovd(A::rax, A::xmm0);
1492 a.vmovd(A::rax, A::xmm8);
1493 a.vmovd(A::r8 , A::xmm0);
1494
1495 a.vmovd(A::xmm0, A::rax);
1496 a.vmovd(A::xmm8, A::rax);
1497 a.vmovd(A::xmm0, A::r8 );
1498
1499 a.movb(A::Mem{A::rdx}, A::rax);
1500 a.movb(A::Mem{A::rdx}, A::r8 );
1501 a.movb(A::Mem{A::r8 }, A::rax);
1502
1503 a.movb(A::rdx, A::Mem{A::rax});
1504 a.movb(A::rdx, A::Mem{A::r8 });
1505 a.movb(A::r8 , A::Mem{A::rax});
1506
1507 a.movb(A::rdx, 12);
1508 a.movb(A::rax, 4);
1509 a.movb(A::r8 , -1);
1510
1511 a.movb(A::Mem{A::rdx}, 12);
1512 a.movb(A::Mem{A::rax}, 4);
1513 a.movb(A::Mem{A::r8 }, -1);
1514 },{
1515 0x48,0x0f,0xb6,0x06, // movzbq (%rsi), %rax
1516 0x49,0x0f,0xb6,0x00,
1517 0x4c,0x0f,0xb6,0x06,
1518 0x4c,0x0f,0xb6,0x46, 12,
1519 0x4c,0x0f,0xb6,0x86, 0x90,0x01,0x00,0x00,
1520
1521 0x48,0x0f,0xb7,0x06, // movzwq (%rsi), %rax
1522 0x49,0x0f,0xb7,0x00,
1523 0x4c,0x0f,0xb7,0x06,
1524 0x4c,0x0f,0xb7,0x46, 12,
1525 0x4c,0x0f,0xb7,0x86, 0x90,0x01,0x00,0x00,
1526
1527 0xc5,0xf9,0x7e,0x00,
1528 0xc5,0x79,0x7e,0x00,
1529 0xc4,0xc1,0x79,0x7e,0x00,
1530
1531 0xc5,0xf9,0x6e,0x00,
1532 0xc5,0x79,0x6e,0x00,
1533 0xc4,0xc1,0x79,0x6e,0x00,
1534
1535 0xc5,0xf9,0x6e,0x04,0x88,
1536 0xc4,0x21,0x79,0x6e,0x3c,0x40,
1537 0xc4,0xc1,0x79,0x6e,0x04,0x08,
1538
1539 0xc5,0xf9,0x7e,0xc0,
1540 0xc5,0x79,0x7e,0xc0,
1541 0xc4,0xc1,0x79,0x7e,0xc0,
1542
1543 0xc5,0xf9,0x6e,0xc0,
1544 0xc5,0x79,0x6e,0xc0,
1545 0xc4,0xc1,0x79,0x6e,0xc0,
1546
1547 0x48 ,0x88, 0x02,
1548 0x4c, 0x88, 0x02,
1549 0x49, 0x88, 0x00,
1550
1551 0x48 ,0x8a, 0x10,
1552 0x49, 0x8a, 0x10,
1553 0x4c, 0x8a, 0x00,
1554
1555 0x48, 0xc6, 0xc2, 0x0c,
1556 0x48, 0xc6, 0xc0, 0x04,
1557 0x49, 0xc6, 0xc0, 0xff,
1558
1559 0x48, 0xc6, 0x02, 0x0c,
1560 0x48, 0xc6, 0x00, 0x04,
1561 0x49, 0xc6, 0x00, 0xff,
1562 });
1563
1564 test_asm(r, [&](A& a) {
1565 a.vpinsrd(A::xmm1, A::xmm8, A::Mem{A::rsi}, 1); // vpinsrd $1, (%rsi), %xmm8, %xmm1
1566 a.vpinsrd(A::xmm8, A::xmm1, A::Mem{A::r8 }, 3); // vpinsrd $3, (%r8), %xmm1, %xmm8;
1567
1568 a.vpinsrw(A::xmm1, A::xmm8, A::Mem{A::rsi}, 4); // vpinsrw $4, (%rsi), %xmm8, %xmm1
1569 a.vpinsrw(A::xmm8, A::xmm1, A::Mem{A::r8 }, 12); // vpinrsw $12, (%r8), %xmm1, %xmm8
1570
1571 a.vpinsrb(A::xmm1, A::xmm8, A::Mem{A::rsi}, 4); // vpinsrb $4, (%rsi), %xmm8, %xmm1
1572 a.vpinsrb(A::xmm8, A::xmm1, A::Mem{A::r8 }, 12); // vpinsrb $12, (%r8), %xmm1, %xmm8
1573
1574 a.vextracti128(A::xmm1, A::ymm8, 1); // vextracti128 $1, %ymm8, %xmm1
1575 a.vextracti128(A::xmm8, A::ymm1, 0); // vextracti128 $0, %ymm1, %xmm8
1576
1577 a.vpextrd(A::Mem{A::rsi}, A::xmm8, 3); // vpextrd $3, %xmm8, (%rsi)
1578 a.vpextrd(A::Mem{A::r8 }, A::xmm1, 2); // vpextrd $2, %xmm1, (%r8)
1579
1580 a.vpextrw(A::Mem{A::rsi}, A::xmm8, 7);
1581 a.vpextrw(A::Mem{A::r8 }, A::xmm1, 15);
1582
1583 a.vpextrb(A::Mem{A::rsi}, A::xmm8, 7);
1584 a.vpextrb(A::Mem{A::r8 }, A::xmm1, 15);
1585 },{
1586 0xc4,0xe3,0x39, 0x22, 0x0e, 1,
1587 0xc4,0x43,0x71, 0x22, 0x00, 3,
1588
1589 0xc5,0xb9, 0xc4, 0x0e, 4,
1590 0xc4,0x41,0x71, 0xc4, 0x00, 12,
1591
1592 0xc4,0xe3,0x39, 0x20, 0x0e, 4,
1593 0xc4,0x43,0x71, 0x20, 0x00, 12,
1594
1595 0xc4,0x63,0x7d,0x39,0xc1, 1,
1596 0xc4,0xc3,0x7d,0x39,0xc8, 0,
1597
1598 0xc4,0x63,0x79,0x16,0x06, 3,
1599 0xc4,0xc3,0x79,0x16,0x08, 2,
1600
1601 0xc4,0x63,0x79, 0x15, 0x06, 7,
1602 0xc4,0xc3,0x79, 0x15, 0x08, 15,
1603
1604 0xc4,0x63,0x79, 0x14, 0x06, 7,
1605 0xc4,0xc3,0x79, 0x14, 0x08, 15,
1606 });
1607
1608 test_asm(r, [&](A& a) {
1609 a.vpandn(A::ymm3, A::ymm12, A::ymm2);
1610 },{
1611 0xc5, 0x9d, 0xdf, 0xda,
1612 });
1613
1614 test_asm(r, [&](A& a) {
1615 A::Label l;
1616 a.vmovdqa(A::ymm3, A::ymm2); // vmovdqa %ymm2 , %ymm3
1617
1618 a.vmovdqa(A::ymm3, A::Mem{A::rsi}); // vmovdqa (%rsi) , %ymm3
1619 a.vmovdqa(A::ymm3, A::Mem{A::rsp}); // vmovdqa (%rsp) , %ymm3
1620 a.vmovdqa(A::ymm3, A::Mem{A::r11}); // vmovdqa (%r11) , %ymm3
1621
1622 a.vmovdqa(A::ymm3, A::Mem{A::rsi, 4}); // vmovdqa 4(%rsi) , %ymm3
1623 a.vmovdqa(A::ymm3, A::Mem{A::rsp, 4}); // vmovdqa 4(%rsp) , %ymm3
1624
1625 a.vmovdqa(A::ymm3, A::Mem{A::rsi, 4, A::rax, A::EIGHT}); // vmovdqa 4(%rsi,%rax,8), %ymm3
1626 a.vmovdqa(A::ymm3, A::Mem{A::r11, 4, A::rax, A::TWO }); // vmovdqa 4(%r11,%rax,2), %ymm3
1627 a.vmovdqa(A::ymm3, A::Mem{A::rsi, 4, A::r11, A::FOUR }); // vmovdqa 4(%rsi,%r11,4), %ymm3
1628 a.vmovdqa(A::ymm3, A::Mem{A::rsi, 4, A::r11, A::ONE }); // vmovdqa 4(%rsi,%r11,1), %ymm3
1629 a.vmovdqa(A::ymm3, A::Mem{A::rsi, 4, A::r11}); // vmovdqa 4(%rsi,%r11) , %ymm3
1630
1631 a.vmovdqa(A::ymm3, A::Mem{A::rsi, 64, A::r11}); // vmovdqa 64(%rsi,%r11), %ymm3
1632 a.vmovdqa(A::ymm3, A::Mem{A::rsi, 128, A::r11}); // vmovdqa 128(%rsi,%r11), %ymm3
1633 a.vmovdqa(A::ymm3, &l); // vmovdqa 16(%rip) , %ymm3
1634
1635 a.vcvttps2dq(A::ymm3, A::ymm2);
1636 a.vcvtdq2ps (A::ymm3, A::ymm2);
1637 a.vcvtps2dq (A::ymm3, A::ymm2);
1638 a.vsqrtps (A::ymm3, A::ymm2);
1639 a.label(&l);
1640 },{
1641 0xc5,0xfd,0x6f,0xda,
1642
1643 0xc5,0xfd,0x6f,0x1e,
1644 0xc5,0xfd,0x6f,0x1c,0x24,
1645 0xc4,0xc1,0x7d,0x6f,0x1b,
1646
1647 0xc5,0xfd,0x6f,0x5e,0x04,
1648 0xc5,0xfd,0x6f,0x5c,0x24,0x04,
1649
1650 0xc5,0xfd,0x6f,0x5c,0xc6,0x04,
1651 0xc4,0xc1,0x7d,0x6f,0x5c,0x43,0x04,
1652 0xc4,0xa1,0x7d,0x6f,0x5c,0x9e,0x04,
1653 0xc4,0xa1,0x7d,0x6f,0x5c,0x1e,0x04,
1654 0xc4,0xa1,0x7d,0x6f,0x5c,0x1e,0x04,
1655
1656 0xc4,0xa1,0x7d,0x6f,0x5c,0x1e,0x40,
1657 0xc4,0xa1,0x7d,0x6f,0x9c,0x1e,0x80,0x00,0x00,0x00,
1658
1659 0xc5,0xfd,0x6f,0x1d,0x10,0x00,0x00,0x00,
1660
1661 0xc5,0xfe,0x5b,0xda,
1662 0xc5,0xfc,0x5b,0xda,
1663 0xc5,0xfd,0x5b,0xda,
1664 0xc5,0xfc,0x51,0xda,
1665 });
1666
1667 test_asm(r, [&](A& a) {
1668 a.vcvtps2ph(A::xmm3, A::ymm2, A::CURRENT);
1669 a.vcvtps2ph(A::Mem{A::rsi, 32, A::rax, A::EIGHT}, A::ymm5, A::CEIL);
1670
1671 a.vcvtph2ps(A::ymm15, A::Mem{A::rdi, 12, A::r9, A::ONE});
1672 a.vcvtph2ps(A::ymm2, A::xmm3);
1673 },{
1674 0xc4,0xe3,0x7d,0x1d,0xd3,0x04,
1675 0xc4,0xe3,0x7d,0x1d,0x6c,0xc6,0x20,0x02,
1676
1677 0xc4,0x22,0x7d,0x13,0x7c,0x0f,0x0c,
1678 0xc4,0xe2,0x7d,0x13,0xd3,
1679 });
1680
1681 test_asm(r, [&](A& a) {
1682 a.vgatherdps(A::ymm1 , A::FOUR , A::ymm0 , A::rdi, A::ymm2 );
1683 a.vgatherdps(A::ymm0 , A::ONE , A::ymm2 , A::rax, A::ymm1 );
1684 a.vgatherdps(A::ymm10, A::ONE , A::ymm2 , A::rax, A::ymm1 );
1685 a.vgatherdps(A::ymm0 , A::ONE , A::ymm12, A::rax, A::ymm1 );
1686 a.vgatherdps(A::ymm0 , A::ONE , A::ymm2 , A::r9 , A::ymm1 );
1687 a.vgatherdps(A::ymm0 , A::ONE , A::ymm2 , A::rax, A::ymm12);
1688 a.vgatherdps(A::ymm0 , A::EIGHT, A::ymm2 , A::rax, A::ymm12);
1689 },{
1690 0xc4,0xe2,0x6d,0x92,0x0c,0x87,
1691 0xc4,0xe2,0x75,0x92,0x04,0x10,
1692 0xc4,0x62,0x75,0x92,0x14,0x10,
1693 0xc4,0xa2,0x75,0x92,0x04,0x20,
1694 0xc4,0xc2,0x75,0x92,0x04,0x11,
1695 0xc4,0xe2,0x1d,0x92,0x04,0x10,
1696 0xc4,0xe2,0x1d,0x92,0x04,0xd0,
1697 });
1698
1699 test_asm(r, [&](A& a) {
1700 a.mov(A::rax, A::Mem{A::rdi, 0});
1701 a.mov(A::rax, A::Mem{A::rdi, 1});
1702 a.mov(A::rax, A::Mem{A::rdi, 512});
1703 a.mov(A::r15, A::Mem{A::r13, 42});
1704 a.mov(A::rax, A::Mem{A::r13, 42});
1705 a.mov(A::r15, A::Mem{A::rax, 42});
1706 a.mov(A::rax, 1);
1707 a.mov(A::rax, A::rcx);
1708 },{
1709 0x48, 0x8b, 0x07,
1710 0x48, 0x8b, 0x47, 0x01,
1711 0x48, 0x8b, 0x87, 0x00,0x02,0x00,0x00,
1712 0x4d, 0x8b, 0x7d, 0x2a,
1713 0x49, 0x8b, 0x45, 0x2a,
1714 0x4c, 0x8b, 0x78, 0x2a,
1715 0x48, 0xc7, 0xc0, 0x01,0x00,0x00,0x00,
1716 0x48, 0x89, 0xc8,
1717 });
1718
1719 // echo "fmul v4.4s, v3.4s, v1.4s" | llvm-mc -show-encoding -arch arm64
1720
1721 test_asm(r, [&](A& a) {
1722 a.and16b(A::v4, A::v3, A::v1);
1723 a.orr16b(A::v4, A::v3, A::v1);
1724 a.eor16b(A::v4, A::v3, A::v1);
1725 a.bic16b(A::v4, A::v3, A::v1);
1726 a.bsl16b(A::v4, A::v3, A::v1);
1727 a.not16b(A::v4, A::v3);
1728
1729 a.add4s(A::v4, A::v3, A::v1);
1730 a.sub4s(A::v4, A::v3, A::v1);
1731 a.mul4s(A::v4, A::v3, A::v1);
1732
1733 a.cmeq4s(A::v4, A::v3, A::v1);
1734 a.cmgt4s(A::v4, A::v3, A::v1);
1735
1736 a.sub8h(A::v4, A::v3, A::v1);
1737 a.mul8h(A::v4, A::v3, A::v1);
1738
1739 a.fadd4s(A::v4, A::v3, A::v1);
1740 a.fsub4s(A::v4, A::v3, A::v1);
1741 a.fmul4s(A::v4, A::v3, A::v1);
1742 a.fdiv4s(A::v4, A::v3, A::v1);
1743 a.fmin4s(A::v4, A::v3, A::v1);
1744 a.fmax4s(A::v4, A::v3, A::v1);
1745
1746 a.fneg4s (A::v4, A::v3);
1747 a.fsqrt4s(A::v4, A::v3);
1748
1749 a.fmla4s(A::v4, A::v3, A::v1);
1750 a.fmls4s(A::v4, A::v3, A::v1);
1751
1752 a.fcmeq4s(A::v4, A::v3, A::v1);
1753 a.fcmgt4s(A::v4, A::v3, A::v1);
1754 a.fcmge4s(A::v4, A::v3, A::v1);
1755 },{
1756 0x64,0x1c,0x21,0x4e,
1757 0x64,0x1c,0xa1,0x4e,
1758 0x64,0x1c,0x21,0x6e,
1759 0x64,0x1c,0x61,0x4e,
1760 0x64,0x1c,0x61,0x6e,
1761 0x64,0x58,0x20,0x6e,
1762
1763 0x64,0x84,0xa1,0x4e,
1764 0x64,0x84,0xa1,0x6e,
1765 0x64,0x9c,0xa1,0x4e,
1766
1767 0x64,0x8c,0xa1,0x6e,
1768 0x64,0x34,0xa1,0x4e,
1769
1770 0x64,0x84,0x61,0x6e,
1771 0x64,0x9c,0x61,0x4e,
1772
1773 0x64,0xd4,0x21,0x4e,
1774 0x64,0xd4,0xa1,0x4e,
1775 0x64,0xdc,0x21,0x6e,
1776 0x64,0xfc,0x21,0x6e,
1777 0x64,0xf4,0xa1,0x4e,
1778 0x64,0xf4,0x21,0x4e,
1779
1780 0x64,0xf8,0xa0,0x6e,
1781 0x64,0xf8,0xa1,0x6e,
1782
1783 0x64,0xcc,0x21,0x4e,
1784 0x64,0xcc,0xa1,0x4e,
1785
1786 0x64,0xe4,0x21,0x4e,
1787 0x64,0xe4,0xa1,0x6e,
1788 0x64,0xe4,0x21,0x6e,
1789 });
1790
1791 test_asm(r, [&](A& a) {
1792 a.shl4s(A::v4, A::v3, 0);
1793 a.shl4s(A::v4, A::v3, 1);
1794 a.shl4s(A::v4, A::v3, 8);
1795 a.shl4s(A::v4, A::v3, 16);
1796 a.shl4s(A::v4, A::v3, 31);
1797
1798 a.sshr4s(A::v4, A::v3, 1);
1799 a.sshr4s(A::v4, A::v3, 8);
1800 a.sshr4s(A::v4, A::v3, 31);
1801
1802 a.ushr4s(A::v4, A::v3, 1);
1803 a.ushr4s(A::v4, A::v3, 8);
1804 a.ushr4s(A::v4, A::v3, 31);
1805
1806 a.ushr8h(A::v4, A::v3, 1);
1807 a.ushr8h(A::v4, A::v3, 8);
1808 a.ushr8h(A::v4, A::v3, 15);
1809 },{
1810 0x64,0x54,0x20,0x4f,
1811 0x64,0x54,0x21,0x4f,
1812 0x64,0x54,0x28,0x4f,
1813 0x64,0x54,0x30,0x4f,
1814 0x64,0x54,0x3f,0x4f,
1815
1816 0x64,0x04,0x3f,0x4f,
1817 0x64,0x04,0x38,0x4f,
1818 0x64,0x04,0x21,0x4f,
1819
1820 0x64,0x04,0x3f,0x6f,
1821 0x64,0x04,0x38,0x6f,
1822 0x64,0x04,0x21,0x6f,
1823
1824 0x64,0x04,0x1f,0x6f,
1825 0x64,0x04,0x18,0x6f,
1826 0x64,0x04,0x11,0x6f,
1827 });
1828
1829 test_asm(r, [&](A& a) {
1830 a.sli4s(A::v4, A::v3, 0);
1831 a.sli4s(A::v4, A::v3, 1);
1832 a.sli4s(A::v4, A::v3, 8);
1833 a.sli4s(A::v4, A::v3, 16);
1834 a.sli4s(A::v4, A::v3, 31);
1835 },{
1836 0x64,0x54,0x20,0x6f,
1837 0x64,0x54,0x21,0x6f,
1838 0x64,0x54,0x28,0x6f,
1839 0x64,0x54,0x30,0x6f,
1840 0x64,0x54,0x3f,0x6f,
1841 });
1842
1843 test_asm(r, [&](A& a) {
1844 a.scvtf4s (A::v4, A::v3);
1845 a.fcvtzs4s(A::v4, A::v3);
1846 a.fcvtns4s(A::v4, A::v3);
1847 a.frintp4s(A::v4, A::v3);
1848 a.frintm4s(A::v4, A::v3);
1849 a.fcvtn (A::v4, A::v3);
1850 a.fcvtl (A::v4, A::v3);
1851 },{
1852 0x64,0xd8,0x21,0x4e,
1853 0x64,0xb8,0xa1,0x4e,
1854 0x64,0xa8,0x21,0x4e,
1855 0x64,0x88,0xa1,0x4e,
1856 0x64,0x98,0x21,0x4e,
1857 0x64,0x68,0x21,0x0e,
1858 0x64,0x78,0x21,0x0e,
1859 });
1860
1861 test_asm(r, [&](A& a) {
1862 a.sub (A::sp, A::sp, 32); // sub sp, sp, #32
1863 a.strq(A::v0, A::sp, 1); // str q0, [sp, #16]
1864 a.strq(A::v1, A::sp); // str q1, [sp]
1865 a.strd(A::v0, A::sp, 6); // str s0, [sp, #48]
1866 a.strs(A::v0, A::sp, 6); // str s0, [sp, #24]
1867 a.strh(A::v0, A::sp, 10); // str h0, [sp, #20]
1868 a.strb(A::v0, A::sp, 47); // str b0, [sp, #47]
1869 a.ldrb(A::v9, A::sp, 42); // ldr b9, [sp, #42]
1870 a.ldrh(A::v9, A::sp, 47); // ldr h9, [sp, #94]
1871 a.ldrs(A::v7, A::sp, 10); // ldr s7, [sp, #40]
1872 a.ldrd(A::v7, A::sp, 1); // ldr d7, [sp, #8]
1873 a.ldrq(A::v5, A::sp, 128); // ldr q5, [sp, #2048]
1874 a.add (A::sp, A::sp, 32); // add sp, sp, #32
1875 },{
1876 0xff,0x83,0x00,0xd1,
1877 0xe0,0x07,0x80,0x3d,
1878 0xe1,0x03,0x80,0x3d,
1879 0xe0,0x1b,0x00,0xfd,
1880 0xe0,0x1b,0x00,0xbd,
1881 0xe0,0x2b,0x00,0x7d,
1882 0xe0,0xbf,0x00,0x3d,
1883 0xe9,0xab,0x40,0x3d,
1884 0xe9,0xbf,0x40,0x7d,
1885 0xe7,0x2b,0x40,0xbd,
1886 0xe7,0x07,0x40,0xfd,
1887 0xe5,0x03,0xc2,0x3d,
1888 0xff,0x83,0x00,0x91,
1889 });
1890
1891 test_asm(r, [&](A& a) {
1892 a.brk(0);
1893 a.brk(65535);
1894
1895 a.ret(A::x30); // Conventional ret using link register.
1896 a.ret(A::x13); // Can really return using any register if we like.
1897
1898 a.add(A::x2, A::x2, 4);
1899 a.add(A::x3, A::x2, 32);
1900
1901 a.sub(A::x2, A::x2, 4);
1902 a.sub(A::x3, A::x2, 32);
1903
1904 a.subs(A::x2, A::x2, 4);
1905 a.subs(A::x3, A::x2, 32);
1906
1907 a.subs(A::xzr, A::x2, 4); // These are actually the same instruction!
1908 a.cmp(A::x2, 4);
1909
1910 A::Label l;
1911 a.label(&l);
1912 a.bne(&l);
1913 a.bne(&l);
1914 a.blt(&l);
1915 a.b(&l);
1916 a.cbnz(A::x2, &l);
1917 a.cbz(A::x2, &l);
1918
1919 a.add(A::x3, A::x2, A::x1); // add x3,x2,x1
1920 a.add(A::x3, A::x2, A::x1, A::ASR, 3); // add x3,x2,x1, asr #3
1921 },{
1922 0x00,0x00,0x20,0xd4,
1923 0xe0,0xff,0x3f,0xd4,
1924
1925 0xc0,0x03,0x5f,0xd6,
1926 0xa0,0x01,0x5f,0xd6,
1927
1928 0x42,0x10,0x00,0x91,
1929 0x43,0x80,0x00,0x91,
1930
1931 0x42,0x10,0x00,0xd1,
1932 0x43,0x80,0x00,0xd1,
1933
1934 0x42,0x10,0x00,0xf1,
1935 0x43,0x80,0x00,0xf1,
1936
1937 0x5f,0x10,0x00,0xf1,
1938 0x5f,0x10,0x00,0xf1,
1939
1940 0x01,0x00,0x00,0x54, // b.ne #0
1941 0xe1,0xff,0xff,0x54, // b.ne #-4
1942 0xcb,0xff,0xff,0x54, // b.lt #-8
1943 0xae,0xff,0xff,0x54, // b.al #-12
1944 0x82,0xff,0xff,0xb5, // cbnz x2, #-16
1945 0x62,0xff,0xff,0xb4, // cbz x2, #-20
1946
1947 0x43,0x00,0x01,0x8b,
1948 0x43,0x0c,0x81,0x8b,
1949 });
1950
1951 // Can we cbz() to a not-yet-defined label?
1952 test_asm(r, [&](A& a) {
1953 A::Label l;
1954 a.cbz(A::x2, &l);
1955 a.add(A::x3, A::x2, 32);
1956 a.label(&l);
1957 a.ret(A::x30);
1958 },{
1959 0x42,0x00,0x00,0xb4, // cbz x2, #8
1960 0x43,0x80,0x00,0x91, // add x3, x2, #32
1961 0xc0,0x03,0x5f,0xd6, // ret
1962 });
1963
1964 // If we start a label as a backward label,
1965 // can we redefine it to be a future label?
1966 // (Not sure this is useful... just want to test it works.)
1967 test_asm(r, [&](A& a) {
1968 A::Label l1;
1969 a.label(&l1);
1970 a.add(A::x3, A::x2, 32);
1971 a.cbz(A::x2, &l1); // This will jump backward... nothing sneaky.
1972
1973 A::Label l2; // Start off the same...
1974 a.label(&l2);
1975 a.add(A::x3, A::x2, 32);
1976 a.cbz(A::x2, &l2); // Looks like this will go backward...
1977 a.add(A::x2, A::x2, 4);
1978 a.add(A::x3, A::x2, 32);
1979 a.label(&l2); // But no... actually forward! What a switcheroo!
1980 },{
1981 0x43,0x80,0x00,0x91, // add x3, x2, #32
1982 0xe2,0xff,0xff,0xb4, // cbz x2, #-4
1983
1984 0x43,0x80,0x00,0x91, // add x3, x2, #32
1985 0x62,0x00,0x00,0xb4, // cbz x2, #12
1986 0x42,0x10,0x00,0x91, // add x2, x2, #4
1987 0x43,0x80,0x00,0x91, // add x3, x2, #32
1988 });
1989
1990 // Loading from a label on ARM.
1991 test_asm(r, [&](A& a) {
1992 A::Label fore,aft;
1993 a.label(&fore);
1994 a.word(0x01234567);
1995 a.ldrq(A::v1, &fore);
1996 a.ldrq(A::v2, &aft);
1997 a.label(&aft);
1998 a.word(0x76543210);
1999 },{
2000 0x67,0x45,0x23,0x01,
2001 0xe1,0xff,0xff,0x9c, // ldr q1, #-4
2002 0x22,0x00,0x00,0x9c, // ldr q2, #4
2003 0x10,0x32,0x54,0x76,
2004 });
2005
2006 test_asm(r, [&](A& a) {
2007 a.ldrq(A::v0, A::x8);
2008 a.strq(A::v0, A::x8);
2009 },{
2010 0x00,0x01,0xc0,0x3d,
2011 0x00,0x01,0x80,0x3d,
2012 });
2013
2014 test_asm(r, [&](A& a) {
2015 a.dup4s (A::v0, A::x8);
2016 a.ld1r4s (A::v0, A::x8); // echo 'ld1r.4s {v0}, [x8]' | llvm-mc --show-encoding
2017 a.ld1r8h (A::v0, A::x8);
2018 a.ld1r16b(A::v0, A::x8);
2019 },{
2020 0x00,0x0d,0x04,0x4e,
2021 0x00,0xc9,0x40,0x4d,
2022 0x00,0xc5,0x40,0x4d,
2023 0x00,0xc1,0x40,0x4d,
2024 });
2025
2026 test_asm(r, [&](A& a) {
2027 a.ld24s(A::v0, A::x8); // echo 'ld2.4s {v0,v1}, [x8]' | llvm-mc --show-encoding
2028 a.ld44s(A::v0, A::x8);
2029 a.st24s(A::v0, A::x8);
2030 a.st44s(A::v0, A::x8); // echo 'st4.4s {v0,v1,v2,v3}, [x8]' | llvm-mc --show-encoding
2031
2032 a.ld24s(A::v0, A::x8, 0); //echo 'ld2 {v0.s,v1.s}[0], [x8]' | llvm-mc --show-encoding
2033 a.ld24s(A::v0, A::x8, 1);
2034 a.ld24s(A::v0, A::x8, 2);
2035 a.ld24s(A::v0, A::x8, 3);
2036
2037 a.ld44s(A::v0, A::x8, 0); // ld4 {v0.s,v1.s,v2.s,v3.s}[0], [x8]
2038 a.ld44s(A::v0, A::x8, 1);
2039 a.ld44s(A::v0, A::x8, 2);
2040 a.ld44s(A::v0, A::x8, 3);
2041 },{
2042 0x00,0x89,0x40,0x4c,
2043 0x00,0x09,0x40,0x4c,
2044 0x00,0x89,0x00,0x4c,
2045 0x00,0x09,0x00,0x4c,
2046
2047 0x00,0x81,0x60,0x0d,
2048 0x00,0x91,0x60,0x0d,
2049 0x00,0x81,0x60,0x4d,
2050 0x00,0x91,0x60,0x4d,
2051
2052 0x00,0xa1,0x60,0x0d,
2053 0x00,0xb1,0x60,0x0d,
2054 0x00,0xa1,0x60,0x4d,
2055 0x00,0xb1,0x60,0x4d,
2056 });
2057
2058 test_asm(r, [&](A& a) {
2059 a.xtns2h(A::v0, A::v0);
2060 a.xtnh2b(A::v0, A::v0);
2061 a.strs (A::v0, A::x0);
2062
2063 a.ldrs (A::v0, A::x0);
2064 a.uxtlb2h(A::v0, A::v0);
2065 a.uxtlh2s(A::v0, A::v0);
2066
2067 a.uminv4s(A::v3, A::v4);
2068 a.movs (A::x3, A::v4,0); // mov.s w3,v4[0]
2069 a.movs (A::x3, A::v4,1); // mov.s w3,v4[1]
2070 a.inss (A::v4, A::x3,3); // ins.s v4[3],w3
2071 },{
2072 0x00,0x28,0x61,0x0e,
2073 0x00,0x28,0x21,0x0e,
2074 0x00,0x00,0x00,0xbd,
2075
2076 0x00,0x00,0x40,0xbd,
2077 0x00,0xa4,0x08,0x2f,
2078 0x00,0xa4,0x10,0x2f,
2079
2080 0x83,0xa8,0xb1,0x6e,
2081 0x83,0x3c,0x04,0x0e,
2082 0x83,0x3c,0x0c,0x0e,
2083 0x64,0x1c,0x1c,0x4e,
2084 });
2085
2086 test_asm(r, [&](A& a) {
2087 a.ldrb(A::v0, A::x8);
2088 a.strb(A::v0, A::x8);
2089 },{
2090 0x00,0x01,0x40,0x3d,
2091 0x00,0x01,0x00,0x3d,
2092 });
2093
2094 test_asm(r, [&](A& a) {
2095 a.ldrd(A::x0, A::x1, 3); // ldr x0, [x1, #24]
2096 a.ldrs(A::x0, A::x1, 3); // ldr w0, [x1, #12]
2097 a.ldrh(A::x0, A::x1, 3); // ldrh w0, [x1, #6]
2098 a.ldrb(A::x0, A::x1, 3); // ldrb w0, [x1, #3]
2099
2100 a.strs(A::x0, A::x1, 3); // str w0, [x1, #12]
2101 },{
2102 0x20,0x0c,0x40,0xf9,
2103 0x20,0x0c,0x40,0xb9,
2104 0x20,0x0c,0x40,0x79,
2105 0x20,0x0c,0x40,0x39,
2106
2107 0x20,0x0c,0x00,0xb9,
2108 });
2109
2110 test_asm(r, [&](A& a) {
2111 a.tbl (A::v0, A::v1, A::v2);
2112 a.uzp14s(A::v0, A::v1, A::v2);
2113 a.uzp24s(A::v0, A::v1, A::v2);
2114 a.zip14s(A::v0, A::v1, A::v2);
2115 a.zip24s(A::v0, A::v1, A::v2);
2116 },{
2117 0x20,0x00,0x02,0x4e,
2118 0x20,0x18,0x82,0x4e,
2119 0x20,0x58,0x82,0x4e,
2120 0x20,0x38,0x82,0x4e,
2121 0x20,0x78,0x82,0x4e,
2122 });
2123 }
2124
DEF_TEST(SkVM_approx_math,r)2125 DEF_TEST(SkVM_approx_math, r) {
2126 auto eval = [](int N, float values[], auto fn) {
2127 skvm::Builder b;
2128 skvm::Ptr inout = b.varying<float>();
2129
2130 b.storeF(inout, fn(&b, b.loadF(inout)));
2131
2132 b.done().eval(N, values);
2133 };
2134
2135 auto compare = [r](int N, const float values[], const float expected[]) {
2136 for (int i = 0; i < N; ++i) {
2137 REPORTER_ASSERT(r, (values[i] == expected[i]) ||
2138 SkScalarNearlyEqual(values[i], expected[i], 0.001f),
2139 "evaluated to %g, but expected %g", values[i], expected[i]);
2140 }
2141 };
2142
2143 // log2
2144 {
2145 float values[] = {0.25f, 0.5f, 1, 2, 4, 8};
2146 constexpr int N = SK_ARRAY_COUNT(values);
2147 eval(N, values, [](skvm::Builder* b, skvm::F32 v) {
2148 return b->approx_log2(v);
2149 });
2150 const float expected[] = {-2, -1, 0, 1, 2, 3};
2151 compare(N, values, expected);
2152 }
2153
2154 // pow2
2155 {
2156 float values[] = {-80, -5, -2, -1, 0, 1, 2, 3, 5, 160};
2157 constexpr int N = SK_ARRAY_COUNT(values);
2158 eval(N, values, [](skvm::Builder* b, skvm::F32 v) {
2159 return b->approx_pow2(v);
2160 });
2161 const float expected[] = {0, 0.03125f, 0.25f, 0.5f, 1, 2, 4, 8, 32, INFINITY};
2162 compare(N, values, expected);
2163 }
2164 // powf -- 1^x
2165 {
2166 float exps[] = {-2, -1, 0, 1, 2};
2167 constexpr int N = SK_ARRAY_COUNT(exps);
2168 eval(N, exps, [](skvm::Builder* b, skvm::F32 exp) {
2169 return b->approx_powf(b->splat(1.0f), exp);
2170 });
2171 const float expected[] = {1, 1, 1, 1, 1};
2172 compare(N, exps, expected);
2173 }
2174 // powf -- 2^x
2175 {
2176 float exps[] = {-80, -5, -2, -1, 0, 1, 2, 3, 5, 160};
2177 constexpr int N = SK_ARRAY_COUNT(exps);
2178 eval(N, exps, [](skvm::Builder* b, skvm::F32 exp) {
2179 return b->approx_powf(2.0, exp);
2180 });
2181 const float expected[] = {0, 0.03125f, 0.25f, 0.5f, 1, 2, 4, 8, 32, INFINITY};
2182 compare(N, exps, expected);
2183 }
2184 // powf -- 3^x
2185 {
2186 float exps[] = {-2, -1, 0, 1, 2};
2187 constexpr int N = SK_ARRAY_COUNT(exps);
2188 eval(N, exps, [](skvm::Builder* b, skvm::F32 exp) {
2189 return b->approx_powf(b->splat(3.0f), exp);
2190 });
2191 const float expected[] = {1/9.0f, 1/3.0f, 1, 3, 9};
2192 compare(N, exps, expected);
2193 }
2194 // powf -- x^0.5
2195 {
2196 float bases[] = {0, 1, 4, 9, 16};
2197 constexpr int N = SK_ARRAY_COUNT(bases);
2198 eval(N, bases, [](skvm::Builder* b, skvm::F32 base) {
2199 return b->approx_powf(base, b->splat(0.5f));
2200 });
2201 const float expected[] = {0, 1, 2, 3, 4};
2202 compare(N, bases, expected);
2203 }
2204 // powf -- x^1
2205 {
2206 float bases[] = {0, 1, 2, 3, 4};
2207 constexpr int N = SK_ARRAY_COUNT(bases);
2208 eval(N, bases, [](skvm::Builder* b, skvm::F32 base) {
2209 return b->approx_powf(base, b->splat(1.0f));
2210 });
2211 const float expected[] = {0, 1, 2, 3, 4};
2212 compare(N, bases, expected);
2213 }
2214 // powf -- x^2
2215 {
2216 float bases[] = {0, 1, 2, 3, 4};
2217 constexpr int N = SK_ARRAY_COUNT(bases);
2218 eval(N, bases, [](skvm::Builder* b, skvm::F32 base) {
2219 return b->approx_powf(base, b->splat(2.0f));
2220 });
2221 const float expected[] = {0, 1, 4, 9, 16};
2222 compare(N, bases, expected);
2223 }
2224
2225 auto test = [r](float arg, float expected, float tolerance, auto prog) {
2226 skvm::Builder b;
2227 skvm::Ptr inout = b.varying<float>();
2228 b.storeF(inout, prog(b.loadF(inout)));
2229 float actual = arg;
2230 b.done().eval(1, &actual);
2231
2232 float err = std::abs(actual - expected);
2233
2234 if (err > tolerance) {
2235 // SkDebugf("arg %g, expected %g, actual %g\n", arg, expected, actual);
2236 REPORTER_ASSERT(r, true);
2237 }
2238 return err;
2239 };
2240
2241 auto test2 = [r](float arg0, float arg1, float expected, float tolerance, auto prog) {
2242 skvm::Builder b;
2243 skvm::Ptr in0 = b.varying<float>();
2244 skvm::Ptr in1 = b.varying<float>();
2245 skvm::Ptr out = b.varying<float>();
2246 b.storeF(out, prog(b.loadF(in0), b.loadF(in1)));
2247 float actual;
2248 b.done().eval(1, &arg0, &arg1, &actual);
2249
2250 float err = std::abs(actual - expected);
2251
2252 if (err > tolerance) {
2253 // SkDebugf("[%g, %g]: expected %g, actual %g\n", arg0, arg1, expected, actual);
2254 REPORTER_ASSERT(r, true);
2255 }
2256 return err;
2257 };
2258
2259 // sine, cosine, tangent
2260 {
2261 constexpr float P = SK_ScalarPI;
2262 constexpr float tol = 0.00175f;
2263 for (float rad = -5*P; rad <= 5*P; rad += 0.1f) {
2264 test(rad, sk_float_sin(rad), tol, [](skvm::F32 x) {
2265 return approx_sin(x);
2266 });
2267 test(rad, sk_float_cos(rad), tol, [](skvm::F32 x) {
2268 return approx_cos(x);
2269 });
2270 }
2271
2272 // Our tangent diverge more as we get near infinities (x near +- Pi/2),
2273 // so bring in the domain a little.
2274 constexpr float eps = 0.16f;
2275 float err = 0;
2276 for (float rad = -P/2 + eps; rad <= P/2 - eps; rad += 0.01f) {
2277 err += test(rad, sk_float_tan(rad), tol, [](skvm::F32 x) {
2278 return approx_tan(x);
2279 });
2280 // try again with some multiples of P, to check our periodicity
2281 test(rad, sk_float_tan(rad), tol, [=](skvm::F32 x) {
2282 return approx_tan(x + 3*P);
2283 });
2284 test(rad, sk_float_tan(rad), tol, [=](skvm::F32 x) {
2285 return approx_tan(x - 3*P);
2286 });
2287 }
2288 if ((false)) { SkDebugf("tan error %g\n", err); }
2289 }
2290
2291 // asin, acos, atan
2292 {
2293 constexpr float tol = 0.00175f;
2294 float err = 0;
2295 for (float x = -1; x <= 1; x += 1.0f/64) {
2296 err += test(x, asin(x), tol, [](skvm::F32 x) {
2297 return approx_asin(x);
2298 });
2299 test(x, acos(x), tol, [](skvm::F32 x) {
2300 return approx_acos(x);
2301 });
2302 }
2303 if ((false)) { SkDebugf("asin error %g\n", err); }
2304
2305 err = 0;
2306 for (float x = -10; x <= 10; x += 1.0f/16) {
2307 err += test(x, atan(x), tol, [](skvm::F32 x) {
2308 return approx_atan(x);
2309 });
2310 }
2311 if ((false)) { SkDebugf("atan error %g\n", err); }
2312
2313 for (float y = -3; y <= 3; y += 1) {
2314 for (float x = -3; x <= 3; x += 1) {
2315 err += test2(y, x, atan2(y,x), tol, [](skvm::F32 y, skvm::F32 x) {
2316 return approx_atan2(y,x);
2317 });
2318 }
2319 }
2320 if ((false)) { SkDebugf("atan2 error %g\n", err); }
2321 }
2322 }
2323
DEF_TEST(SkVM_min_max,r)2324 DEF_TEST(SkVM_min_max, r) {
2325 // min() and max() have subtle behavior when one argument is NaN and
2326 // the other isn't. It's not sound to blindly swap their arguments.
2327 //
2328 // All backends must behave like std::min() and std::max(), which are
2329 //
2330 // min(x,y) = y<x ? y : x
2331 // max(x,y) = x<y ? y : x
2332
2333 // ±NaN, ±0, ±1, ±inf
2334 const uint32_t bits[] = {0x7f80'0001, 0xff80'0001, 0x0000'0000, 0x8000'0000,
2335 0x3f80'0000, 0xbf80'0000, 0x7f80'0000, 0xff80'0000};
2336
2337 float f[8];
2338 memcpy(f, bits, sizeof(bits));
2339
2340 auto identical = [&](float x, float y) {
2341 uint32_t X,Y;
2342 memcpy(&X, &x, 4);
2343 memcpy(&Y, &y, 4);
2344 return X == Y;
2345 };
2346
2347 // Test min/max with non-constant x, non-constant y.
2348 // (Whether x and y are varying or uniform shouldn't make any difference.)
2349 {
2350 skvm::Builder b;
2351 {
2352 skvm::Ptr src = b.varying<float>(),
2353 mn = b.varying<float>(),
2354 mx = b.varying<float>();
2355
2356 skvm::F32 x = b.loadF(src),
2357 y = b.uniformF(b.uniform(), 0);
2358
2359 b.storeF(mn, b.min(x,y));
2360 b.storeF(mx, b.max(x,y));
2361 }
2362
2363 test_jit_and_interpreter(b, [&](const skvm::Program& program){
2364 float mn[8], mx[8];
2365 for (int i = 0; i < 8; i++) {
2366 // min() and max() everything with f[i].
2367 program.eval(8, f,mn,mx, &f[i]);
2368
2369 for (int j = 0; j < 8; j++) {
2370 REPORTER_ASSERT(r, identical(mn[j], std::min(f[j], f[i])));
2371 REPORTER_ASSERT(r, identical(mx[j], std::max(f[j], f[i])));
2372 }
2373 }
2374 });
2375 }
2376
2377 // Test each with constant on the right.
2378 for (int i = 0; i < 8; i++) {
2379 skvm::Builder b;
2380 {
2381 skvm::Ptr src = b.varying<float>(),
2382 mn = b.varying<float>(),
2383 mx = b.varying<float>();
2384
2385 skvm::F32 x = b.loadF(src),
2386 y = b.splat(f[i]);
2387
2388 b.storeF(mn, b.min(x,y));
2389 b.storeF(mx, b.max(x,y));
2390 }
2391
2392 test_jit_and_interpreter(b, [&](const skvm::Program& program){
2393 float mn[8], mx[8];
2394 program.eval(8, f,mn,mx);
2395 for (int j = 0; j < 8; j++) {
2396 REPORTER_ASSERT(r, identical(mn[j], std::min(f[j], f[i])));
2397 REPORTER_ASSERT(r, identical(mx[j], std::max(f[j], f[i])));
2398 }
2399 });
2400 }
2401
2402 // Test each with constant on the left.
2403 for (int i = 0; i < 8; i++) {
2404 skvm::Builder b;
2405 {
2406 skvm::Ptr src = b.varying<float>(),
2407 mn = b.varying<float>(),
2408 mx = b.varying<float>();
2409
2410 skvm::F32 x = b.splat(f[i]),
2411 y = b.loadF(src);
2412
2413 b.storeF(mn, b.min(x,y));
2414 b.storeF(mx, b.max(x,y));
2415 }
2416
2417 test_jit_and_interpreter(b, [&](const skvm::Program& program){
2418 float mn[8], mx[8];
2419 program.eval(8, f,mn,mx);
2420 for (int j = 0; j < 8; j++) {
2421 REPORTER_ASSERT(r, identical(mn[j], std::min(f[i], f[j])));
2422 REPORTER_ASSERT(r, identical(mx[j], std::max(f[i], f[j])));
2423 }
2424 });
2425 }
2426 }
2427
DEF_TEST(SkVM_halfs,r)2428 DEF_TEST(SkVM_halfs, r) {
2429 const uint16_t hs[] = {0x0000,0x3800,0x3c00,0x4000,
2430 0xc400,0xb800,0xbc00,0xc000};
2431 const float fs[] = {+0.0f,+0.5f,+1.0f,+2.0f,
2432 -4.0f,-0.5f,-1.0f,-2.0f};
2433 {
2434 skvm::Builder b;
2435 skvm::Ptr src = b.varying<uint16_t>(),
2436 dst = b.varying<float>();
2437 b.storeF(dst, b.from_fp16(b.load16(src)));
2438
2439 test_jit_and_interpreter(b, [&](const skvm::Program& program){
2440 float dst[8];
2441 program.eval(8, hs, dst);
2442 for (int i = 0; i < 8; i++) {
2443 REPORTER_ASSERT(r, dst[i] == fs[i]);
2444 }
2445 });
2446 }
2447 {
2448 skvm::Builder b;
2449 skvm::Ptr src = b.varying<float>(),
2450 dst = b.varying<uint16_t>();
2451 b.store16(dst, b.to_fp16(b.loadF(src)));
2452
2453 test_jit_and_interpreter(b, [&](const skvm::Program& program){
2454 uint16_t dst[8];
2455 program.eval(8, fs, dst);
2456 for (int i = 0; i < 8; i++) {
2457 REPORTER_ASSERT(r, dst[i] == hs[i]);
2458 }
2459 });
2460 }
2461 }
2462
DEF_TEST(SkVM_64bit,r)2463 DEF_TEST(SkVM_64bit, r) {
2464 uint32_t lo[65],
2465 hi[65];
2466 uint64_t wide[65];
2467 for (int i = 0; i < 65; i++) {
2468 lo[i] = 2*i+0;
2469 hi[i] = 2*i+1;
2470 wide[i] = ((uint64_t)lo[i] << 0)
2471 | ((uint64_t)hi[i] << 32);
2472 }
2473
2474 {
2475 skvm::Builder b;
2476 {
2477 skvm::Ptr widePtr = b.varying<uint64_t>(),
2478 loPtr = b.varying<int>(),
2479 hiPtr = b.varying<int>();
2480 b.store32(loPtr, b.load64(widePtr, 0));
2481 b.store32(hiPtr, b.load64(widePtr, 1));
2482 }
2483 test_jit_and_interpreter(b, [&](const skvm::Program& program){
2484 uint32_t l[65], h[65];
2485 program.eval(65, wide,l,h);
2486 for (int i = 0; i < 65; i++) {
2487 REPORTER_ASSERT(r, l[i] == lo[i]);
2488 REPORTER_ASSERT(r, h[i] == hi[i]);
2489 }
2490 });
2491 }
2492
2493 {
2494 skvm::Builder b;
2495 {
2496 skvm::Ptr widePtr = b.varying<uint64_t>(),
2497 loPtr = b.varying<int>(),
2498 hiPtr = b.varying<int>();
2499 b.store64(widePtr, b.load32(loPtr), b.load32(hiPtr));
2500 }
2501 test_jit_and_interpreter(b, [&](const skvm::Program& program){
2502 uint64_t w[65];
2503 program.eval(65, w,lo,hi);
2504 for (int i = 0; i < 65; i++) {
2505 REPORTER_ASSERT(r, w[i] == wide[i]);
2506 }
2507 });
2508 }
2509 }
2510
DEF_TEST(SkVM_128bit,r)2511 DEF_TEST(SkVM_128bit, r) {
2512 float floats[4*63];
2513 uint8_t packed[4*63];
2514
2515 for (int i = 0; i < 4*63; i++) {
2516 floats[i] = i * (1/255.0f);
2517 }
2518
2519 skvm::PixelFormat rgba_ffff = skvm::SkColorType_to_PixelFormat(kRGBA_F32_SkColorType),
2520 rgba_8888 = skvm::SkColorType_to_PixelFormat(kRGBA_8888_SkColorType);
2521
2522 { // Convert RGBA F32 to RGBA 8888, testing 128-bit loads.
2523 skvm::Builder b;
2524 {
2525 skvm::Ptr dst = b.varying(4),
2526 src = b.varying(16);
2527
2528 skvm::Color c = b.load(rgba_ffff, src);
2529 b.store(rgba_8888, dst, c);
2530 }
2531 test_jit_and_interpreter(b, [&](const skvm::Program& program){
2532 memset(packed, 0, sizeof(packed));
2533 program.eval(63, packed, floats);
2534 for (int i = 0; i < 4*63; i++) {
2535 REPORTER_ASSERT(r, packed[i] == i);
2536 }
2537 });
2538 }
2539
2540
2541 { // Convert RGBA 8888 to RGBA F32, testing 128-bit stores.
2542 skvm::Builder b;
2543 {
2544 skvm::Ptr dst = b.varying(16),
2545 src = b.varying(4);
2546
2547 skvm::Color c = b.load(rgba_8888, src);
2548 b.store(rgba_ffff, dst, c);
2549 }
2550 test_jit_and_interpreter(b, [&](const skvm::Program& program){
2551 memset(floats, 0, sizeof(floats));
2552 program.eval(63, floats, packed);
2553 for (int i = 0; i < 4*63; i++) {
2554 REPORTER_ASSERT(r, floats[i] == i * (1/255.0f));
2555 }
2556 });
2557 }
2558
2559 }
2560
DEF_TEST(SkVM_is_NaN_is_finite,r)2561 DEF_TEST(SkVM_is_NaN_is_finite, r) {
2562 skvm::Builder b;
2563 {
2564 skvm::Ptr src = b.varying<float>(),
2565 nan = b.varying<int>(),
2566 fin = b.varying<int>();
2567 b.store32(nan, is_NaN (b.loadF(src)));
2568 b.store32(fin, is_finite(b.loadF(src)));
2569 }
2570 test_jit_and_interpreter(b, [&](const skvm::Program& program){
2571 // ±NaN, ±0, ±1, ±inf
2572 const uint32_t bits[] = {0x7f80'0001, 0xff80'0001, 0x0000'0000, 0x8000'0000,
2573 0x3f80'0000, 0xbf80'0000, 0x7f80'0000, 0xff80'0000};
2574 uint32_t nan[8], fin[8];
2575 program.eval(8, bits, nan,fin);
2576
2577 for (int i = 0; i < 8; i++) {
2578 REPORTER_ASSERT(r, nan[i] == ((i == 0 || i == 1) ? 0xffffffff : 0));
2579 REPORTER_ASSERT(r, fin[i] == ((i == 2 || i == 3 ||
2580 i == 4 || i == 5) ? 0xffffffff : 0));
2581 }
2582 });
2583 }
2584
DEF_TEST(SkVM_args,r)2585 DEF_TEST(SkVM_args, r) {
2586 // Test we can handle at least six arguments.
2587 skvm::Builder b;
2588 {
2589 skvm::Ptr dst = b.varying<float>(),
2590 A = b.varying<float>(),
2591 B = b.varying<float>(),
2592 C = b.varying<float>(),
2593 D = b.varying<float>(),
2594 E = b.varying<float>();
2595 storeF(dst, b.loadF(A)
2596 + b.loadF(B)
2597 + b.loadF(C)
2598 + b.loadF(D)
2599 + b.loadF(E));
2600 }
2601
2602 test_jit_and_interpreter(b, [&](const skvm::Program& program){
2603 float dst[17],A[17],B[17],C[17],D[17],E[17];
2604 for (int i = 0; i < 17; i++) {
2605 A[i] = B[i] = C[i] = D[i] = E[i] = (float)i;
2606 }
2607 program.eval(17, dst,A,B,C,D,E);
2608 for (int i = 0; i < 17; i++) {
2609 REPORTER_ASSERT(r, dst[i] == 5.0f*i);
2610 }
2611 });
2612 }
2613
DEF_TEST(SkVM_badpack,reporter)2614 DEF_TEST(SkVM_badpack, reporter) {
2615 // Test case distilled from actual failing draw,
2616 // originally with a bad arm64 implementation of pack().
2617 skvm::Builder p;
2618 {
2619 skvm::UPtr uniforms = p.uniform();
2620 skvm::Ptr dst = p.varying<uint16_t>();
2621
2622 skvm::I32 r = round(p.uniformF(uniforms, 8) * 15),
2623 a = p.splat(0xf);
2624
2625 skvm::I32 _4444 = p.splat(0);
2626 _4444 = pack(_4444, r, 12);
2627 _4444 = pack(_4444, a, 0);
2628 store16(dst, _4444);
2629 }
2630
2631 test_jit_and_interpreter(p, [&](const skvm::Program& program){
2632 const float uniforms[] = { 0.0f, 0.0f,
2633 1.0f, 0.0f, 0.0f, 1.0f };
2634
2635 uint16_t dst[17] = {0};
2636 program.eval(17, uniforms,dst);
2637 for (int i = 0; i < 17; i++) {
2638 REPORTER_ASSERT(reporter, dst[i] == 0xf00f, "got %04x, want %04x\n", dst[i], 0xf00f);
2639 }
2640 });
2641 }
2642
DEF_TEST(SkVM_features,r)2643 DEF_TEST(SkVM_features, r) {
2644 auto build_program = [](skvm::Builder* b) {
2645 skvm::F32 x = b->loadF(b->varying<float>());
2646 b->storeF(b->varying<float>(), x*x+x);
2647 };
2648
2649 { // load-fma-store with FMA available.
2650 skvm::Features features;
2651 features.fma = true;
2652 skvm::Builder b(features);
2653 build_program(&b);
2654 REPORTER_ASSERT(r, b.optimize().size() == 3);
2655 }
2656
2657 { // load-mul-add-store without FMA.
2658 skvm::Features features;
2659 features.fma = false;
2660 skvm::Builder b(features);
2661 build_program(&b);
2662 REPORTER_ASSERT(r, b.optimize().size() == 4);
2663 }
2664
2665 { // Auto-detected, could be either.
2666 skvm::Builder b;
2667 build_program(&b);
2668 REPORTER_ASSERT(r, b.optimize().size() == 3
2669 || b.optimize().size() == 4);
2670 }
2671 }
2672
DEF_TEST(SkVM_gather_can_hoist,r)2673 DEF_TEST(SkVM_gather_can_hoist, r) {
2674 // A gather instruction isn't necessarily varying... it's whatever its index is.
2675 // First a typical gather scenario with varying index.
2676 {
2677 skvm::Builder b;
2678 skvm::UPtr uniforms = b.uniform();
2679 skvm::Ptr buf = b.varying<int>();
2680 skvm::I32 ix = b.load32(buf);
2681 b.store32(buf, b.gather32(uniforms,0, ix));
2682
2683 skvm::Program p = b.done();
2684
2685 // ix is varying, so the gather is too.
2686 //
2687 // loop:
2688 // v0 = load32 buf
2689 // v1 = gather32 uniforms+0 v0
2690 // store32 buf v1
2691 REPORTER_ASSERT(r, p.instructions().size() == 3);
2692 REPORTER_ASSERT(r, p.loop() == 0);
2693 }
2694
2695 // Now the same but with a uniform index instead.
2696 {
2697 skvm::Builder b;
2698 skvm::UPtr uniforms = b.uniform();
2699 skvm::Ptr buf = b.varying<int>();
2700 skvm::I32 ix = b.uniform32(uniforms,8);
2701 b.store32(buf, b.gather32(uniforms,0, ix));
2702
2703 skvm::Program p = b.done();
2704
2705 // ix is uniform, so the gather is too.
2706 //
2707 // v0 = uniform32 uniforms+8
2708 // v1 = gather32 uniforms+0 v0
2709 // loop:
2710 // store32 buf v1
2711 REPORTER_ASSERT(r, p.instructions().size() == 3);
2712 REPORTER_ASSERT(r, p.loop() == 2);
2713 }
2714 }
2715
DEF_TEST(SkVM_dont_dedup_loads,r)2716 DEF_TEST(SkVM_dont_dedup_loads, r) {
2717 // We've been assuming that all Ops with the same arguments produce the same value
2718 // and deduplicating them, which results in a simple common subexpression eliminator.
2719 //
2720 // But we can't soundly dedup two identical loads with a store between.
2721 // If we dedup the loads in this test program it will always increment by 1, not K.
2722 constexpr int K = 2;
2723 skvm::Builder b;
2724 {
2725 skvm::Ptr buf = b.varying<int>();
2726 for (int i = 0; i < K; i++) {
2727 b.store32(buf, b.load32(buf) + 1);
2728 }
2729 }
2730
2731 test_jit_and_interpreter(b, [&](const skvm::Program& program){
2732 int buf[] = { 0,1,2,3,4 };
2733 program.eval(SK_ARRAY_COUNT(buf), buf);
2734 for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) {
2735 REPORTER_ASSERT(r, buf[i] == i+K);
2736 }
2737 });
2738 }
2739
DEF_TEST(SkVM_dont_dedup_stores,r)2740 DEF_TEST(SkVM_dont_dedup_stores, r) {
2741 // Following a similar line of reasoning to SkVM_dont_dedup_loads,
2742 // we cannot dedup stores either. A different store between two identical stores
2743 // will invalidate the first store, meaning we do need to reissue that store operation.
2744 skvm::Builder b;
2745 {
2746 skvm::Ptr buf = b.varying<int>();
2747 b.store32(buf, b.splat(4));
2748 b.store32(buf, b.splat(5));
2749 b.store32(buf, b.splat(4)); // If we dedup'd, we'd skip this store.
2750 }
2751
2752 test_jit_and_interpreter(b, [&](const skvm::Program& program){
2753 int buf[42];
2754 program.eval(SK_ARRAY_COUNT(buf), buf);
2755 for (int x : buf) {
2756 REPORTER_ASSERT(r, x == 4);
2757 }
2758 });
2759 }
2760
DEF_TEST(SkVM_fast_mul,r)2761 DEF_TEST(SkVM_fast_mul, r) {
2762 skvm::Builder b;
2763 {
2764 skvm::Ptr src = b.varying<float>(),
2765 fast = b.varying<float>(),
2766 slow = b.varying<float>();
2767 skvm::F32 x = b.loadF(src);
2768 b.storeF(fast, fast_mul(0.0f, x));
2769 b.storeF(slow, 0.0f * x);
2770 }
2771 test_jit_and_interpreter(b, [&](const skvm::Program& program){
2772 const uint32_t bits[] = {
2773 0x0000'0000, 0x8000'0000, //±0
2774 0x3f80'0000, 0xbf80'0000, //±1
2775 0x7f80'0000, 0xff80'0000, //±inf
2776 0x7f80'0001, 0xff80'0001, //±NaN
2777 };
2778 float fast[8],
2779 slow[8];
2780 program.eval(8,bits,fast,slow);
2781
2782 for (int i = 0; i < 8; i++) {
2783 REPORTER_ASSERT(r, fast[i] == 0.0f);
2784
2785 if (i < 4) {
2786 REPORTER_ASSERT(r, slow[i] == 0.0f);
2787 } else {
2788 REPORTER_ASSERT(r, isnan(slow[i]));
2789 }
2790 }
2791 });
2792 }
2793
DEF_TEST(SkVM_duplicates,reporter)2794 DEF_TEST(SkVM_duplicates, reporter) {
2795 {
2796 skvm::Builder p(true);
2797 auto rptr = p.varying<int>();
2798
2799 skvm::F32 r = p.loadF(rptr),
2800 g = p.splat(0.0f),
2801 b = p.splat(0.0f),
2802 a = p.splat(1.0f);
2803
2804 p.unpremul(&r, &g, &b, a);
2805 p.storeF(rptr, r);
2806
2807 std::vector<skvm::Instruction> program = b->program();
2808
2809 auto withDuplicates = skvm::finalize(program);
2810 int duplicates = 0;
2811 for (const auto& instr : withDuplicates) {
2812 if (instr.op == skvm::Op::duplicate) {
2813 ++duplicates;
2814 }
2815 }
2816 REPORTER_ASSERT(reporter, duplicates > 0);
2817
2818 auto eliminatedAsDeadCode = skvm::eliminate_dead_code(program);
2819 for (const auto& instr : eliminatedAsDeadCode) {
2820 REPORTER_ASSERT(reporter, instr.op != skvm::Op::duplicate);
2821 }
2822 }
2823
2824 {
2825 skvm::Builder p(false);
2826 auto rptr = p.varying<int>();
2827
2828 skvm::F32 r = p.loadF(rptr),
2829 g = p.splat(0.0f),
2830 b = p.splat(0.0f),
2831 a = p.splat(1.0f);
2832
2833 p.unpremul(&r, &g, &b, a);
2834 p.storeF(rptr, r);
2835
2836 auto withoutDuplicates = p.done().instructions();
2837 for (const auto& instr : withoutDuplicates) {
2838 REPORTER_ASSERT(reporter, instr.op != skvm::Op::duplicate);
2839 }
2840 }
2841 }
2842
DEF_TEST(SkVM_Visualizer,r)2843 DEF_TEST(SkVM_Visualizer, r) {
2844 const char* src =
2845 "int main(int x, int y) {\n"
2846 " int a = 99;\n"
2847 " if (x > 0) a += 100;\n"
2848 " if (y > 0) a += 101;\n"
2849 " a = 102;\n"
2850 " return a;\n"
2851 "}";
2852 GrShaderCaps caps;
2853 SkSL::Compiler compiler(&caps);
2854 SkSL::Program::Settings settings;
2855 auto program = compiler.convertProgram(SkSL::ProgramKind::kGeneric,
2856 std::string(src), settings);
2857 const SkSL::FunctionDefinition* main = SkSL::Program_GetFunction(*program, "main");
2858 SkSL::SkVMDebugTrace d;
2859 d.setSource(src);
2860 auto v = std::make_unique<skvm::viz::Visualizer>(&d);
2861 skvm::Builder b(skvm::Features{}, /*createDuplicates=*/true);
2862 SkSL::ProgramToSkVM(*program, *main, &b, &d, /*uniforms=*/{});
2863
2864 skvm::Program p = b.done(nullptr, true, std::move(v));
2865 #if defined(SKVM_JIT)
2866 SkDynamicMemoryWStream asmFile;
2867 p.disassemble(&asmFile);
2868 auto dumpData = asmFile.detachAsData();
2869 std::string dumpString((const char*)dumpData->data(), dumpData->size());
2870 #else
2871 std::string dumpString;
2872 #endif
2873 SkDynamicMemoryWStream vizFile;
2874 p.visualizer()->dump(&vizFile, dumpString.c_str());
2875 auto vizData = vizFile.detachAsData();
2876 std::string html((const char*)vizData->data(), vizData->size());
2877 //b.dump();
2878 //std::printf(html.c_str());
2879 // Check that html contains all types of information:
2880 if (!dumpString.empty() && !std::strstr(dumpString.c_str(), "Program not JIT'd.")) {
2881 REPORTER_ASSERT(r, std::strstr(html.c_str(), "<tr class='machine'>")); // machine commands
2882 }
2883 REPORTER_ASSERT(r, std::strstr(html.c_str(), "<tr class='normal'>")); // SkVM byte code
2884 REPORTER_ASSERT(r, std::strstr(html.c_str(), "<tr class='source'>")); // C++ source
2885 REPORTER_ASSERT(r, std::strstr(html.c_str(), "<tr class='dead'>")); // dead code
2886 REPORTER_ASSERT(r, std::strstr(html.c_str(), "<tr class='dead deduped'>")); // deduped removed
2887 REPORTER_ASSERT(r, std::strstr(html.c_str(), // deduped origins
2888 "<tr class='normal origin'>"
2889 "<td>↑↑↑ *13</td>"
2890 "<td>v2 = splat 0 (0)</td></tr>"));
2891 REPORTER_ASSERT(r, std::strstr(html.c_str(), // trace enter
2892 "<tr class='source'><td class='mask'>↪v9</td>"
2893 "<td colspan=2>int main(int x, int y)</td></tr>"));
2894 REPORTER_ASSERT(r, std::strstr(html.c_str(), // trace exit
2895 "<tr class='source'><td class='mask'>↩v9</td>"
2896 "<td colspan=2>int main(int x, int y)</td></tr>"));
2897 }
2898