1 /*
2 * Copyright 2019 Google LLC
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8 #include "include/core/SkColorType.h"
9 #include "include/core/SkData.h"
10 #include "include/core/SkRefCnt.h"
11 #include "include/core/SkScalar.h"
12 #include "include/core/SkSpan.h"
13 #include "include/core/SkStream.h"
14 #include "include/core/SkTypes.h"
15 #include "include/private/SkSLProgramKind.h"
16 #include "include/private/base/SkDebug.h"
17 #include "include/private/base/SkFloatingPoint.h"
18 #include "src/base/SkMSAN.h"
19 #include "src/core/SkVM.h"
20 #include "src/sksl/SkSLCompiler.h"
21 #include "src/sksl/SkSLProgramSettings.h"
22 #include "src/sksl/SkSLUtil.h"
23 #include "src/sksl/codegen/SkSLVMCodeGenerator.h"
24 #include "src/sksl/ir/SkSLFunctionDeclaration.h"
25 #include "src/sksl/ir/SkSLProgram.h"
26 #include "src/sksl/tracing/SkVMDebugTrace.h"
27 #include "src/utils/SkVMVisualizer.h"
28 #include "tests/Test.h"
29
30 #include <algorithm>
31 #include <cmath>
32 #include <cstdint>
33 #include <cstring>
34 #include <initializer_list>
35 #include <memory>
36 #include <string>
37 #include <utility>
38 #include <vector>
39
40 template <typename Fn>
test_jit_and_interpreter(const skvm::Builder & b,Fn && test)41 static void test_jit_and_interpreter(const skvm::Builder& b, Fn&& test) {
42 skvm::Program p = b.done();
43 test(p);
44 if (p.hasJIT()) {
45 test(b.done(/*debug_name=*/nullptr, /*allow_jit=*/false));
46 }
47 }
48
DEF_TEST(SkVM_eliminate_dead_code,r)49 DEF_TEST(SkVM_eliminate_dead_code, r) {
50 skvm::Builder b;
51 {
52 skvm::Ptr arg = b.varying<int>();
53 skvm::I32 l = b.load32(arg);
54 skvm::I32 a = b.add(l, l);
55 b.add(a, b.splat(7));
56 }
57
58 std::vector<skvm::Instruction> program = b.program();
59 REPORTER_ASSERT(r, program.size() == 4);
60
61 program = skvm::eliminate_dead_code(program);
62 REPORTER_ASSERT(r, program.size() == 0);
63 }
64
DEF_TEST(SkVM_Pointless,r)65 DEF_TEST(SkVM_Pointless, r) {
66 // Let's build a program with no memory arguments.
67 // It should all be pegged as dead code, but we should be able to "run" it.
68 skvm::Builder b;
69 {
70 b.add(b.splat(5.0f),
71 b.splat(4.0f));
72 }
73
74 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
75 for (int N = 0; N < 64; N++) {
76 program.eval(N);
77 }
78 });
79
80 for (const skvm::OptimizedInstruction& inst : b.optimize()) {
81 REPORTER_ASSERT(r, inst.death == 0 && inst.can_hoist == true);
82 }
83 }
84
DEF_TEST(SkVM_memset,r)85 DEF_TEST(SkVM_memset, r) {
86 skvm::Builder b;
87 b.store32(b.varying<int>(), b.splat(42));
88
89 test_jit_and_interpreter(b, [&](const skvm::Program& p) {
90 int buf[18];
91 buf[17] = 47;
92
93 p.eval(17, buf);
94 for (int i = 0; i < 17; i++) {
95 REPORTER_ASSERT(r, buf[i] == 42);
96 }
97 REPORTER_ASSERT(r, buf[17] == 47);
98 });
99 }
100
DEF_TEST(SkVM_memcpy,r)101 DEF_TEST(SkVM_memcpy, r) {
102 skvm::Builder b;
103 {
104 auto src = b.varying<int>(),
105 dst = b.varying<int>();
106 b.store32(dst, b.load32(src));
107 }
108
109 test_jit_and_interpreter(b, [&](const skvm::Program& p) {
110 int src[] = {1,2,3,4,5,6,7,8,9},
111 dst[] = {0,0,0,0,0,0,0,0,0};
112
113 p.eval(std::size(src)-1, src, dst);
114 for (size_t i = 0; i < std::size(src)-1; i++) {
115 REPORTER_ASSERT(r, dst[i] == src[i]);
116 }
117 size_t i = std::size(src)-1;
118 REPORTER_ASSERT(r, dst[i] == 0);
119 });
120 }
121
DEF_TEST(SkVM_allow_jit,r)122 DEF_TEST(SkVM_allow_jit, r) {
123 skvm::Builder b;
124 {
125 auto src = b.varying<int>(),
126 dst = b.varying<int>();
127 b.store32(dst, b.load32(src));
128 }
129
130 if (b.done("test-allow_jit", /*allow_jit=*/true).hasJIT()) {
131 REPORTER_ASSERT(r, !b.done("", false).hasJIT());
132 }
133 }
134
DEF_TEST(SkVM_LoopCounts,r)135 DEF_TEST(SkVM_LoopCounts, r) {
136 // Make sure we cover all the exact N we want.
137
138 // buf[i] += 1
139 skvm::Builder b;
140 skvm::Ptr arg = b.varying<int>();
141 b.store32(arg,
142 b.add(b.splat(1),
143 b.load32(arg)));
144
145 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
146 int buf[64];
147 for (int N = 0; N <= (int)std::size(buf); N++) {
148 for (int i = 0; i < (int)std::size(buf); i++) {
149 buf[i] = i;
150 }
151 program.eval(N, buf);
152
153 for (int i = 0; i < N; i++) {
154 REPORTER_ASSERT(r, buf[i] == i+1);
155 }
156 for (int i = N; i < (int)std::size(buf); i++) {
157 REPORTER_ASSERT(r, buf[i] == i);
158 }
159 }
160 });
161 }
162
DEF_TEST(SkVM_gather32,r)163 DEF_TEST(SkVM_gather32, r) {
164 skvm::Builder b;
165 {
166 skvm::UPtr uniforms = b.uniform();
167 skvm::Ptr buf = b.varying<int>();
168 skvm::I32 x = b.load32(buf);
169 b.store32(buf, b.gather32(uniforms,0, b.bit_and(x, b.splat(7))));
170 }
171
172 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
173 const int img[] = {12,34,56,78, 90,98,76,54};
174
175 int buf[20];
176 for (int i = 0; i < 20; i++) {
177 buf[i] = i;
178 }
179
180 struct Uniforms {
181 const int* img;
182 } uniforms{img};
183
184 program.eval(20, &uniforms, buf);
185 int i = 0;
186 REPORTER_ASSERT(r, buf[i] == 12); i++;
187 REPORTER_ASSERT(r, buf[i] == 34); i++;
188 REPORTER_ASSERT(r, buf[i] == 56); i++;
189 REPORTER_ASSERT(r, buf[i] == 78); i++;
190 REPORTER_ASSERT(r, buf[i] == 90); i++;
191 REPORTER_ASSERT(r, buf[i] == 98); i++;
192 REPORTER_ASSERT(r, buf[i] == 76); i++;
193 REPORTER_ASSERT(r, buf[i] == 54); i++;
194
195 REPORTER_ASSERT(r, buf[i] == 12); i++;
196 REPORTER_ASSERT(r, buf[i] == 34); i++;
197 REPORTER_ASSERT(r, buf[i] == 56); i++;
198 REPORTER_ASSERT(r, buf[i] == 78); i++;
199 REPORTER_ASSERT(r, buf[i] == 90); i++;
200 REPORTER_ASSERT(r, buf[i] == 98); i++;
201 REPORTER_ASSERT(r, buf[i] == 76); i++;
202 REPORTER_ASSERT(r, buf[i] == 54); i++;
203
204 REPORTER_ASSERT(r, buf[i] == 12); i++;
205 REPORTER_ASSERT(r, buf[i] == 34); i++;
206 REPORTER_ASSERT(r, buf[i] == 56); i++;
207 REPORTER_ASSERT(r, buf[i] == 78); i++;
208 });
209 }
210
DEF_TEST(SkVM_gathers,r)211 DEF_TEST(SkVM_gathers, r) {
212 skvm::Builder b;
213 {
214 skvm::UPtr uniforms = b.uniform();
215 skvm::Ptr buf32 = b.varying<int>(),
216 buf16 = b.varying<uint16_t>(),
217 buf8 = b.varying<uint8_t>();
218
219 skvm::I32 x = b.load32(buf32);
220
221 b.store32(buf32, b.gather32(uniforms,0, b.bit_and(x, b.splat( 7))));
222 b.store16(buf16, b.gather16(uniforms,0, b.bit_and(x, b.splat(15))));
223 b.store8 (buf8 , b.gather8 (uniforms,0, b.bit_and(x, b.splat(31))));
224 }
225
226 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
227 const int img[] = {12,34,56,78, 90,98,76,54};
228
229 constexpr int N = 20;
230 int buf32[N];
231 uint16_t buf16[N];
232 uint8_t buf8 [N];
233
234 for (int i = 0; i < 20; i++) {
235 buf32[i] = i;
236 }
237
238 struct Uniforms {
239 const int* img;
240 } uniforms{img};
241
242 program.eval(N, &uniforms, buf32, buf16, buf8);
243 int i = 0;
244 REPORTER_ASSERT(r, buf32[i] == 12 && buf16[i] == 12 && buf8[i] == 12); i++;
245 REPORTER_ASSERT(r, buf32[i] == 34 && buf16[i] == 0 && buf8[i] == 0); i++;
246 REPORTER_ASSERT(r, buf32[i] == 56 && buf16[i] == 34 && buf8[i] == 0); i++;
247 REPORTER_ASSERT(r, buf32[i] == 78 && buf16[i] == 0 && buf8[i] == 0); i++;
248 REPORTER_ASSERT(r, buf32[i] == 90 && buf16[i] == 56 && buf8[i] == 34); i++;
249 REPORTER_ASSERT(r, buf32[i] == 98 && buf16[i] == 0 && buf8[i] == 0); i++;
250 REPORTER_ASSERT(r, buf32[i] == 76 && buf16[i] == 78 && buf8[i] == 0); i++;
251 REPORTER_ASSERT(r, buf32[i] == 54 && buf16[i] == 0 && buf8[i] == 0); i++;
252
253 REPORTER_ASSERT(r, buf32[i] == 12 && buf16[i] == 90 && buf8[i] == 56); i++;
254 REPORTER_ASSERT(r, buf32[i] == 34 && buf16[i] == 0 && buf8[i] == 0); i++;
255 REPORTER_ASSERT(r, buf32[i] == 56 && buf16[i] == 98 && buf8[i] == 0); i++;
256 REPORTER_ASSERT(r, buf32[i] == 78 && buf16[i] == 0 && buf8[i] == 0); i++;
257 REPORTER_ASSERT(r, buf32[i] == 90 && buf16[i] == 76 && buf8[i] == 78); i++;
258 REPORTER_ASSERT(r, buf32[i] == 98 && buf16[i] == 0 && buf8[i] == 0); i++;
259 REPORTER_ASSERT(r, buf32[i] == 76 && buf16[i] == 54 && buf8[i] == 0); i++;
260 REPORTER_ASSERT(r, buf32[i] == 54 && buf16[i] == 0 && buf8[i] == 0); i++;
261
262 REPORTER_ASSERT(r, buf32[i] == 12 && buf16[i] == 12 && buf8[i] == 90); i++;
263 REPORTER_ASSERT(r, buf32[i] == 34 && buf16[i] == 0 && buf8[i] == 0); i++;
264 REPORTER_ASSERT(r, buf32[i] == 56 && buf16[i] == 34 && buf8[i] == 0); i++;
265 REPORTER_ASSERT(r, buf32[i] == 78 && buf16[i] == 0 && buf8[i] == 0); i++;
266 });
267 }
268
DEF_TEST(SkVM_gathers2,r)269 DEF_TEST(SkVM_gathers2, r) {
270 skvm::Builder b;
271 {
272 skvm::UPtr uniforms = b.uniform();
273 skvm::Ptr buf32 = b.varying<int>(),
274 buf16 = b.varying<uint16_t>(),
275 buf8 = b.varying<uint8_t>();
276
277 skvm::I32 x = b.load32(buf32);
278
279 b.store32(buf32, b.gather32(uniforms,0, x));
280 b.store16(buf16, b.gather16(uniforms,0, x));
281 b.store8 (buf8 , b.gather8 (uniforms,0, x));
282 }
283
284 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
285 uint8_t img[256];
286 for (int i = 0; i < 256; i++) {
287 img[i] = i;
288 }
289
290 int buf32[64];
291 uint16_t buf16[64];
292 uint8_t buf8 [64];
293
294 for (int i = 0; i < 64; i++) {
295 buf32[i] = (i*47)&63;
296 buf16[i] = 0;
297 buf8 [i] = 0;
298 }
299
300 struct Uniforms {
301 const uint8_t* img;
302 } uniforms{img};
303
304 program.eval(64, &uniforms, buf32, buf16, buf8);
305
306 for (int i = 0; i < 64; i++) {
307 REPORTER_ASSERT(r, buf8[i] == ((i*47)&63)); // 0,47,30,13,60,...
308 }
309
310 REPORTER_ASSERT(r, buf16[ 0] == 0x0100);
311 REPORTER_ASSERT(r, buf16[63] == 0x2322);
312
313 REPORTER_ASSERT(r, buf32[ 0] == 0x03020100);
314 REPORTER_ASSERT(r, buf32[63] == 0x47464544);
315 });
316 }
317
DEF_TEST(SkVM_bitops,r)318 DEF_TEST(SkVM_bitops, r) {
319 skvm::Builder b;
320 {
321 skvm::Ptr ptr = b.varying<int>();
322
323 skvm::I32 x = b.load32(ptr);
324
325 x = b.bit_and (x, b.splat(0xf1)); // 0x40
326 x = b.bit_or (x, b.splat(0x80)); // 0xc0
327 x = b.bit_xor (x, b.splat(0xfe)); // 0x3e
328 x = b.bit_clear(x, b.splat(0x30)); // 0x0e
329
330 x = b.shl(x, 28); // 0xe000'0000
331 x = b.sra(x, 28); // 0xffff'fffe
332 x = b.shr(x, 1); // 0x7fff'ffff
333
334 b.store32(ptr, x);
335 }
336
337 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
338 int x = 0x42;
339 program.eval(1, &x);
340 REPORTER_ASSERT(r, x == 0x7fff'ffff);
341 });
342 }
343
DEF_TEST(SkVM_select_is_NaN,r)344 DEF_TEST(SkVM_select_is_NaN, r) {
345 skvm::Builder b;
346 {
347 skvm::Ptr src = b.varying<float>(),
348 dst = b.varying<float>();
349
350 skvm::F32 x = b.loadF(src);
351 x = select(is_NaN(x), b.splat(0.0f)
352 , x);
353 b.storeF(dst, x);
354 }
355
356 std::vector<skvm::OptimizedInstruction> program = b.optimize();
357 REPORTER_ASSERT(r, program.size() == 4);
358 REPORTER_ASSERT(r, program[0].op == skvm::Op::load32);
359 REPORTER_ASSERT(r, program[1].op == skvm::Op::neq_f32);
360 REPORTER_ASSERT(r, program[2].op == skvm::Op::bit_clear);
361 REPORTER_ASSERT(r, program[3].op == skvm::Op::store32);
362
363 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
364 // ±NaN, ±0, ±1, ±inf
365 uint32_t src[] = {0x7f80'0001, 0xff80'0001, 0x0000'0000, 0x8000'0000,
366 0x3f80'0000, 0xbf80'0000, 0x7f80'0000, 0xff80'0000};
367 uint32_t dst[std::size(src)];
368 program.eval(std::size(src), src, dst);
369
370 for (int i = 0; i < (int)std::size(src); i++) {
371 REPORTER_ASSERT(r, dst[i] == (i < 2 ? 0 : src[i]));
372 }
373 });
374 }
375
DEF_TEST(SkVM_f32,r)376 DEF_TEST(SkVM_f32, r) {
377 skvm::Builder b;
378 {
379 skvm::Ptr arg = b.varying<float>();
380
381 skvm::F32 x = b.loadF(arg),
382 y = b.add(x,x), // y = 2x
383 z = b.sub(y,x), // z = 2x-x = x
384 w = b.div(z,x); // w = x/x = 1
385 b.storeF(arg, w);
386 }
387
388 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
389 float buf[] = { 1,2,3,4,5,6,7,8,9 };
390 program.eval(std::size(buf), buf);
391 for (float v : buf) {
392 REPORTER_ASSERT(r, v == 1.0f);
393 }
394 });
395 }
396
DEF_TEST(SkVM_cmp_i32,r)397 DEF_TEST(SkVM_cmp_i32, r) {
398 skvm::Builder b;
399 {
400 skvm::I32 x = b.load32(b.varying<int>());
401
402 auto to_bit = [&](int shift, skvm::I32 mask) {
403 return b.shl(b.bit_and(mask, b.splat(0x1)), shift);
404 };
405
406 skvm::I32 m = b.splat(0);
407 m = b.bit_or(m, to_bit(0, b. eq(x, b.splat(0))));
408 m = b.bit_or(m, to_bit(1, b.neq(x, b.splat(1))));
409 m = b.bit_or(m, to_bit(2, b. lt(x, b.splat(2))));
410 m = b.bit_or(m, to_bit(3, b.lte(x, b.splat(3))));
411 m = b.bit_or(m, to_bit(4, b. gt(x, b.splat(4))));
412 m = b.bit_or(m, to_bit(5, b.gte(x, b.splat(5))));
413
414 b.store32(b.varying<int>(), m);
415 }
416 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
417 int in[] = { 0,1,2,3,4,5,6,7,8,9 };
418 int out[std::size(in)];
419
420 program.eval(std::size(in), in, out);
421
422 REPORTER_ASSERT(r, out[0] == 0b001111);
423 REPORTER_ASSERT(r, out[1] == 0b001100);
424 REPORTER_ASSERT(r, out[2] == 0b001010);
425 REPORTER_ASSERT(r, out[3] == 0b001010);
426 REPORTER_ASSERT(r, out[4] == 0b000010);
427 for (int i = 5; i < (int)std::size(out); i++) {
428 REPORTER_ASSERT(r, out[i] == 0b110010);
429 }
430 });
431 }
432
DEF_TEST(SkVM_cmp_f32,r)433 DEF_TEST(SkVM_cmp_f32, r) {
434 skvm::Builder b;
435 {
436 skvm::F32 x = b.loadF(b.varying<float>());
437
438 auto to_bit = [&](int shift, skvm::I32 mask) {
439 return b.shl(b.bit_and(mask, b.splat(0x1)), shift);
440 };
441
442 skvm::I32 m = b.splat(0);
443 m = b.bit_or(m, to_bit(0, b. eq(x, b.splat(0.0f))));
444 m = b.bit_or(m, to_bit(1, b.neq(x, b.splat(1.0f))));
445 m = b.bit_or(m, to_bit(2, b. lt(x, b.splat(2.0f))));
446 m = b.bit_or(m, to_bit(3, b.lte(x, b.splat(3.0f))));
447 m = b.bit_or(m, to_bit(4, b. gt(x, b.splat(4.0f))));
448 m = b.bit_or(m, to_bit(5, b.gte(x, b.splat(5.0f))));
449
450 b.store32(b.varying<int>(), m);
451 }
452
453 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
454 float in[] = { 0,1,2,3,4,5,6,7,8,9 };
455 int out[std::size(in)];
456
457 program.eval(std::size(in), in, out);
458
459 REPORTER_ASSERT(r, out[0] == 0b001111);
460 REPORTER_ASSERT(r, out[1] == 0b001100);
461 REPORTER_ASSERT(r, out[2] == 0b001010);
462 REPORTER_ASSERT(r, out[3] == 0b001010);
463 REPORTER_ASSERT(r, out[4] == 0b000010);
464 for (int i = 5; i < (int)std::size(out); i++) {
465 REPORTER_ASSERT(r, out[i] == 0b110010);
466 }
467 });
468 }
469
DEF_TEST(SkVM_index,r)470 DEF_TEST(SkVM_index, r) {
471 skvm::Builder b;
472 b.store32(b.varying<int>(), b.index());
473
474 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
475 int buf[23];
476 program.eval(std::size(buf), buf);
477 for (int i = 0; i < (int)std::size(buf); i++) {
478 REPORTER_ASSERT(r, buf[i] == (int)std::size(buf)-i);
479 }
480 });
481 }
482
DEF_TEST(SkVM_mad,r)483 DEF_TEST(SkVM_mad, r) {
484 // This program is designed to exercise the tricky corners of instruction
485 // and register selection for Op::mad_f32.
486
487 skvm::Builder b;
488 {
489 skvm::Ptr arg = b.varying<int>();
490
491 skvm::F32 x = b.to_F32(b.load32(arg)),
492 y = b.mad(x,x,x), // x is needed in the future, so r[x] != r[y].
493 z = b.mad(y,y,x), // y is needed in the future, but r[z] = r[x] is ok.
494 w = b.mad(z,z,y), // w can alias z but not y.
495 v = b.mad(w,y,w); // Got to stop somewhere.
496 b.store32(arg, b.trunc(v));
497 }
498
499 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
500 int x = 2;
501 program.eval(1, &x);
502 // x = 2
503 // y = 2*2 + 2 = 6
504 // z = 6*6 + 2 = 38
505 // w = 38*38 + 6 = 1450
506 // v = 1450*6 + 1450 = 10150
507 REPORTER_ASSERT(r, x == 10150);
508 });
509 }
510
DEF_TEST(SkVM_fms,r)511 DEF_TEST(SkVM_fms, r) {
512 // Create a pattern that can be peepholed into an Op::fms_f32.
513 skvm::Builder b;
514 {
515 skvm::Ptr arg = b.varying<int>();
516
517 skvm::F32 x = b.to_F32(b.load32(arg)),
518 v = b.sub(b.mul(x, b.splat(2.0f)),
519 b.splat(1.0f));
520 b.store32(arg, b.trunc(v));
521 }
522
523 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
524 int buf[] = {0,1,2,3,4,5,6,7,8,9,10};
525 program.eval((int)std::size(buf), &buf);
526
527 for (int i = 0; i < (int)std::size(buf); i++) {
528 REPORTER_ASSERT(r, buf[i] = 2*i-1);
529 }
530 });
531 }
532
DEF_TEST(SkVM_fnma,r)533 DEF_TEST(SkVM_fnma, r) {
534 // Create a pattern that can be peepholed into an Op::fnma_f32.
535 skvm::Builder b;
536 {
537 skvm::Ptr arg = b.varying<int>();
538
539 skvm::F32 x = b.to_F32(b.load32(arg)),
540 v = b.sub(b.splat(1.0f),
541 b.mul(x, b.splat(2.0f)));
542 b.store32(arg, b.trunc(v));
543 }
544
545 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
546 int buf[] = {0,1,2,3,4,5,6,7,8,9,10};
547 program.eval((int)std::size(buf), &buf);
548
549 for (int i = 0; i < (int)std::size(buf); i++) {
550 REPORTER_ASSERT(r, buf[i] = 1-2*i);
551 }
552 });
553 }
554
DEF_TEST(SkVM_madder,r)555 DEF_TEST(SkVM_madder, r) {
556 skvm::Builder b;
557 {
558 skvm::Ptr arg = b.varying<float>();
559
560 skvm::F32 x = b.loadF(arg),
561 y = b.mad(x,x,x), // x is needed in the future, so r[x] != r[y].
562 z = b.mad(y,x,y), // r[x] can be reused after this instruction, but not r[y].
563 w = b.mad(y,y,z);
564 b.storeF(arg, w);
565 }
566
567 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
568 float x = 2.0f;
569 // y = 2*2 + 2 = 6
570 // z = 6*2 + 6 = 18
571 // w = 6*6 + 18 = 54
572 program.eval(1, &x);
573 REPORTER_ASSERT(r, x == 54.0f);
574 });
575 }
576
DEF_TEST(SkVM_floor,r)577 DEF_TEST(SkVM_floor, r) {
578 skvm::Builder b;
579 {
580 skvm::Ptr arg = b.varying<float>();
581 b.storeF(arg, b.floor(b.loadF(arg)));
582 }
583
584 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
585 float buf[] = { -2.0f, -1.5f, -1.0f, 0.0f, 1.0f, 1.5f, 2.0f };
586 float want[] = { -2.0f, -2.0f, -1.0f, 0.0f, 1.0f, 1.0f, 2.0f };
587 program.eval(std::size(buf), buf);
588 for (int i = 0; i < (int)std::size(buf); i++) {
589 REPORTER_ASSERT(r, buf[i] == want[i]);
590 }
591 });
592 }
593
DEF_TEST(SkVM_round,r)594 DEF_TEST(SkVM_round, r) {
595 skvm::Builder b;
596 {
597 skvm::Ptr src = b.varying<float>();
598 skvm::Ptr dst = b.varying<int>();
599 b.store32(dst, b.round(b.loadF(src)));
600 }
601
602 // The test cases on exact 0.5f boundaries assume the current rounding mode is nearest even.
603 // We haven't explicitly guaranteed that here... it just probably is.
604 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
605 float buf[] = { -1.5f, -0.5f, 0.0f, 0.5f, 0.2f, 0.6f, 1.0f, 1.4f, 1.5f, 2.0f };
606 int want[] = { -2 , 0 , 0 , 0 , 0 , 1 , 1 , 1 , 2 , 2 };
607 int dst[std::size(buf)];
608
609 program.eval(std::size(buf), buf, dst);
610 for (int i = 0; i < (int)std::size(dst); i++) {
611 REPORTER_ASSERT(r, dst[i] == want[i]);
612 }
613 });
614 }
615
DEF_TEST(SkVM_min,r)616 DEF_TEST(SkVM_min, r) {
617 skvm::Builder b;
618 {
619 skvm::Ptr src1 = b.varying<float>();
620 skvm::Ptr src2 = b.varying<float>();
621 skvm::Ptr dst = b.varying<float>();
622
623 b.storeF(dst, b.min(b.loadF(src1), b.loadF(src2)));
624 }
625
626 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
627 float s1[] = { 0.0f, 1.0f, 4.0f, -1.0f, -1.0f};
628 float s2[] = { 0.0f, 2.0f, 3.0f, 1.0f, -2.0f};
629 float want[] = { 0.0f, 1.0f, 3.0f, -1.0f, -2.0f};
630 float d[std::size(s1)];
631 program.eval(std::size(d), s1, s2, d);
632 for (int i = 0; i < (int)std::size(d); i++) {
633 REPORTER_ASSERT(r, d[i] == want[i]);
634 }
635 });
636 }
637
DEF_TEST(SkVM_max,r)638 DEF_TEST(SkVM_max, r) {
639 skvm::Builder b;
640 {
641 skvm::Ptr src1 = b.varying<float>();
642 skvm::Ptr src2 = b.varying<float>();
643 skvm::Ptr dst = b.varying<float>();
644
645 b.storeF(dst, b.max(b.loadF(src1), b.loadF(src2)));
646 }
647
648 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
649 float s1[] = { 0.0f, 1.0f, 4.0f, -1.0f, -1.0f};
650 float s2[] = { 0.0f, 2.0f, 3.0f, 1.0f, -2.0f};
651 float want[] = { 0.0f, 2.0f, 4.0f, 1.0f, -1.0f};
652 float d[std::size(s1)];
653 program.eval(std::size(d), s1, s2, d);
654 for (int i = 0; i < (int)std::size(d); i++) {
655 REPORTER_ASSERT(r, d[i] == want[i]);
656 }
657 });
658 }
659
DEF_TEST(SkVM_hoist,r)660 DEF_TEST(SkVM_hoist, r) {
661 // This program uses enough constants that it will fail to JIT if we hoist them.
662 // The JIT will try again without hoisting, and that'll just need 2 registers.
663 skvm::Builder b;
664 {
665 skvm::Ptr arg = b.varying<int>();
666 skvm::I32 x = b.load32(arg);
667 for (int i = 0; i < 32; i++) {
668 x = b.add(x, b.splat(i));
669 }
670 b.store32(arg, x);
671 }
672
673 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
674 int x = 4;
675 program.eval(1, &x);
676 // x += 0 + 1 + 2 + 3 + ... + 30 + 31
677 // x += 496
678 REPORTER_ASSERT(r, x == 500);
679 });
680 }
681
DEF_TEST(SkVM_select,r)682 DEF_TEST(SkVM_select, r) {
683 skvm::Builder b;
684 {
685 skvm::Ptr buf = b.varying<int>();
686
687 skvm::I32 x = b.load32(buf);
688
689 x = b.select( b.gt(x, b.splat(4)), x, b.splat(42) );
690
691 b.store32(buf, x);
692 }
693
694 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
695 int buf[] = { 0,1,2,3,4,5,6,7,8 };
696 program.eval(std::size(buf), buf);
697 for (int i = 0; i < (int)std::size(buf); i++) {
698 REPORTER_ASSERT(r, buf[i] == (i > 4 ? i : 42));
699 }
700 });
701 }
702
DEF_TEST(SkVM_swap,r)703 DEF_TEST(SkVM_swap, r) {
704 skvm::Builder b;
705 {
706 // This program is the equivalent of
707 // x = *X
708 // y = *Y
709 // *X = y
710 // *Y = x
711 // One rescheduling of the program based only on data flow of Op arguments is
712 // x = *X
713 // *Y = x
714 // y = *Y
715 // *X = y
716 // but this reordering does not produce the same results and is invalid.
717 skvm::Ptr X = b.varying<int>(),
718 Y = b.varying<int>();
719
720 skvm::I32 x = b.load32(X),
721 y = b.load32(Y);
722
723 b.store32(X, y);
724 b.store32(Y, x);
725 }
726
727 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
728 int b1[] = { 0,1,2,3 };
729 int b2[] = { 4,5,6,7 };
730 program.eval(std::size(b1), b1, b2);
731 for (int i = 0; i < (int)std::size(b1); i++) {
732 REPORTER_ASSERT(r, b1[i] == 4 + i);
733 REPORTER_ASSERT(r, b2[i] == i);
734 }
735 });
736 }
737
DEF_TEST(SkVM_NewOps,r)738 DEF_TEST(SkVM_NewOps, r) {
739 // Exercise a somewhat arbitrary set of new ops.
740 skvm::Builder b;
741 {
742 skvm::Ptr buf = b.varying<int16_t>();
743 skvm::UPtr uniforms = b.uniform();
744
745 skvm::I32 x = b.load16(buf);
746
747 const size_t kPtr = sizeof(const int*);
748
749 x = b.add(x, b.uniform32(uniforms, kPtr+0));
750 x = b.mul(x, b.uniform32(uniforms, kPtr+4));
751 x = b.sub(x, b.uniform32(uniforms, kPtr+8));
752
753 skvm::I32 limit = b.uniform32(uniforms, kPtr+12);
754 x = b.select(b.lt(x, b.splat(0)), b.splat(0), x);
755 x = b.select(b.gt(x, limit ), limit , x);
756
757 x = b.gather8(uniforms,0, x);
758
759 b.store16(buf, x);
760 }
761
762 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
763 const int N = 31;
764 int16_t buf[N];
765 for (int i = 0; i < N; i++) {
766 buf[i] = i;
767 }
768
769 const int M = 16;
770 uint8_t img[M];
771 for (int i = 0; i < M; i++) {
772 img[i] = i*i;
773 }
774
775 struct {
776 const uint8_t* img;
777 int add = 5;
778 int mul = 3;
779 int sub = 18;
780 int limit = M-1;
781 } uniforms{img};
782
783 program.eval(N, buf, &uniforms);
784
785 for (int i = 0; i < N; i++) {
786 // Our first math calculates x = (i+5)*3 - 18 a.k.a 3*(i-1).
787 int x = 3*(i-1);
788
789 // Then that's pinned to the limits of img.
790 if (i < 2) { x = 0; } // Notice i == 1 hits x == 0 exactly...
791 if (i > 5) { x = 15; } // ...and i == 6 hits x == 15 exactly
792 REPORTER_ASSERT(r, buf[i] == img[x]);
793 }
794 });
795 }
796
DEF_TEST(SKVM_array32,r)797 DEF_TEST(SKVM_array32, r) {
798
799
800
801 skvm::Builder b;
802 skvm::Uniforms uniforms(b.uniform(), 0);
803 // Take up the first slot, so other uniforms are not at 0 offset.
804 uniforms.push(0);
805 int i[] = {3, 7};
806 skvm::Uniform array = uniforms.pushArray(i);
807 float f[] = {5, 9};
808 skvm::Uniform arrayF = uniforms.pushArrayF(f);
809 {
810 skvm::Ptr buf0 = b.varying<int32_t>(),
811 buf1 = b.varying<int32_t>(),
812 buf2 = b.varying<int32_t>();
813
814 skvm::I32 j = b.array32(array, 0);
815 b.store32(buf0, j);
816 skvm::I32 k = b.array32(array, 1);
817 b.store32(buf1, k);
818
819 skvm::F32 x = b.arrayF(arrayF, 0);
820 skvm::F32 y = b.arrayF(arrayF, 1);
821 b.store32(buf2, b.trunc(b.add(x, y)));
822 }
823
824 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
825 const int K = 10;
826 int32_t buf0[K],
827 buf1[K],
828 buf2[K];
829
830 // reset the i[0] for the two tests.
831 i[0] = 3;
832 f[1] = 9;
833 program.eval(K, uniforms.buf.data(), buf0, buf1, buf2);
834 for (auto v : buf0) {
835 REPORTER_ASSERT(r, v == 3);
836 }
837 for (auto v : buf1) {
838 REPORTER_ASSERT(r, v == 7);
839 }
840 for (auto v : buf2) {
841 REPORTER_ASSERT(r, v == 14);
842 }
843 i[0] = 4;
844 f[1] = 10;
845 program.eval(K, uniforms.buf.data(), buf0, buf1, buf2);
846 for (auto v : buf0) {
847 REPORTER_ASSERT(r, v == 4);
848 }
849 for (auto v : buf1) {
850 REPORTER_ASSERT(r, v == 7);
851 }
852 for (auto v : buf2) {
853 REPORTER_ASSERT(r, v == 15);
854 }
855 });
856 }
857
DEF_TEST(SkVM_sqrt,r)858 DEF_TEST(SkVM_sqrt, r) {
859 skvm::Builder b;
860 auto buf = b.varying<int>();
861 b.storeF(buf, b.sqrt(b.loadF(buf)));
862
863 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
864 constexpr int K = 17;
865 float buf[K];
866 for (int i = 0; i < K; i++) {
867 buf[i] = (float)(i*i);
868 }
869
870 // x^2 -> x
871 program.eval(K, buf);
872
873 for (int i = 0; i < K; i++) {
874 REPORTER_ASSERT(r, buf[i] == (float)i);
875 }
876 });
877 }
878
DEF_TEST(SkVM_MSAN,r)879 DEF_TEST(SkVM_MSAN, r) {
880 // This little memset32() program should be able to JIT, but if we run that
881 // JIT code in an MSAN build, it won't see the writes initialize buf. So
882 // this tests that we're using the interpreter instead.
883 skvm::Builder b;
884 b.store32(b.varying<int>(), b.splat(42));
885
886 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
887 constexpr int K = 17;
888 int buf[K]; // Intentionally uninitialized.
889 program.eval(K, buf);
890 sk_msan_assert_initialized(buf, buf+K);
891 for (int x : buf) {
892 REPORTER_ASSERT(r, x == 42);
893 }
894 });
895 }
896
DEF_TEST(SkVM_assert,r)897 DEF_TEST(SkVM_assert, r) {
898 skvm::Builder b;
899 b.assert_true(b.lt(b.load32(b.varying<int>()),
900 b.splat(42)));
901
902 test_jit_and_interpreter(b, [&](const skvm::Program& program) {
903 int buf[] = { 0,1,2,3,4,5,6,7,8,9 };
904 program.eval(std::size(buf), buf);
905 });
906 }
907
DEF_TEST(SkVM_trace_line,r)908 DEF_TEST(SkVM_trace_line, r) {
909 class TestTraceHook : public skvm::TraceHook {
910 public:
911 void var(int, int32_t) override { fBuffer.push_back(-9999999); }
912 void enter(int) override { fBuffer.push_back(-9999999); }
913 void exit(int) override { fBuffer.push_back(-9999999); }
914 void scope(int) override { fBuffer.push_back(-9999999); }
915 void line(int lineNum) override { fBuffer.push_back(lineNum); }
916
917 std::vector<int> fBuffer;
918 };
919
920 skvm::Builder b;
921 TestTraceHook testTrace;
922 int traceHookID = b.attachTraceHook(&testTrace);
923 b.trace_line(traceHookID, b.splat(0xFFFFFFFF), b.splat(0xFFFFFFFF), 123);
924 b.trace_line(traceHookID, b.splat(0x00000000), b.splat(0xFFFFFFFF), 456);
925 b.trace_line(traceHookID, b.splat(0xFFFFFFFF), b.splat(0x00000000), 567);
926 b.trace_line(traceHookID, b.splat(0x00000000), b.splat(0x00000000), 678);
927 b.trace_line(traceHookID, b.splat(0xFFFFFFFF), b.splat(0xFFFFFFFF), 789);
928 skvm::Program p = b.done();
929 p.eval(1);
930
931 REPORTER_ASSERT(r, (testTrace.fBuffer == std::vector<int>{123, 789}));
932 }
933
DEF_TEST(SkVM_trace_var,r)934 DEF_TEST(SkVM_trace_var, r) {
935 class TestTraceHook : public skvm::TraceHook {
936 public:
937 void line(int) override { fBuffer.push_back(-9999999); }
938 void enter(int) override { fBuffer.push_back(-9999999); }
939 void exit(int) override { fBuffer.push_back(-9999999); }
940 void scope(int) override { fBuffer.push_back(-9999999); }
941 void var(int slot, int32_t val) override {
942 fBuffer.push_back(slot);
943 fBuffer.push_back(val);
944 }
945
946 std::vector<int> fBuffer;
947 };
948
949 skvm::Builder b;
950 TestTraceHook testTrace;
951 int traceHookID = b.attachTraceHook(&testTrace);
952 b.trace_var(traceHookID, b.splat(0x00000000), b.splat(0xFFFFFFFF), 2, b.splat(333));
953 b.trace_var(traceHookID, b.splat(0xFFFFFFFF), b.splat(0xFFFFFFFF), 4, b.splat(555));
954 b.trace_var(traceHookID, b.splat(0x00000000), b.splat(0x00000000), 5, b.splat(666));
955 b.trace_var(traceHookID, b.splat(0xFFFFFFFF), b.splat(0xFFFFFFFF), 6, b.splat(777));
956 b.trace_var(traceHookID, b.splat(0xFFFFFFFF), b.splat(0x00000000), 8, b.splat(999));
957 skvm::Program p = b.done();
958 p.eval(1);
959
960 REPORTER_ASSERT(r, (testTrace.fBuffer == std::vector<int>{4, 555, 6, 777}));
961 }
962
DEF_TEST(SkVM_trace_enter_exit,r)963 DEF_TEST(SkVM_trace_enter_exit, r) {
964 class TestTraceHook : public skvm::TraceHook {
965 public:
966 void line(int) override { fBuffer.push_back(-9999999); }
967 void var(int, int32_t) override { fBuffer.push_back(-9999999); }
968 void scope(int) override { fBuffer.push_back(-9999999); }
969 void enter(int fnIdx) override {
970 fBuffer.push_back(fnIdx);
971 fBuffer.push_back(1);
972 }
973 void exit(int fnIdx) override {
974 fBuffer.push_back(fnIdx);
975 fBuffer.push_back(0);
976 }
977
978 std::vector<int> fBuffer;
979 };
980
981 skvm::Builder b;
982 TestTraceHook testTrace;
983 int traceHookID = b.attachTraceHook(&testTrace);
984 b.trace_enter(traceHookID, b.splat(0x00000000), b.splat(0x00000000), 99);
985 b.trace_enter(traceHookID, b.splat(0xFFFFFFFF), b.splat(0xFFFFFFFF), 12);
986 b.trace_enter(traceHookID, b.splat(0x00000000), b.splat(0xFFFFFFFF), 34);
987 b.trace_exit(traceHookID, b.splat(0xFFFFFFFF), b.splat(0xFFFFFFFF), 56);
988 b.trace_exit(traceHookID, b.splat(0xFFFFFFFF), b.splat(0x00000000), 78);
989 b.trace_exit(traceHookID, b.splat(0x00000000), b.splat(0x00000000), 90);
990 skvm::Program p = b.done();
991 p.eval(1);
992
993 REPORTER_ASSERT(r, (testTrace.fBuffer == std::vector<int>{12, 1, 56, 0}));
994 }
995
DEF_TEST(SkVM_trace_scope,r)996 DEF_TEST(SkVM_trace_scope, r) {
997 class TestTraceHook : public skvm::TraceHook {
998 public:
999 void var(int, int32_t) override { fBuffer.push_back(-9999999); }
1000 void enter(int) override { fBuffer.push_back(-9999999); }
1001 void exit(int) override { fBuffer.push_back(-9999999); }
1002 void line(int) override { fBuffer.push_back(-9999999); }
1003 void scope(int delta) override { fBuffer.push_back(delta); }
1004
1005 std::vector<int> fBuffer;
1006 };
1007
1008 skvm::Builder b;
1009 TestTraceHook testTrace;
1010 int traceHookID = b.attachTraceHook(&testTrace);
1011 b.trace_scope(traceHookID, b.splat(0xFFFFFFFF), b.splat(0xFFFFFFFF), 1);
1012 b.trace_scope(traceHookID, b.splat(0xFFFFFFFF), b.splat(0x00000000), -2);
1013 b.trace_scope(traceHookID, b.splat(0x00000000), b.splat(0x00000000), 3);
1014 b.trace_scope(traceHookID, b.splat(0x00000000), b.splat(0xFFFFFFFF), 4);
1015 b.trace_scope(traceHookID, b.splat(0xFFFFFFFF), b.splat(0xFFFFFFFF), -5);
1016 skvm::Program p = b.done();
1017 p.eval(1);
1018
1019 REPORTER_ASSERT(r, (testTrace.fBuffer == std::vector<int>{1, -5}));
1020 }
1021
DEF_TEST(SkVM_trace_multiple_hooks,r)1022 DEF_TEST(SkVM_trace_multiple_hooks, r) {
1023 class TestTraceHook : public skvm::TraceHook {
1024 public:
1025 void var(int, int32_t) override { fBuffer.push_back(-9999999); }
1026 void enter(int) override { fBuffer.push_back(-9999999); }
1027 void exit(int) override { fBuffer.push_back(-9999999); }
1028 void scope(int) override { fBuffer.push_back(-9999999); }
1029 void line(int lineNum) override { fBuffer.push_back(lineNum); }
1030
1031 std::vector<int> fBuffer;
1032 };
1033
1034 skvm::Builder b;
1035 TestTraceHook testTraceA, testTraceB, testTraceC;
1036 int traceHookAID = b.attachTraceHook(&testTraceA);
1037 int traceHookBID = b.attachTraceHook(&testTraceB);
1038 int traceHookCID = b.attachTraceHook(&testTraceC);
1039 b.trace_line(traceHookCID, b.splat(0xFFFFFFFF), b.splat(0xFFFFFFFF), 111);
1040 b.trace_line(traceHookAID, b.splat(0xFFFFFFFF), b.splat(0xFFFFFFFF), 222);
1041 b.trace_line(traceHookCID, b.splat(0x00000000), b.splat(0x00000000), 333);
1042 b.trace_line(traceHookBID, b.splat(0xFFFFFFFF), b.splat(0x00000000), 444);
1043 b.trace_line(traceHookAID, b.splat(0x00000000), b.splat(0xFFFFFFFF), 555);
1044 b.trace_line(traceHookBID, b.splat(0xFFFFFFFF), b.splat(0xFFFFFFFF), 666);
1045 skvm::Program p = b.done();
1046 p.eval(1);
1047
1048 REPORTER_ASSERT(r, (testTraceA.fBuffer == std::vector<int>{222}));
1049 REPORTER_ASSERT(r, (testTraceB.fBuffer == std::vector<int>{666}));
1050 REPORTER_ASSERT(r, (testTraceC.fBuffer == std::vector<int>{111}));
1051 }
1052
DEF_TEST(SkVM_premul,reporter)1053 DEF_TEST(SkVM_premul, reporter) {
1054 // Test that premul is short-circuited when alpha is known opaque.
1055 {
1056 skvm::Builder p;
1057 auto rptr = p.varying<int>(),
1058 aptr = p.varying<int>();
1059
1060 skvm::F32 r = p.loadF(rptr),
1061 g = p.splat(0.0f),
1062 b = p.splat(0.0f),
1063 a = p.loadF(aptr);
1064
1065 p.premul(&r, &g, &b, a);
1066 p.storeF(rptr, r);
1067
1068 // load red, load alpha, red *= alpha, store red
1069 REPORTER_ASSERT(reporter, p.done().instructions().size() == 4);
1070 }
1071
1072 {
1073 skvm::Builder p;
1074 auto rptr = p.varying<int>();
1075
1076 skvm::F32 r = p.loadF(rptr),
1077 g = p.splat(0.0f),
1078 b = p.splat(0.0f),
1079 a = p.splat(1.0f);
1080
1081 p.premul(&r, &g, &b, a);
1082 p.storeF(rptr, r);
1083
1084 // load red, store red
1085 REPORTER_ASSERT(reporter, p.done().instructions().size() == 2);
1086 }
1087
1088 // Same deal for unpremul.
1089 {
1090 skvm::Builder p;
1091 auto rptr = p.varying<int>(),
1092 aptr = p.varying<int>();
1093
1094 skvm::F32 r = p.loadF(rptr),
1095 g = p.splat(0.0f),
1096 b = p.splat(0.0f),
1097 a = p.loadF(aptr);
1098
1099 p.unpremul(&r, &g, &b, a);
1100 p.storeF(rptr, r);
1101
1102 // load red, load alpha, a bunch of unpremul instructions, store red
1103 REPORTER_ASSERT(reporter, p.done().instructions().size() >= 4);
1104 }
1105
1106 {
1107 skvm::Builder p;
1108 auto rptr = p.varying<int>();
1109
1110 skvm::F32 r = p.loadF(rptr),
1111 g = p.splat(0.0f),
1112 b = p.splat(0.0f),
1113 a = p.splat(1.0f);
1114
1115 p.unpremul(&r, &g, &b, a);
1116 p.storeF(rptr, r);
1117
1118 // load red, store red
1119 REPORTER_ASSERT(reporter, p.done().instructions().size() == 2);
1120 }
1121 }
1122
1123 template <typename Fn>
test_asm(skiatest::Reporter * r,Fn && fn,std::initializer_list<uint8_t> expected)1124 static void test_asm(skiatest::Reporter* r, Fn&& fn, std::initializer_list<uint8_t> expected) {
1125 uint8_t buf[4096];
1126 skvm::Assembler a{buf};
1127 fn(a);
1128
1129 REPORTER_ASSERT(r, a.size() == expected.size());
1130
1131 auto got = (const uint8_t*)buf,
1132 want = expected.begin();
1133 for (int i = 0; i < (int)std::min(a.size(), expected.size()); i++) {
1134 REPORTER_ASSERT(r, got[i] == want[i],
1135 "byte %d was %02x, want %02x", i, got[i], want[i]);
1136 }
1137 }
1138
DEF_TEST(SkVM_Assembler,r)1139 DEF_TEST(SkVM_Assembler, r) {
1140 // Easiest way to generate test cases is
1141 //
1142 // echo '...some asm...' | llvm-mc -show-encoding -x86-asm-syntax=intel
1143 //
1144 // The -x86-asm-syntax=intel bit is optional, controlling the
1145 // input syntax only; the output will always be AT&T op x,y,dst style.
1146 // Our APIs read more like Intel op dst,x,y as op(dst,x,y), so I find
1147 // that a bit easier to use here, despite maybe favoring AT&T overall.
1148
1149 using A = skvm::Assembler;
1150 // Our exit strategy from AVX code.
1151 test_asm(r, [&](A& a) {
1152 a.int3();
1153 a.vzeroupper();
1154 a.ret();
1155 },{
1156 0xcc,
1157 0xc5, 0xf8, 0x77,
1158 0xc3,
1159 });
1160
1161 // Align should pad with zero
1162 test_asm(r, [&](A& a) {
1163 a.ret();
1164 a.align(4);
1165 },{
1166 0xc3,
1167 0x00, 0x00, 0x00,
1168 });
1169
1170 test_asm(r, [&](A& a) {
1171 a.add(A::rax, 8); // Always good to test rax.
1172 a.sub(A::rax, 32);
1173
1174 a.add(A::rdi, 12); // Last 0x48 REX
1175 a.sub(A::rdi, 8);
1176
1177 a.add(A::r8 , 7); // First 0x49 REX
1178 a.sub(A::r8 , 4);
1179
1180 a.add(A::rsi, 128); // Requires 4 byte immediate.
1181 a.sub(A::r8 , 1000000);
1182
1183 a.add(A::Mem{A::rsi}, 7); // addq $7, (%rsi)
1184 a.add(A::Mem{A::rsi, 12}, 7); // addq $7, 12(%rsi)
1185 a.add(A::Mem{A::rsp, 12}, 7); // addq $7, 12(%rsp)
1186 a.add(A::Mem{A::r12, 12}, 7); // addq $7, 12(%r12)
1187 a.add(A::Mem{A::rsp, 12, A::rax, A::FOUR}, 7); // addq $7, 12(%rsp,%rax,4)
1188 a.add(A::Mem{A::r12, 12, A::rax, A::FOUR}, 7); // addq $7, 12(%r12,%rax,4)
1189 a.add(A::Mem{A::rax, 12, A::r12, A::FOUR}, 7); // addq $7, 12(%rax,%r12,4)
1190 a.add(A::Mem{A::r11, 12, A::r8 , A::TWO }, 7); // addq $7, 12(%r11,%r8,2)
1191 a.add(A::Mem{A::r11, 12, A::rax} , 7); // addq $7, 12(%r11,%rax)
1192 a.add(A::Mem{A::rax, 12, A::r11} , 7); // addq $7, 12(%rax,%r11)
1193
1194 a.sub(A::Mem{A::rax, 12, A::r11} , 7); // subq $7, 12(%rax,%r11)
1195
1196 a.add( A::rax , A::rcx); // addq %rcx, %rax
1197 a.add(A::Mem{A::rax} , A::rcx); // addq %rcx, (%rax)
1198 a.add(A::Mem{A::rax, 12}, A::rcx); // addq %rcx, 12(%rax)
1199 a.add(A::rcx, A::Mem{A::rax, 12}); // addq 12(%rax), %rcx
1200
1201 a.sub(A::rcx, A::Mem{A::rax, 12}); // subq 12(%rax), %rcx
1202 },{
1203 0x48, 0x83, 0b11'000'000, 0x08,
1204 0x48, 0x83, 0b11'101'000, 0x20,
1205
1206 0x48, 0x83, 0b11'000'111, 0x0c,
1207 0x48, 0x83, 0b11'101'111, 0x08,
1208
1209 0x49, 0x83, 0b11'000'000, 0x07,
1210 0x49, 0x83, 0b11'101'000, 0x04,
1211
1212 0x48, 0x81, 0b11'000'110, 0x80, 0x00, 0x00, 0x00,
1213 0x49, 0x81, 0b11'101'000, 0x40, 0x42, 0x0f, 0x00,
1214
1215 0x48,0x83,0x06,0x07,
1216 0x48,0x83,0x46,0x0c,0x07,
1217 0x48,0x83,0x44,0x24,0x0c,0x07,
1218 0x49,0x83,0x44,0x24,0x0c,0x07,
1219 0x48,0x83,0x44,0x84,0x0c,0x07,
1220 0x49,0x83,0x44,0x84,0x0c,0x07,
1221 0x4a,0x83,0x44,0xa0,0x0c,0x07,
1222 0x4b,0x83,0x44,0x43,0x0c,0x07,
1223 0x49,0x83,0x44,0x03,0x0c,0x07,
1224 0x4a,0x83,0x44,0x18,0x0c,0x07,
1225
1226 0x4a,0x83,0x6c,0x18,0x0c,0x07,
1227
1228 0x48,0x01,0xc8,
1229 0x48,0x01,0x08,
1230 0x48,0x01,0x48,0x0c,
1231 0x48,0x03,0x48,0x0c,
1232 0x48,0x2b,0x48,0x0c,
1233 });
1234
1235
1236 test_asm(r, [&](A& a) {
1237 a.vpaddd (A::ymm0, A::ymm1, A::ymm2); // Low registers and 0x0f map -> 2-byte VEX.
1238 a.vpaddd (A::ymm8, A::ymm1, A::ymm2); // A high dst register is ok -> 2-byte VEX.
1239 a.vpaddd (A::ymm0, A::ymm8, A::ymm2); // A high first argument register -> 2-byte VEX.
1240 a.vpaddd (A::ymm0, A::ymm1, A::ymm8); // A high second argument -> 3-byte VEX.
1241 a.vpmulld(A::ymm0, A::ymm1, A::ymm2); // Using non-0x0f map instruction -> 3-byte VEX.
1242 a.vpsubd (A::ymm0, A::ymm1, A::ymm2); // Test vpsubd to ensure argument order is right.
1243 },{
1244 /* VEX */ /*op*/ /*modRM*/
1245 0xc5, 0xf5, 0xfe, 0xc2,
1246 0xc5, 0x75, 0xfe, 0xc2,
1247 0xc5, 0xbd, 0xfe, 0xc2,
1248 0xc4, 0xc1, 0x75, 0xfe, 0xc0,
1249 0xc4, 0xe2, 0x75, 0x40, 0xc2,
1250 0xc5, 0xf5, 0xfa, 0xc2,
1251 });
1252
1253 test_asm(r, [&](A& a) {
1254 a.vpaddw (A::ymm4, A::ymm3, A::ymm2);
1255 a.vpavgw (A::ymm4, A::ymm3, A::ymm2);
1256 a.vpcmpeqw (A::ymm4, A::ymm3, A::ymm2);
1257 a.vpcmpgtw (A::ymm4, A::ymm3, A::ymm2);
1258
1259 a.vpminsw (A::ymm4, A::ymm3, A::ymm2);
1260 a.vpmaxsw (A::ymm4, A::ymm3, A::ymm2);
1261 a.vpminuw (A::ymm4, A::ymm3, A::ymm2);
1262 a.vpmaxuw (A::ymm4, A::ymm3, A::ymm2);
1263
1264 a.vpmulhrsw(A::ymm4, A::ymm3, A::ymm2);
1265 a.vpabsw (A::ymm4, A::ymm3);
1266 a.vpsllw (A::ymm4, A::ymm3, 12);
1267 a.vpsraw (A::ymm4, A::ymm3, 12);
1268 },{
1269 0xc5, 0xe5, 0xfd, 0xe2,
1270 0xc5, 0xe5, 0xe3, 0xe2,
1271 0xc5, 0xe5, 0x75, 0xe2,
1272 0xc5, 0xe5, 0x65, 0xe2,
1273
1274 0xc5, 0xe5, 0xea, 0xe2,
1275 0xc5, 0xe5, 0xee, 0xe2,
1276 0xc4,0xe2,0x65, 0x3a, 0xe2,
1277 0xc4,0xe2,0x65, 0x3e, 0xe2,
1278
1279 0xc4,0xe2,0x65, 0x0b, 0xe2,
1280 0xc4,0xe2,0x7d, 0x1d, 0xe3,
1281 0xc5,0xdd,0x71, 0xf3, 0x0c,
1282 0xc5,0xdd,0x71, 0xe3, 0x0c,
1283 });
1284
1285 test_asm(r, [&](A& a) {
1286 A::Label l;
1287 a.vcmpeqps (A::ymm0, A::ymm1, &l); // vcmpeqps 0x1c(%rip), %ymm1, %ymm0
1288 a.vpcmpeqd (A::ymm0, A::ymm1, A::ymm2);
1289 a.vpcmpgtd (A::ymm0, A::ymm1, A::ymm2);
1290 a.vcmpeqps (A::ymm0, A::ymm1, A::ymm2);
1291 a.vcmpltps (A::ymm0, A::ymm1, A::ymm2);
1292 a.vcmpleps (A::ymm0, A::ymm1, A::ymm2);
1293 a.vcmpneqps(A::ymm0, A::ymm1, A::ymm2);
1294 a.label(&l); // 28 bytes after the vcmpeqps that uses it.
1295 },{
1296 0xc5,0xf4,0xc2,0x05,0x1c,0x00,0x00,0x00,0x00,
1297 0xc5,0xf5,0x76,0xc2,
1298 0xc5,0xf5,0x66,0xc2,
1299 0xc5,0xf4,0xc2,0xc2,0x00,
1300 0xc5,0xf4,0xc2,0xc2,0x01,
1301 0xc5,0xf4,0xc2,0xc2,0x02,
1302 0xc5,0xf4,0xc2,0xc2,0x04,
1303 });
1304
1305 test_asm(r, [&](A& a) {
1306 a.vminps(A::ymm0, A::ymm1, A::ymm2);
1307 a.vmaxps(A::ymm0, A::ymm1, A::ymm2);
1308 },{
1309 0xc5,0xf4,0x5d,0xc2,
1310 0xc5,0xf4,0x5f,0xc2,
1311 });
1312
1313 test_asm(r, [&](A& a) {
1314 a.vpblendvb(A::ymm0, A::ymm1, A::ymm2, A::ymm3);
1315 },{
1316 0xc4,0xe3,0x75, 0x4c, 0xc2, 0x30,
1317 });
1318
1319 test_asm(r, [&](A& a) {
1320 a.vpsrld(A::ymm15, A::ymm2, 8);
1321 a.vpsrld(A::ymm0 , A::ymm8, 5);
1322 },{
1323 0xc5, 0x85, 0x72,0xd2, 0x08,
1324 0xc4,0xc1,0x7d, 0x72,0xd0, 0x05,
1325 });
1326
1327 test_asm(r, [&](A& a) {
1328 A::Label l;
1329 a.vpermps(A::ymm1, A::ymm2, A::Mem{A::rdi, 32});
1330 a.vperm2f128(A::ymm1, A::ymm2, &l, 0x20);
1331 a.vpermq(A::ymm1, A::ymm2, 5);
1332 a.label(&l); // 6 bytes after vperm2f128
1333 },{
1334 0xc4,0xe2,0x6d,0x16,0x4f,0x20,
1335 0xc4,0xe3,0x6d,0x06,0x0d,0x06,0x00,0x00,0x00,0x20,
1336 0xc4,0xe3,0xfd, 0x00,0xca, 0x05,
1337 });
1338
1339 test_asm(r, [&](A& a) {
1340 a.vpunpckldq(A::ymm1, A::ymm2, A::Mem{A::rdi});
1341 a.vpunpckhdq(A::ymm1, A::ymm2, A::ymm3);
1342 },{
1343 0xc5,0xed,0x62,0x0f,
1344 0xc5,0xed,0x6a,0xcb,
1345 });
1346
1347 test_asm(r, [&](A& a) {
1348 a.vroundps(A::ymm1, A::ymm2, A::NEAREST);
1349 a.vroundps(A::ymm1, A::ymm2, A::FLOOR);
1350 a.vroundps(A::ymm1, A::ymm2, A::CEIL);
1351 a.vroundps(A::ymm1, A::ymm2, A::TRUNC);
1352 },{
1353 0xc4,0xe3,0x7d,0x08,0xca,0x00,
1354 0xc4,0xe3,0x7d,0x08,0xca,0x01,
1355 0xc4,0xe3,0x7d,0x08,0xca,0x02,
1356 0xc4,0xe3,0x7d,0x08,0xca,0x03,
1357 });
1358
1359 test_asm(r, [&](A& a) {
1360 A::Label l;
1361 a.label(&l);
1362 a.byte(1);
1363 a.byte(2);
1364 a.byte(3);
1365 a.byte(4);
1366
1367 a.vbroadcastss(A::ymm0 , &l);
1368 a.vbroadcastss(A::ymm1 , &l);
1369 a.vbroadcastss(A::ymm8 , &l);
1370 a.vbroadcastss(A::ymm15, &l);
1371
1372 a.vpshufb(A::ymm4, A::ymm3, &l);
1373 a.vpaddd (A::ymm4, A::ymm3, &l);
1374 a.vpsubd (A::ymm4, A::ymm3, &l);
1375
1376 a.vptest(A::ymm4, &l);
1377
1378 a.vmulps (A::ymm4, A::ymm3, &l);
1379 },{
1380 0x01, 0x02, 0x03, 0x4,
1381
1382 /* VEX */ /*op*/ /* ModRM */ /* offset */
1383 0xc4, 0xe2, 0x7d, 0x18, 0b00'000'101, 0xf3,0xff,0xff,0xff, // 0xfffffff3 == -13
1384 0xc4, 0xe2, 0x7d, 0x18, 0b00'001'101, 0xea,0xff,0xff,0xff, // 0xffffffea == -22
1385 0xc4, 0x62, 0x7d, 0x18, 0b00'000'101, 0xe1,0xff,0xff,0xff, // 0xffffffe1 == -31
1386 0xc4, 0x62, 0x7d, 0x18, 0b00'111'101, 0xd8,0xff,0xff,0xff, // 0xffffffd8 == -40
1387
1388 0xc4, 0xe2, 0x65, 0x00, 0b00'100'101, 0xcf,0xff,0xff,0xff, // 0xffffffcf == -49
1389
1390 0xc5, 0xe5, 0xfe, 0b00'100'101, 0xc7,0xff,0xff,0xff, // 0xffffffc7 == -57
1391 0xc5, 0xe5, 0xfa, 0b00'100'101, 0xbf,0xff,0xff,0xff, // 0xffffffbf == -65
1392
1393 0xc4, 0xe2, 0x7d, 0x17, 0b00'100'101, 0xb6,0xff,0xff,0xff, // 0xffffffb6 == -74
1394
1395 0xc5, 0xe4, 0x59, 0b00'100'101, 0xae,0xff,0xff,0xff, // 0xffffffaf == -82
1396 });
1397
1398 test_asm(r, [&](A& a) {
1399 a.vbroadcastss(A::ymm0, A::Mem{A::rdi, 0});
1400 a.vbroadcastss(A::ymm13, A::Mem{A::r14, 7});
1401 a.vbroadcastss(A::ymm8, A::Mem{A::rdx, -12});
1402 a.vbroadcastss(A::ymm8, A::Mem{A::rdx, 400});
1403
1404 a.vbroadcastss(A::ymm8, A::xmm0);
1405 a.vbroadcastss(A::ymm0, A::xmm13);
1406 },{
1407 /* VEX */ /*op*/ /*ModRM*/ /*offset*/
1408 0xc4,0xe2,0x7d, 0x18, 0b00'000'111,
1409 0xc4,0x42,0x7d, 0x18, 0b01'101'110, 0x07,
1410 0xc4,0x62,0x7d, 0x18, 0b01'000'010, 0xf4,
1411 0xc4,0x62,0x7d, 0x18, 0b10'000'010, 0x90,0x01,0x00,0x00,
1412
1413 0xc4,0x62,0x7d, 0x18, 0b11'000'000,
1414 0xc4,0xc2,0x7d, 0x18, 0b11'000'101,
1415 });
1416
1417 test_asm(r, [&](A& a) {
1418 A::Label l;
1419 a.label(&l);
1420 a.jne(&l);
1421 a.jne(&l);
1422 a.je (&l);
1423 a.jmp(&l);
1424 a.jl (&l);
1425 a.jc (&l);
1426
1427 a.cmp(A::rdx, 1);
1428 a.cmp(A::rax, 12);
1429 a.cmp(A::r14, 2000000000);
1430 },{
1431 0x0f,0x85, 0xfa,0xff,0xff,0xff, // near jne -6 bytes
1432 0x0f,0x85, 0xf4,0xff,0xff,0xff, // near jne -12 bytes
1433 0x0f,0x84, 0xee,0xff,0xff,0xff, // near je -18 bytes
1434 0xe9, 0xe9,0xff,0xff,0xff, // near jmp -23 bytes
1435 0x0f,0x8c, 0xe3,0xff,0xff,0xff, // near jl -29 bytes
1436 0x0f,0x82, 0xdd,0xff,0xff,0xff, // near jc -35 bytes
1437
1438 0x48,0x83,0xfa,0x01,
1439 0x48,0x83,0xf8,0x0c,
1440 0x49,0x81,0xfe,0x00,0x94,0x35,0x77,
1441 });
1442
1443 test_asm(r, [&](A& a) {
1444 a.vmovups(A::ymm5, A::Mem{A::rsi});
1445 a.vmovups(A::Mem{A::rsi}, A::ymm5);
1446
1447 a.vmovups(A::xmm5, A::Mem{A::rsi});
1448 a.vmovups(A::Mem{A::rsi}, A::xmm5);
1449
1450 a.vpmovzxwd(A::ymm4, A::Mem{A::rsi});
1451 a.vpmovzxbd(A::ymm4, A::Mem{A::rsi});
1452
1453 a.vmovq(A::Mem{A::rdx}, A::xmm15);
1454 },{
1455 /* VEX */ /*Op*/ /* ModRM */
1456 0xc5, 0xfc, 0x10, 0b00'101'110,
1457 0xc5, 0xfc, 0x11, 0b00'101'110,
1458
1459 0xc5, 0xf8, 0x10, 0b00'101'110,
1460 0xc5, 0xf8, 0x11, 0b00'101'110,
1461
1462 0xc4,0xe2,0x7d, 0x33, 0b00'100'110,
1463 0xc4,0xe2,0x7d, 0x31, 0b00'100'110,
1464
1465 0xc5, 0x79, 0xd6, 0b00'111'010,
1466 });
1467
1468 test_asm(r, [&](A& a) {
1469 a.vmovups(A::ymm5, A::Mem{A::rsp, 0});
1470 a.vmovups(A::ymm5, A::Mem{A::rsp, 64});
1471 a.vmovups(A::ymm5, A::Mem{A::rsp,128});
1472
1473 a.vmovups(A::Mem{A::rsp, 0}, A::ymm5);
1474 a.vmovups(A::Mem{A::rsp, 64}, A::ymm5);
1475 a.vmovups(A::Mem{A::rsp,128}, A::ymm5);
1476 },{
1477 0xc5,0xfc,0x10,0x2c,0x24,
1478 0xc5,0xfc,0x10,0x6c,0x24,0x40,
1479 0xc5,0xfc,0x10,0xac,0x24,0x80,0x00,0x00,0x00,
1480
1481 0xc5,0xfc,0x11,0x2c,0x24,
1482 0xc5,0xfc,0x11,0x6c,0x24,0x40,
1483 0xc5,0xfc,0x11,0xac,0x24,0x80,0x00,0x00,0x00,
1484 });
1485
1486 test_asm(r, [&](A& a) {
1487 a.movzbq(A::rax, A::Mem{A::rsi}); // Low registers for src and dst.
1488 a.movzbq(A::rax, A::Mem{A::r8,}); // High src register.
1489 a.movzbq(A::r8 , A::Mem{A::rsi}); // High dst register.
1490 a.movzbq(A::r8, A::Mem{A::rsi, 12});
1491 a.movzbq(A::r8, A::Mem{A::rsi, 400});
1492
1493 a.movzwq(A::rax, A::Mem{A::rsi}); // Low registers for src and dst.
1494 a.movzwq(A::rax, A::Mem{A::r8,}); // High src register.
1495 a.movzwq(A::r8 , A::Mem{A::rsi}); // High dst register.
1496 a.movzwq(A::r8, A::Mem{A::rsi, 12});
1497 a.movzwq(A::r8, A::Mem{A::rsi, 400});
1498
1499 a.vmovd(A::Mem{A::rax}, A::xmm0);
1500 a.vmovd(A::Mem{A::rax}, A::xmm8);
1501 a.vmovd(A::Mem{A::r8 }, A::xmm0);
1502
1503 a.vmovd(A::xmm0, A::Mem{A::rax});
1504 a.vmovd(A::xmm8, A::Mem{A::rax});
1505 a.vmovd(A::xmm0, A::Mem{A::r8 });
1506
1507 a.vmovd(A::xmm0 , A::Mem{A::rax, 0, A::rcx, A::FOUR});
1508 a.vmovd(A::xmm15, A::Mem{A::rax, 0, A::r8, A::TWO });
1509 a.vmovd(A::xmm0 , A::Mem{A::r8 , 0, A::rcx});
1510
1511 a.vmovd(A::rax, A::xmm0);
1512 a.vmovd(A::rax, A::xmm8);
1513 a.vmovd(A::r8 , A::xmm0);
1514
1515 a.vmovd(A::xmm0, A::rax);
1516 a.vmovd(A::xmm8, A::rax);
1517 a.vmovd(A::xmm0, A::r8 );
1518
1519 a.movb(A::Mem{A::rdx}, A::rax);
1520 a.movb(A::Mem{A::rdx}, A::r8 );
1521 a.movb(A::Mem{A::r8 }, A::rax);
1522
1523 a.movb(A::rdx, A::Mem{A::rax});
1524 a.movb(A::rdx, A::Mem{A::r8 });
1525 a.movb(A::r8 , A::Mem{A::rax});
1526
1527 a.movb(A::rdx, 12);
1528 a.movb(A::rax, 4);
1529 a.movb(A::r8 , -1);
1530
1531 a.movb(A::Mem{A::rdx}, 12);
1532 a.movb(A::Mem{A::rax}, 4);
1533 a.movb(A::Mem{A::r8 }, -1);
1534 },{
1535 0x48,0x0f,0xb6,0x06, // movzbq (%rsi), %rax
1536 0x49,0x0f,0xb6,0x00,
1537 0x4c,0x0f,0xb6,0x06,
1538 0x4c,0x0f,0xb6,0x46, 12,
1539 0x4c,0x0f,0xb6,0x86, 0x90,0x01,0x00,0x00,
1540
1541 0x48,0x0f,0xb7,0x06, // movzwq (%rsi), %rax
1542 0x49,0x0f,0xb7,0x00,
1543 0x4c,0x0f,0xb7,0x06,
1544 0x4c,0x0f,0xb7,0x46, 12,
1545 0x4c,0x0f,0xb7,0x86, 0x90,0x01,0x00,0x00,
1546
1547 0xc5,0xf9,0x7e,0x00,
1548 0xc5,0x79,0x7e,0x00,
1549 0xc4,0xc1,0x79,0x7e,0x00,
1550
1551 0xc5,0xf9,0x6e,0x00,
1552 0xc5,0x79,0x6e,0x00,
1553 0xc4,0xc1,0x79,0x6e,0x00,
1554
1555 0xc5,0xf9,0x6e,0x04,0x88,
1556 0xc4,0x21,0x79,0x6e,0x3c,0x40,
1557 0xc4,0xc1,0x79,0x6e,0x04,0x08,
1558
1559 0xc5,0xf9,0x7e,0xc0,
1560 0xc5,0x79,0x7e,0xc0,
1561 0xc4,0xc1,0x79,0x7e,0xc0,
1562
1563 0xc5,0xf9,0x6e,0xc0,
1564 0xc5,0x79,0x6e,0xc0,
1565 0xc4,0xc1,0x79,0x6e,0xc0,
1566
1567 0x48 ,0x88, 0x02,
1568 0x4c, 0x88, 0x02,
1569 0x49, 0x88, 0x00,
1570
1571 0x48 ,0x8a, 0x10,
1572 0x49, 0x8a, 0x10,
1573 0x4c, 0x8a, 0x00,
1574
1575 0x48, 0xc6, 0xc2, 0x0c,
1576 0x48, 0xc6, 0xc0, 0x04,
1577 0x49, 0xc6, 0xc0, 0xff,
1578
1579 0x48, 0xc6, 0x02, 0x0c,
1580 0x48, 0xc6, 0x00, 0x04,
1581 0x49, 0xc6, 0x00, 0xff,
1582 });
1583
1584 test_asm(r, [&](A& a) {
1585 a.vpinsrd(A::xmm1, A::xmm8, A::Mem{A::rsi}, 1); // vpinsrd $1, (%rsi), %xmm8, %xmm1
1586 a.vpinsrd(A::xmm8, A::xmm1, A::Mem{A::r8 }, 3); // vpinsrd $3, (%r8), %xmm1, %xmm8;
1587
1588 a.vpinsrw(A::xmm1, A::xmm8, A::Mem{A::rsi}, 4); // vpinsrw $4, (%rsi), %xmm8, %xmm1
1589 a.vpinsrw(A::xmm8, A::xmm1, A::Mem{A::r8 }, 12); // vpinrsw $12, (%r8), %xmm1, %xmm8
1590
1591 a.vpinsrb(A::xmm1, A::xmm8, A::Mem{A::rsi}, 4); // vpinsrb $4, (%rsi), %xmm8, %xmm1
1592 a.vpinsrb(A::xmm8, A::xmm1, A::Mem{A::r8 }, 12); // vpinsrb $12, (%r8), %xmm1, %xmm8
1593
1594 a.vextracti128(A::xmm1, A::ymm8, 1); // vextracti128 $1, %ymm8, %xmm1
1595 a.vextracti128(A::xmm8, A::ymm1, 0); // vextracti128 $0, %ymm1, %xmm8
1596
1597 a.vpextrd(A::Mem{A::rsi}, A::xmm8, 3); // vpextrd $3, %xmm8, (%rsi)
1598 a.vpextrd(A::Mem{A::r8 }, A::xmm1, 2); // vpextrd $2, %xmm1, (%r8)
1599
1600 a.vpextrw(A::Mem{A::rsi}, A::xmm8, 7);
1601 a.vpextrw(A::Mem{A::r8 }, A::xmm1, 15);
1602
1603 a.vpextrb(A::Mem{A::rsi}, A::xmm8, 7);
1604 a.vpextrb(A::Mem{A::r8 }, A::xmm1, 15);
1605 },{
1606 0xc4,0xe3,0x39, 0x22, 0x0e, 1,
1607 0xc4,0x43,0x71, 0x22, 0x00, 3,
1608
1609 0xc5,0xb9, 0xc4, 0x0e, 4,
1610 0xc4,0x41,0x71, 0xc4, 0x00, 12,
1611
1612 0xc4,0xe3,0x39, 0x20, 0x0e, 4,
1613 0xc4,0x43,0x71, 0x20, 0x00, 12,
1614
1615 0xc4,0x63,0x7d,0x39,0xc1, 1,
1616 0xc4,0xc3,0x7d,0x39,0xc8, 0,
1617
1618 0xc4,0x63,0x79,0x16,0x06, 3,
1619 0xc4,0xc3,0x79,0x16,0x08, 2,
1620
1621 0xc4,0x63,0x79, 0x15, 0x06, 7,
1622 0xc4,0xc3,0x79, 0x15, 0x08, 15,
1623
1624 0xc4,0x63,0x79, 0x14, 0x06, 7,
1625 0xc4,0xc3,0x79, 0x14, 0x08, 15,
1626 });
1627
1628 test_asm(r, [&](A& a) {
1629 a.vpandn(A::ymm3, A::ymm12, A::ymm2);
1630 },{
1631 0xc5, 0x9d, 0xdf, 0xda,
1632 });
1633
1634 test_asm(r, [&](A& a) {
1635 A::Label l;
1636 a.vmovdqa(A::ymm3, A::ymm2); // vmovdqa %ymm2 , %ymm3
1637
1638 a.vmovdqa(A::ymm3, A::Mem{A::rsi}); // vmovdqa (%rsi) , %ymm3
1639 a.vmovdqa(A::ymm3, A::Mem{A::rsp}); // vmovdqa (%rsp) , %ymm3
1640 a.vmovdqa(A::ymm3, A::Mem{A::r11}); // vmovdqa (%r11) , %ymm3
1641
1642 a.vmovdqa(A::ymm3, A::Mem{A::rsi, 4}); // vmovdqa 4(%rsi) , %ymm3
1643 a.vmovdqa(A::ymm3, A::Mem{A::rsp, 4}); // vmovdqa 4(%rsp) , %ymm3
1644
1645 a.vmovdqa(A::ymm3, A::Mem{A::rsi, 4, A::rax, A::EIGHT}); // vmovdqa 4(%rsi,%rax,8), %ymm3
1646 a.vmovdqa(A::ymm3, A::Mem{A::r11, 4, A::rax, A::TWO }); // vmovdqa 4(%r11,%rax,2), %ymm3
1647 a.vmovdqa(A::ymm3, A::Mem{A::rsi, 4, A::r11, A::FOUR }); // vmovdqa 4(%rsi,%r11,4), %ymm3
1648 a.vmovdqa(A::ymm3, A::Mem{A::rsi, 4, A::r11, A::ONE }); // vmovdqa 4(%rsi,%r11,1), %ymm3
1649 a.vmovdqa(A::ymm3, A::Mem{A::rsi, 4, A::r11}); // vmovdqa 4(%rsi,%r11) , %ymm3
1650
1651 a.vmovdqa(A::ymm3, A::Mem{A::rsi, 64, A::r11}); // vmovdqa 64(%rsi,%r11), %ymm3
1652 a.vmovdqa(A::ymm3, A::Mem{A::rsi, 128, A::r11}); // vmovdqa 128(%rsi,%r11), %ymm3
1653 a.vmovdqa(A::ymm3, &l); // vmovdqa 16(%rip) , %ymm3
1654
1655 a.vcvttps2dq(A::ymm3, A::ymm2);
1656 a.vcvtdq2ps (A::ymm3, A::ymm2);
1657 a.vcvtps2dq (A::ymm3, A::ymm2);
1658 a.vsqrtps (A::ymm3, A::ymm2);
1659 a.label(&l);
1660 },{
1661 0xc5,0xfd,0x6f,0xda,
1662
1663 0xc5,0xfd,0x6f,0x1e,
1664 0xc5,0xfd,0x6f,0x1c,0x24,
1665 0xc4,0xc1,0x7d,0x6f,0x1b,
1666
1667 0xc5,0xfd,0x6f,0x5e,0x04,
1668 0xc5,0xfd,0x6f,0x5c,0x24,0x04,
1669
1670 0xc5,0xfd,0x6f,0x5c,0xc6,0x04,
1671 0xc4,0xc1,0x7d,0x6f,0x5c,0x43,0x04,
1672 0xc4,0xa1,0x7d,0x6f,0x5c,0x9e,0x04,
1673 0xc4,0xa1,0x7d,0x6f,0x5c,0x1e,0x04,
1674 0xc4,0xa1,0x7d,0x6f,0x5c,0x1e,0x04,
1675
1676 0xc4,0xa1,0x7d,0x6f,0x5c,0x1e,0x40,
1677 0xc4,0xa1,0x7d,0x6f,0x9c,0x1e,0x80,0x00,0x00,0x00,
1678
1679 0xc5,0xfd,0x6f,0x1d,0x10,0x00,0x00,0x00,
1680
1681 0xc5,0xfe,0x5b,0xda,
1682 0xc5,0xfc,0x5b,0xda,
1683 0xc5,0xfd,0x5b,0xda,
1684 0xc5,0xfc,0x51,0xda,
1685 });
1686
1687 test_asm(r, [&](A& a) {
1688 a.vcvtps2ph(A::xmm3, A::ymm2, A::CURRENT);
1689 a.vcvtps2ph(A::Mem{A::rsi, 32, A::rax, A::EIGHT}, A::ymm5, A::CEIL);
1690
1691 a.vcvtph2ps(A::ymm15, A::Mem{A::rdi, 12, A::r9, A::ONE});
1692 a.vcvtph2ps(A::ymm2, A::xmm3);
1693 },{
1694 0xc4,0xe3,0x7d,0x1d,0xd3,0x04,
1695 0xc4,0xe3,0x7d,0x1d,0x6c,0xc6,0x20,0x02,
1696
1697 0xc4,0x22,0x7d,0x13,0x7c,0x0f,0x0c,
1698 0xc4,0xe2,0x7d,0x13,0xd3,
1699 });
1700
1701 test_asm(r, [&](A& a) {
1702 a.vgatherdps(A::ymm1 , A::FOUR , A::ymm0 , A::rdi, A::ymm2 );
1703 a.vgatherdps(A::ymm0 , A::ONE , A::ymm2 , A::rax, A::ymm1 );
1704 a.vgatherdps(A::ymm10, A::ONE , A::ymm2 , A::rax, A::ymm1 );
1705 a.vgatherdps(A::ymm0 , A::ONE , A::ymm12, A::rax, A::ymm1 );
1706 a.vgatherdps(A::ymm0 , A::ONE , A::ymm2 , A::r9 , A::ymm1 );
1707 a.vgatherdps(A::ymm0 , A::ONE , A::ymm2 , A::rax, A::ymm12);
1708 a.vgatherdps(A::ymm0 , A::EIGHT, A::ymm2 , A::rax, A::ymm12);
1709 },{
1710 0xc4,0xe2,0x6d,0x92,0x0c,0x87,
1711 0xc4,0xe2,0x75,0x92,0x04,0x10,
1712 0xc4,0x62,0x75,0x92,0x14,0x10,
1713 0xc4,0xa2,0x75,0x92,0x04,0x20,
1714 0xc4,0xc2,0x75,0x92,0x04,0x11,
1715 0xc4,0xe2,0x1d,0x92,0x04,0x10,
1716 0xc4,0xe2,0x1d,0x92,0x04,0xd0,
1717 });
1718
1719 test_asm(r, [&](A& a) {
1720 a.mov(A::rax, A::Mem{A::rdi, 0});
1721 a.mov(A::rax, A::Mem{A::rdi, 1});
1722 a.mov(A::rax, A::Mem{A::rdi, 512});
1723 a.mov(A::r15, A::Mem{A::r13, 42});
1724 a.mov(A::rax, A::Mem{A::r13, 42});
1725 a.mov(A::r15, A::Mem{A::rax, 42});
1726 a.mov(A::rax, 1);
1727 a.mov(A::rax, A::rcx);
1728 },{
1729 0x48, 0x8b, 0x07,
1730 0x48, 0x8b, 0x47, 0x01,
1731 0x48, 0x8b, 0x87, 0x00,0x02,0x00,0x00,
1732 0x4d, 0x8b, 0x7d, 0x2a,
1733 0x49, 0x8b, 0x45, 0x2a,
1734 0x4c, 0x8b, 0x78, 0x2a,
1735 0x48, 0xc7, 0xc0, 0x01,0x00,0x00,0x00,
1736 0x48, 0x89, 0xc8,
1737 });
1738
1739 // echo "fmul v4.4s, v3.4s, v1.4s" | llvm-mc -show-encoding -arch arm64
1740
1741 test_asm(r, [&](A& a) {
1742 a.and16b(A::v4, A::v3, A::v1);
1743 a.orr16b(A::v4, A::v3, A::v1);
1744 a.eor16b(A::v4, A::v3, A::v1);
1745 a.bic16b(A::v4, A::v3, A::v1);
1746 a.bsl16b(A::v4, A::v3, A::v1);
1747 a.not16b(A::v4, A::v3);
1748
1749 a.add4s(A::v4, A::v3, A::v1);
1750 a.sub4s(A::v4, A::v3, A::v1);
1751 a.mul4s(A::v4, A::v3, A::v1);
1752
1753 a.cmeq4s(A::v4, A::v3, A::v1);
1754 a.cmgt4s(A::v4, A::v3, A::v1);
1755
1756 a.sub8h(A::v4, A::v3, A::v1);
1757 a.mul8h(A::v4, A::v3, A::v1);
1758
1759 a.fadd4s(A::v4, A::v3, A::v1);
1760 a.fsub4s(A::v4, A::v3, A::v1);
1761 a.fmul4s(A::v4, A::v3, A::v1);
1762 a.fdiv4s(A::v4, A::v3, A::v1);
1763 a.fmin4s(A::v4, A::v3, A::v1);
1764 a.fmax4s(A::v4, A::v3, A::v1);
1765
1766 a.fneg4s (A::v4, A::v3);
1767 a.fsqrt4s(A::v4, A::v3);
1768
1769 a.fmla4s(A::v4, A::v3, A::v1);
1770 a.fmls4s(A::v4, A::v3, A::v1);
1771
1772 a.fcmeq4s(A::v4, A::v3, A::v1);
1773 a.fcmgt4s(A::v4, A::v3, A::v1);
1774 a.fcmge4s(A::v4, A::v3, A::v1);
1775 },{
1776 0x64,0x1c,0x21,0x4e,
1777 0x64,0x1c,0xa1,0x4e,
1778 0x64,0x1c,0x21,0x6e,
1779 0x64,0x1c,0x61,0x4e,
1780 0x64,0x1c,0x61,0x6e,
1781 0x64,0x58,0x20,0x6e,
1782
1783 0x64,0x84,0xa1,0x4e,
1784 0x64,0x84,0xa1,0x6e,
1785 0x64,0x9c,0xa1,0x4e,
1786
1787 0x64,0x8c,0xa1,0x6e,
1788 0x64,0x34,0xa1,0x4e,
1789
1790 0x64,0x84,0x61,0x6e,
1791 0x64,0x9c,0x61,0x4e,
1792
1793 0x64,0xd4,0x21,0x4e,
1794 0x64,0xd4,0xa1,0x4e,
1795 0x64,0xdc,0x21,0x6e,
1796 0x64,0xfc,0x21,0x6e,
1797 0x64,0xf4,0xa1,0x4e,
1798 0x64,0xf4,0x21,0x4e,
1799
1800 0x64,0xf8,0xa0,0x6e,
1801 0x64,0xf8,0xa1,0x6e,
1802
1803 0x64,0xcc,0x21,0x4e,
1804 0x64,0xcc,0xa1,0x4e,
1805
1806 0x64,0xe4,0x21,0x4e,
1807 0x64,0xe4,0xa1,0x6e,
1808 0x64,0xe4,0x21,0x6e,
1809 });
1810
1811 test_asm(r, [&](A& a) {
1812 a.shl4s(A::v4, A::v3, 0);
1813 a.shl4s(A::v4, A::v3, 1);
1814 a.shl4s(A::v4, A::v3, 8);
1815 a.shl4s(A::v4, A::v3, 16);
1816 a.shl4s(A::v4, A::v3, 31);
1817
1818 a.sshr4s(A::v4, A::v3, 1);
1819 a.sshr4s(A::v4, A::v3, 8);
1820 a.sshr4s(A::v4, A::v3, 31);
1821
1822 a.ushr4s(A::v4, A::v3, 1);
1823 a.ushr4s(A::v4, A::v3, 8);
1824 a.ushr4s(A::v4, A::v3, 31);
1825
1826 a.ushr8h(A::v4, A::v3, 1);
1827 a.ushr8h(A::v4, A::v3, 8);
1828 a.ushr8h(A::v4, A::v3, 15);
1829 },{
1830 0x64,0x54,0x20,0x4f,
1831 0x64,0x54,0x21,0x4f,
1832 0x64,0x54,0x28,0x4f,
1833 0x64,0x54,0x30,0x4f,
1834 0x64,0x54,0x3f,0x4f,
1835
1836 0x64,0x04,0x3f,0x4f,
1837 0x64,0x04,0x38,0x4f,
1838 0x64,0x04,0x21,0x4f,
1839
1840 0x64,0x04,0x3f,0x6f,
1841 0x64,0x04,0x38,0x6f,
1842 0x64,0x04,0x21,0x6f,
1843
1844 0x64,0x04,0x1f,0x6f,
1845 0x64,0x04,0x18,0x6f,
1846 0x64,0x04,0x11,0x6f,
1847 });
1848
1849 test_asm(r, [&](A& a) {
1850 a.sli4s(A::v4, A::v3, 0);
1851 a.sli4s(A::v4, A::v3, 1);
1852 a.sli4s(A::v4, A::v3, 8);
1853 a.sli4s(A::v4, A::v3, 16);
1854 a.sli4s(A::v4, A::v3, 31);
1855 },{
1856 0x64,0x54,0x20,0x6f,
1857 0x64,0x54,0x21,0x6f,
1858 0x64,0x54,0x28,0x6f,
1859 0x64,0x54,0x30,0x6f,
1860 0x64,0x54,0x3f,0x6f,
1861 });
1862
1863 test_asm(r, [&](A& a) {
1864 a.scvtf4s (A::v4, A::v3);
1865 a.fcvtzs4s(A::v4, A::v3);
1866 a.fcvtns4s(A::v4, A::v3);
1867 a.frintp4s(A::v4, A::v3);
1868 a.frintm4s(A::v4, A::v3);
1869 a.fcvtn (A::v4, A::v3);
1870 a.fcvtl (A::v4, A::v3);
1871 },{
1872 0x64,0xd8,0x21,0x4e,
1873 0x64,0xb8,0xa1,0x4e,
1874 0x64,0xa8,0x21,0x4e,
1875 0x64,0x88,0xa1,0x4e,
1876 0x64,0x98,0x21,0x4e,
1877 0x64,0x68,0x21,0x0e,
1878 0x64,0x78,0x21,0x0e,
1879 });
1880
1881 test_asm(r, [&](A& a) {
1882 a.sub (A::sp, A::sp, 32); // sub sp, sp, #32
1883 a.strq(A::v0, A::sp, 1); // str q0, [sp, #16]
1884 a.strq(A::v1, A::sp); // str q1, [sp]
1885 a.strd(A::v0, A::sp, 6); // str s0, [sp, #48]
1886 a.strs(A::v0, A::sp, 6); // str s0, [sp, #24]
1887 a.strh(A::v0, A::sp, 10); // str h0, [sp, #20]
1888 a.strb(A::v0, A::sp, 47); // str b0, [sp, #47]
1889 a.ldrb(A::v9, A::sp, 42); // ldr b9, [sp, #42]
1890 a.ldrh(A::v9, A::sp, 47); // ldr h9, [sp, #94]
1891 a.ldrs(A::v7, A::sp, 10); // ldr s7, [sp, #40]
1892 a.ldrd(A::v7, A::sp, 1); // ldr d7, [sp, #8]
1893 a.ldrq(A::v5, A::sp, 128); // ldr q5, [sp, #2048]
1894 a.add (A::sp, A::sp, 32); // add sp, sp, #32
1895 },{
1896 0xff,0x83,0x00,0xd1,
1897 0xe0,0x07,0x80,0x3d,
1898 0xe1,0x03,0x80,0x3d,
1899 0xe0,0x1b,0x00,0xfd,
1900 0xe0,0x1b,0x00,0xbd,
1901 0xe0,0x2b,0x00,0x7d,
1902 0xe0,0xbf,0x00,0x3d,
1903 0xe9,0xab,0x40,0x3d,
1904 0xe9,0xbf,0x40,0x7d,
1905 0xe7,0x2b,0x40,0xbd,
1906 0xe7,0x07,0x40,0xfd,
1907 0xe5,0x03,0xc2,0x3d,
1908 0xff,0x83,0x00,0x91,
1909 });
1910
1911 test_asm(r, [&](A& a) {
1912 a.brk(0);
1913 a.brk(65535);
1914
1915 a.ret(A::x30); // Conventional ret using link register.
1916 a.ret(A::x13); // Can really return using any register if we like.
1917
1918 a.add(A::x2, A::x2, 4);
1919 a.add(A::x3, A::x2, 32);
1920
1921 a.sub(A::x2, A::x2, 4);
1922 a.sub(A::x3, A::x2, 32);
1923
1924 a.subs(A::x2, A::x2, 4);
1925 a.subs(A::x3, A::x2, 32);
1926
1927 a.subs(A::xzr, A::x2, 4); // These are actually the same instruction!
1928 a.cmp(A::x2, 4);
1929
1930 A::Label l;
1931 a.label(&l);
1932 a.bne(&l);
1933 a.bne(&l);
1934 a.blt(&l);
1935 a.b(&l);
1936 a.cbnz(A::x2, &l);
1937 a.cbz(A::x2, &l);
1938
1939 a.add(A::x3, A::x2, A::x1); // add x3,x2,x1
1940 a.add(A::x3, A::x2, A::x1, A::ASR, 3); // add x3,x2,x1, asr #3
1941 },{
1942 0x00,0x00,0x20,0xd4,
1943 0xe0,0xff,0x3f,0xd4,
1944
1945 0xc0,0x03,0x5f,0xd6,
1946 0xa0,0x01,0x5f,0xd6,
1947
1948 0x42,0x10,0x00,0x91,
1949 0x43,0x80,0x00,0x91,
1950
1951 0x42,0x10,0x00,0xd1,
1952 0x43,0x80,0x00,0xd1,
1953
1954 0x42,0x10,0x00,0xf1,
1955 0x43,0x80,0x00,0xf1,
1956
1957 0x5f,0x10,0x00,0xf1,
1958 0x5f,0x10,0x00,0xf1,
1959
1960 0x01,0x00,0x00,0x54, // b.ne #0
1961 0xe1,0xff,0xff,0x54, // b.ne #-4
1962 0xcb,0xff,0xff,0x54, // b.lt #-8
1963 0xae,0xff,0xff,0x54, // b.al #-12
1964 0x82,0xff,0xff,0xb5, // cbnz x2, #-16
1965 0x62,0xff,0xff,0xb4, // cbz x2, #-20
1966
1967 0x43,0x00,0x01,0x8b,
1968 0x43,0x0c,0x81,0x8b,
1969 });
1970
1971 // Can we cbz() to a not-yet-defined label?
1972 test_asm(r, [&](A& a) {
1973 A::Label l;
1974 a.cbz(A::x2, &l);
1975 a.add(A::x3, A::x2, 32);
1976 a.label(&l);
1977 a.ret(A::x30);
1978 },{
1979 0x42,0x00,0x00,0xb4, // cbz x2, #8
1980 0x43,0x80,0x00,0x91, // add x3, x2, #32
1981 0xc0,0x03,0x5f,0xd6, // ret
1982 });
1983
1984 // If we start a label as a backward label,
1985 // can we redefine it to be a future label?
1986 // (Not sure this is useful... just want to test it works.)
1987 test_asm(r, [&](A& a) {
1988 A::Label l1;
1989 a.label(&l1);
1990 a.add(A::x3, A::x2, 32);
1991 a.cbz(A::x2, &l1); // This will jump backward... nothing sneaky.
1992
1993 A::Label l2; // Start off the same...
1994 a.label(&l2);
1995 a.add(A::x3, A::x2, 32);
1996 a.cbz(A::x2, &l2); // Looks like this will go backward...
1997 a.add(A::x2, A::x2, 4);
1998 a.add(A::x3, A::x2, 32);
1999 a.label(&l2); // But no... actually forward! What a switcheroo!
2000 },{
2001 0x43,0x80,0x00,0x91, // add x3, x2, #32
2002 0xe2,0xff,0xff,0xb4, // cbz x2, #-4
2003
2004 0x43,0x80,0x00,0x91, // add x3, x2, #32
2005 0x62,0x00,0x00,0xb4, // cbz x2, #12
2006 0x42,0x10,0x00,0x91, // add x2, x2, #4
2007 0x43,0x80,0x00,0x91, // add x3, x2, #32
2008 });
2009
2010 // Loading from a label on ARM.
2011 test_asm(r, [&](A& a) {
2012 A::Label fore,aft;
2013 a.label(&fore);
2014 a.word(0x01234567);
2015 a.ldrq(A::v1, &fore);
2016 a.ldrq(A::v2, &aft);
2017 a.label(&aft);
2018 a.word(0x76543210);
2019 },{
2020 0x67,0x45,0x23,0x01,
2021 0xe1,0xff,0xff,0x9c, // ldr q1, #-4
2022 0x22,0x00,0x00,0x9c, // ldr q2, #4
2023 0x10,0x32,0x54,0x76,
2024 });
2025
2026 test_asm(r, [&](A& a) {
2027 a.ldrq(A::v0, A::x8);
2028 a.strq(A::v0, A::x8);
2029 },{
2030 0x00,0x01,0xc0,0x3d,
2031 0x00,0x01,0x80,0x3d,
2032 });
2033
2034 test_asm(r, [&](A& a) {
2035 a.dup4s (A::v0, A::x8);
2036 a.ld1r4s (A::v0, A::x8); // echo 'ld1r.4s {v0}, [x8]' | llvm-mc --show-encoding
2037 a.ld1r8h (A::v0, A::x8);
2038 a.ld1r16b(A::v0, A::x8);
2039 },{
2040 0x00,0x0d,0x04,0x4e,
2041 0x00,0xc9,0x40,0x4d,
2042 0x00,0xc5,0x40,0x4d,
2043 0x00,0xc1,0x40,0x4d,
2044 });
2045
2046 test_asm(r, [&](A& a) {
2047 a.ld24s(A::v0, A::x8); // echo 'ld2.4s {v0,v1}, [x8]' | llvm-mc --show-encoding
2048 a.ld44s(A::v0, A::x8);
2049 a.st24s(A::v0, A::x8);
2050 a.st44s(A::v0, A::x8); // echo 'st4.4s {v0,v1,v2,v3}, [x8]' | llvm-mc --show-encoding
2051
2052 a.ld24s(A::v0, A::x8, 0); //echo 'ld2 {v0.s,v1.s}[0], [x8]' | llvm-mc --show-encoding
2053 a.ld24s(A::v0, A::x8, 1);
2054 a.ld24s(A::v0, A::x8, 2);
2055 a.ld24s(A::v0, A::x8, 3);
2056
2057 a.ld44s(A::v0, A::x8, 0); // ld4 {v0.s,v1.s,v2.s,v3.s}[0], [x8]
2058 a.ld44s(A::v0, A::x8, 1);
2059 a.ld44s(A::v0, A::x8, 2);
2060 a.ld44s(A::v0, A::x8, 3);
2061 },{
2062 0x00,0x89,0x40,0x4c,
2063 0x00,0x09,0x40,0x4c,
2064 0x00,0x89,0x00,0x4c,
2065 0x00,0x09,0x00,0x4c,
2066
2067 0x00,0x81,0x60,0x0d,
2068 0x00,0x91,0x60,0x0d,
2069 0x00,0x81,0x60,0x4d,
2070 0x00,0x91,0x60,0x4d,
2071
2072 0x00,0xa1,0x60,0x0d,
2073 0x00,0xb1,0x60,0x0d,
2074 0x00,0xa1,0x60,0x4d,
2075 0x00,0xb1,0x60,0x4d,
2076 });
2077
2078 test_asm(r, [&](A& a) {
2079 a.xtns2h(A::v0, A::v0);
2080 a.xtnh2b(A::v0, A::v0);
2081 a.strs (A::v0, A::x0);
2082
2083 a.ldrs (A::v0, A::x0);
2084 a.uxtlb2h(A::v0, A::v0);
2085 a.uxtlh2s(A::v0, A::v0);
2086
2087 a.uminv4s(A::v3, A::v4);
2088 a.movs (A::x3, A::v4,0); // mov.s w3,v4[0]
2089 a.movs (A::x3, A::v4,1); // mov.s w3,v4[1]
2090 a.inss (A::v4, A::x3,3); // ins.s v4[3],w3
2091 },{
2092 0x00,0x28,0x61,0x0e,
2093 0x00,0x28,0x21,0x0e,
2094 0x00,0x00,0x00,0xbd,
2095
2096 0x00,0x00,0x40,0xbd,
2097 0x00,0xa4,0x08,0x2f,
2098 0x00,0xa4,0x10,0x2f,
2099
2100 0x83,0xa8,0xb1,0x6e,
2101 0x83,0x3c,0x04,0x0e,
2102 0x83,0x3c,0x0c,0x0e,
2103 0x64,0x1c,0x1c,0x4e,
2104 });
2105
2106 test_asm(r, [&](A& a) {
2107 a.ldrb(A::v0, A::x8);
2108 a.strb(A::v0, A::x8);
2109 },{
2110 0x00,0x01,0x40,0x3d,
2111 0x00,0x01,0x00,0x3d,
2112 });
2113
2114 test_asm(r, [&](A& a) {
2115 a.ldrd(A::x0, A::x1, 3); // ldr x0, [x1, #24]
2116 a.ldrs(A::x0, A::x1, 3); // ldr w0, [x1, #12]
2117 a.ldrh(A::x0, A::x1, 3); // ldrh w0, [x1, #6]
2118 a.ldrb(A::x0, A::x1, 3); // ldrb w0, [x1, #3]
2119
2120 a.strs(A::x0, A::x1, 3); // str w0, [x1, #12]
2121 },{
2122 0x20,0x0c,0x40,0xf9,
2123 0x20,0x0c,0x40,0xb9,
2124 0x20,0x0c,0x40,0x79,
2125 0x20,0x0c,0x40,0x39,
2126
2127 0x20,0x0c,0x00,0xb9,
2128 });
2129
2130 test_asm(r, [&](A& a) {
2131 a.tbl (A::v0, A::v1, A::v2);
2132 a.uzp14s(A::v0, A::v1, A::v2);
2133 a.uzp24s(A::v0, A::v1, A::v2);
2134 a.zip14s(A::v0, A::v1, A::v2);
2135 a.zip24s(A::v0, A::v1, A::v2);
2136 },{
2137 0x20,0x00,0x02,0x4e,
2138 0x20,0x18,0x82,0x4e,
2139 0x20,0x58,0x82,0x4e,
2140 0x20,0x38,0x82,0x4e,
2141 0x20,0x78,0x82,0x4e,
2142 });
2143 }
2144
DEF_TEST(SkVM_approx_math,r)2145 DEF_TEST(SkVM_approx_math, r) {
2146 auto eval = [](int N, float values[], auto fn) {
2147 skvm::Builder b;
2148 skvm::Ptr inout = b.varying<float>();
2149
2150 b.storeF(inout, fn(&b, b.loadF(inout)));
2151
2152 b.done().eval(N, values);
2153 };
2154
2155 auto compare = [r](int N, const float values[], const float expected[]) {
2156 for (int i = 0; i < N; ++i) {
2157 REPORTER_ASSERT(r, (values[i] == expected[i]) ||
2158 SkScalarNearlyEqual(values[i], expected[i], 0.001f),
2159 "evaluated to %g, but expected %g", values[i], expected[i]);
2160 }
2161 };
2162
2163 // log2
2164 {
2165 float values[] = {0.25f, 0.5f, 1, 2, 4, 8};
2166 constexpr int N = std::size(values);
2167 eval(N, values, [](skvm::Builder* b, skvm::F32 v) {
2168 return b->approx_log2(v);
2169 });
2170 const float expected[] = {-2, -1, 0, 1, 2, 3};
2171 compare(N, values, expected);
2172 }
2173
2174 // pow2
2175 {
2176 float values[] = {-80, -5, -2, -1, 0, 1, 2, 3, 5, 160};
2177 constexpr int N = std::size(values);
2178 eval(N, values, [](skvm::Builder* b, skvm::F32 v) {
2179 return b->approx_pow2(v);
2180 });
2181 const float expected[] = {0, 0.03125f, 0.25f, 0.5f, 1, 2, 4, 8, 32, INFINITY};
2182 compare(N, values, expected);
2183 }
2184 // powf -- 1^x
2185 {
2186 float exps[] = {-2, -1, 0, 1, 2};
2187 constexpr int N = std::size(exps);
2188 eval(N, exps, [](skvm::Builder* b, skvm::F32 exp) {
2189 return b->approx_powf(b->splat(1.0f), exp);
2190 });
2191 const float expected[] = {1, 1, 1, 1, 1};
2192 compare(N, exps, expected);
2193 }
2194 // powf -- 2^x
2195 {
2196 float exps[] = {-80, -5, -2, -1, 0, 1, 2, 3, 5, 160};
2197 constexpr int N = std::size(exps);
2198 eval(N, exps, [](skvm::Builder* b, skvm::F32 exp) {
2199 return b->approx_powf(2.0, exp);
2200 });
2201 const float expected[] = {0, 0.03125f, 0.25f, 0.5f, 1, 2, 4, 8, 32, INFINITY};
2202 compare(N, exps, expected);
2203 }
2204 // powf -- 3^x
2205 {
2206 float exps[] = {-2, -1, 0, 1, 2};
2207 constexpr int N = std::size(exps);
2208 eval(N, exps, [](skvm::Builder* b, skvm::F32 exp) {
2209 return b->approx_powf(b->splat(3.0f), exp);
2210 });
2211 const float expected[] = {1/9.0f, 1/3.0f, 1, 3, 9};
2212 compare(N, exps, expected);
2213 }
2214 // powf -- x^0.5
2215 {
2216 float bases[] = {0, 1, 4, 9, 16};
2217 constexpr int N = std::size(bases);
2218 eval(N, bases, [](skvm::Builder* b, skvm::F32 base) {
2219 return b->approx_powf(base, b->splat(0.5f));
2220 });
2221 const float expected[] = {0, 1, 2, 3, 4};
2222 compare(N, bases, expected);
2223 }
2224 // powf -- x^1
2225 {
2226 float bases[] = {0, 1, 2, 3, 4};
2227 constexpr int N = std::size(bases);
2228 eval(N, bases, [](skvm::Builder* b, skvm::F32 base) {
2229 return b->approx_powf(base, b->splat(1.0f));
2230 });
2231 const float expected[] = {0, 1, 2, 3, 4};
2232 compare(N, bases, expected);
2233 }
2234 // powf -- x^2
2235 {
2236 float bases[] = {0, 1, 2, 3, 4};
2237 constexpr int N = std::size(bases);
2238 eval(N, bases, [](skvm::Builder* b, skvm::F32 base) {
2239 return b->approx_powf(base, b->splat(2.0f));
2240 });
2241 const float expected[] = {0, 1, 4, 9, 16};
2242 compare(N, bases, expected);
2243 }
2244
2245 auto test = [r](float arg, float expected, float tolerance, auto prog) {
2246 skvm::Builder b;
2247 skvm::Ptr inout = b.varying<float>();
2248 b.storeF(inout, prog(b.loadF(inout)));
2249 float actual = arg;
2250 b.done().eval(1, &actual);
2251
2252 float err = std::abs(actual - expected);
2253
2254 if (err > tolerance) {
2255 // SkDebugf("arg %g, expected %g, actual %g\n", arg, expected, actual);
2256 REPORTER_ASSERT(r, true);
2257 }
2258 return err;
2259 };
2260
2261 auto test2 = [r](float arg0, float arg1, float expected, float tolerance, auto prog) {
2262 skvm::Builder b;
2263 skvm::Ptr in0 = b.varying<float>();
2264 skvm::Ptr in1 = b.varying<float>();
2265 skvm::Ptr out = b.varying<float>();
2266 b.storeF(out, prog(b.loadF(in0), b.loadF(in1)));
2267 float actual;
2268 b.done().eval(1, &arg0, &arg1, &actual);
2269
2270 float err = std::abs(actual - expected);
2271
2272 if (err > tolerance) {
2273 // SkDebugf("[%g, %g]: expected %g, actual %g\n", arg0, arg1, expected, actual);
2274 REPORTER_ASSERT(r, true);
2275 }
2276 return err;
2277 };
2278
2279 // sine, cosine, tangent
2280 {
2281 constexpr float P = SK_ScalarPI;
2282 constexpr float tol = 0.00175f;
2283 for (float rad = -5*P; rad <= 5*P; rad += 0.1f) {
2284 test(rad, sk_float_sin(rad), tol, [](skvm::F32 x) {
2285 return approx_sin(x);
2286 });
2287 test(rad, sk_float_cos(rad), tol, [](skvm::F32 x) {
2288 return approx_cos(x);
2289 });
2290 }
2291
2292 // Our tangent diverge more as we get near infinities (x near +- Pi/2),
2293 // so bring in the domain a little.
2294 constexpr float eps = 0.16f;
2295 float err = 0;
2296 for (float rad = -P/2 + eps; rad <= P/2 - eps; rad += 0.01f) {
2297 err += test(rad, sk_float_tan(rad), tol, [](skvm::F32 x) {
2298 return approx_tan(x);
2299 });
2300 // try again with some multiples of P, to check our periodicity
2301 test(rad, sk_float_tan(rad), tol, [=](skvm::F32 x) {
2302 return approx_tan(x + 3*P);
2303 });
2304 test(rad, sk_float_tan(rad), tol, [=](skvm::F32 x) {
2305 return approx_tan(x - 3*P);
2306 });
2307 }
2308 if ((false)) { SkDebugf("tan error %g\n", err); }
2309 }
2310
2311 // asin, acos, atan
2312 {
2313 constexpr float tol = 0.00175f;
2314 float err = 0;
2315 for (float x = -1; x <= 1; x += 1.0f/64) {
2316 err += test(x, asin(x), tol, [](skvm::F32 x) {
2317 return approx_asin(x);
2318 });
2319 test(x, acos(x), tol, [](skvm::F32 x) {
2320 return approx_acos(x);
2321 });
2322 }
2323 if ((false)) { SkDebugf("asin error %g\n", err); }
2324
2325 err = 0;
2326 for (float x = -10; x <= 10; x += 1.0f/16) {
2327 err += test(x, atan(x), tol, [](skvm::F32 x) {
2328 return approx_atan(x);
2329 });
2330 }
2331 if ((false)) { SkDebugf("atan error %g\n", err); }
2332
2333 for (float y = -3; y <= 3; y += 1) {
2334 for (float x = -3; x <= 3; x += 1) {
2335 err += test2(y, x, atan2(y,x), tol, [](skvm::F32 y, skvm::F32 x) {
2336 return approx_atan2(y,x);
2337 });
2338 }
2339 }
2340 if ((false)) { SkDebugf("atan2 error %g\n", err); }
2341 }
2342 }
2343
DEF_TEST(SkVM_min_max,r)2344 DEF_TEST(SkVM_min_max, r) {
2345 // min() and max() have subtle behavior when one argument is NaN and
2346 // the other isn't. It's not sound to blindly swap their arguments.
2347 //
2348 // All backends must behave like std::min() and std::max(), which are
2349 //
2350 // min(x,y) = y<x ? y : x
2351 // max(x,y) = x<y ? y : x
2352
2353 // ±NaN, ±0, ±1, ±inf
2354 const uint32_t bits[] = {0x7f80'0001, 0xff80'0001, 0x0000'0000, 0x8000'0000,
2355 0x3f80'0000, 0xbf80'0000, 0x7f80'0000, 0xff80'0000};
2356
2357 float f[8];
2358 memcpy(f, bits, sizeof(bits));
2359
2360 auto identical = [&](float x, float y) {
2361 uint32_t X,Y;
2362 memcpy(&X, &x, 4);
2363 memcpy(&Y, &y, 4);
2364 return X == Y;
2365 };
2366
2367 // Test min/max with non-constant x, non-constant y.
2368 // (Whether x and y are varying or uniform shouldn't make any difference.)
2369 {
2370 skvm::Builder b;
2371 {
2372 skvm::Ptr src = b.varying<float>(),
2373 mn = b.varying<float>(),
2374 mx = b.varying<float>();
2375
2376 skvm::F32 x = b.loadF(src),
2377 y = b.uniformF(b.uniform(), 0);
2378
2379 b.storeF(mn, b.min(x,y));
2380 b.storeF(mx, b.max(x,y));
2381 }
2382
2383 test_jit_and_interpreter(b, [&](const skvm::Program& program){
2384 float mn[8], mx[8];
2385 for (int i = 0; i < 8; i++) {
2386 // min() and max() everything with f[i].
2387 program.eval(8, f,mn,mx, &f[i]);
2388
2389 for (int j = 0; j < 8; j++) {
2390 REPORTER_ASSERT(r, identical(mn[j], std::min(f[j], f[i])));
2391 REPORTER_ASSERT(r, identical(mx[j], std::max(f[j], f[i])));
2392 }
2393 }
2394 });
2395 }
2396
2397 // Test each with constant on the right.
2398 for (int i = 0; i < 8; i++) {
2399 skvm::Builder b;
2400 {
2401 skvm::Ptr src = b.varying<float>(),
2402 mn = b.varying<float>(),
2403 mx = b.varying<float>();
2404
2405 skvm::F32 x = b.loadF(src),
2406 y = b.splat(f[i]);
2407
2408 b.storeF(mn, b.min(x,y));
2409 b.storeF(mx, b.max(x,y));
2410 }
2411
2412 test_jit_and_interpreter(b, [&](const skvm::Program& program){
2413 float mn[8], mx[8];
2414 program.eval(8, f,mn,mx);
2415 for (int j = 0; j < 8; j++) {
2416 REPORTER_ASSERT(r, identical(mn[j], std::min(f[j], f[i])));
2417 REPORTER_ASSERT(r, identical(mx[j], std::max(f[j], f[i])));
2418 }
2419 });
2420 }
2421
2422 // Test each with constant on the left.
2423 for (int i = 0; i < 8; i++) {
2424 skvm::Builder b;
2425 {
2426 skvm::Ptr src = b.varying<float>(),
2427 mn = b.varying<float>(),
2428 mx = b.varying<float>();
2429
2430 skvm::F32 x = b.splat(f[i]),
2431 y = b.loadF(src);
2432
2433 b.storeF(mn, b.min(x,y));
2434 b.storeF(mx, b.max(x,y));
2435 }
2436
2437 test_jit_and_interpreter(b, [&](const skvm::Program& program){
2438 float mn[8], mx[8];
2439 program.eval(8, f,mn,mx);
2440 for (int j = 0; j < 8; j++) {
2441 REPORTER_ASSERT(r, identical(mn[j], std::min(f[i], f[j])));
2442 REPORTER_ASSERT(r, identical(mx[j], std::max(f[i], f[j])));
2443 }
2444 });
2445 }
2446 }
2447
DEF_TEST(SkVM_halfs,r)2448 DEF_TEST(SkVM_halfs, r) {
2449 const uint16_t hs[] = {0x0000,0x3800,0x3c00,0x4000,
2450 0xc400,0xb800,0xbc00,0xc000};
2451 const float fs[] = {+0.0f,+0.5f,+1.0f,+2.0f,
2452 -4.0f,-0.5f,-1.0f,-2.0f};
2453 {
2454 skvm::Builder b;
2455 skvm::Ptr src = b.varying<uint16_t>(),
2456 dst = b.varying<float>();
2457 b.storeF(dst, b.from_fp16(b.load16(src)));
2458
2459 test_jit_and_interpreter(b, [&](const skvm::Program& program){
2460 float dst[8];
2461 program.eval(8, hs, dst);
2462 for (int i = 0; i < 8; i++) {
2463 REPORTER_ASSERT(r, dst[i] == fs[i]);
2464 }
2465 });
2466 }
2467 {
2468 skvm::Builder b;
2469 skvm::Ptr src = b.varying<float>(),
2470 dst = b.varying<uint16_t>();
2471 b.store16(dst, b.to_fp16(b.loadF(src)));
2472
2473 test_jit_and_interpreter(b, [&](const skvm::Program& program){
2474 uint16_t dst[8];
2475 program.eval(8, fs, dst);
2476 for (int i = 0; i < 8; i++) {
2477 REPORTER_ASSERT(r, dst[i] == hs[i]);
2478 }
2479 });
2480 }
2481 }
2482
DEF_TEST(SkVM_64bit,r)2483 DEF_TEST(SkVM_64bit, r) {
2484 uint32_t lo[65],
2485 hi[65];
2486 uint64_t wide[65];
2487 for (int i = 0; i < 65; i++) {
2488 lo[i] = 2*i+0;
2489 hi[i] = 2*i+1;
2490 wide[i] = ((uint64_t)lo[i] << 0)
2491 | ((uint64_t)hi[i] << 32);
2492 }
2493
2494 {
2495 skvm::Builder b;
2496 {
2497 skvm::Ptr widePtr = b.varying<uint64_t>(),
2498 loPtr = b.varying<int>(),
2499 hiPtr = b.varying<int>();
2500 b.store32(loPtr, b.load64(widePtr, 0));
2501 b.store32(hiPtr, b.load64(widePtr, 1));
2502 }
2503 test_jit_and_interpreter(b, [&](const skvm::Program& program){
2504 uint32_t l[65], h[65];
2505 program.eval(65, wide,l,h);
2506 for (int i = 0; i < 65; i++) {
2507 REPORTER_ASSERT(r, l[i] == lo[i]);
2508 REPORTER_ASSERT(r, h[i] == hi[i]);
2509 }
2510 });
2511 }
2512
2513 {
2514 skvm::Builder b;
2515 {
2516 skvm::Ptr widePtr = b.varying<uint64_t>(),
2517 loPtr = b.varying<int>(),
2518 hiPtr = b.varying<int>();
2519 b.store64(widePtr, b.load32(loPtr), b.load32(hiPtr));
2520 }
2521 test_jit_and_interpreter(b, [&](const skvm::Program& program){
2522 uint64_t w[65];
2523 program.eval(65, w,lo,hi);
2524 for (int i = 0; i < 65; i++) {
2525 REPORTER_ASSERT(r, w[i] == wide[i]);
2526 }
2527 });
2528 }
2529 }
2530
DEF_TEST(SkVM_128bit,r)2531 DEF_TEST(SkVM_128bit, r) {
2532 float floats[4*63];
2533 uint8_t packed[4*63];
2534
2535 for (int i = 0; i < 4*63; i++) {
2536 floats[i] = i * (1/255.0f);
2537 }
2538
2539 skvm::PixelFormat rgba_ffff = skvm::SkColorType_to_PixelFormat(kRGBA_F32_SkColorType),
2540 rgba_8888 = skvm::SkColorType_to_PixelFormat(kRGBA_8888_SkColorType);
2541
2542 { // Convert RGBA F32 to RGBA 8888, testing 128-bit loads.
2543 skvm::Builder b;
2544 {
2545 skvm::Ptr dst = b.varying(4),
2546 src = b.varying(16);
2547
2548 skvm::Color c = b.load(rgba_ffff, src);
2549 b.store(rgba_8888, dst, c);
2550 }
2551 test_jit_and_interpreter(b, [&](const skvm::Program& program){
2552 memset(packed, 0, sizeof(packed));
2553 program.eval(63, packed, floats);
2554 for (int i = 0; i < 4*63; i++) {
2555 REPORTER_ASSERT(r, packed[i] == i);
2556 }
2557 });
2558 }
2559
2560
2561 { // Convert RGBA 8888 to RGBA F32, testing 128-bit stores.
2562 skvm::Builder b;
2563 {
2564 skvm::Ptr dst = b.varying(16),
2565 src = b.varying(4);
2566
2567 skvm::Color c = b.load(rgba_8888, src);
2568 b.store(rgba_ffff, dst, c);
2569 }
2570 test_jit_and_interpreter(b, [&](const skvm::Program& program){
2571 memset(floats, 0, sizeof(floats));
2572 program.eval(63, floats, packed);
2573 for (int i = 0; i < 4*63; i++) {
2574 REPORTER_ASSERT(r, floats[i] == i * (1/255.0f));
2575 }
2576 });
2577 }
2578
2579 }
2580
DEF_TEST(SkVM_is_NaN_is_finite,r)2581 DEF_TEST(SkVM_is_NaN_is_finite, r) {
2582 skvm::Builder b;
2583 {
2584 skvm::Ptr src = b.varying<float>(),
2585 nan = b.varying<int>(),
2586 fin = b.varying<int>();
2587 b.store32(nan, is_NaN (b.loadF(src)));
2588 b.store32(fin, is_finite(b.loadF(src)));
2589 }
2590 test_jit_and_interpreter(b, [&](const skvm::Program& program){
2591 // ±NaN, ±0, ±1, ±inf
2592 const uint32_t bits[] = {0x7f80'0001, 0xff80'0001, 0x0000'0000, 0x8000'0000,
2593 0x3f80'0000, 0xbf80'0000, 0x7f80'0000, 0xff80'0000};
2594 uint32_t nan[8], fin[8];
2595 program.eval(8, bits, nan,fin);
2596
2597 for (int i = 0; i < 8; i++) {
2598 REPORTER_ASSERT(r, nan[i] == ((i == 0 || i == 1) ? 0xffffffff : 0));
2599 REPORTER_ASSERT(r, fin[i] == ((i == 2 || i == 3 ||
2600 i == 4 || i == 5) ? 0xffffffff : 0));
2601 }
2602 });
2603 }
2604
DEF_TEST(SkVM_args,r)2605 DEF_TEST(SkVM_args, r) {
2606 // Test we can handle at least six arguments.
2607 skvm::Builder b;
2608 {
2609 skvm::Ptr dst = b.varying<float>(),
2610 A = b.varying<float>(),
2611 B = b.varying<float>(),
2612 C = b.varying<float>(),
2613 D = b.varying<float>(),
2614 E = b.varying<float>();
2615 storeF(dst, b.loadF(A)
2616 + b.loadF(B)
2617 + b.loadF(C)
2618 + b.loadF(D)
2619 + b.loadF(E));
2620 }
2621
2622 test_jit_and_interpreter(b, [&](const skvm::Program& program){
2623 float dst[17],A[17],B[17],C[17],D[17],E[17];
2624 for (int i = 0; i < 17; i++) {
2625 A[i] = B[i] = C[i] = D[i] = E[i] = (float)i;
2626 }
2627 program.eval(17, dst,A,B,C,D,E);
2628 for (int i = 0; i < 17; i++) {
2629 REPORTER_ASSERT(r, dst[i] == 5.0f*i);
2630 }
2631 });
2632 }
2633
DEF_TEST(SkVM_badpack,reporter)2634 DEF_TEST(SkVM_badpack, reporter) {
2635 // Test case distilled from actual failing draw,
2636 // originally with a bad arm64 implementation of pack().
2637 skvm::Builder p;
2638 {
2639 skvm::UPtr uniforms = p.uniform();
2640 skvm::Ptr dst = p.varying<uint16_t>();
2641
2642 skvm::I32 r = round(p.uniformF(uniforms, 8) * 15),
2643 a = p.splat(0xf);
2644
2645 skvm::I32 _4444 = p.splat(0);
2646 _4444 = pack(_4444, r, 12);
2647 _4444 = pack(_4444, a, 0);
2648 store16(dst, _4444);
2649 }
2650
2651 test_jit_and_interpreter(p, [&](const skvm::Program& program){
2652 const float uniforms[] = { 0.0f, 0.0f,
2653 1.0f, 0.0f, 0.0f, 1.0f };
2654
2655 uint16_t dst[17] = {0};
2656 program.eval(17, uniforms,dst);
2657 for (int i = 0; i < 17; i++) {
2658 REPORTER_ASSERT(reporter, dst[i] == 0xf00f, "got %04x, want %04x\n", dst[i], 0xf00f);
2659 }
2660 });
2661 }
2662
DEF_TEST(SkVM_features,r)2663 DEF_TEST(SkVM_features, r) {
2664 auto build_program = [](skvm::Builder* b) {
2665 skvm::F32 x = b->loadF(b->varying<float>());
2666 b->storeF(b->varying<float>(), x*x+x);
2667 };
2668
2669 { // load-fma-store with FMA available.
2670 skvm::Features features;
2671 features.fma = true;
2672 skvm::Builder b(features);
2673 build_program(&b);
2674 REPORTER_ASSERT(r, b.optimize().size() == 3);
2675 }
2676
2677 { // load-mul-add-store without FMA.
2678 skvm::Features features;
2679 features.fma = false;
2680 skvm::Builder b(features);
2681 build_program(&b);
2682 REPORTER_ASSERT(r, b.optimize().size() == 4);
2683 }
2684
2685 { // Auto-detected, could be either.
2686 skvm::Builder b;
2687 build_program(&b);
2688 REPORTER_ASSERT(r, b.optimize().size() == 3
2689 || b.optimize().size() == 4);
2690 }
2691 }
2692
DEF_TEST(SkVM_gather_can_hoist,r)2693 DEF_TEST(SkVM_gather_can_hoist, r) {
2694 // A gather instruction isn't necessarily varying... it's whatever its index is.
2695 // First a typical gather scenario with varying index.
2696 {
2697 skvm::Builder b;
2698 skvm::UPtr uniforms = b.uniform();
2699 skvm::Ptr buf = b.varying<int>();
2700 skvm::I32 ix = b.load32(buf);
2701 b.store32(buf, b.gather32(uniforms,0, ix));
2702
2703 skvm::Program p = b.done();
2704
2705 // ix is varying, so the gather is too.
2706 //
2707 // loop:
2708 // v0 = load32 buf
2709 // v1 = gather32 uniforms+0 v0
2710 // store32 buf v1
2711 REPORTER_ASSERT(r, p.instructions().size() == 3);
2712 REPORTER_ASSERT(r, p.loop() == 0);
2713 }
2714
2715 // Now the same but with a uniform index instead.
2716 {
2717 skvm::Builder b;
2718 skvm::UPtr uniforms = b.uniform();
2719 skvm::Ptr buf = b.varying<int>();
2720 skvm::I32 ix = b.uniform32(uniforms,8);
2721 b.store32(buf, b.gather32(uniforms,0, ix));
2722
2723 skvm::Program p = b.done();
2724
2725 // ix is uniform, so the gather is too.
2726 //
2727 // v0 = uniform32 uniforms+8
2728 // v1 = gather32 uniforms+0 v0
2729 // loop:
2730 // store32 buf v1
2731 REPORTER_ASSERT(r, p.instructions().size() == 3);
2732 REPORTER_ASSERT(r, p.loop() == 2);
2733 }
2734 }
2735
DEF_TEST(SkVM_dont_dedup_loads,r)2736 DEF_TEST(SkVM_dont_dedup_loads, r) {
2737 // We've been assuming that all Ops with the same arguments produce the same value
2738 // and deduplicating them, which results in a simple common subexpression eliminator.
2739 //
2740 // But we can't soundly dedup two identical loads with a store between.
2741 // If we dedup the loads in this test program it will always increment by 1, not K.
2742 constexpr int K = 2;
2743 skvm::Builder b;
2744 {
2745 skvm::Ptr buf = b.varying<int>();
2746 for (int i = 0; i < K; i++) {
2747 b.store32(buf, b.load32(buf) + 1);
2748 }
2749 }
2750
2751 test_jit_and_interpreter(b, [&](const skvm::Program& program){
2752 int buf[] = { 0,1,2,3,4 };
2753 program.eval(std::size(buf), buf);
2754 for (int i = 0; i < (int)std::size(buf); i++) {
2755 REPORTER_ASSERT(r, buf[i] == i+K);
2756 }
2757 });
2758 }
2759
DEF_TEST(SkVM_dont_dedup_stores,r)2760 DEF_TEST(SkVM_dont_dedup_stores, r) {
2761 // Following a similar line of reasoning to SkVM_dont_dedup_loads,
2762 // we cannot dedup stores either. A different store between two identical stores
2763 // will invalidate the first store, meaning we do need to reissue that store operation.
2764 skvm::Builder b;
2765 {
2766 skvm::Ptr buf = b.varying<int>();
2767 b.store32(buf, b.splat(4));
2768 b.store32(buf, b.splat(5));
2769 b.store32(buf, b.splat(4)); // If we dedup'd, we'd skip this store.
2770 }
2771
2772 test_jit_and_interpreter(b, [&](const skvm::Program& program){
2773 int buf[42];
2774 program.eval(std::size(buf), buf);
2775 for (int x : buf) {
2776 REPORTER_ASSERT(r, x == 4);
2777 }
2778 });
2779 }
2780
DEF_TEST(SkVM_fast_mul,r)2781 DEF_TEST(SkVM_fast_mul, r) {
2782 skvm::Builder b;
2783 {
2784 skvm::Ptr src = b.varying<float>(),
2785 fast = b.varying<float>(),
2786 slow = b.varying<float>();
2787 skvm::F32 x = b.loadF(src);
2788 b.storeF(fast, fast_mul(0.0f, x));
2789 b.storeF(slow, 0.0f * x);
2790 }
2791 test_jit_and_interpreter(b, [&](const skvm::Program& program){
2792 const uint32_t bits[] = {
2793 0x0000'0000, 0x8000'0000, //±0
2794 0x3f80'0000, 0xbf80'0000, //±1
2795 0x7f80'0000, 0xff80'0000, //±inf
2796 0x7f80'0001, 0xff80'0001, //±NaN
2797 };
2798 float fast[8],
2799 slow[8];
2800 program.eval(8,bits,fast,slow);
2801
2802 for (int i = 0; i < 8; i++) {
2803 REPORTER_ASSERT(r, fast[i] == 0.0f);
2804
2805 if (i < 4) {
2806 REPORTER_ASSERT(r, slow[i] == 0.0f);
2807 } else {
2808 REPORTER_ASSERT(r, std::isnan(slow[i]));
2809 }
2810 }
2811 });
2812 }
2813
DEF_TEST(SkVM_duplicates,reporter)2814 DEF_TEST(SkVM_duplicates, reporter) {
2815 {
2816 skvm::Builder p(true);
2817 auto rptr = p.varying<int>();
2818
2819 skvm::F32 r = p.loadF(rptr),
2820 g = p.splat(0.0f),
2821 b = p.splat(0.0f),
2822 a = p.splat(1.0f);
2823
2824 p.unpremul(&r, &g, &b, a);
2825 p.storeF(rptr, r);
2826
2827 std::vector<skvm::Instruction> program = b->program();
2828
2829 auto withDuplicates = skvm::finalize(program);
2830 int duplicates = 0;
2831 for (const auto& instr : withDuplicates) {
2832 if (instr.op == skvm::Op::duplicate) {
2833 ++duplicates;
2834 }
2835 }
2836 REPORTER_ASSERT(reporter, duplicates > 0);
2837
2838 auto eliminatedAsDeadCode = skvm::eliminate_dead_code(program);
2839 for (const auto& instr : eliminatedAsDeadCode) {
2840 REPORTER_ASSERT(reporter, instr.op != skvm::Op::duplicate);
2841 }
2842 }
2843
2844 {
2845 skvm::Builder p(false);
2846 auto rptr = p.varying<int>();
2847
2848 skvm::F32 r = p.loadF(rptr),
2849 g = p.splat(0.0f),
2850 b = p.splat(0.0f),
2851 a = p.splat(1.0f);
2852
2853 p.unpremul(&r, &g, &b, a);
2854 p.storeF(rptr, r);
2855
2856 auto withoutDuplicates = p.done().instructions();
2857 for (const auto& instr : withoutDuplicates) {
2858 REPORTER_ASSERT(reporter, instr.op != skvm::Op::duplicate);
2859 }
2860 }
2861 }
2862
DEF_TEST(SkVM_Visualizer,r)2863 DEF_TEST(SkVM_Visualizer, r) {
2864 const char* src =
2865 "int main(int x, int y) {\n"
2866 " int a = 99;\n"
2867 " if (x > 0) a += 100;\n"
2868 " if (y > 0) a += 101;\n"
2869 " a = 102;\n"
2870 " return a;\n"
2871 "}";
2872 SkSL::Compiler compiler(SkSL::ShaderCapsFactory::Default());
2873 SkSL::ProgramSettings settings;
2874 auto program = compiler.convertProgram(SkSL::ProgramKind::kGeneric,
2875 std::string(src), settings);
2876 const SkSL::FunctionDeclaration* main = program->getFunction("main");
2877 SkSL::SkVMDebugTrace d;
2878 d.setSource(src);
2879 auto v = std::make_unique<skvm::viz::Visualizer>(&d);
2880 skvm::Builder b(skvm::Features{}, /*createDuplicates=*/true);
2881 SkSL::ProgramToSkVM(*program, *main->definition(), &b, &d, /*uniforms=*/{});
2882
2883 skvm::Program p = b.done(nullptr, true, std::move(v));
2884 SkDynamicMemoryWStream vizFile;
2885 p.visualizer()->dump(&vizFile);
2886 auto vizData = vizFile.detachAsData();
2887 std::string html((const char*)vizData->data(), vizData->size());
2888 //b.dump();
2889 //std::printf(html.c_str());
2890 // Check that html contains all types of information:
2891 REPORTER_ASSERT(r, std::strstr(html.c_str(), "<tr class='normal'>")); // SkVM byte code
2892 REPORTER_ASSERT(r, std::strstr(html.c_str(), "<tr class='source'>")); // C++ source
2893 REPORTER_ASSERT(r, std::strstr(html.c_str(), "<tr class='dead'>")); // dead code
2894 REPORTER_ASSERT(r, std::strstr(html.c_str(), "<tr class='dead deduped'>")); // deduped removed
2895 REPORTER_ASSERT(r, std::strstr(html.c_str(), // deduped origins
2896 "<tr class='normal origin'>"
2897 "<td>↑↑↑ *13</td>"
2898 "<td>v2 = splat 0 (0)</td></tr>"));
2899 REPORTER_ASSERT(r, std::strstr(html.c_str(), // trace enter
2900 "<tr class='source'><td class='mask'>↪v9</td>"
2901 "<td colspan=2>int main(int x, int y)</td></tr>"));
2902 REPORTER_ASSERT(r, std::strstr(html.c_str(), // trace exit
2903 "<tr class='source'><td class='mask'>↩v9</td>"
2904 "<td colspan=2>int main(int x, int y)</td></tr>"));
2905 }
2906