1 /*
2 * Copyright 2019 Google LLC
3 *
4 * Use of this source code is governed by a BSD-style license that can be
5 * found in the LICENSE file.
6 */
7
8 #include "include/core/SkColorPriv.h"
9 #include "include/private/SkColorData.h"
10 #include "src/core/SkVM.h"
11 #include "tests/Test.h"
12 #include "tools/Resources.h"
13 #include "tools/SkVMBuilders.h"
14
15 using Fmt = SrcoverBuilder_F32::Fmt;
fmt_name(Fmt fmt)16 const char* fmt_name(Fmt fmt) {
17 switch (fmt) {
18 case Fmt::A8: return "A8";
19 case Fmt::G8: return "G8";
20 case Fmt::RGBA_8888: return "RGBA_8888";
21 }
22 return "";
23 }
24
25 namespace {
26 using namespace skvm;
27
28 struct V { Val id; };
29 struct R { Reg id; };
30 struct Shift { int bits; };
31 struct Splat { int bits; };
32 struct Hex { int bits; };
33
write(SkWStream * o,const char * s)34 static void write(SkWStream* o, const char* s) {
35 o->writeText(s);
36 }
37
write(SkWStream * o,Arg a)38 static void write(SkWStream* o, Arg a) {
39 write(o, "arg(");
40 o->writeDecAsText(a.ix);
41 write(o, ")");
42 }
write(SkWStream * o,V v)43 static void write(SkWStream* o, V v) {
44 write(o, "v");
45 o->writeDecAsText(v.id);
46 }
write(SkWStream * o,R r)47 static void write(SkWStream* o, R r) {
48 write(o, "r");
49 o->writeDecAsText(r.id);
50 }
write(SkWStream * o,Shift s)51 static void write(SkWStream* o, Shift s) {
52 o->writeDecAsText(s.bits);
53 }
write(SkWStream * o,Splat s)54 static void write(SkWStream* o, Splat s) {
55 float f;
56 memcpy(&f, &s.bits, 4);
57 o->writeHexAsText(s.bits);
58 write(o, " (");
59 o->writeScalarAsText(f);
60 write(o, ")");
61 }
write(SkWStream * o,Hex h)62 static void write(SkWStream* o, Hex h) {
63 o->writeHexAsText(h.bits);
64 }
65
66 template <typename T, typename... Ts>
write(SkWStream * o,T first,Ts...rest)67 static void write(SkWStream* o, T first, Ts... rest) {
68 write(o, first);
69 write(o, " ");
70 write(o, rest...);
71 }
72
dump_builder(const Builder & builder,SkWStream * o)73 static void dump_builder(const Builder& builder, SkWStream* o) {
74 const std::vector<Builder::Instruction> program = builder.program();
75
76 o->writeDecAsText(program.size());
77 o->writeText(" values:\n");
78 for (Val id = 0; id < (Val)program.size(); id++) {
79 const Builder::Instruction& inst = program[id];
80 Op op = inst.op;
81 Val x = inst.x,
82 y = inst.y,
83 z = inst.z;
84 int imm = inst.imm;
85 write(o, inst.death == 0 ? "☠️ " :
86 inst.hoist ? "↑ " : " ");
87 switch (op) {
88 case Op::store8: write(o, "store8" , Arg{imm}, V{x}); break;
89 case Op::store16: write(o, "store16", Arg{imm}, V{x}); break;
90 case Op::store32: write(o, "store32", Arg{imm}, V{x}); break;
91
92 case Op::load8: write(o, V{id}, "= load8" , Arg{imm}); break;
93 case Op::load16: write(o, V{id}, "= load16", Arg{imm}); break;
94 case Op::load32: write(o, V{id}, "= load32", Arg{imm}); break;
95
96 case Op::gather8: write(o, V{id}, "= gather8" , Arg{imm}, V{x}); break;
97 case Op::gather16: write(o, V{id}, "= gather16", Arg{imm}, V{x}); break;
98 case Op::gather32: write(o, V{id}, "= gather32", Arg{imm}, V{x}); break;
99
100 case Op::uniform8: write(o, V{id}, "= uniform8" , Arg{imm & 0xffff}, Hex{imm>>16}); break;
101 case Op::uniform16: write(o, V{id}, "= uniform16", Arg{imm & 0xffff}, Hex{imm>>16}); break;
102 case Op::uniform32: write(o, V{id}, "= uniform32", Arg{imm & 0xffff}, Hex{imm>>16}); break;
103
104 case Op::splat: write(o, V{id}, "= splat", Splat{imm}); break;
105
106
107 case Op::add_f32: write(o, V{id}, "= add_f32", V{x}, V{y} ); break;
108 case Op::sub_f32: write(o, V{id}, "= sub_f32", V{x}, V{y} ); break;
109 case Op::mul_f32: write(o, V{id}, "= mul_f32", V{x}, V{y} ); break;
110 case Op::div_f32: write(o, V{id}, "= div_f32", V{x}, V{y} ); break;
111 case Op::mad_f32: write(o, V{id}, "= mad_f32", V{x}, V{y}, V{z}); break;
112
113 case Op:: eq_f32: write(o, V{id}, "= eq_f32", V{x}, V{y}); break;
114 case Op::neq_f32: write(o, V{id}, "= neq_f32", V{x}, V{y}); break;
115 case Op:: lt_f32: write(o, V{id}, "= lt_f32", V{x}, V{y}); break;
116 case Op::lte_f32: write(o, V{id}, "= lte_f32", V{x}, V{y}); break;
117 case Op:: gt_f32: write(o, V{id}, "= gt_f32", V{x}, V{y}); break;
118 case Op::gte_f32: write(o, V{id}, "= gte_f32", V{x}, V{y}); break;
119
120
121 case Op::add_i32: write(o, V{id}, "= add_i32", V{x}, V{y}); break;
122 case Op::sub_i32: write(o, V{id}, "= sub_i32", V{x}, V{y}); break;
123 case Op::mul_i32: write(o, V{id}, "= mul_i32", V{x}, V{y}); break;
124
125 case Op::shl_i32: write(o, V{id}, "= shl_i32", V{x}, Shift{imm}); break;
126 case Op::shr_i32: write(o, V{id}, "= shr_i32", V{x}, Shift{imm}); break;
127 case Op::sra_i32: write(o, V{id}, "= sra_i32", V{x}, Shift{imm}); break;
128
129 case Op:: eq_i32: write(o, V{id}, "= eq_i32", V{x}, V{y}); break;
130 case Op::neq_i32: write(o, V{id}, "= neq_i32", V{x}, V{y}); break;
131 case Op:: lt_i32: write(o, V{id}, "= lt_i32", V{x}, V{y}); break;
132 case Op::lte_i32: write(o, V{id}, "= lte_i32", V{x}, V{y}); break;
133 case Op:: gt_i32: write(o, V{id}, "= gt_i32", V{x}, V{y}); break;
134 case Op::gte_i32: write(o, V{id}, "= gte_i32", V{x}, V{y}); break;
135
136 case Op::add_i16x2: write(o, V{id}, "= add_i16x2", V{x}, V{y}); break;
137 case Op::sub_i16x2: write(o, V{id}, "= sub_i16x2", V{x}, V{y}); break;
138 case Op::mul_i16x2: write(o, V{id}, "= mul_i16x2", V{x}, V{y}); break;
139
140 case Op::shl_i16x2: write(o, V{id}, "= shl_i16x2", V{x}, Shift{imm}); break;
141 case Op::shr_i16x2: write(o, V{id}, "= shr_i16x2", V{x}, Shift{imm}); break;
142 case Op::sra_i16x2: write(o, V{id}, "= sra_i16x2", V{x}, Shift{imm}); break;
143
144 case Op:: eq_i16x2: write(o, V{id}, "= eq_i16x2", V{x}, V{y}); break;
145 case Op::neq_i16x2: write(o, V{id}, "= neq_i16x2", V{x}, V{y}); break;
146 case Op:: lt_i16x2: write(o, V{id}, "= lt_i16x2", V{x}, V{y}); break;
147 case Op::lte_i16x2: write(o, V{id}, "= lte_i16x2", V{x}, V{y}); break;
148 case Op:: gt_i16x2: write(o, V{id}, "= gt_i16x2", V{x}, V{y}); break;
149 case Op::gte_i16x2: write(o, V{id}, "= gte_i16x2", V{x}, V{y}); break;
150
151 case Op::bit_and : write(o, V{id}, "= bit_and" , V{x}, V{y} ); break;
152 case Op::bit_or : write(o, V{id}, "= bit_or" , V{x}, V{y} ); break;
153 case Op::bit_xor : write(o, V{id}, "= bit_xor" , V{x}, V{y} ); break;
154 case Op::bit_clear: write(o, V{id}, "= bit_clear", V{x}, V{y} ); break;
155 case Op::select : write(o, V{id}, "= select" , V{x}, V{y}, V{z}); break;
156
157 case Op::bytes: write(o, V{id}, "= bytes", V{x}, Hex{imm}); break;
158 case Op::extract: write(o, V{id}, "= extract", V{x}, Shift{imm}, V{y}); break;
159 case Op::pack: write(o, V{id}, "= pack", V{x}, V{y}, Shift{imm}); break;
160
161 case Op::to_f32: write(o, V{id}, "= to_f32", V{x}); break;
162 case Op::to_i32: write(o, V{id}, "= to_i32", V{x}); break;
163 }
164
165 write(o, "\n");
166 }
167 }
168
dump_program(const Program & program,SkWStream * o)169 static void dump_program(const Program& program, SkWStream* o) {
170 const std::vector<Program::Instruction> instructions = program.instructions();
171 const int nregs = program.nregs();
172 const int loop = program.loop();
173
174 o->writeDecAsText(nregs);
175 o->writeText(" registers, ");
176 o->writeDecAsText(instructions.size());
177 o->writeText(" instructions:\n");
178 for (int i = 0; i < (int)instructions.size(); i++) {
179 if (i == loop) {
180 write(o, "loop:\n");
181 }
182 const Program::Instruction& inst = instructions[i];
183 Op op = inst.op;
184 Reg d = inst.d,
185 x = inst.x,
186 y = inst.y,
187 z = inst.z;
188 int imm = inst.imm;
189 switch (op) {
190 case Op::store8: write(o, "store8" , Arg{imm}, R{x}); break;
191 case Op::store16: write(o, "store16", Arg{imm}, R{x}); break;
192 case Op::store32: write(o, "store32", Arg{imm}, R{x}); break;
193
194 case Op::load8: write(o, R{d}, "= load8" , Arg{imm}); break;
195 case Op::load16: write(o, R{d}, "= load16", Arg{imm}); break;
196 case Op::load32: write(o, R{d}, "= load32", Arg{imm}); break;
197
198 case Op::gather8: write(o, R{d}, "= gather8" , Arg{imm}, R{x}); break;
199 case Op::gather16: write(o, R{d}, "= gather16", Arg{imm}, R{x}); break;
200 case Op::gather32: write(o, R{d}, "= gather32", Arg{imm}, R{x}); break;
201
202 case Op::uniform8: write(o, R{d}, "= uniform8" , Arg{imm & 0xffff}, Hex{imm>>16}); break;
203 case Op::uniform16: write(o, R{d}, "= uniform16", Arg{imm & 0xffff}, Hex{imm>>16}); break;
204 case Op::uniform32: write(o, R{d}, "= uniform32", Arg{imm & 0xffff}, Hex{imm>>16}); break;
205
206 case Op::splat: write(o, R{d}, "= splat", Splat{imm}); break;
207
208
209 case Op::add_f32: write(o, R{d}, "= add_f32", R{x}, R{y} ); break;
210 case Op::sub_f32: write(o, R{d}, "= sub_f32", R{x}, R{y} ); break;
211 case Op::mul_f32: write(o, R{d}, "= mul_f32", R{x}, R{y} ); break;
212 case Op::div_f32: write(o, R{d}, "= div_f32", R{x}, R{y} ); break;
213 case Op::mad_f32: write(o, R{d}, "= mad_f32", R{x}, R{y}, R{z}); break;
214
215 case Op:: eq_f32: write(o, R{d}, "= eq_f32", R{x}, R{y}); break;
216 case Op::neq_f32: write(o, R{d}, "= neq_f32", R{x}, R{y}); break;
217 case Op:: lt_f32: write(o, R{d}, "= lt_f32", R{x}, R{y}); break;
218 case Op::lte_f32: write(o, R{d}, "= lte_f32", R{x}, R{y}); break;
219 case Op:: gt_f32: write(o, R{d}, "= gt_f32", R{x}, R{y}); break;
220 case Op::gte_f32: write(o, R{d}, "= gte_f32", R{x}, R{y}); break;
221
222
223 case Op::add_i32: write(o, R{d}, "= add_i32", R{x}, R{y}); break;
224 case Op::sub_i32: write(o, R{d}, "= sub_i32", R{x}, R{y}); break;
225 case Op::mul_i32: write(o, R{d}, "= mul_i32", R{x}, R{y}); break;
226
227 case Op::shl_i32: write(o, R{d}, "= shl_i32", R{x}, Shift{imm}); break;
228 case Op::shr_i32: write(o, R{d}, "= shr_i32", R{x}, Shift{imm}); break;
229 case Op::sra_i32: write(o, R{d}, "= sra_i32", R{x}, Shift{imm}); break;
230
231 case Op:: eq_i32: write(o, R{d}, "= eq_i32", R{x}, R{y}); break;
232 case Op::neq_i32: write(o, R{d}, "= neq_i32", R{x}, R{y}); break;
233 case Op:: lt_i32: write(o, R{d}, "= lt_i32", R{x}, R{y}); break;
234 case Op::lte_i32: write(o, R{d}, "= lte_i32", R{x}, R{y}); break;
235 case Op:: gt_i32: write(o, R{d}, "= gt_i32", R{x}, R{y}); break;
236 case Op::gte_i32: write(o, R{d}, "= gte_i32", R{x}, R{y}); break;
237
238
239 case Op::add_i16x2: write(o, R{d}, "= add_i16x2", R{x}, R{y}); break;
240 case Op::sub_i16x2: write(o, R{d}, "= sub_i16x2", R{x}, R{y}); break;
241 case Op::mul_i16x2: write(o, R{d}, "= mul_i16x2", R{x}, R{y}); break;
242
243 case Op::shl_i16x2: write(o, R{d}, "= shl_i16x2", R{x}, Shift{imm}); break;
244 case Op::shr_i16x2: write(o, R{d}, "= shr_i16x2", R{x}, Shift{imm}); break;
245 case Op::sra_i16x2: write(o, R{d}, "= sra_i16x2", R{x}, Shift{imm}); break;
246
247 case Op:: eq_i16x2: write(o, R{d}, "= eq_i16x2", R{x}, R{y}); break;
248 case Op::neq_i16x2: write(o, R{d}, "= neq_i16x2", R{x}, R{y}); break;
249 case Op:: lt_i16x2: write(o, R{d}, "= lt_i16x2", R{x}, R{y}); break;
250 case Op::lte_i16x2: write(o, R{d}, "= lte_i16x2", R{x}, R{y}); break;
251 case Op:: gt_i16x2: write(o, R{d}, "= gt_i16x2", R{x}, R{y}); break;
252 case Op::gte_i16x2: write(o, R{d}, "= gte_i16x2", R{x}, R{y}); break;
253
254
255 case Op::bit_and : write(o, R{d}, "= bit_and" , R{x}, R{y} ); break;
256 case Op::bit_or : write(o, R{d}, "= bit_or" , R{x}, R{y} ); break;
257 case Op::bit_xor : write(o, R{d}, "= bit_xor" , R{x}, R{y} ); break;
258 case Op::bit_clear: write(o, R{d}, "= bit_clear", R{x}, R{y} ); break;
259 case Op::select : write(o, R{d}, "= select" , R{x}, R{y}, R{z}); break;
260
261 case Op::bytes: write(o, R{d}, "= bytes", R{x}, Hex{imm}); break;
262 case Op::extract: write(o, R{d}, "= extract", R{x}, Shift{imm}, R{y}); break;
263 case Op::pack: write(o, R{d}, "= pack", R{x}, R{y}, Shift{imm}); break;
264
265 case Op::to_f32: write(o, R{d}, "= to_f32", R{x}); break;
266 case Op::to_i32: write(o, R{d}, "= to_i32", R{x}); break;
267 }
268 write(o, "\n");
269 }
270 }
271
dump(Builder & builder,SkWStream * o)272 static void dump(Builder& builder, SkWStream* o) {
273 skvm::Program program = builder.done();
274 dump_builder(builder, o);
275 o->writeText("\n");
276 dump_program(program, o);
277 o->writeText("\n");
278 }
279
280 } // namespace
281
282 template <typename Fn>
test_jit_and_interpreter(skvm::Program && program,Fn && test)283 static void test_jit_and_interpreter(skvm::Program&& program, Fn&& test) {
284 test((const skvm::Program&) program);
285 program.dropJIT();
286 test((const skvm::Program&) program);
287 }
288
DEF_TEST(SkVM,r)289 DEF_TEST(SkVM, r) {
290 SkDynamicMemoryWStream buf;
291
292 // Write all combinations of SrcoverBuilder_F32
293 for (int s = 0; s < 3; s++)
294 for (int d = 0; d < 3; d++) {
295 auto srcFmt = (Fmt)s,
296 dstFmt = (Fmt)d;
297 SrcoverBuilder_F32 builder{srcFmt, dstFmt};
298
299 buf.writeText(fmt_name(srcFmt));
300 buf.writeText(" over ");
301 buf.writeText(fmt_name(dstFmt));
302 buf.writeText("\n");
303 dump(builder, &buf);
304 }
305
306 // Write the I32 Srcovers also.
307 {
308 SrcoverBuilder_I32_Naive builder;
309 buf.writeText("I32 (Naive) 8888 over 8888\n");
310 dump(builder, &buf);
311 }
312 {
313 SrcoverBuilder_I32 builder;
314 buf.writeText("I32 8888 over 8888\n");
315 dump(builder, &buf);
316 }
317 {
318 SrcoverBuilder_I32_SWAR builder;
319 buf.writeText("I32 (SWAR) 8888 over 8888\n");
320 dump(builder, &buf);
321 }
322
323 {
324 skvm::Builder b;
325 skvm::Arg arg = b.varying<int>();
326
327 // x and y can both be hoisted,
328 // and x can die at y, while y lives forever.
329 skvm::I32 x = b.splat(1),
330 y = b.add(x, b.splat(2));
331 b.store32(arg, b.mul(b.load32(arg), y));
332
333 skvm::Program program = b.done();
334 REPORTER_ASSERT(r, program.nregs() == 2);
335
336 std::vector<skvm::Builder::Instruction> insts = b.program();
337 REPORTER_ASSERT(r, insts.size() == 6);
338 REPORTER_ASSERT(r, insts[0].hoist && insts[0].death == 2);
339 REPORTER_ASSERT(r, insts[1].hoist && insts[1].death == 2);
340 REPORTER_ASSERT(r, insts[2].hoist && insts[2].death == 6);
341 REPORTER_ASSERT(r, !insts[3].hoist);
342 REPORTER_ASSERT(r, !insts[4].hoist);
343 REPORTER_ASSERT(r, !insts[5].hoist);
344
345 dump(b, &buf);
346
347 test_jit_and_interpreter(std::move(program), [&](const skvm::Program& program) {
348 int arg[] = {0,1,2,3,4,5,6,7,8,9};
349
350 program.eval(SK_ARRAY_COUNT(arg), arg);
351
352 for (int i = 0; i < (int)SK_ARRAY_COUNT(arg); i++) {
353 REPORTER_ASSERT(r, arg[i] == i*3);
354 }
355 });
356 }
357
358 sk_sp<SkData> blob = buf.detachAsData();
359 {
360
361 sk_sp<SkData> expected = GetResourceAsData("SkVMTest.expected");
362 REPORTER_ASSERT(r, expected, "Couldn't load SkVMTest.expected.");
363 if (expected) {
364 if (blob->size() != expected->size()
365 || 0 != memcmp(blob->data(), expected->data(), blob->size())) {
366
367 ERRORF(r, "SkVMTest expected\n%.*s\nbut got\n%.*s\n",
368 expected->size(), expected->data(),
369 blob->size(), blob->data());
370 }
371
372 SkFILEWStream out(GetResourcePath("SkVMTest.expected").c_str());
373 if (out.isValid()) {
374 out.write(blob->data(), blob->size());
375 }
376 }
377 }
378
379 auto test_8888 = [&](skvm::Program&& program) {
380 uint32_t src[9];
381 uint32_t dst[SK_ARRAY_COUNT(src)];
382
383 test_jit_and_interpreter(std::move(program), [&](const skvm::Program& program) {
384 for (int i = 0; i < (int)SK_ARRAY_COUNT(src); i++) {
385 src[i] = 0xbb007733;
386 dst[i] = 0xffaaccee;
387 }
388
389 SkPMColor expected = SkPMSrcOver(src[0], dst[0]); // 0xff2dad73
390
391 program.eval((int)SK_ARRAY_COUNT(src), src, dst);
392
393 // dst is probably 0xff2dad72.
394 for (auto got : dst) {
395 auto want = expected;
396 for (int i = 0; i < 4; i++) {
397 uint8_t d = got & 0xff,
398 w = want & 0xff;
399 if (abs(d-w) >= 2) {
400 SkDebugf("d %02x, w %02x\n", d,w);
401 }
402 REPORTER_ASSERT(r, abs(d-w) < 2);
403 got >>= 8;
404 want >>= 8;
405 }
406 }
407 });
408 };
409
410 test_8888(SrcoverBuilder_F32{Fmt::RGBA_8888, Fmt::RGBA_8888}.done("srcover_f32"));
411 test_8888(SrcoverBuilder_I32_Naive{}.done("srcover_i32_naive"));
412 test_8888(SrcoverBuilder_I32{}.done("srcover_i32"));
413 test_8888(SrcoverBuilder_I32_SWAR{}.done("srcover_i32_SWAR"));
414
415 test_jit_and_interpreter(SrcoverBuilder_F32{Fmt::RGBA_8888, Fmt::G8}.done(),
416 [&](const skvm::Program& program) {
417 uint32_t src[9];
418 uint8_t dst[SK_ARRAY_COUNT(src)];
419
420 for (int i = 0; i < (int)SK_ARRAY_COUNT(src); i++) {
421 src[i] = 0xbb007733;
422 dst[i] = 0x42;
423 }
424
425 SkPMColor over = SkPMSrcOver(SkPackARGB32(0xbb, 0x33, 0x77, 0x00),
426 0xff424242);
427
428 uint8_t want = SkComputeLuminance(SkGetPackedR32(over),
429 SkGetPackedG32(over),
430 SkGetPackedB32(over));
431 program.eval((int)SK_ARRAY_COUNT(src), src, dst);
432
433 for (auto got : dst) {
434 REPORTER_ASSERT(r, abs(got-want) < 3);
435 }
436 });
437
438 test_jit_and_interpreter(SrcoverBuilder_F32{Fmt::A8, Fmt::A8}.done(),
439 [&](const skvm::Program& program) {
440 uint8_t src[256],
441 dst[256];
442 for (int i = 0; i < 256; i++) {
443 src[i] = 255 - i;
444 dst[i] = i;
445 }
446
447 program.eval(256, src, dst);
448
449 for (int i = 0; i < 256; i++) {
450 uint8_t want = SkGetPackedA32(SkPMSrcOver(SkPackARGB32(src[i], 0,0,0),
451 SkPackARGB32( i, 0,0,0)));
452 REPORTER_ASSERT(r, abs(dst[i]-want) < 2);
453 }
454 });
455 }
456
DEF_TEST(SkVM_Pointless,r)457 DEF_TEST(SkVM_Pointless, r) {
458 // Let's build a program with no memory arguments.
459 // It should all be pegged as dead code, but we should be able to "run" it.
460 skvm::Builder b;
461 {
462 b.add(b.splat(5.0f),
463 b.splat(4.0f));
464 }
465
466 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
467 for (int N = 0; N < 64; N++) {
468 program.eval(N);
469 }
470 });
471
472 for (const skvm::Builder::Instruction& inst : b.program()) {
473 REPORTER_ASSERT(r, inst.death == 0 && inst.hoist == true);
474 }
475 }
476
DEF_TEST(SkVM_LoopCounts,r)477 DEF_TEST(SkVM_LoopCounts, r) {
478 // Make sure we cover all the exact N we want.
479
480 // buf[i] += 1
481 skvm::Builder b;
482 skvm::Arg arg = b.varying<int>();
483 b.store32(arg,
484 b.add(b.splat(1),
485 b.load32(arg)));
486
487 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
488 int buf[64];
489 for (int N = 0; N <= (int)SK_ARRAY_COUNT(buf); N++) {
490 for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) {
491 buf[i] = i;
492 }
493 program.eval(N, buf);
494
495 for (int i = 0; i < N; i++) {
496 REPORTER_ASSERT(r, buf[i] == i+1);
497 }
498 for (int i = N; i < (int)SK_ARRAY_COUNT(buf); i++) {
499 REPORTER_ASSERT(r, buf[i] == i);
500 }
501 }
502 });
503 }
504
DEF_TEST(SkVM_gathers,r)505 DEF_TEST(SkVM_gathers, r) {
506 skvm::Builder b;
507 {
508 skvm::Arg img = b.uniform(),
509 buf32 = b.varying<int>(),
510 buf16 = b.varying<uint16_t>(),
511 buf8 = b.varying<uint8_t>();
512
513 skvm::I32 x = b.load32(buf32);
514
515 b.store32(buf32, b.gather32(img, b.bit_and(x, b.splat( 7))));
516 b.store16(buf16, b.gather16(img, b.bit_and(x, b.splat(15))));
517 b.store8 (buf8 , b.gather8 (img, b.bit_and(x, b.splat(31))));
518 }
519
520 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
521 const int img[] = {12,34,56,78, 90,98,76,54};
522
523 constexpr int N = 20;
524 int buf32[N];
525 uint16_t buf16[N];
526 uint8_t buf8 [N];
527
528 for (int i = 0; i < 20; i++) {
529 buf32[i] = i;
530 }
531
532 program.eval(N, img, buf32, buf16, buf8);
533 int i = 0;
534 REPORTER_ASSERT(r, buf32[i] == 12 && buf16[i] == 12 && buf8[i] == 12); i++;
535 REPORTER_ASSERT(r, buf32[i] == 34 && buf16[i] == 0 && buf8[i] == 0); i++;
536 REPORTER_ASSERT(r, buf32[i] == 56 && buf16[i] == 34 && buf8[i] == 0); i++;
537 REPORTER_ASSERT(r, buf32[i] == 78 && buf16[i] == 0 && buf8[i] == 0); i++;
538 REPORTER_ASSERT(r, buf32[i] == 90 && buf16[i] == 56 && buf8[i] == 34); i++;
539 REPORTER_ASSERT(r, buf32[i] == 98 && buf16[i] == 0 && buf8[i] == 0); i++;
540 REPORTER_ASSERT(r, buf32[i] == 76 && buf16[i] == 78 && buf8[i] == 0); i++;
541 REPORTER_ASSERT(r, buf32[i] == 54 && buf16[i] == 0 && buf8[i] == 0); i++;
542
543 REPORTER_ASSERT(r, buf32[i] == 12 && buf16[i] == 90 && buf8[i] == 56); i++;
544 REPORTER_ASSERT(r, buf32[i] == 34 && buf16[i] == 0 && buf8[i] == 0); i++;
545 REPORTER_ASSERT(r, buf32[i] == 56 && buf16[i] == 98 && buf8[i] == 0); i++;
546 REPORTER_ASSERT(r, buf32[i] == 78 && buf16[i] == 0 && buf8[i] == 0); i++;
547 REPORTER_ASSERT(r, buf32[i] == 90 && buf16[i] == 76 && buf8[i] == 78); i++;
548 REPORTER_ASSERT(r, buf32[i] == 98 && buf16[i] == 0 && buf8[i] == 0); i++;
549 REPORTER_ASSERT(r, buf32[i] == 76 && buf16[i] == 54 && buf8[i] == 0); i++;
550 REPORTER_ASSERT(r, buf32[i] == 54 && buf16[i] == 0 && buf8[i] == 0); i++;
551
552 REPORTER_ASSERT(r, buf32[i] == 12 && buf16[i] == 12 && buf8[i] == 90); i++;
553 REPORTER_ASSERT(r, buf32[i] == 34 && buf16[i] == 0 && buf8[i] == 0); i++;
554 REPORTER_ASSERT(r, buf32[i] == 56 && buf16[i] == 34 && buf8[i] == 0); i++;
555 REPORTER_ASSERT(r, buf32[i] == 78 && buf16[i] == 0 && buf8[i] == 0); i++;
556 });
557 }
558
DEF_TEST(SkVM_bitops,r)559 DEF_TEST(SkVM_bitops, r) {
560 skvm::Builder b;
561 {
562 skvm::Arg ptr = b.varying<int>();
563
564 skvm::I32 x = b.load32(ptr);
565
566 x = b.bit_and (x, b.splat(0xf1)); // 0x40
567 x = b.bit_or (x, b.splat(0x80)); // 0xc0
568 x = b.bit_xor (x, b.splat(0xfe)); // 0x3e
569 x = b.bit_clear(x, b.splat(0x30)); // 0x0e
570
571 x = b.shl(x, 28); // 0xe000'0000
572 x = b.sra(x, 28); // 0xffff'fffe
573 x = b.shr(x, 1); // 0x7fff'ffff
574
575 b.store32(ptr, x);
576 }
577
578 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
579 int x = 0x42;
580 program.eval(1, &x);
581 REPORTER_ASSERT(r, x == 0x7fff'ffff);
582 });
583 }
584
DEF_TEST(SkVM_f32,r)585 DEF_TEST(SkVM_f32, r) {
586 skvm::Builder b;
587 {
588 skvm::Arg arg = b.varying<float>();
589
590 skvm::F32 x = b.bit_cast(b.load32(arg)),
591 y = b.add(x,x), // y = 2x
592 z = b.sub(y,x), // z = 2x-x = x
593 w = b.div(z,x); // w = x/x = 1
594 b.store32(arg, b.bit_cast(w));
595 }
596
597 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
598 float buf[] = { 1,2,3,4,5,6,7,8,9 };
599 program.eval(SK_ARRAY_COUNT(buf), buf);
600 for (float v : buf) {
601 REPORTER_ASSERT(r, v == 1.0f);
602 }
603 });
604 }
605
DEF_TEST(SkVM_cmp_i32,r)606 DEF_TEST(SkVM_cmp_i32, r) {
607 skvm::Builder b;
608 {
609 skvm::I32 x = b.load32(b.varying<int>());
610
611 auto to_bit = [&](int shift, skvm::I32 mask) {
612 return b.shl(b.bit_and(mask, b.splat(0x1)), shift);
613 };
614
615 skvm::I32 m = b.splat(0);
616 m = b.bit_or(m, to_bit(0, b. eq(x, b.splat(0))));
617 m = b.bit_or(m, to_bit(1, b.neq(x, b.splat(1))));
618 m = b.bit_or(m, to_bit(2, b. lt(x, b.splat(2))));
619 m = b.bit_or(m, to_bit(3, b.lte(x, b.splat(3))));
620 m = b.bit_or(m, to_bit(4, b. gt(x, b.splat(4))));
621 m = b.bit_or(m, to_bit(5, b.gte(x, b.splat(5))));
622
623 b.store32(b.varying<int>(), m);
624 }
625
626 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
627 int in[] = { 0,1,2,3,4,5,6,7,8,9 };
628 int out[SK_ARRAY_COUNT(in)];
629
630 program.eval(SK_ARRAY_COUNT(in), in, out);
631
632 REPORTER_ASSERT(r, out[0] == 0b001111);
633 REPORTER_ASSERT(r, out[1] == 0b001100);
634 REPORTER_ASSERT(r, out[2] == 0b001010);
635 REPORTER_ASSERT(r, out[3] == 0b001010);
636 REPORTER_ASSERT(r, out[4] == 0b000010);
637 for (int i = 5; i < (int)SK_ARRAY_COUNT(out); i++) {
638 REPORTER_ASSERT(r, out[i] == 0b110010);
639 }
640 });
641 }
642
DEF_TEST(SkVM_cmp_f32,r)643 DEF_TEST(SkVM_cmp_f32, r) {
644 skvm::Builder b;
645 {
646 skvm::F32 x = b.bit_cast(b.load32(b.varying<float>()));
647
648 auto to_bit = [&](int shift, skvm::I32 mask) {
649 return b.shl(b.bit_and(mask, b.splat(0x1)), shift);
650 };
651
652 skvm::I32 m = b.splat(0);
653 m = b.bit_or(m, to_bit(0, b. eq(x, b.splat(0.0f))));
654 m = b.bit_or(m, to_bit(1, b.neq(x, b.splat(1.0f))));
655 m = b.bit_or(m, to_bit(2, b. lt(x, b.splat(2.0f))));
656 m = b.bit_or(m, to_bit(3, b.lte(x, b.splat(3.0f))));
657 m = b.bit_or(m, to_bit(4, b. gt(x, b.splat(4.0f))));
658 m = b.bit_or(m, to_bit(5, b.gte(x, b.splat(5.0f))));
659
660 b.store32(b.varying<int>(), m);
661 }
662
663 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
664 float in[] = { 0,1,2,3,4,5,6,7,8,9 };
665 int out[SK_ARRAY_COUNT(in)];
666
667 program.eval(SK_ARRAY_COUNT(in), in, out);
668
669 REPORTER_ASSERT(r, out[0] == 0b001111);
670 REPORTER_ASSERT(r, out[1] == 0b001100);
671 REPORTER_ASSERT(r, out[2] == 0b001010);
672 REPORTER_ASSERT(r, out[3] == 0b001010);
673 REPORTER_ASSERT(r, out[4] == 0b000010);
674 for (int i = 5; i < (int)SK_ARRAY_COUNT(out); i++) {
675 REPORTER_ASSERT(r, out[i] == 0b110010);
676 }
677 });
678 }
679
DEF_TEST(SkVM_i16x2,r)680 DEF_TEST(SkVM_i16x2, r) {
681 skvm::Builder b;
682 {
683 skvm::Arg buf = b.varying<int>();
684
685 skvm::I32 x = b.load32(buf),
686 y = b.add_16x2(x,x), // y = 2x
687 z = b.mul_16x2(x,y), // z = 2x^2
688 w = b.sub_16x2(z,x), // w = x(2x-1)
689 v = b.shl_16x2(w,7), // These shifts will be a no-op
690 u = b.sra_16x2(v,7); // for all but x=12 and x=13.
691 b.store32(buf, u);
692 }
693
694 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
695 uint16_t buf[] = { 0,1,2,3,4,5,6,7,8,9,10,11,12,13 };
696
697 program.eval(SK_ARRAY_COUNT(buf)/2, buf);
698 for (int i = 0; i < 12; i++) {
699 REPORTER_ASSERT(r, buf[i] == i*(2*i-1));
700 }
701 REPORTER_ASSERT(r, buf[12] == 0xff14); // 12*23 = 0x114
702 REPORTER_ASSERT(r, buf[13] == 0xff45); // 13*25 = 0x145
703 });
704 }
705
DEF_TEST(SkVM_cmp_i16,r)706 DEF_TEST(SkVM_cmp_i16, r) {
707 skvm::Builder b;
708 {
709 skvm::Arg buf = b.varying<int>();
710 skvm::I32 x = b.load32(buf);
711
712 auto to_bit = [&](int shift, skvm::I32 mask) {
713 return b.shl_16x2(b.bit_and(mask, b.splat(0x0001'0001)), shift);
714 };
715
716 skvm::I32 m = b.splat(0);
717 m = b.bit_or(m, to_bit(0, b. eq_16x2(x, b.splat(0x0000'0000))));
718 m = b.bit_or(m, to_bit(1, b.neq_16x2(x, b.splat(0x0001'0001))));
719 m = b.bit_or(m, to_bit(2, b. lt_16x2(x, b.splat(0x0002'0002))));
720 m = b.bit_or(m, to_bit(3, b.lte_16x2(x, b.splat(0x0003'0003))));
721 m = b.bit_or(m, to_bit(4, b. gt_16x2(x, b.splat(0x0004'0004))));
722 m = b.bit_or(m, to_bit(5, b.gte_16x2(x, b.splat(0x0005'0005))));
723
724 b.store32(buf, m);
725 }
726
727 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
728 int16_t buf[] = { 0,1, 2,3, 4,5, 6,7, 8,9 };
729
730 program.eval(SK_ARRAY_COUNT(buf)/2, buf);
731
732 REPORTER_ASSERT(r, buf[0] == 0b001111);
733 REPORTER_ASSERT(r, buf[1] == 0b001100);
734 REPORTER_ASSERT(r, buf[2] == 0b001010);
735 REPORTER_ASSERT(r, buf[3] == 0b001010);
736 REPORTER_ASSERT(r, buf[4] == 0b000010);
737 for (int i = 5; i < (int)SK_ARRAY_COUNT(buf); i++) {
738 REPORTER_ASSERT(r, buf[i] == 0b110010);
739 }
740 });
741 }
742
743
DEF_TEST(SkVM_mad,r)744 DEF_TEST(SkVM_mad, r) {
745 // This program is designed to exercise the tricky corners of instruction
746 // and register selection for Op::mad_f32.
747
748 skvm::Builder b;
749 {
750 skvm::Arg arg = b.varying<int>();
751
752 skvm::F32 x = b.to_f32(b.load32(arg)),
753 y = b.mad(x,x,x), // x is needed in the future, so r[x] != r[y].
754 z = b.mad(y,y,x), // y is needed in the future, but r[z] = r[x] is ok.
755 w = b.mad(z,z,y), // w can alias z but not y.
756 v = b.mad(w,y,w); // Got to stop somewhere.
757 b.store32(arg, b.to_i32(v));
758 }
759
760 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
761 int x = 2;
762 program.eval(1, &x);
763 // x = 2
764 // y = 2*2 + 2 = 6
765 // z = 6*6 + 2 = 38
766 // w = 38*38 + 6 = 1450
767 // v = 1450*6 + 1450 = 10150
768 REPORTER_ASSERT(r, x == 10150);
769 });
770 }
771
DEF_TEST(SkVM_madder,r)772 DEF_TEST(SkVM_madder, r) {
773 skvm::Builder b;
774 {
775 skvm::Arg arg = b.varying<float>();
776
777 skvm::F32 x = b.bit_cast(b.load32(arg)),
778 y = b.mad(x,x,x), // x is needed in the future, so r[x] != r[y].
779 z = b.mad(y,x,y), // r[x] can be reused after this instruction, but not r[y].
780 w = b.mad(y,y,z);
781 b.store32(arg, b.bit_cast(w));
782 }
783
784 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
785 float x = 2.0f;
786 // y = 2*2 + 2 = 6
787 // z = 6*2 + 6 = 18
788 // w = 6*6 + 18 = 54
789 program.eval(1, &x);
790 REPORTER_ASSERT(r, x == 54.0f);
791 });
792 }
793
DEF_TEST(SkVM_hoist,r)794 DEF_TEST(SkVM_hoist, r) {
795 // This program uses enough constants that it will fail to JIT if we hoist them.
796 // The JIT will try again without hoisting, and that'll just need 2 registers.
797 skvm::Builder b;
798 {
799 skvm::Arg arg = b.varying<int>();
800 skvm::I32 x = b.load32(arg);
801 for (int i = 0; i < 32; i++) {
802 x = b.add(x, b.splat(i));
803 }
804 b.store32(arg, x);
805 }
806
807 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
808 int x = 4;
809 program.eval(1, &x);
810 // x += 0 + 1 + 2 + 3 + ... + 30 + 31
811 // x += 496
812 REPORTER_ASSERT(r, x == 500);
813 });
814 }
815
DEF_TEST(SkVM_select,r)816 DEF_TEST(SkVM_select, r) {
817 skvm::Builder b;
818 {
819 skvm::Arg buf = b.varying<int>();
820
821 skvm::I32 x = b.load32(buf);
822
823 x = b.select( b.gt(x, b.splat(4)), x, b.splat(42) );
824
825 b.store32(buf, x);
826 }
827
828 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
829 int buf[] = { 0,1,2,3,4,5,6,7,8 };
830 program.eval(SK_ARRAY_COUNT(buf), buf);
831 for (int i = 0; i < (int)SK_ARRAY_COUNT(buf); i++) {
832 REPORTER_ASSERT(r, buf[i] == (i > 4 ? i : 42));
833 }
834 });
835 }
836
DEF_TEST(SkVM_NewOps,r)837 DEF_TEST(SkVM_NewOps, r) {
838 // Exercise a somewhat arbitrary set of new ops.
839 skvm::Builder b;
840 {
841 skvm::Arg buf = b.varying<int16_t>(),
842 img = b.uniform(),
843 uniforms = b.uniform();
844
845 skvm::I32 x = b.load16(buf);
846
847 x = b.add(x, b.uniform32(uniforms, 0));
848 x = b.mul(x, b.uniform8 (uniforms, 4));
849 x = b.sub(x, b.uniform16(uniforms, 6));
850
851 skvm::I32 limit = b.uniform32(uniforms, 8);
852 x = b.select(b.lt(x, b.splat(0)), b.splat(0), x);
853 x = b.select(b.gt(x, limit ), limit , x);
854
855 x = b.gather8(img, x);
856
857 b.store16(buf, x);
858 }
859
860 if ((false)) {
861 SkDynamicMemoryWStream buf;
862 dump(b, &buf);
863 sk_sp<SkData> blob = buf.detachAsData();
864 SkDebugf("%.*s\n", blob->size(), blob->data());
865 }
866
867 test_jit_and_interpreter(b.done(), [&](const skvm::Program& program) {
868 const int N = 31;
869 int16_t buf[N];
870 for (int i = 0; i < N; i++) {
871 buf[i] = i;
872 }
873
874 const int M = 16;
875 uint8_t img[M];
876 for (int i = 0; i < M; i++) {
877 img[i] = i*i;
878 }
879
880 struct {
881 int add = 5;
882 uint8_t mul = 3;
883 uint16_t sub = 18;
884 int limit = M-1;
885 } uniforms;
886
887 program.eval(N, buf, img, &uniforms);
888
889 for (int i = 0; i < N; i++) {
890 // Our first math calculates x = (i+5)*3 - 18 a.k.a 3*(i-1).
891 int x = 3*(i-1);
892
893 // Then that's pinned to the limits of img.
894 if (i < 2) { x = 0; } // Notice i == 1 hits x == 0 exactly...
895 if (i > 5) { x = 15; } // ...and i == 6 hits x == 15 exactly
896 REPORTER_ASSERT(r, buf[i] == img[x]);
897 }
898 });
899 }
900
901
902 template <typename Fn>
test_asm(skiatest::Reporter * r,Fn && fn,std::initializer_list<uint8_t> expected)903 static void test_asm(skiatest::Reporter* r, Fn&& fn, std::initializer_list<uint8_t> expected) {
904 uint8_t buf[4096];
905 skvm::Assembler a{buf};
906 fn(a);
907
908 REPORTER_ASSERT(r, a.size() == expected.size());
909
910 auto got = (const uint8_t*)buf,
911 want = expected.begin();
912 for (int i = 0; i < (int)std::min(a.size(), expected.size()); i++) {
913 REPORTER_ASSERT(r, got[i] == want[i],
914 "byte %d was %02x, want %02x", i, got[i], want[i]);
915 }
916 }
917
DEF_TEST(SkVM_Assembler,r)918 DEF_TEST(SkVM_Assembler, r) {
919 // Easiest way to generate test cases is
920 //
921 // echo '...some asm...' | llvm-mc -show-encoding -x86-asm-syntax=intel
922 //
923 // The -x86-asm-syntax=intel bit is optional, controlling the
924 // input syntax only; the output will always be AT&T op x,y,dst style.
925 // Our APIs read more like Intel op dst,x,y as op(dst,x,y), so I find
926 // that a bit easier to use here, despite maybe favoring AT&T overall.
927
928 using A = skvm::Assembler;
929 // Our exit strategy from AVX code.
930 test_asm(r, [&](A& a) {
931 a.vzeroupper();
932 a.ret();
933 },{
934 0xc5, 0xf8, 0x77,
935 0xc3,
936 });
937
938 // Align should pad with zero
939 test_asm(r, [&](A& a) {
940 a.ret();
941 a.align(4);
942 },{
943 0xc3,
944 0x00, 0x00, 0x00,
945 });
946
947 test_asm(r, [&](A& a) {
948 a.add(A::rax, 8); // Always good to test rax.
949 a.sub(A::rax, 32);
950
951 a.add(A::rdi, 12); // Last 0x48 REX
952 a.sub(A::rdi, 8);
953
954 a.add(A::r8 , 7); // First 0x49 REX
955 a.sub(A::r8 , 4);
956
957 a.add(A::rsi, 128); // Requires 4 byte immediate.
958 a.sub(A::r8 , 1000000);
959 },{
960 0x48, 0x83, 0b11'000'000, 0x08,
961 0x48, 0x83, 0b11'101'000, 0x20,
962
963 0x48, 0x83, 0b11'000'111, 0x0c,
964 0x48, 0x83, 0b11'101'111, 0x08,
965
966 0x49, 0x83, 0b11'000'000, 0x07,
967 0x49, 0x83, 0b11'101'000, 0x04,
968
969 0x48, 0x81, 0b11'000'110, 0x80, 0x00, 0x00, 0x00,
970 0x49, 0x81, 0b11'101'000, 0x40, 0x42, 0x0f, 0x00,
971 });
972
973
974 test_asm(r, [&](A& a) {
975 a.vpaddd (A::ymm0, A::ymm1, A::ymm2); // Low registers and 0x0f map -> 2-byte VEX.
976 a.vpaddd (A::ymm8, A::ymm1, A::ymm2); // A high dst register is ok -> 2-byte VEX.
977 a.vpaddd (A::ymm0, A::ymm8, A::ymm2); // A high first argument register -> 2-byte VEX.
978 a.vpaddd (A::ymm0, A::ymm1, A::ymm8); // A high second argument -> 3-byte VEX.
979 a.vpmulld(A::ymm0, A::ymm1, A::ymm2); // Using non-0x0f map instruction -> 3-byte VEX.
980 a.vpsubd (A::ymm0, A::ymm1, A::ymm2); // Test vpsubd to ensure argument order is right.
981 },{
982 /* VEX */ /*op*/ /*modRM*/
983 0xc5, 0xf5, 0xfe, 0xc2,
984 0xc5, 0x75, 0xfe, 0xc2,
985 0xc5, 0xbd, 0xfe, 0xc2,
986 0xc4, 0xc1, 0x75, 0xfe, 0xc0,
987 0xc4, 0xe2, 0x75, 0x40, 0xc2,
988 0xc5, 0xf5, 0xfa, 0xc2,
989 });
990
991 test_asm(r, [&](A& a) {
992 a.vpcmpeqd(A::ymm0, A::ymm1, A::ymm2);
993 a.vpcmpgtd(A::ymm0, A::ymm1, A::ymm2);
994 },{
995 0xc5,0xf5,0x76,0xc2,
996 0xc5,0xf5,0x66,0xc2,
997 });
998
999 test_asm(r, [&](A& a) {
1000 a.vpblendvb(A::ymm0, A::ymm1, A::ymm2, A::ymm3);
1001 },{
1002 0xc4,0xe3,0x75, 0x4c, 0xc2, 0x30,
1003 });
1004
1005 test_asm(r, [&](A& a) {
1006 a.vpsrld(A::ymm15, A::ymm2, 8);
1007 a.vpsrld(A::ymm0 , A::ymm8, 5);
1008 },{
1009 0xc5, 0x85, 0x72,0xd2, 0x08,
1010 0xc4,0xc1,0x7d, 0x72,0xd0, 0x05,
1011 });
1012
1013 test_asm(r, [&](A& a) {
1014 a.vpermq(A::ymm1, A::ymm2, 5);
1015 },{
1016 0xc4,0xe3,0xfd, 0x00,0xca, 0x05,
1017 });
1018
1019 test_asm(r, [&](A& a) {
1020 A::Label l = a.here();
1021 a.byte(1);
1022 a.byte(2);
1023 a.byte(3);
1024 a.byte(4);
1025
1026 a.vbroadcastss(A::ymm0 , &l);
1027 a.vbroadcastss(A::ymm1 , &l);
1028 a.vbroadcastss(A::ymm8 , &l);
1029 a.vbroadcastss(A::ymm15, &l);
1030
1031 a.vpshufb(A::ymm4, A::ymm3, &l);
1032 },{
1033 0x01, 0x02, 0x03, 0x4,
1034
1035 /* VEX */ /*op*/ /* ModRM */ /* offset */
1036 0xc4, 0xe2, 0x7d, 0x18, 0b00'000'101, 0xf3,0xff,0xff,0xff, // 0xfffffff3 == -13
1037 0xc4, 0xe2, 0x7d, 0x18, 0b00'001'101, 0xea,0xff,0xff,0xff, // 0xffffffea == -22
1038 0xc4, 0x62, 0x7d, 0x18, 0b00'000'101, 0xe1,0xff,0xff,0xff, // 0xffffffe1 == -31
1039 0xc4, 0x62, 0x7d, 0x18, 0b00'111'101, 0xd8,0xff,0xff,0xff, // 0xffffffd8 == -40
1040
1041 0xc4, 0xe2, 0x65, 0x00, 0b00'100'101, 0xcf,0xff,0xff,0xff, // 0xffffffcf == -49
1042 });
1043
1044 test_asm(r, [&](A& a) {
1045 a.vbroadcastss(A::ymm0, A::rdi, 0);
1046 a.vbroadcastss(A::ymm13, A::r14, 7);
1047 a.vbroadcastss(A::ymm8, A::rdx, -12);
1048 a.vbroadcastss(A::ymm8, A::rdx, 400);
1049
1050 a.vbroadcastss(A::ymm8, A::xmm0);
1051 a.vbroadcastss(A::ymm0, A::xmm13);
1052 },{
1053 /* VEX */ /*op*/ /*ModRM*/ /*offset*/
1054 0xc4,0xe2,0x7d, 0x18, 0b00'000'111,
1055 0xc4,0x42,0x7d, 0x18, 0b01'101'110, 0x07,
1056 0xc4,0x62,0x7d, 0x18, 0b01'000'010, 0xf4,
1057 0xc4,0x62,0x7d, 0x18, 0b10'000'010, 0x90,0x01,0x00,0x00,
1058
1059 0xc4,0x62,0x7d, 0x18, 0b11'000'000,
1060 0xc4,0xc2,0x7d, 0x18, 0b11'000'101,
1061 });
1062
1063 test_asm(r, [&](A& a) {
1064 A::Label l = a.here();
1065 a.jne(&l);
1066 a.jne(&l);
1067 a.je (&l);
1068 a.jmp(&l);
1069 a.jl (&l);
1070
1071 a.cmp(A::rdx, 0);
1072 a.cmp(A::rax, 12);
1073 a.cmp(A::r14, 2000000000);
1074 },{
1075 0x0f,0x85, 0xfa,0xff,0xff,0xff, // near jne -6 bytes
1076 0x0f,0x85, 0xf4,0xff,0xff,0xff, // near jne -12 bytes
1077 0x0f,0x84, 0xee,0xff,0xff,0xff, // near je -18 bytes
1078 0xe9, 0xe9,0xff,0xff,0xff, // near jmp -23 bytes
1079 0x0f,0x8c, 0xe3,0xff,0xff,0xff, // near jl -29 bytes
1080
1081 0x48,0x83,0xfa,0x00,
1082 0x48,0x83,0xf8,0x0c,
1083 0x49,0x81,0xfe,0x00,0x94,0x35,0x77,
1084 });
1085
1086 test_asm(r, [&](A& a) {
1087 a.vmovups(A::ymm5, A::rsi);
1088 a.vmovups(A::rsi, A::ymm5);
1089
1090 a.vmovups(A::rsi, A::xmm5);
1091
1092 a.vpmovzxwd(A::ymm4, A::rsi);
1093 a.vpmovzxbd(A::ymm4, A::rsi);
1094
1095 a.vmovq(A::rdx, A::xmm15);
1096 },{
1097 /* VEX */ /*Op*/ /* ModRM */
1098 0xc5, 0xfc, 0x10, 0b00'101'110,
1099 0xc5, 0xfc, 0x11, 0b00'101'110,
1100
1101 0xc5, 0xf8, 0x11, 0b00'101'110,
1102
1103 0xc4,0xe2,0x7d, 0x33, 0b00'100'110,
1104 0xc4,0xe2,0x7d, 0x31, 0b00'100'110,
1105
1106 0xc5, 0x79, 0xd6, 0b00'111'010,
1107 });
1108
1109 test_asm(r, [&](A& a) {
1110 a.movzbl(A::rax, A::rsi, 0); // Low registers for src and dst.
1111 a.movzbl(A::rax, A::r8, 0); // High src register.
1112 a.movzbl(A::r8 , A::rsi, 0); // High dst register.
1113 a.movzbl(A::r8, A::rsi, 12);
1114 a.movzbl(A::r8, A::rsi, 400);
1115
1116 a.vmovd(A::rax, A::xmm0);
1117 a.vmovd(A::rax, A::xmm8);
1118 a.vmovd(A::r8, A::xmm0);
1119
1120 a.vmovd(A::xmm0, A::rax);
1121 a.vmovd(A::xmm8, A::rax);
1122 a.vmovd(A::xmm0, A::r8);
1123
1124 a.vmovd_direct(A::rax, A::xmm0);
1125 a.vmovd_direct(A::rax, A::xmm8);
1126 a.vmovd_direct(A::r8, A::xmm0);
1127
1128 a.vmovd_direct(A::xmm0, A::rax);
1129 a.vmovd_direct(A::xmm8, A::rax);
1130 a.vmovd_direct(A::xmm0, A::r8);
1131
1132 a.movb(A::rdx, A::rax);
1133 a.movb(A::rdx, A::r8);
1134 a.movb(A::r8 , A::rax);
1135 },{
1136 0x0f,0xb6,0x06,
1137 0x41,0x0f,0xb6,0x00,
1138 0x44,0x0f,0xb6,0x06,
1139 0x44,0x0f,0xb6,0x46, 12,
1140 0x44,0x0f,0xb6,0x86, 0x90,0x01,0x00,0x00,
1141
1142 0xc5,0xf9,0x7e,0x00,
1143 0xc5,0x79,0x7e,0x00,
1144 0xc4,0xc1,0x79,0x7e,0x00,
1145
1146 0xc5,0xf9,0x6e,0x00,
1147 0xc5,0x79,0x6e,0x00,
1148 0xc4,0xc1,0x79,0x6e,0x00,
1149
1150 0xc5,0xf9,0x7e,0xc0,
1151 0xc5,0x79,0x7e,0xc0,
1152 0xc4,0xc1,0x79,0x7e,0xc0,
1153
1154 0xc5,0xf9,0x6e,0xc0,
1155 0xc5,0x79,0x6e,0xc0,
1156 0xc4,0xc1,0x79,0x6e,0xc0,
1157
1158 0x88, 0x02,
1159 0x44, 0x88, 0x02,
1160 0x41, 0x88, 0x00,
1161 });
1162
1163 test_asm(r, [&](A& a) {
1164 a.vpinsrw(A::xmm1, A::xmm8, A::rsi, 4);
1165 a.vpinsrw(A::xmm8, A::xmm1, A::r8, 12);
1166
1167 a.vpinsrb(A::xmm1, A::xmm8, A::rsi, 4);
1168 a.vpinsrb(A::xmm8, A::xmm1, A::r8, 12);
1169
1170 a.vpextrw(A::rsi, A::xmm8, 7);
1171 a.vpextrw(A::r8, A::xmm1, 15);
1172
1173 a.vpextrb(A::rsi, A::xmm8, 7);
1174 a.vpextrb(A::r8, A::xmm1, 15);
1175 },{
1176 0xc5,0xb9, 0xc4, 0x0e, 4,
1177 0xc4,0x41,0x71, 0xc4, 0x00, 12,
1178
1179 0xc4,0xe3,0x39, 0x20, 0x0e, 4,
1180 0xc4,0x43,0x71, 0x20, 0x00, 12,
1181
1182 0xc4,0x63,0x79, 0x15, 0x06, 7,
1183 0xc4,0xc3,0x79, 0x15, 0x08, 15,
1184
1185 0xc4,0x63,0x79, 0x14, 0x06, 7,
1186 0xc4,0xc3,0x79, 0x14, 0x08, 15,
1187 });
1188
1189 test_asm(r, [&](A& a) {
1190 a.vpandn(A::ymm3, A::ymm12, A::ymm2);
1191 },{
1192 0xc5, 0x9d, 0xdf, 0xda,
1193 });
1194
1195 test_asm(r, [&](A& a) {
1196 a.vmovdqa (A::ymm3, A::ymm2);
1197 a.vcvttps2dq(A::ymm3, A::ymm2);
1198 a.vcvtdq2ps (A::ymm3, A::ymm2);
1199 },{
1200 0xc5,0xfd,0x6f,0xda,
1201 0xc5,0xfe,0x5b,0xda,
1202 0xc5,0xfc,0x5b,0xda,
1203 });
1204
1205 // echo "fmul v4.4s, v3.4s, v1.4s" | llvm-mc -show-encoding -arch arm64
1206
1207 test_asm(r, [&](A& a) {
1208 a.and16b(A::v4, A::v3, A::v1);
1209 a.orr16b(A::v4, A::v3, A::v1);
1210 a.eor16b(A::v4, A::v3, A::v1);
1211 a.bic16b(A::v4, A::v3, A::v1);
1212
1213 a.add4s(A::v4, A::v3, A::v1);
1214 a.sub4s(A::v4, A::v3, A::v1);
1215 a.mul4s(A::v4, A::v3, A::v1);
1216
1217 a.sub8h(A::v4, A::v3, A::v1);
1218 a.mul8h(A::v4, A::v3, A::v1);
1219
1220 a.fadd4s(A::v4, A::v3, A::v1);
1221 a.fsub4s(A::v4, A::v3, A::v1);
1222 a.fmul4s(A::v4, A::v3, A::v1);
1223 a.fdiv4s(A::v4, A::v3, A::v1);
1224
1225 a.fmla4s(A::v4, A::v3, A::v1);
1226 },{
1227 0x64,0x1c,0x21,0x4e,
1228 0x64,0x1c,0xa1,0x4e,
1229 0x64,0x1c,0x21,0x6e,
1230 0x64,0x1c,0x61,0x4e,
1231
1232 0x64,0x84,0xa1,0x4e,
1233 0x64,0x84,0xa1,0x6e,
1234 0x64,0x9c,0xa1,0x4e,
1235
1236 0x64,0x84,0x61,0x6e,
1237 0x64,0x9c,0x61,0x4e,
1238
1239 0x64,0xd4,0x21,0x4e,
1240 0x64,0xd4,0xa1,0x4e,
1241 0x64,0xdc,0x21,0x6e,
1242 0x64,0xfc,0x21,0x6e,
1243
1244 0x64,0xcc,0x21,0x4e,
1245 });
1246
1247 test_asm(r, [&](A& a) {
1248 a.shl4s(A::v4, A::v3, 0);
1249 a.shl4s(A::v4, A::v3, 1);
1250 a.shl4s(A::v4, A::v3, 8);
1251 a.shl4s(A::v4, A::v3, 16);
1252 a.shl4s(A::v4, A::v3, 31);
1253
1254 a.sshr4s(A::v4, A::v3, 1);
1255 a.sshr4s(A::v4, A::v3, 8);
1256 a.sshr4s(A::v4, A::v3, 31);
1257
1258 a.ushr4s(A::v4, A::v3, 1);
1259 a.ushr4s(A::v4, A::v3, 8);
1260 a.ushr4s(A::v4, A::v3, 31);
1261
1262 a.ushr8h(A::v4, A::v3, 1);
1263 a.ushr8h(A::v4, A::v3, 8);
1264 a.ushr8h(A::v4, A::v3, 15);
1265 },{
1266 0x64,0x54,0x20,0x4f,
1267 0x64,0x54,0x21,0x4f,
1268 0x64,0x54,0x28,0x4f,
1269 0x64,0x54,0x30,0x4f,
1270 0x64,0x54,0x3f,0x4f,
1271
1272 0x64,0x04,0x3f,0x4f,
1273 0x64,0x04,0x38,0x4f,
1274 0x64,0x04,0x21,0x4f,
1275
1276 0x64,0x04,0x3f,0x6f,
1277 0x64,0x04,0x38,0x6f,
1278 0x64,0x04,0x21,0x6f,
1279
1280 0x64,0x04,0x1f,0x6f,
1281 0x64,0x04,0x18,0x6f,
1282 0x64,0x04,0x11,0x6f,
1283 });
1284
1285 test_asm(r, [&](A& a) {
1286 a.sli4s(A::v4, A::v3, 0);
1287 a.sli4s(A::v4, A::v3, 1);
1288 a.sli4s(A::v4, A::v3, 8);
1289 a.sli4s(A::v4, A::v3, 16);
1290 a.sli4s(A::v4, A::v3, 31);
1291 },{
1292 0x64,0x54,0x20,0x6f,
1293 0x64,0x54,0x21,0x6f,
1294 0x64,0x54,0x28,0x6f,
1295 0x64,0x54,0x30,0x6f,
1296 0x64,0x54,0x3f,0x6f,
1297 });
1298
1299 test_asm(r, [&](A& a) {
1300 a.scvtf4s (A::v4, A::v3);
1301 a.fcvtzs4s(A::v4, A::v3);
1302 },{
1303 0x64,0xd8,0x21,0x4e,
1304 0x64,0xb8,0xa1,0x4e,
1305 });
1306
1307 test_asm(r, [&](A& a) {
1308 a.ret(A::x30); // Conventional ret using link register.
1309 a.ret(A::x13); // Can really return using any register if we like.
1310
1311 a.add(A::x2, A::x2, 4);
1312 a.add(A::x3, A::x2, 32);
1313
1314 a.sub(A::x2, A::x2, 4);
1315 a.sub(A::x3, A::x2, 32);
1316
1317 a.subs(A::x2, A::x2, 4);
1318 a.subs(A::x3, A::x2, 32);
1319
1320 a.subs(A::xzr, A::x2, 4); // These are actually the same instruction!
1321 a.cmp(A::x2, 4);
1322
1323 A::Label l = a.here();
1324 a.bne(&l);
1325 a.bne(&l);
1326 a.blt(&l);
1327 a.b(&l);
1328 a.cbnz(A::x2, &l);
1329 a.cbz(A::x2, &l);
1330 },{
1331 0xc0,0x03,0x5f,0xd6,
1332 0xa0,0x01,0x5f,0xd6,
1333
1334 0x42,0x10,0x00,0x91,
1335 0x43,0x80,0x00,0x91,
1336
1337 0x42,0x10,0x00,0xd1,
1338 0x43,0x80,0x00,0xd1,
1339
1340 0x42,0x10,0x00,0xf1,
1341 0x43,0x80,0x00,0xf1,
1342
1343 0x5f,0x10,0x00,0xf1,
1344 0x5f,0x10,0x00,0xf1,
1345
1346 0x01,0x00,0x00,0x54, // b.ne #0
1347 0xe1,0xff,0xff,0x54, // b.ne #-4
1348 0xcb,0xff,0xff,0x54, // b.lt #-8
1349 0xae,0xff,0xff,0x54, // b.al #-12
1350 0x82,0xff,0xff,0xb5, // cbnz x2, #-16
1351 0x62,0xff,0xff,0xb4, // cbz x2, #-20
1352 });
1353
1354 // Can we cbz() to a not-yet-defined label?
1355 test_asm(r, [&](A& a) {
1356 A::Label l;
1357 a.cbz(A::x2, &l);
1358 a.add(A::x3, A::x2, 32);
1359 a.label(&l);
1360 a.ret(A::x30);
1361 },{
1362 0x42,0x00,0x00,0xb4, // cbz x2, #8
1363 0x43,0x80,0x00,0x91, // add x3, x2, #32
1364 0xc0,0x03,0x5f,0xd6, // ret
1365 });
1366
1367 // If we start a label as a backward label,
1368 // can we redefine it to be a future label?
1369 // (Not sure this is useful... just want to test it works.)
1370 test_asm(r, [&](A& a) {
1371 A::Label l1 = a.here();
1372 a.add(A::x3, A::x2, 32);
1373 a.cbz(A::x2, &l1); // This will jump backward... nothing sneaky.
1374
1375 A::Label l2 = a.here(); // Start off the same...
1376 a.add(A::x3, A::x2, 32);
1377 a.cbz(A::x2, &l2); // Looks like this will go backward...
1378 a.add(A::x2, A::x2, 4);
1379 a.add(A::x3, A::x2, 32);
1380 a.label(&l2); // But no... actually forward! What a switcheroo!
1381 },{
1382 0x43,0x80,0x00,0x91, // add x3, x2, #32
1383 0xe2,0xff,0xff,0xb4, // cbz x2, #-4
1384
1385 0x43,0x80,0x00,0x91, // add x3, x2, #32
1386 0x62,0x00,0x00,0xb4, // cbz x2, #12
1387 0x42,0x10,0x00,0x91, // add x2, x2, #4
1388 0x43,0x80,0x00,0x91, // add x3, x2, #32
1389 });
1390
1391 // Loading from a label on ARM.
1392 test_asm(r, [&](A& a) {
1393 A::Label fore,aft;
1394 a.label(&fore);
1395 a.word(0x01234567);
1396 a.ldrq(A::v1, &fore);
1397 a.ldrq(A::v2, &aft);
1398 a.label(&aft);
1399 a.word(0x76543210);
1400 },{
1401 0x67,0x45,0x23,0x01,
1402 0xe1,0xff,0xff,0x9c, // ldr q1, #-4
1403 0x22,0x00,0x00,0x9c, // ldr q2, #4
1404 0x10,0x32,0x54,0x76,
1405 });
1406
1407 test_asm(r, [&](A& a) {
1408 a.ldrq(A::v0, A::x8);
1409 a.strq(A::v0, A::x8);
1410 },{
1411 0x00,0x01,0xc0,0x3d,
1412 0x00,0x01,0x80,0x3d,
1413 });
1414
1415 test_asm(r, [&](A& a) {
1416 a.xtns2h(A::v0, A::v0);
1417 a.xtnh2b(A::v0, A::v0);
1418 a.strs (A::v0, A::x0);
1419
1420 a.ldrs (A::v0, A::x0);
1421 a.uxtlb2h(A::v0, A::v0);
1422 a.uxtlh2s(A::v0, A::v0);
1423 },{
1424 0x00,0x28,0x61,0x0e,
1425 0x00,0x28,0x21,0x0e,
1426 0x00,0x00,0x00,0xbd,
1427
1428 0x00,0x00,0x40,0xbd,
1429 0x00,0xa4,0x08,0x2f,
1430 0x00,0xa4,0x10,0x2f,
1431 });
1432
1433 test_asm(r, [&](A& a) {
1434 a.ldrb(A::v0, A::x8);
1435 a.strb(A::v0, A::x8);
1436 },{
1437 0x00,0x01,0x40,0x3d,
1438 0x00,0x01,0x00,0x3d,
1439 });
1440
1441 test_asm(r, [&](A& a) {
1442 a.tbl(A::v0, A::v1, A::v2);
1443 },{
1444 0x20,0x00,0x02,0x4e,
1445 });
1446 }
1447