Lines Matching +full:0 +full:a
4 * Use of this source code is governed by a BSD-style license that can be
54 skvm::I32 a = b.add(l, l); in DEF_TEST() local
55 b.add(a, b.splat(7)); in DEF_TEST()
62 REPORTER_ASSERT(r, program.size() == 0); in DEF_TEST()
66 // Let's build a program with no memory arguments. in DEF_TEST()
75 for (int N = 0; N < 64; N++) { in DEF_TEST()
81 REPORTER_ASSERT(r, inst.death == 0 && inst.can_hoist == true); in DEF_TEST()
94 for (int i = 0; i < 17; i++) { in DEF_TEST()
111 dst[] = {0,0,0,0,0,0,0,0,0}; in DEF_TEST()
114 for (size_t i = 0; i < std::size(src)-1; i++) { in DEF_TEST()
118 REPORTER_ASSERT(r, dst[i] == 0); in DEF_TEST()
147 for (int N = 0; N <= (int)std::size(buf); N++) { in DEF_TEST()
148 for (int i = 0; i < (int)std::size(buf); i++) { in DEF_TEST()
153 for (int i = 0; i < N; i++) { in DEF_TEST()
169 b.store32(buf, b.gather32(uniforms,0, b.bit_and(x, b.splat(7)))); in DEF_TEST()
176 for (int i = 0; i < 20; i++) { in DEF_TEST()
185 int i = 0; in DEF_TEST()
221 b.store32(buf32, b.gather32(uniforms,0, b.bit_and(x, b.splat( 7)))); in DEF_TEST()
222 b.store16(buf16, b.gather16(uniforms,0, b.bit_and(x, b.splat(15)))); in DEF_TEST()
223 b.store8 (buf8 , b.gather8 (uniforms,0, b.bit_and(x, b.splat(31)))); in DEF_TEST()
234 for (int i = 0; i < 20; i++) { in DEF_TEST()
243 int i = 0; in DEF_TEST()
245 REPORTER_ASSERT(r, buf32[i] == 34 && buf16[i] == 0 && buf8[i] == 0); i++; in DEF_TEST()
246 REPORTER_ASSERT(r, buf32[i] == 56 && buf16[i] == 34 && buf8[i] == 0); i++; in DEF_TEST()
247 REPORTER_ASSERT(r, buf32[i] == 78 && buf16[i] == 0 && buf8[i] == 0); i++; in DEF_TEST()
249 REPORTER_ASSERT(r, buf32[i] == 98 && buf16[i] == 0 && buf8[i] == 0); i++; in DEF_TEST()
250 REPORTER_ASSERT(r, buf32[i] == 76 && buf16[i] == 78 && buf8[i] == 0); i++; in DEF_TEST()
251 REPORTER_ASSERT(r, buf32[i] == 54 && buf16[i] == 0 && buf8[i] == 0); i++; in DEF_TEST()
254 REPORTER_ASSERT(r, buf32[i] == 34 && buf16[i] == 0 && buf8[i] == 0); i++; in DEF_TEST()
255 REPORTER_ASSERT(r, buf32[i] == 56 && buf16[i] == 98 && buf8[i] == 0); i++; in DEF_TEST()
256 REPORTER_ASSERT(r, buf32[i] == 78 && buf16[i] == 0 && buf8[i] == 0); i++; in DEF_TEST()
258 REPORTER_ASSERT(r, buf32[i] == 98 && buf16[i] == 0 && buf8[i] == 0); i++; in DEF_TEST()
259 REPORTER_ASSERT(r, buf32[i] == 76 && buf16[i] == 54 && buf8[i] == 0); i++; in DEF_TEST()
260 REPORTER_ASSERT(r, buf32[i] == 54 && buf16[i] == 0 && buf8[i] == 0); i++; in DEF_TEST()
263 REPORTER_ASSERT(r, buf32[i] == 34 && buf16[i] == 0 && buf8[i] == 0); i++; in DEF_TEST()
264 REPORTER_ASSERT(r, buf32[i] == 56 && buf16[i] == 34 && buf8[i] == 0); i++; in DEF_TEST()
265 REPORTER_ASSERT(r, buf32[i] == 78 && buf16[i] == 0 && buf8[i] == 0); i++; in DEF_TEST()
279 b.store32(buf32, b.gather32(uniforms,0, x)); in DEF_TEST()
280 b.store16(buf16, b.gather16(uniforms,0, x)); in DEF_TEST()
281 b.store8 (buf8 , b.gather8 (uniforms,0, x)); in DEF_TEST()
286 for (int i = 0; i < 256; i++) { in DEF_TEST()
294 for (int i = 0; i < 64; i++) { in DEF_TEST()
296 buf16[i] = 0; in DEF_TEST()
297 buf8 [i] = 0; in DEF_TEST()
306 for (int i = 0; i < 64; i++) { in DEF_TEST()
307 REPORTER_ASSERT(r, buf8[i] == ((i*47)&63)); // 0,47,30,13,60,... in DEF_TEST()
310 REPORTER_ASSERT(r, buf16[ 0] == 0x0100); in DEF_TEST()
311 REPORTER_ASSERT(r, buf16[63] == 0x2322); in DEF_TEST()
313 REPORTER_ASSERT(r, buf32[ 0] == 0x03020100); in DEF_TEST()
314 REPORTER_ASSERT(r, buf32[63] == 0x47464544); in DEF_TEST()
325 x = b.bit_and (x, b.splat(0xf1)); // 0x40 in DEF_TEST()
326 x = b.bit_or (x, b.splat(0x80)); // 0xc0 in DEF_TEST()
327 x = b.bit_xor (x, b.splat(0xfe)); // 0x3e in DEF_TEST()
328 x = b.bit_clear(x, b.splat(0x30)); // 0x0e in DEF_TEST()
330 x = b.shl(x, 28); // 0xe000'0000 in DEF_TEST()
331 x = b.sra(x, 28); // 0xffff'fffe in DEF_TEST()
332 x = b.shr(x, 1); // 0x7fff'ffff in DEF_TEST()
338 int x = 0x42; in DEF_TEST()
340 REPORTER_ASSERT(r, x == 0x7fff'ffff); in DEF_TEST()
358 REPORTER_ASSERT(r, program[0].op == skvm::Op::load32); in DEF_TEST()
364 // ±NaN, ±0, ±1, ±inf in DEF_TEST()
365 uint32_t src[] = {0x7f80'0001, 0xff80'0001, 0x0000'0000, 0x8000'0000, in DEF_TEST()
366 0x3f80'0000, 0xbf80'0000, 0x7f80'0000, 0xff80'0000}; in DEF_TEST()
370 for (int i = 0; i < (int)std::size(src); i++) { in DEF_TEST()
371 REPORTER_ASSERT(r, dst[i] == (i < 2 ? 0 : src[i])); in DEF_TEST()
403 return b.shl(b.bit_and(mask, b.splat(0x1)), shift); in DEF_TEST()
406 skvm::I32 m = b.splat(0); in DEF_TEST()
407 m = b.bit_or(m, to_bit(0, b. eq(x, b.splat(0)))); in DEF_TEST()
417 int in[] = { 0,1,2,3,4,5,6,7,8,9 }; in DEF_TEST()
422 REPORTER_ASSERT(r, out[0] == 0b001111); in DEF_TEST()
423 REPORTER_ASSERT(r, out[1] == 0b001100); in DEF_TEST()
424 REPORTER_ASSERT(r, out[2] == 0b001010); in DEF_TEST()
425 REPORTER_ASSERT(r, out[3] == 0b001010); in DEF_TEST()
426 REPORTER_ASSERT(r, out[4] == 0b000010); in DEF_TEST()
428 REPORTER_ASSERT(r, out[i] == 0b110010); in DEF_TEST()
439 return b.shl(b.bit_and(mask, b.splat(0x1)), shift); in DEF_TEST()
442 skvm::I32 m = b.splat(0); in DEF_TEST()
443 m = b.bit_or(m, to_bit(0, b. eq(x, b.splat(0.0f)))); in DEF_TEST()
454 float in[] = { 0,1,2,3,4,5,6,7,8,9 }; in DEF_TEST()
459 REPORTER_ASSERT(r, out[0] == 0b001111); in DEF_TEST()
460 REPORTER_ASSERT(r, out[1] == 0b001100); in DEF_TEST()
461 REPORTER_ASSERT(r, out[2] == 0b001010); in DEF_TEST()
462 REPORTER_ASSERT(r, out[3] == 0b001010); in DEF_TEST()
463 REPORTER_ASSERT(r, out[4] == 0b000010); in DEF_TEST()
465 REPORTER_ASSERT(r, out[i] == 0b110010); in DEF_TEST()
477 for (int i = 0; i < (int)std::size(buf); i++) { in DEF_TEST()
512 // Create a pattern that can be peepholed into an Op::fms_f32. in DEF_TEST()
524 int buf[] = {0,1,2,3,4,5,6,7,8,9,10}; in DEF_TEST()
527 for (int i = 0; i < (int)std::size(buf); i++) { in DEF_TEST()
534 // Create a pattern that can be peepholed into an Op::fnma_f32. in DEF_TEST()
546 int buf[] = {0,1,2,3,4,5,6,7,8,9,10}; in DEF_TEST()
549 for (int i = 0; i < (int)std::size(buf); i++) { in DEF_TEST()
588 for (int i = 0; i < (int)std::size(buf); i++) { in DEF_TEST()
606 int want[] = { -2 , 0 , 0 , 0 , 0 , 1 , 1 , 1 , 2 , 2 }; in DEF_TEST()
610 for (int i = 0; i < (int)std::size(dst); i++) { in DEF_TEST()
632 for (int i = 0; i < (int)std::size(d); i++) { in DEF_TEST()
654 for (int i = 0; i < (int)std::size(d); i++) { in DEF_TEST()
667 for (int i = 0; i < 32; i++) { in DEF_TEST()
676 // x += 0 + 1 + 2 + 3 + ... + 30 + 31 in DEF_TEST()
695 int buf[] = { 0,1,2,3,4,5,6,7,8 }; in DEF_TEST()
697 for (int i = 0; i < (int)std::size(buf); i++) { in DEF_TEST()
728 int b1[] = { 0,1,2,3 }; in DEF_TEST()
731 for (int i = 0; i < (int)std::size(b1); i++) { in DEF_TEST()
739 // Exercise a somewhat arbitrary set of new ops. in DEF_TEST()
749 x = b.add(x, b.uniform32(uniforms, kPtr+0)); in DEF_TEST()
754 x = b.select(b.lt(x, b.splat(0)), b.splat(0), x); in DEF_TEST()
757 x = b.gather8(uniforms,0, x); in DEF_TEST()
765 for (int i = 0; i < N; i++) { in DEF_TEST()
771 for (int i = 0; i < M; i++) { in DEF_TEST()
785 for (int i = 0; i < N; i++) { in DEF_TEST()
786 // Our first math calculates x = (i+5)*3 - 18 a.k.a 3*(i-1). in DEF_TEST()
790 if (i < 2) { x = 0; } // Notice i == 1 hits x == 0 exactly... in DEF_TEST()
802 skvm::Uniforms uniforms(b.uniform(), 0); in DEF_TEST()
803 // Take up the first slot, so other uniforms are not at 0 offset. in DEF_TEST()
804 uniforms.push(0); in DEF_TEST()
814 skvm::I32 j = b.array32(array, 0); in DEF_TEST()
819 skvm::F32 x = b.arrayF(arrayF, 0); in DEF_TEST()
830 // reset the i[0] for the two tests. in DEF_TEST()
831 i[0] = 3; in DEF_TEST()
843 i[0] = 4; in DEF_TEST()
866 for (int i = 0; i < K; i++) { in DEF_TEST()
873 for (int i = 0; i < K; i++) { in DEF_TEST()
903 int buf[] = { 0,1,2,3,4,5,6,7,8,9 }; in DEF_TEST()
923 b.trace_line(traceHookID, b.splat(0xFFFFFFFF), b.splat(0xFFFFFFFF), 123); in DEF_TEST()
924 b.trace_line(traceHookID, b.splat(0x00000000), b.splat(0xFFFFFFFF), 456); in DEF_TEST()
925 b.trace_line(traceHookID, b.splat(0xFFFFFFFF), b.splat(0x00000000), 567); in DEF_TEST()
926 b.trace_line(traceHookID, b.splat(0x00000000), b.splat(0x00000000), 678); in DEF_TEST()
927 b.trace_line(traceHookID, b.splat(0xFFFFFFFF), b.splat(0xFFFFFFFF), 789); in DEF_TEST()
952 b.trace_var(traceHookID, b.splat(0x00000000), b.splat(0xFFFFFFFF), 2, b.splat(333)); in DEF_TEST()
953 b.trace_var(traceHookID, b.splat(0xFFFFFFFF), b.splat(0xFFFFFFFF), 4, b.splat(555)); in DEF_TEST()
954 b.trace_var(traceHookID, b.splat(0x00000000), b.splat(0x00000000), 5, b.splat(666)); in DEF_TEST()
955 b.trace_var(traceHookID, b.splat(0xFFFFFFFF), b.splat(0xFFFFFFFF), 6, b.splat(777)); in DEF_TEST()
956 b.trace_var(traceHookID, b.splat(0xFFFFFFFF), b.splat(0x00000000), 8, b.splat(999)); in DEF_TEST()
975 fBuffer.push_back(0); in DEF_TEST()
984 b.trace_enter(traceHookID, b.splat(0x00000000), b.splat(0x00000000), 99); in DEF_TEST()
985 b.trace_enter(traceHookID, b.splat(0xFFFFFFFF), b.splat(0xFFFFFFFF), 12); in DEF_TEST()
986 b.trace_enter(traceHookID, b.splat(0x00000000), b.splat(0xFFFFFFFF), 34); in DEF_TEST()
987 b.trace_exit(traceHookID, b.splat(0xFFFFFFFF), b.splat(0xFFFFFFFF), 56); in DEF_TEST()
988 b.trace_exit(traceHookID, b.splat(0xFFFFFFFF), b.splat(0x00000000), 78); in DEF_TEST()
989 b.trace_exit(traceHookID, b.splat(0x00000000), b.splat(0x00000000), 90); in DEF_TEST()
993 REPORTER_ASSERT(r, (testTrace.fBuffer == std::vector<int>{12, 1, 56, 0})); in DEF_TEST()
1011 b.trace_scope(traceHookID, b.splat(0xFFFFFFFF), b.splat(0xFFFFFFFF), 1); in DEF_TEST()
1012 b.trace_scope(traceHookID, b.splat(0xFFFFFFFF), b.splat(0x00000000), -2); in DEF_TEST()
1013 b.trace_scope(traceHookID, b.splat(0x00000000), b.splat(0x00000000), 3); in DEF_TEST()
1014 b.trace_scope(traceHookID, b.splat(0x00000000), b.splat(0xFFFFFFFF), 4); in DEF_TEST()
1015 b.trace_scope(traceHookID, b.splat(0xFFFFFFFF), b.splat(0xFFFFFFFF), -5); in DEF_TEST()
1039 b.trace_line(traceHookCID, b.splat(0xFFFFFFFF), b.splat(0xFFFFFFFF), 111); in DEF_TEST()
1040 b.trace_line(traceHookAID, b.splat(0xFFFFFFFF), b.splat(0xFFFFFFFF), 222); in DEF_TEST()
1041 b.trace_line(traceHookCID, b.splat(0x00000000), b.splat(0x00000000), 333); in DEF_TEST()
1042 b.trace_line(traceHookBID, b.splat(0xFFFFFFFF), b.splat(0x00000000), 444); in DEF_TEST()
1043 b.trace_line(traceHookAID, b.splat(0x00000000), b.splat(0xFFFFFFFF), 555); in DEF_TEST()
1044 b.trace_line(traceHookBID, b.splat(0xFFFFFFFF), b.splat(0xFFFFFFFF), 666); in DEF_TEST()
1063 a = p.loadF(aptr); in DEF_TEST() local
1065 p.premul(&r, &g, &b, a); in DEF_TEST()
1079 a = p.splat(1.0f); in DEF_TEST() local
1081 p.premul(&r, &g, &b, a); in DEF_TEST()
1097 a = p.loadF(aptr); in DEF_TEST() local
1099 p.unpremul(&r, &g, &b, a); in DEF_TEST()
1102 // load red, load alpha, a bunch of unpremul instructions, store red in DEF_TEST()
1113 a = p.splat(1.0f); in DEF_TEST() local
1115 p.unpremul(&r, &g, &b, a); in DEF_TEST()
1126 skvm::Assembler a{buf}; in test_asm() local
1127 fn(a); in test_asm()
1129 REPORTER_ASSERT(r, a.size() == expected.size()); in test_asm()
1133 for (int i = 0; i < (int)std::min(a.size(), expected.size()); i++) { in test_asm()
1147 // that a bit easier to use here, despite maybe favoring AT&T overall. in DEF_TEST()
1149 using A = skvm::Assembler; in DEF_TEST() typedef
1151 test_asm(r, [&](A& a) { in DEF_TEST() argument
1152 a.int3(); in DEF_TEST()
1153 a.vzeroupper(); in DEF_TEST()
1154 a.ret(); in DEF_TEST()
1156 0xcc, in DEF_TEST()
1157 0xc5, 0xf8, 0x77, in DEF_TEST()
1158 0xc3, in DEF_TEST()
1162 test_asm(r, [&](A& a) { in DEF_TEST() argument
1163 a.ret(); in DEF_TEST()
1164 a.align(4); in DEF_TEST()
1166 0xc3, in DEF_TEST()
1167 0x00, 0x00, 0x00, in DEF_TEST()
1170 test_asm(r, [&](A& a) { in DEF_TEST() argument
1171 a.add(A::rax, 8); // Always good to test rax. in DEF_TEST()
1172 a.sub(A::rax, 32); in DEF_TEST()
1174 a.add(A::rdi, 12); // Last 0x48 REX in DEF_TEST()
1175 a.sub(A::rdi, 8); in DEF_TEST()
1177 a.add(A::r8 , 7); // First 0x49 REX in DEF_TEST()
1178 a.sub(A::r8 , 4); in DEF_TEST()
1180 a.add(A::rsi, 128); // Requires 4 byte immediate. in DEF_TEST()
1181 a.sub(A::r8 , 1000000); in DEF_TEST()
1183 a.add(A::Mem{A::rsi}, 7); // addq $7, (%rsi) in DEF_TEST()
1184 a.add(A::Mem{A::rsi, 12}, 7); // addq $7, 12(%rsi) in DEF_TEST()
1185 a.add(A::Mem{A::rsp, 12}, 7); // addq $7, 12(%rsp) in DEF_TEST()
1186 a.add(A::Mem{A::r12, 12}, 7); // addq $7, 12(%r12) in DEF_TEST()
1187 a.add(A::Mem{A::rsp, 12, A::rax, A::FOUR}, 7); // addq $7, 12(%rsp,%rax,4) in DEF_TEST()
1188 a.add(A::Mem{A::r12, 12, A::rax, A::FOUR}, 7); // addq $7, 12(%r12,%rax,4) in DEF_TEST()
1189 a.add(A::Mem{A::rax, 12, A::r12, A::FOUR}, 7); // addq $7, 12(%rax,%r12,4) in DEF_TEST()
1190 a.add(A::Mem{A::r11, 12, A::r8 , A::TWO }, 7); // addq $7, 12(%r11,%r8,2) in DEF_TEST()
1191 a.add(A::Mem{A::r11, 12, A::rax} , 7); // addq $7, 12(%r11,%rax) in DEF_TEST()
1192 a.add(A::Mem{A::rax, 12, A::r11} , 7); // addq $7, 12(%rax,%r11) in DEF_TEST()
1194 a.sub(A::Mem{A::rax, 12, A::r11} , 7); // subq $7, 12(%rax,%r11) in DEF_TEST()
1196 a.add( A::rax , A::rcx); // addq %rcx, %rax in DEF_TEST()
1197 a.add(A::Mem{A::rax} , A::rcx); // addq %rcx, (%rax) in DEF_TEST()
1198 a.add(A::Mem{A::rax, 12}, A::rcx); // addq %rcx, 12(%rax) in DEF_TEST()
1199 a.add(A::rcx, A::Mem{A::rax, 12}); // addq 12(%rax), %rcx in DEF_TEST()
1201 a.sub(A::rcx, A::Mem{A::rax, 12}); // subq 12(%rax), %rcx in DEF_TEST()
1203 0x48, 0x83, 0b11'000'000, 0x08, in DEF_TEST()
1204 0x48, 0x83, 0b11'101'000, 0x20, in DEF_TEST()
1206 0x48, 0x83, 0b11'000'111, 0x0c, in DEF_TEST()
1207 0x48, 0x83, 0b11'101'111, 0x08, in DEF_TEST()
1209 0x49, 0x83, 0b11'000'000, 0x07, in DEF_TEST()
1210 0x49, 0x83, 0b11'101'000, 0x04, in DEF_TEST()
1212 0x48, 0x81, 0b11'000'110, 0x80, 0x00, 0x00, 0x00, in DEF_TEST()
1213 0x49, 0x81, 0b11'101'000, 0x40, 0x42, 0x0f, 0x00, in DEF_TEST()
1215 0x48,0x83,0x06,0x07, in DEF_TEST()
1216 0x48,0x83,0x46,0x0c,0x07, in DEF_TEST()
1217 0x48,0x83,0x44,0x24,0x0c,0x07, in DEF_TEST()
1218 0x49,0x83,0x44,0x24,0x0c,0x07, in DEF_TEST()
1219 0x48,0x83,0x44,0x84,0x0c,0x07, in DEF_TEST()
1220 0x49,0x83,0x44,0x84,0x0c,0x07, in DEF_TEST()
1221 0x4a,0x83,0x44,0xa0,0x0c,0x07, in DEF_TEST()
1222 0x4b,0x83,0x44,0x43,0x0c,0x07, in DEF_TEST()
1223 0x49,0x83,0x44,0x03,0x0c,0x07, in DEF_TEST()
1224 0x4a,0x83,0x44,0x18,0x0c,0x07, in DEF_TEST()
1226 0x4a,0x83,0x6c,0x18,0x0c,0x07, in DEF_TEST()
1228 0x48,0x01,0xc8, in DEF_TEST()
1229 0x48,0x01,0x08, in DEF_TEST()
1230 0x48,0x01,0x48,0x0c, in DEF_TEST()
1231 0x48,0x03,0x48,0x0c, in DEF_TEST()
1232 0x48,0x2b,0x48,0x0c, in DEF_TEST()
1236 test_asm(r, [&](A& a) { in DEF_TEST() argument
1237 a.vpaddd (A::ymm0, A::ymm1, A::ymm2); // Low registers and 0x0f map -> 2-byte VEX. in DEF_TEST()
1238 a.vpaddd (A::ymm8, A::ymm1, A::ymm2); // A high dst register is ok -> 2-byte VEX. in DEF_TEST()
1239 a.vpaddd (A::ymm0, A::ymm8, A::ymm2); // A high first argument register -> 2-byte VEX. in DEF_TEST()
1240 a.vpaddd (A::ymm0, A::ymm1, A::ymm8); // A high second argument -> 3-byte VEX. in DEF_TEST()
1241 a.vpmulld(A::ymm0, A::ymm1, A::ymm2); // Using non-0x0f map instruction -> 3-byte VEX. in DEF_TEST()
1242 a.vpsubd (A::ymm0, A::ymm1, A::ymm2); // Test vpsubd to ensure argument order is right. in DEF_TEST()
1245 0xc5, 0xf5, 0xfe, 0xc2, in DEF_TEST()
1246 0xc5, 0x75, 0xfe, 0xc2, in DEF_TEST()
1247 0xc5, 0xbd, 0xfe, 0xc2, in DEF_TEST()
1248 0xc4, 0xc1, 0x75, 0xfe, 0xc0, in DEF_TEST()
1249 0xc4, 0xe2, 0x75, 0x40, 0xc2, in DEF_TEST()
1250 0xc5, 0xf5, 0xfa, 0xc2, in DEF_TEST()
1253 test_asm(r, [&](A& a) { in DEF_TEST() argument
1254 a.vpaddw (A::ymm4, A::ymm3, A::ymm2); in DEF_TEST()
1255 a.vpavgw (A::ymm4, A::ymm3, A::ymm2); in DEF_TEST()
1256 a.vpcmpeqw (A::ymm4, A::ymm3, A::ymm2); in DEF_TEST()
1257 a.vpcmpgtw (A::ymm4, A::ymm3, A::ymm2); in DEF_TEST()
1259 a.vpminsw (A::ymm4, A::ymm3, A::ymm2); in DEF_TEST()
1260 a.vpmaxsw (A::ymm4, A::ymm3, A::ymm2); in DEF_TEST()
1261 a.vpminuw (A::ymm4, A::ymm3, A::ymm2); in DEF_TEST()
1262 a.vpmaxuw (A::ymm4, A::ymm3, A::ymm2); in DEF_TEST()
1264 a.vpmulhrsw(A::ymm4, A::ymm3, A::ymm2); in DEF_TEST()
1265 a.vpabsw (A::ymm4, A::ymm3); in DEF_TEST()
1266 a.vpsllw (A::ymm4, A::ymm3, 12); in DEF_TEST()
1267 a.vpsraw (A::ymm4, A::ymm3, 12); in DEF_TEST()
1269 0xc5, 0xe5, 0xfd, 0xe2, in DEF_TEST()
1270 0xc5, 0xe5, 0xe3, 0xe2, in DEF_TEST()
1271 0xc5, 0xe5, 0x75, 0xe2, in DEF_TEST()
1272 0xc5, 0xe5, 0x65, 0xe2, in DEF_TEST()
1274 0xc5, 0xe5, 0xea, 0xe2, in DEF_TEST()
1275 0xc5, 0xe5, 0xee, 0xe2, in DEF_TEST()
1276 0xc4,0xe2,0x65, 0x3a, 0xe2, in DEF_TEST()
1277 0xc4,0xe2,0x65, 0x3e, 0xe2, in DEF_TEST()
1279 0xc4,0xe2,0x65, 0x0b, 0xe2, in DEF_TEST()
1280 0xc4,0xe2,0x7d, 0x1d, 0xe3, in DEF_TEST()
1281 0xc5,0xdd,0x71, 0xf3, 0x0c, in DEF_TEST()
1282 0xc5,0xdd,0x71, 0xe3, 0x0c, in DEF_TEST()
1285 test_asm(r, [&](A& a) { in DEF_TEST() argument
1286 A::Label l; in DEF_TEST()
1287 a.vcmpeqps (A::ymm0, A::ymm1, &l); // vcmpeqps 0x1c(%rip), %ymm1, %ymm0 in DEF_TEST()
1288 a.vpcmpeqd (A::ymm0, A::ymm1, A::ymm2); in DEF_TEST()
1289 a.vpcmpgtd (A::ymm0, A::ymm1, A::ymm2); in DEF_TEST()
1290 a.vcmpeqps (A::ymm0, A::ymm1, A::ymm2); in DEF_TEST()
1291 a.vcmpltps (A::ymm0, A::ymm1, A::ymm2); in DEF_TEST()
1292 a.vcmpleps (A::ymm0, A::ymm1, A::ymm2); in DEF_TEST()
1293 a.vcmpneqps(A::ymm0, A::ymm1, A::ymm2); in DEF_TEST()
1294 a.label(&l); // 28 bytes after the vcmpeqps that uses it. in DEF_TEST()
1296 0xc5,0xf4,0xc2,0x05,0x1c,0x00,0x00,0x00,0x00, in DEF_TEST()
1297 0xc5,0xf5,0x76,0xc2, in DEF_TEST()
1298 0xc5,0xf5,0x66,0xc2, in DEF_TEST()
1299 0xc5,0xf4,0xc2,0xc2,0x00, in DEF_TEST()
1300 0xc5,0xf4,0xc2,0xc2,0x01, in DEF_TEST()
1301 0xc5,0xf4,0xc2,0xc2,0x02, in DEF_TEST()
1302 0xc5,0xf4,0xc2,0xc2,0x04, in DEF_TEST()
1305 test_asm(r, [&](A& a) { in DEF_TEST() argument
1306 a.vminps(A::ymm0, A::ymm1, A::ymm2); in DEF_TEST()
1307 a.vmaxps(A::ymm0, A::ymm1, A::ymm2); in DEF_TEST()
1309 0xc5,0xf4,0x5d,0xc2, in DEF_TEST()
1310 0xc5,0xf4,0x5f,0xc2, in DEF_TEST()
1313 test_asm(r, [&](A& a) { in DEF_TEST() argument
1314 a.vpblendvb(A::ymm0, A::ymm1, A::ymm2, A::ymm3); in DEF_TEST()
1316 0xc4,0xe3,0x75, 0x4c, 0xc2, 0x30, in DEF_TEST()
1319 test_asm(r, [&](A& a) { in DEF_TEST() argument
1320 a.vpsrld(A::ymm15, A::ymm2, 8); in DEF_TEST()
1321 a.vpsrld(A::ymm0 , A::ymm8, 5); in DEF_TEST()
1323 0xc5, 0x85, 0x72,0xd2, 0x08, in DEF_TEST()
1324 0xc4,0xc1,0x7d, 0x72,0xd0, 0x05, in DEF_TEST()
1327 test_asm(r, [&](A& a) { in DEF_TEST() argument
1328 A::Label l; in DEF_TEST()
1329 a.vpermps(A::ymm1, A::ymm2, A::Mem{A::rdi, 32}); in DEF_TEST()
1330 a.vperm2f128(A::ymm1, A::ymm2, &l, 0x20); in DEF_TEST()
1331 a.vpermq(A::ymm1, A::ymm2, 5); in DEF_TEST()
1332 a.label(&l); // 6 bytes after vperm2f128 in DEF_TEST()
1334 0xc4,0xe2,0x6d,0x16,0x4f,0x20, in DEF_TEST()
1335 0xc4,0xe3,0x6d,0x06,0x0d,0x06,0x00,0x00,0x00,0x20, in DEF_TEST()
1336 0xc4,0xe3,0xfd, 0x00,0xca, 0x05, in DEF_TEST()
1339 test_asm(r, [&](A& a) { in DEF_TEST() argument
1340 a.vpunpckldq(A::ymm1, A::ymm2, A::Mem{A::rdi}); in DEF_TEST()
1341 a.vpunpckhdq(A::ymm1, A::ymm2, A::ymm3); in DEF_TEST()
1343 0xc5,0xed,0x62,0x0f, in DEF_TEST()
1344 0xc5,0xed,0x6a,0xcb, in DEF_TEST()
1347 test_asm(r, [&](A& a) { in DEF_TEST() argument
1348 a.vroundps(A::ymm1, A::ymm2, A::NEAREST); in DEF_TEST()
1349 a.vroundps(A::ymm1, A::ymm2, A::FLOOR); in DEF_TEST()
1350 a.vroundps(A::ymm1, A::ymm2, A::CEIL); in DEF_TEST()
1351 a.vroundps(A::ymm1, A::ymm2, A::TRUNC); in DEF_TEST()
1353 0xc4,0xe3,0x7d,0x08,0xca,0x00, in DEF_TEST()
1354 0xc4,0xe3,0x7d,0x08,0xca,0x01, in DEF_TEST()
1355 0xc4,0xe3,0x7d,0x08,0xca,0x02, in DEF_TEST()
1356 0xc4,0xe3,0x7d,0x08,0xca,0x03, in DEF_TEST()
1359 test_asm(r, [&](A& a) { in DEF_TEST() argument
1360 A::Label l; in DEF_TEST()
1361 a.label(&l); in DEF_TEST()
1362 a.byte(1); in DEF_TEST()
1363 a.byte(2); in DEF_TEST()
1364 a.byte(3); in DEF_TEST()
1365 a.byte(4); in DEF_TEST()
1367 a.vbroadcastss(A::ymm0 , &l); in DEF_TEST()
1368 a.vbroadcastss(A::ymm1 , &l); in DEF_TEST()
1369 a.vbroadcastss(A::ymm8 , &l); in DEF_TEST()
1370 a.vbroadcastss(A::ymm15, &l); in DEF_TEST()
1372 a.vpshufb(A::ymm4, A::ymm3, &l); in DEF_TEST()
1373 a.vpaddd (A::ymm4, A::ymm3, &l); in DEF_TEST()
1374 a.vpsubd (A::ymm4, A::ymm3, &l); in DEF_TEST()
1376 a.vptest(A::ymm4, &l); in DEF_TEST()
1378 a.vmulps (A::ymm4, A::ymm3, &l); in DEF_TEST()
1380 0x01, 0x02, 0x03, 0x4, in DEF_TEST()
1383 0xc4, 0xe2, 0x7d, 0x18, 0b00'000'101, 0xf3,0xff,0xff,0xff, // 0xfffffff3 == -13 in DEF_TEST()
1384 0xc4, 0xe2, 0x7d, 0x18, 0b00'001'101, 0xea,0xff,0xff,0xff, // 0xffffffea == -22 in DEF_TEST()
1385 0xc4, 0x62, 0x7d, 0x18, 0b00'000'101, 0xe1,0xff,0xff,0xff, // 0xffffffe1 == -31 in DEF_TEST()
1386 0xc4, 0x62, 0x7d, 0x18, 0b00'111'101, 0xd8,0xff,0xff,0xff, // 0xffffffd8 == -40 in DEF_TEST()
1388 0xc4, 0xe2, 0x65, 0x00, 0b00'100'101, 0xcf,0xff,0xff,0xff, // 0xffffffcf == -49 in DEF_TEST()
1390 0xc5, 0xe5, 0xfe, 0b00'100'101, 0xc7,0xff,0xff,0xff, // 0xffffffc7 == -57 in DEF_TEST()
1391 0xc5, 0xe5, 0xfa, 0b00'100'101, 0xbf,0xff,0xff,0xff, // 0xffffffbf == -65 in DEF_TEST()
1393 0xc4, 0xe2, 0x7d, 0x17, 0b00'100'101, 0xb6,0xff,0xff,0xff, // 0xffffffb6 == -74 in DEF_TEST()
1395 0xc5, 0xe4, 0x59, 0b00'100'101, 0xae,0xff,0xff,0xff, // 0xffffffaf == -82 in DEF_TEST()
1398 test_asm(r, [&](A& a) { in DEF_TEST() argument
1399 a.vbroadcastss(A::ymm0, A::Mem{A::rdi, 0}); in DEF_TEST()
1400 a.vbroadcastss(A::ymm13, A::Mem{A::r14, 7}); in DEF_TEST()
1401 a.vbroadcastss(A::ymm8, A::Mem{A::rdx, -12}); in DEF_TEST()
1402 a.vbroadcastss(A::ymm8, A::Mem{A::rdx, 400}); in DEF_TEST()
1404 a.vbroadcastss(A::ymm8, A::xmm0); in DEF_TEST()
1405 a.vbroadcastss(A::ymm0, A::xmm13); in DEF_TEST()
1408 0xc4,0xe2,0x7d, 0x18, 0b00'000'111, in DEF_TEST()
1409 0xc4,0x42,0x7d, 0x18, 0b01'101'110, 0x07, in DEF_TEST()
1410 0xc4,0x62,0x7d, 0x18, 0b01'000'010, 0xf4, in DEF_TEST()
1411 0xc4,0x62,0x7d, 0x18, 0b10'000'010, 0x90,0x01,0x00,0x00, in DEF_TEST()
1413 0xc4,0x62,0x7d, 0x18, 0b11'000'000, in DEF_TEST()
1414 0xc4,0xc2,0x7d, 0x18, 0b11'000'101, in DEF_TEST()
1417 test_asm(r, [&](A& a) { in DEF_TEST() argument
1418 A::Label l; in DEF_TEST()
1419 a.label(&l); in DEF_TEST()
1420 a.jne(&l); in DEF_TEST()
1421 a.jne(&l); in DEF_TEST()
1422 a.je (&l); in DEF_TEST()
1423 a.jmp(&l); in DEF_TEST()
1424 a.jl (&l); in DEF_TEST()
1425 a.jc (&l); in DEF_TEST()
1427 a.cmp(A::rdx, 1); in DEF_TEST()
1428 a.cmp(A::rax, 12); in DEF_TEST()
1429 a.cmp(A::r14, 2000000000); in DEF_TEST()
1431 0x0f,0x85, 0xfa,0xff,0xff,0xff, // near jne -6 bytes in DEF_TEST()
1432 0x0f,0x85, 0xf4,0xff,0xff,0xff, // near jne -12 bytes in DEF_TEST()
1433 0x0f,0x84, 0xee,0xff,0xff,0xff, // near je -18 bytes in DEF_TEST()
1434 0xe9, 0xe9,0xff,0xff,0xff, // near jmp -23 bytes in DEF_TEST()
1435 0x0f,0x8c, 0xe3,0xff,0xff,0xff, // near jl -29 bytes in DEF_TEST()
1436 0x0f,0x82, 0xdd,0xff,0xff,0xff, // near jc -35 bytes in DEF_TEST()
1438 0x48,0x83,0xfa,0x01, in DEF_TEST()
1439 0x48,0x83,0xf8,0x0c, in DEF_TEST()
1440 0x49,0x81,0xfe,0x00,0x94,0x35,0x77, in DEF_TEST()
1443 test_asm(r, [&](A& a) { in DEF_TEST() argument
1444 a.vmovups(A::ymm5, A::Mem{A::rsi}); in DEF_TEST()
1445 a.vmovups(A::Mem{A::rsi}, A::ymm5); in DEF_TEST()
1447 a.vmovups(A::xmm5, A::Mem{A::rsi}); in DEF_TEST()
1448 a.vmovups(A::Mem{A::rsi}, A::xmm5); in DEF_TEST()
1450 a.vpmovzxwd(A::ymm4, A::Mem{A::rsi}); in DEF_TEST()
1451 a.vpmovzxbd(A::ymm4, A::Mem{A::rsi}); in DEF_TEST()
1453 a.vmovq(A::Mem{A::rdx}, A::xmm15); in DEF_TEST()
1456 0xc5, 0xfc, 0x10, 0b00'101'110, in DEF_TEST()
1457 0xc5, 0xfc, 0x11, 0b00'101'110, in DEF_TEST()
1459 0xc5, 0xf8, 0x10, 0b00'101'110, in DEF_TEST()
1460 0xc5, 0xf8, 0x11, 0b00'101'110, in DEF_TEST()
1462 0xc4,0xe2,0x7d, 0x33, 0b00'100'110, in DEF_TEST()
1463 0xc4,0xe2,0x7d, 0x31, 0b00'100'110, in DEF_TEST()
1465 0xc5, 0x79, 0xd6, 0b00'111'010, in DEF_TEST()
1468 test_asm(r, [&](A& a) { in DEF_TEST() argument
1469 a.vmovups(A::ymm5, A::Mem{A::rsp, 0}); in DEF_TEST()
1470 a.vmovups(A::ymm5, A::Mem{A::rsp, 64}); in DEF_TEST()
1471 a.vmovups(A::ymm5, A::Mem{A::rsp,128}); in DEF_TEST()
1473 a.vmovups(A::Mem{A::rsp, 0}, A::ymm5); in DEF_TEST()
1474 a.vmovups(A::Mem{A::rsp, 64}, A::ymm5); in DEF_TEST()
1475 a.vmovups(A::Mem{A::rsp,128}, A::ymm5); in DEF_TEST()
1477 0xc5,0xfc,0x10,0x2c,0x24, in DEF_TEST()
1478 0xc5,0xfc,0x10,0x6c,0x24,0x40, in DEF_TEST()
1479 0xc5,0xfc,0x10,0xac,0x24,0x80,0x00,0x00,0x00, in DEF_TEST()
1481 0xc5,0xfc,0x11,0x2c,0x24, in DEF_TEST()
1482 0xc5,0xfc,0x11,0x6c,0x24,0x40, in DEF_TEST()
1483 0xc5,0xfc,0x11,0xac,0x24,0x80,0x00,0x00,0x00, in DEF_TEST()
1486 test_asm(r, [&](A& a) { in DEF_TEST() argument
1487 a.movzbq(A::rax, A::Mem{A::rsi}); // Low registers for src and dst. in DEF_TEST()
1488 a.movzbq(A::rax, A::Mem{A::r8,}); // High src register. in DEF_TEST()
1489 a.movzbq(A::r8 , A::Mem{A::rsi}); // High dst register. in DEF_TEST()
1490 a.movzbq(A::r8, A::Mem{A::rsi, 12}); in DEF_TEST()
1491 a.movzbq(A::r8, A::Mem{A::rsi, 400}); in DEF_TEST()
1493 a.movzwq(A::rax, A::Mem{A::rsi}); // Low registers for src and dst. in DEF_TEST()
1494 a.movzwq(A::rax, A::Mem{A::r8,}); // High src register. in DEF_TEST()
1495 a.movzwq(A::r8 , A::Mem{A::rsi}); // High dst register. in DEF_TEST()
1496 a.movzwq(A::r8, A::Mem{A::rsi, 12}); in DEF_TEST()
1497 a.movzwq(A::r8, A::Mem{A::rsi, 400}); in DEF_TEST()
1499 a.vmovd(A::Mem{A::rax}, A::xmm0); in DEF_TEST()
1500 a.vmovd(A::Mem{A::rax}, A::xmm8); in DEF_TEST()
1501 a.vmovd(A::Mem{A::r8 }, A::xmm0); in DEF_TEST()
1503 a.vmovd(A::xmm0, A::Mem{A::rax}); in DEF_TEST()
1504 a.vmovd(A::xmm8, A::Mem{A::rax}); in DEF_TEST()
1505 a.vmovd(A::xmm0, A::Mem{A::r8 }); in DEF_TEST()
1507 a.vmovd(A::xmm0 , A::Mem{A::rax, 0, A::rcx, A::FOUR}); in DEF_TEST()
1508 a.vmovd(A::xmm15, A::Mem{A::rax, 0, A::r8, A::TWO }); in DEF_TEST()
1509 a.vmovd(A::xmm0 , A::Mem{A::r8 , 0, A::rcx}); in DEF_TEST()
1511 a.vmovd(A::rax, A::xmm0); in DEF_TEST()
1512 a.vmovd(A::rax, A::xmm8); in DEF_TEST()
1513 a.vmovd(A::r8 , A::xmm0); in DEF_TEST()
1515 a.vmovd(A::xmm0, A::rax); in DEF_TEST()
1516 a.vmovd(A::xmm8, A::rax); in DEF_TEST()
1517 a.vmovd(A::xmm0, A::r8 ); in DEF_TEST()
1519 a.movb(A::Mem{A::rdx}, A::rax); in DEF_TEST()
1520 a.movb(A::Mem{A::rdx}, A::r8 ); in DEF_TEST()
1521 a.movb(A::Mem{A::r8 }, A::rax); in DEF_TEST()
1523 a.movb(A::rdx, A::Mem{A::rax}); in DEF_TEST()
1524 a.movb(A::rdx, A::Mem{A::r8 }); in DEF_TEST()
1525 a.movb(A::r8 , A::Mem{A::rax}); in DEF_TEST()
1527 a.movb(A::rdx, 12); in DEF_TEST()
1528 a.movb(A::rax, 4); in DEF_TEST()
1529 a.movb(A::r8 , -1); in DEF_TEST()
1531 a.movb(A::Mem{A::rdx}, 12); in DEF_TEST()
1532 a.movb(A::Mem{A::rax}, 4); in DEF_TEST()
1533 a.movb(A::Mem{A::r8 }, -1); in DEF_TEST()
1535 0x48,0x0f,0xb6,0x06, // movzbq (%rsi), %rax in DEF_TEST()
1536 0x49,0x0f,0xb6,0x00, in DEF_TEST()
1537 0x4c,0x0f,0xb6,0x06, in DEF_TEST()
1538 0x4c,0x0f,0xb6,0x46, 12, in DEF_TEST()
1539 0x4c,0x0f,0xb6,0x86, 0x90,0x01,0x00,0x00, in DEF_TEST()
1541 0x48,0x0f,0xb7,0x06, // movzwq (%rsi), %rax in DEF_TEST()
1542 0x49,0x0f,0xb7,0x00, in DEF_TEST()
1543 0x4c,0x0f,0xb7,0x06, in DEF_TEST()
1544 0x4c,0x0f,0xb7,0x46, 12, in DEF_TEST()
1545 0x4c,0x0f,0xb7,0x86, 0x90,0x01,0x00,0x00, in DEF_TEST()
1547 0xc5,0xf9,0x7e,0x00, in DEF_TEST()
1548 0xc5,0x79,0x7e,0x00, in DEF_TEST()
1549 0xc4,0xc1,0x79,0x7e,0x00, in DEF_TEST()
1551 0xc5,0xf9,0x6e,0x00, in DEF_TEST()
1552 0xc5,0x79,0x6e,0x00, in DEF_TEST()
1553 0xc4,0xc1,0x79,0x6e,0x00, in DEF_TEST()
1555 0xc5,0xf9,0x6e,0x04,0x88, in DEF_TEST()
1556 0xc4,0x21,0x79,0x6e,0x3c,0x40, in DEF_TEST()
1557 0xc4,0xc1,0x79,0x6e,0x04,0x08, in DEF_TEST()
1559 0xc5,0xf9,0x7e,0xc0, in DEF_TEST()
1560 0xc5,0x79,0x7e,0xc0, in DEF_TEST()
1561 0xc4,0xc1,0x79,0x7e,0xc0, in DEF_TEST()
1563 0xc5,0xf9,0x6e,0xc0, in DEF_TEST()
1564 0xc5,0x79,0x6e,0xc0, in DEF_TEST()
1565 0xc4,0xc1,0x79,0x6e,0xc0, in DEF_TEST()
1567 0x48 ,0x88, 0x02, in DEF_TEST()
1568 0x4c, 0x88, 0x02, in DEF_TEST()
1569 0x49, 0x88, 0x00, in DEF_TEST()
1571 0x48 ,0x8a, 0x10, in DEF_TEST()
1572 0x49, 0x8a, 0x10, in DEF_TEST()
1573 0x4c, 0x8a, 0x00, in DEF_TEST()
1575 0x48, 0xc6, 0xc2, 0x0c, in DEF_TEST()
1576 0x48, 0xc6, 0xc0, 0x04, in DEF_TEST()
1577 0x49, 0xc6, 0xc0, 0xff, in DEF_TEST()
1579 0x48, 0xc6, 0x02, 0x0c, in DEF_TEST()
1580 0x48, 0xc6, 0x00, 0x04, in DEF_TEST()
1581 0x49, 0xc6, 0x00, 0xff, in DEF_TEST()
1584 test_asm(r, [&](A& a) { in DEF_TEST() argument
1585 a.vpinsrd(A::xmm1, A::xmm8, A::Mem{A::rsi}, 1); // vpinsrd $1, (%rsi), %xmm8, %xmm1 in DEF_TEST()
1586 a.vpinsrd(A::xmm8, A::xmm1, A::Mem{A::r8 }, 3); // vpinsrd $3, (%r8), %xmm1, %xmm8; in DEF_TEST()
1588 a.vpinsrw(A::xmm1, A::xmm8, A::Mem{A::rsi}, 4); // vpinsrw $4, (%rsi), %xmm8, %xmm1 in DEF_TEST()
1589 a.vpinsrw(A::xmm8, A::xmm1, A::Mem{A::r8 }, 12); // vpinrsw $12, (%r8), %xmm1, %xmm8 in DEF_TEST()
1591 a.vpinsrb(A::xmm1, A::xmm8, A::Mem{A::rsi}, 4); // vpinsrb $4, (%rsi), %xmm8, %xmm1 in DEF_TEST()
1592 a.vpinsrb(A::xmm8, A::xmm1, A::Mem{A::r8 }, 12); // vpinsrb $12, (%r8), %xmm1, %xmm8 in DEF_TEST()
1594 a.vextracti128(A::xmm1, A::ymm8, 1); // vextracti128 $1, %ymm8, %xmm1 in DEF_TEST()
1595 a.vextracti128(A::xmm8, A::ymm1, 0); // vextracti128 $0, %ymm1, %xmm8 in DEF_TEST()
1597 a.vpextrd(A::Mem{A::rsi}, A::xmm8, 3); // vpextrd $3, %xmm8, (%rsi) in DEF_TEST()
1598 a.vpextrd(A::Mem{A::r8 }, A::xmm1, 2); // vpextrd $2, %xmm1, (%r8) in DEF_TEST()
1600 a.vpextrw(A::Mem{A::rsi}, A::xmm8, 7); in DEF_TEST()
1601 a.vpextrw(A::Mem{A::r8 }, A::xmm1, 15); in DEF_TEST()
1603 a.vpextrb(A::Mem{A::rsi}, A::xmm8, 7); in DEF_TEST()
1604 a.vpextrb(A::Mem{A::r8 }, A::xmm1, 15); in DEF_TEST()
1606 0xc4,0xe3,0x39, 0x22, 0x0e, 1, in DEF_TEST()
1607 0xc4,0x43,0x71, 0x22, 0x00, 3, in DEF_TEST()
1609 0xc5,0xb9, 0xc4, 0x0e, 4, in DEF_TEST()
1610 0xc4,0x41,0x71, 0xc4, 0x00, 12, in DEF_TEST()
1612 0xc4,0xe3,0x39, 0x20, 0x0e, 4, in DEF_TEST()
1613 0xc4,0x43,0x71, 0x20, 0x00, 12, in DEF_TEST()
1615 0xc4,0x63,0x7d,0x39,0xc1, 1, in DEF_TEST()
1616 0xc4,0xc3,0x7d,0x39,0xc8, 0, in DEF_TEST()
1618 0xc4,0x63,0x79,0x16,0x06, 3, in DEF_TEST()
1619 0xc4,0xc3,0x79,0x16,0x08, 2, in DEF_TEST()
1621 0xc4,0x63,0x79, 0x15, 0x06, 7, in DEF_TEST()
1622 0xc4,0xc3,0x79, 0x15, 0x08, 15, in DEF_TEST()
1624 0xc4,0x63,0x79, 0x14, 0x06, 7, in DEF_TEST()
1625 0xc4,0xc3,0x79, 0x14, 0x08, 15, in DEF_TEST()
1628 test_asm(r, [&](A& a) { in DEF_TEST() argument
1629 a.vpandn(A::ymm3, A::ymm12, A::ymm2); in DEF_TEST()
1631 0xc5, 0x9d, 0xdf, 0xda, in DEF_TEST()
1634 test_asm(r, [&](A& a) { in DEF_TEST() argument
1635 A::Label l; in DEF_TEST()
1636 a.vmovdqa(A::ymm3, A::ymm2); // vmovdqa %ymm2 , %ymm3 in DEF_TEST()
1638 a.vmovdqa(A::ymm3, A::Mem{A::rsi}); // vmovdqa (%rsi) , %ymm3 in DEF_TEST()
1639 a.vmovdqa(A::ymm3, A::Mem{A::rsp}); // vmovdqa (%rsp) , %ymm3 in DEF_TEST()
1640 a.vmovdqa(A::ymm3, A::Mem{A::r11}); // vmovdqa (%r11) , %ymm3 in DEF_TEST()
1642 a.vmovdqa(A::ymm3, A::Mem{A::rsi, 4}); // vmovdqa 4(%rsi) , %ymm3 in DEF_TEST()
1643 a.vmovdqa(A::ymm3, A::Mem{A::rsp, 4}); // vmovdqa 4(%rsp) , %ymm3 in DEF_TEST()
1645 a.vmovdqa(A::ymm3, A::Mem{A::rsi, 4, A::rax, A::EIGHT}); // vmovdqa 4(%rsi,%rax,8), %ymm3 in DEF_TEST()
1646 a.vmovdqa(A::ymm3, A::Mem{A::r11, 4, A::rax, A::TWO }); // vmovdqa 4(%r11,%rax,2), %ymm3 in DEF_TEST()
1647 a.vmovdqa(A::ymm3, A::Mem{A::rsi, 4, A::r11, A::FOUR }); // vmovdqa 4(%rsi,%r11,4), %ymm3 in DEF_TEST()
1648 a.vmovdqa(A::ymm3, A::Mem{A::rsi, 4, A::r11, A::ONE }); // vmovdqa 4(%rsi,%r11,1), %ymm3 in DEF_TEST()
1649 a.vmovdqa(A::ymm3, A::Mem{A::rsi, 4, A::r11}); // vmovdqa 4(%rsi,%r11) , %ymm3 in DEF_TEST()
1651 a.vmovdqa(A::ymm3, A::Mem{A::rsi, 64, A::r11}); // vmovdqa 64(%rsi,%r11), %ymm3 in DEF_TEST()
1652 a.vmovdqa(A::ymm3, A::Mem{A::rsi, 128, A::r11}); // vmovdqa 128(%rsi,%r11), %ymm3 in DEF_TEST()
1653 a.vmovdqa(A::ymm3, &l); // vmovdqa 16(%rip) , %ymm3 in DEF_TEST()
1655 a.vcvttps2dq(A::ymm3, A::ymm2); in DEF_TEST()
1656 a.vcvtdq2ps (A::ymm3, A::ymm2); in DEF_TEST()
1657 a.vcvtps2dq (A::ymm3, A::ymm2); in DEF_TEST()
1658 a.vsqrtps (A::ymm3, A::ymm2); in DEF_TEST()
1659 a.label(&l); in DEF_TEST()
1661 0xc5,0xfd,0x6f,0xda, in DEF_TEST()
1663 0xc5,0xfd,0x6f,0x1e, in DEF_TEST()
1664 0xc5,0xfd,0x6f,0x1c,0x24, in DEF_TEST()
1665 0xc4,0xc1,0x7d,0x6f,0x1b, in DEF_TEST()
1667 0xc5,0xfd,0x6f,0x5e,0x04, in DEF_TEST()
1668 0xc5,0xfd,0x6f,0x5c,0x24,0x04, in DEF_TEST()
1670 0xc5,0xfd,0x6f,0x5c,0xc6,0x04, in DEF_TEST()
1671 0xc4,0xc1,0x7d,0x6f,0x5c,0x43,0x04, in DEF_TEST()
1672 0xc4,0xa1,0x7d,0x6f,0x5c,0x9e,0x04, in DEF_TEST()
1673 0xc4,0xa1,0x7d,0x6f,0x5c,0x1e,0x04, in DEF_TEST()
1674 0xc4,0xa1,0x7d,0x6f,0x5c,0x1e,0x04, in DEF_TEST()
1676 0xc4,0xa1,0x7d,0x6f,0x5c,0x1e,0x40, in DEF_TEST()
1677 0xc4,0xa1,0x7d,0x6f,0x9c,0x1e,0x80,0x00,0x00,0x00, in DEF_TEST()
1679 0xc5,0xfd,0x6f,0x1d,0x10,0x00,0x00,0x00, in DEF_TEST()
1681 0xc5,0xfe,0x5b,0xda, in DEF_TEST()
1682 0xc5,0xfc,0x5b,0xda, in DEF_TEST()
1683 0xc5,0xfd,0x5b,0xda, in DEF_TEST()
1684 0xc5,0xfc,0x51,0xda, in DEF_TEST()
1687 test_asm(r, [&](A& a) { in DEF_TEST() argument
1688 a.vcvtps2ph(A::xmm3, A::ymm2, A::CURRENT); in DEF_TEST()
1689 a.vcvtps2ph(A::Mem{A::rsi, 32, A::rax, A::EIGHT}, A::ymm5, A::CEIL); in DEF_TEST()
1691 a.vcvtph2ps(A::ymm15, A::Mem{A::rdi, 12, A::r9, A::ONE}); in DEF_TEST()
1692 a.vcvtph2ps(A::ymm2, A::xmm3); in DEF_TEST()
1694 0xc4,0xe3,0x7d,0x1d,0xd3,0x04, in DEF_TEST()
1695 0xc4,0xe3,0x7d,0x1d,0x6c,0xc6,0x20,0x02, in DEF_TEST()
1697 0xc4,0x22,0x7d,0x13,0x7c,0x0f,0x0c, in DEF_TEST()
1698 0xc4,0xe2,0x7d,0x13,0xd3, in DEF_TEST()
1701 test_asm(r, [&](A& a) { in DEF_TEST() argument
1702 a.vgatherdps(A::ymm1 , A::FOUR , A::ymm0 , A::rdi, A::ymm2 ); in DEF_TEST()
1703 a.vgatherdps(A::ymm0 , A::ONE , A::ymm2 , A::rax, A::ymm1 ); in DEF_TEST()
1704 a.vgatherdps(A::ymm10, A::ONE , A::ymm2 , A::rax, A::ymm1 ); in DEF_TEST()
1705 a.vgatherdps(A::ymm0 , A::ONE , A::ymm12, A::rax, A::ymm1 ); in DEF_TEST()
1706 a.vgatherdps(A::ymm0 , A::ONE , A::ymm2 , A::r9 , A::ymm1 ); in DEF_TEST()
1707 a.vgatherdps(A::ymm0 , A::ONE , A::ymm2 , A::rax, A::ymm12); in DEF_TEST()
1708 a.vgatherdps(A::ymm0 , A::EIGHT, A::ymm2 , A::rax, A::ymm12); in DEF_TEST()
1710 0xc4,0xe2,0x6d,0x92,0x0c,0x87, in DEF_TEST()
1711 0xc4,0xe2,0x75,0x92,0x04,0x10, in DEF_TEST()
1712 0xc4,0x62,0x75,0x92,0x14,0x10, in DEF_TEST()
1713 0xc4,0xa2,0x75,0x92,0x04,0x20, in DEF_TEST()
1714 0xc4,0xc2,0x75,0x92,0x04,0x11, in DEF_TEST()
1715 0xc4,0xe2,0x1d,0x92,0x04,0x10, in DEF_TEST()
1716 0xc4,0xe2,0x1d,0x92,0x04,0xd0, in DEF_TEST()
1719 test_asm(r, [&](A& a) { in DEF_TEST() argument
1720 a.mov(A::rax, A::Mem{A::rdi, 0}); in DEF_TEST()
1721 a.mov(A::rax, A::Mem{A::rdi, 1}); in DEF_TEST()
1722 a.mov(A::rax, A::Mem{A::rdi, 512}); in DEF_TEST()
1723 a.mov(A::r15, A::Mem{A::r13, 42}); in DEF_TEST()
1724 a.mov(A::rax, A::Mem{A::r13, 42}); in DEF_TEST()
1725 a.mov(A::r15, A::Mem{A::rax, 42}); in DEF_TEST()
1726 a.mov(A::rax, 1); in DEF_TEST()
1727 a.mov(A::rax, A::rcx); in DEF_TEST()
1729 0x48, 0x8b, 0x07, in DEF_TEST()
1730 0x48, 0x8b, 0x47, 0x01, in DEF_TEST()
1731 0x48, 0x8b, 0x87, 0x00,0x02,0x00,0x00, in DEF_TEST()
1732 0x4d, 0x8b, 0x7d, 0x2a, in DEF_TEST()
1733 0x49, 0x8b, 0x45, 0x2a, in DEF_TEST()
1734 0x4c, 0x8b, 0x78, 0x2a, in DEF_TEST()
1735 0x48, 0xc7, 0xc0, 0x01,0x00,0x00,0x00, in DEF_TEST()
1736 0x48, 0x89, 0xc8, in DEF_TEST()
1741 test_asm(r, [&](A& a) { in DEF_TEST() argument
1742 a.and16b(A::v4, A::v3, A::v1); in DEF_TEST()
1743 a.orr16b(A::v4, A::v3, A::v1); in DEF_TEST()
1744 a.eor16b(A::v4, A::v3, A::v1); in DEF_TEST()
1745 a.bic16b(A::v4, A::v3, A::v1); in DEF_TEST()
1746 a.bsl16b(A::v4, A::v3, A::v1); in DEF_TEST()
1747 a.not16b(A::v4, A::v3); in DEF_TEST()
1749 a.add4s(A::v4, A::v3, A::v1); in DEF_TEST()
1750 a.sub4s(A::v4, A::v3, A::v1); in DEF_TEST()
1751 a.mul4s(A::v4, A::v3, A::v1); in DEF_TEST()
1753 a.cmeq4s(A::v4, A::v3, A::v1); in DEF_TEST()
1754 a.cmgt4s(A::v4, A::v3, A::v1); in DEF_TEST()
1756 a.sub8h(A::v4, A::v3, A::v1); in DEF_TEST()
1757 a.mul8h(A::v4, A::v3, A::v1); in DEF_TEST()
1759 a.fadd4s(A::v4, A::v3, A::v1); in DEF_TEST()
1760 a.fsub4s(A::v4, A::v3, A::v1); in DEF_TEST()
1761 a.fmul4s(A::v4, A::v3, A::v1); in DEF_TEST()
1762 a.fdiv4s(A::v4, A::v3, A::v1); in DEF_TEST()
1763 a.fmin4s(A::v4, A::v3, A::v1); in DEF_TEST()
1764 a.fmax4s(A::v4, A::v3, A::v1); in DEF_TEST()
1766 a.fneg4s (A::v4, A::v3); in DEF_TEST()
1767 a.fsqrt4s(A::v4, A::v3); in DEF_TEST()
1769 a.fmla4s(A::v4, A::v3, A::v1); in DEF_TEST()
1770 a.fmls4s(A::v4, A::v3, A::v1); in DEF_TEST()
1772 a.fcmeq4s(A::v4, A::v3, A::v1); in DEF_TEST()
1773 a.fcmgt4s(A::v4, A::v3, A::v1); in DEF_TEST()
1774 a.fcmge4s(A::v4, A::v3, A::v1); in DEF_TEST()
1776 0x64,0x1c,0x21,0x4e, in DEF_TEST()
1777 0x64,0x1c,0xa1,0x4e, in DEF_TEST()
1778 0x64,0x1c,0x21,0x6e, in DEF_TEST()
1779 0x64,0x1c,0x61,0x4e, in DEF_TEST()
1780 0x64,0x1c,0x61,0x6e, in DEF_TEST()
1781 0x64,0x58,0x20,0x6e, in DEF_TEST()
1783 0x64,0x84,0xa1,0x4e, in DEF_TEST()
1784 0x64,0x84,0xa1,0x6e, in DEF_TEST()
1785 0x64,0x9c,0xa1,0x4e, in DEF_TEST()
1787 0x64,0x8c,0xa1,0x6e, in DEF_TEST()
1788 0x64,0x34,0xa1,0x4e, in DEF_TEST()
1790 0x64,0x84,0x61,0x6e, in DEF_TEST()
1791 0x64,0x9c,0x61,0x4e, in DEF_TEST()
1793 0x64,0xd4,0x21,0x4e, in DEF_TEST()
1794 0x64,0xd4,0xa1,0x4e, in DEF_TEST()
1795 0x64,0xdc,0x21,0x6e, in DEF_TEST()
1796 0x64,0xfc,0x21,0x6e, in DEF_TEST()
1797 0x64,0xf4,0xa1,0x4e, in DEF_TEST()
1798 0x64,0xf4,0x21,0x4e, in DEF_TEST()
1800 0x64,0xf8,0xa0,0x6e, in DEF_TEST()
1801 0x64,0xf8,0xa1,0x6e, in DEF_TEST()
1803 0x64,0xcc,0x21,0x4e, in DEF_TEST()
1804 0x64,0xcc,0xa1,0x4e, in DEF_TEST()
1806 0x64,0xe4,0x21,0x4e, in DEF_TEST()
1807 0x64,0xe4,0xa1,0x6e, in DEF_TEST()
1808 0x64,0xe4,0x21,0x6e, in DEF_TEST()
1811 test_asm(r, [&](A& a) { in DEF_TEST() argument
1812 a.shl4s(A::v4, A::v3, 0); in DEF_TEST()
1813 a.shl4s(A::v4, A::v3, 1); in DEF_TEST()
1814 a.shl4s(A::v4, A::v3, 8); in DEF_TEST()
1815 a.shl4s(A::v4, A::v3, 16); in DEF_TEST()
1816 a.shl4s(A::v4, A::v3, 31); in DEF_TEST()
1818 a.sshr4s(A::v4, A::v3, 1); in DEF_TEST()
1819 a.sshr4s(A::v4, A::v3, 8); in DEF_TEST()
1820 a.sshr4s(A::v4, A::v3, 31); in DEF_TEST()
1822 a.ushr4s(A::v4, A::v3, 1); in DEF_TEST()
1823 a.ushr4s(A::v4, A::v3, 8); in DEF_TEST()
1824 a.ushr4s(A::v4, A::v3, 31); in DEF_TEST()
1826 a.ushr8h(A::v4, A::v3, 1); in DEF_TEST()
1827 a.ushr8h(A::v4, A::v3, 8); in DEF_TEST()
1828 a.ushr8h(A::v4, A::v3, 15); in DEF_TEST()
1830 0x64,0x54,0x20,0x4f, in DEF_TEST()
1831 0x64,0x54,0x21,0x4f, in DEF_TEST()
1832 0x64,0x54,0x28,0x4f, in DEF_TEST()
1833 0x64,0x54,0x30,0x4f, in DEF_TEST()
1834 0x64,0x54,0x3f,0x4f, in DEF_TEST()
1836 0x64,0x04,0x3f,0x4f, in DEF_TEST()
1837 0x64,0x04,0x38,0x4f, in DEF_TEST()
1838 0x64,0x04,0x21,0x4f, in DEF_TEST()
1840 0x64,0x04,0x3f,0x6f, in DEF_TEST()
1841 0x64,0x04,0x38,0x6f, in DEF_TEST()
1842 0x64,0x04,0x21,0x6f, in DEF_TEST()
1844 0x64,0x04,0x1f,0x6f, in DEF_TEST()
1845 0x64,0x04,0x18,0x6f, in DEF_TEST()
1846 0x64,0x04,0x11,0x6f, in DEF_TEST()
1849 test_asm(r, [&](A& a) { in DEF_TEST() argument
1850 a.sli4s(A::v4, A::v3, 0); in DEF_TEST()
1851 a.sli4s(A::v4, A::v3, 1); in DEF_TEST()
1852 a.sli4s(A::v4, A::v3, 8); in DEF_TEST()
1853 a.sli4s(A::v4, A::v3, 16); in DEF_TEST()
1854 a.sli4s(A::v4, A::v3, 31); in DEF_TEST()
1856 0x64,0x54,0x20,0x6f, in DEF_TEST()
1857 0x64,0x54,0x21,0x6f, in DEF_TEST()
1858 0x64,0x54,0x28,0x6f, in DEF_TEST()
1859 0x64,0x54,0x30,0x6f, in DEF_TEST()
1860 0x64,0x54,0x3f,0x6f, in DEF_TEST()
1863 test_asm(r, [&](A& a) { in DEF_TEST() argument
1864 a.scvtf4s (A::v4, A::v3); in DEF_TEST()
1865 a.fcvtzs4s(A::v4, A::v3); in DEF_TEST()
1866 a.fcvtns4s(A::v4, A::v3); in DEF_TEST()
1867 a.frintp4s(A::v4, A::v3); in DEF_TEST()
1868 a.frintm4s(A::v4, A::v3); in DEF_TEST()
1869 a.fcvtn (A::v4, A::v3); in DEF_TEST()
1870 a.fcvtl (A::v4, A::v3); in DEF_TEST()
1872 0x64,0xd8,0x21,0x4e, in DEF_TEST()
1873 0x64,0xb8,0xa1,0x4e, in DEF_TEST()
1874 0x64,0xa8,0x21,0x4e, in DEF_TEST()
1875 0x64,0x88,0xa1,0x4e, in DEF_TEST()
1876 0x64,0x98,0x21,0x4e, in DEF_TEST()
1877 0x64,0x68,0x21,0x0e, in DEF_TEST()
1878 0x64,0x78,0x21,0x0e, in DEF_TEST()
1881 test_asm(r, [&](A& a) { in DEF_TEST() argument
1882 a.sub (A::sp, A::sp, 32); // sub sp, sp, #32 in DEF_TEST()
1883 a.strq(A::v0, A::sp, 1); // str q0, [sp, #16] in DEF_TEST()
1884 a.strq(A::v1, A::sp); // str q1, [sp] in DEF_TEST()
1885 a.strd(A::v0, A::sp, 6); // str s0, [sp, #48] in DEF_TEST()
1886 a.strs(A::v0, A::sp, 6); // str s0, [sp, #24] in DEF_TEST()
1887 a.strh(A::v0, A::sp, 10); // str h0, [sp, #20] in DEF_TEST()
1888 a.strb(A::v0, A::sp, 47); // str b0, [sp, #47] in DEF_TEST()
1889 a.ldrb(A::v9, A::sp, 42); // ldr b9, [sp, #42] in DEF_TEST()
1890 a.ldrh(A::v9, A::sp, 47); // ldr h9, [sp, #94] in DEF_TEST()
1891 a.ldrs(A::v7, A::sp, 10); // ldr s7, [sp, #40] in DEF_TEST()
1892 a.ldrd(A::v7, A::sp, 1); // ldr d7, [sp, #8] in DEF_TEST()
1893 a.ldrq(A::v5, A::sp, 128); // ldr q5, [sp, #2048] in DEF_TEST()
1894 a.add (A::sp, A::sp, 32); // add sp, sp, #32 in DEF_TEST()
1896 0xff,0x83,0x00,0xd1, in DEF_TEST()
1897 0xe0,0x07,0x80,0x3d, in DEF_TEST()
1898 0xe1,0x03,0x80,0x3d, in DEF_TEST()
1899 0xe0,0x1b,0x00,0xfd, in DEF_TEST()
1900 0xe0,0x1b,0x00,0xbd, in DEF_TEST()
1901 0xe0,0x2b,0x00,0x7d, in DEF_TEST()
1902 0xe0,0xbf,0x00,0x3d, in DEF_TEST()
1903 0xe9,0xab,0x40,0x3d, in DEF_TEST()
1904 0xe9,0xbf,0x40,0x7d, in DEF_TEST()
1905 0xe7,0x2b,0x40,0xbd, in DEF_TEST()
1906 0xe7,0x07,0x40,0xfd, in DEF_TEST()
1907 0xe5,0x03,0xc2,0x3d, in DEF_TEST()
1908 0xff,0x83,0x00,0x91, in DEF_TEST()
1911 test_asm(r, [&](A& a) { in DEF_TEST() argument
1912 a.brk(0); in DEF_TEST()
1913 a.brk(65535); in DEF_TEST()
1915 a.ret(A::x30); // Conventional ret using link register. in DEF_TEST()
1916 a.ret(A::x13); // Can really return using any register if we like. in DEF_TEST()
1918 a.add(A::x2, A::x2, 4); in DEF_TEST()
1919 a.add(A::x3, A::x2, 32); in DEF_TEST()
1921 a.sub(A::x2, A::x2, 4); in DEF_TEST()
1922 a.sub(A::x3, A::x2, 32); in DEF_TEST()
1924 a.subs(A::x2, A::x2, 4); in DEF_TEST()
1925 a.subs(A::x3, A::x2, 32); in DEF_TEST()
1927 a.subs(A::xzr, A::x2, 4); // These are actually the same instruction! in DEF_TEST()
1928 a.cmp(A::x2, 4); in DEF_TEST()
1930 A::Label l; in DEF_TEST()
1931 a.label(&l); in DEF_TEST()
1932 a.bne(&l); in DEF_TEST()
1933 a.bne(&l); in DEF_TEST()
1934 a.blt(&l); in DEF_TEST()
1935 a.b(&l); in DEF_TEST()
1936 a.cbnz(A::x2, &l); in DEF_TEST()
1937 a.cbz(A::x2, &l); in DEF_TEST()
1939 a.add(A::x3, A::x2, A::x1); // add x3,x2,x1 in DEF_TEST()
1940 a.add(A::x3, A::x2, A::x1, A::ASR, 3); // add x3,x2,x1, asr #3 in DEF_TEST()
1942 0x00,0x00,0x20,0xd4, in DEF_TEST()
1943 0xe0,0xff,0x3f,0xd4, in DEF_TEST()
1945 0xc0,0x03,0x5f,0xd6, in DEF_TEST()
1946 0xa0,0x01,0x5f,0xd6, in DEF_TEST()
1948 0x42,0x10,0x00,0x91, in DEF_TEST()
1949 0x43,0x80,0x00,0x91, in DEF_TEST()
1951 0x42,0x10,0x00,0xd1, in DEF_TEST()
1952 0x43,0x80,0x00,0xd1, in DEF_TEST()
1954 0x42,0x10,0x00,0xf1, in DEF_TEST()
1955 0x43,0x80,0x00,0xf1, in DEF_TEST()
1957 0x5f,0x10,0x00,0xf1, in DEF_TEST()
1958 0x5f,0x10,0x00,0xf1, in DEF_TEST()
1960 0x01,0x00,0x00,0x54, // b.ne #0 in DEF_TEST()
1961 0xe1,0xff,0xff,0x54, // b.ne #-4 in DEF_TEST()
1962 0xcb,0xff,0xff,0x54, // b.lt #-8 in DEF_TEST()
1963 0xae,0xff,0xff,0x54, // b.al #-12 in DEF_TEST()
1964 0x82,0xff,0xff,0xb5, // cbnz x2, #-16 in DEF_TEST()
1965 0x62,0xff,0xff,0xb4, // cbz x2, #-20 in DEF_TEST()
1967 0x43,0x00,0x01,0x8b, in DEF_TEST()
1968 0x43,0x0c,0x81,0x8b, in DEF_TEST()
1971 // Can we cbz() to a not-yet-defined label? in DEF_TEST()
1972 test_asm(r, [&](A& a) { in DEF_TEST() argument
1973 A::Label l; in DEF_TEST()
1974 a.cbz(A::x2, &l); in DEF_TEST()
1975 a.add(A::x3, A::x2, 32); in DEF_TEST()
1976 a.label(&l); in DEF_TEST()
1977 a.ret(A::x30); in DEF_TEST()
1979 0x42,0x00,0x00,0xb4, // cbz x2, #8 in DEF_TEST()
1980 0x43,0x80,0x00,0x91, // add x3, x2, #32 in DEF_TEST()
1981 0xc0,0x03,0x5f,0xd6, // ret in DEF_TEST()
1984 // If we start a label as a backward label, in DEF_TEST()
1985 // can we redefine it to be a future label? in DEF_TEST()
1987 test_asm(r, [&](A& a) { in DEF_TEST() argument
1988 A::Label l1; in DEF_TEST()
1989 a.label(&l1); in DEF_TEST()
1990 a.add(A::x3, A::x2, 32); in DEF_TEST()
1991 a.cbz(A::x2, &l1); // This will jump backward... nothing sneaky. in DEF_TEST()
1993 A::Label l2; // Start off the same... in DEF_TEST()
1994 a.label(&l2); in DEF_TEST()
1995 a.add(A::x3, A::x2, 32); in DEF_TEST()
1996 a.cbz(A::x2, &l2); // Looks like this will go backward... in DEF_TEST()
1997 a.add(A::x2, A::x2, 4); in DEF_TEST()
1998 a.add(A::x3, A::x2, 32); in DEF_TEST()
1999 a.label(&l2); // But no... actually forward! What a switcheroo! in DEF_TEST()
2001 0x43,0x80,0x00,0x91, // add x3, x2, #32 in DEF_TEST()
2002 0xe2,0xff,0xff,0xb4, // cbz x2, #-4 in DEF_TEST()
2004 0x43,0x80,0x00,0x91, // add x3, x2, #32 in DEF_TEST()
2005 0x62,0x00,0x00,0xb4, // cbz x2, #12 in DEF_TEST()
2006 0x42,0x10,0x00,0x91, // add x2, x2, #4 in DEF_TEST()
2007 0x43,0x80,0x00,0x91, // add x3, x2, #32 in DEF_TEST()
2010 // Loading from a label on ARM. in DEF_TEST()
2011 test_asm(r, [&](A& a) { in DEF_TEST() argument
2012 A::Label fore,aft; in DEF_TEST()
2013 a.label(&fore); in DEF_TEST()
2014 a.word(0x01234567); in DEF_TEST()
2015 a.ldrq(A::v1, &fore); in DEF_TEST()
2016 a.ldrq(A::v2, &aft); in DEF_TEST()
2017 a.label(&aft); in DEF_TEST()
2018 a.word(0x76543210); in DEF_TEST()
2020 0x67,0x45,0x23,0x01, in DEF_TEST()
2021 0xe1,0xff,0xff,0x9c, // ldr q1, #-4 in DEF_TEST()
2022 0x22,0x00,0x00,0x9c, // ldr q2, #4 in DEF_TEST()
2023 0x10,0x32,0x54,0x76, in DEF_TEST()
2026 test_asm(r, [&](A& a) { in DEF_TEST() argument
2027 a.ldrq(A::v0, A::x8); in DEF_TEST()
2028 a.strq(A::v0, A::x8); in DEF_TEST()
2030 0x00,0x01,0xc0,0x3d, in DEF_TEST()
2031 0x00,0x01,0x80,0x3d, in DEF_TEST()
2034 test_asm(r, [&](A& a) { in DEF_TEST() argument
2035 a.dup4s (A::v0, A::x8); in DEF_TEST()
2036 a.ld1r4s (A::v0, A::x8); // echo 'ld1r.4s {v0}, [x8]' | llvm-mc --show-encoding in DEF_TEST()
2037 a.ld1r8h (A::v0, A::x8); in DEF_TEST()
2038 a.ld1r16b(A::v0, A::x8); in DEF_TEST()
2040 0x00,0x0d,0x04,0x4e, in DEF_TEST()
2041 0x00,0xc9,0x40,0x4d, in DEF_TEST()
2042 0x00,0xc5,0x40,0x4d, in DEF_TEST()
2043 0x00,0xc1,0x40,0x4d, in DEF_TEST()
2046 test_asm(r, [&](A& a) { in DEF_TEST() argument
2047 a.ld24s(A::v0, A::x8); // echo 'ld2.4s {v0,v1}, [x8]' | llvm-mc --show-encoding in DEF_TEST()
2048 a.ld44s(A::v0, A::x8); in DEF_TEST()
2049 a.st24s(A::v0, A::x8); in DEF_TEST()
2050 a.st44s(A::v0, A::x8); // echo 'st4.4s {v0,v1,v2,v3}, [x8]' | llvm-mc --show-encoding in DEF_TEST()
2052 a.ld24s(A::v0, A::x8, 0); //echo 'ld2 {v0.s,v1.s}[0], [x8]' | llvm-mc --show-encoding in DEF_TEST()
2053 a.ld24s(A::v0, A::x8, 1); in DEF_TEST()
2054 a.ld24s(A::v0, A::x8, 2); in DEF_TEST()
2055 a.ld24s(A::v0, A::x8, 3); in DEF_TEST()
2057 a.ld44s(A::v0, A::x8, 0); // ld4 {v0.s,v1.s,v2.s,v3.s}[0], [x8] in DEF_TEST()
2058 a.ld44s(A::v0, A::x8, 1); in DEF_TEST()
2059 a.ld44s(A::v0, A::x8, 2); in DEF_TEST()
2060 a.ld44s(A::v0, A::x8, 3); in DEF_TEST()
2062 0x00,0x89,0x40,0x4c, in DEF_TEST()
2063 0x00,0x09,0x40,0x4c, in DEF_TEST()
2064 0x00,0x89,0x00,0x4c, in DEF_TEST()
2065 0x00,0x09,0x00,0x4c, in DEF_TEST()
2067 0x00,0x81,0x60,0x0d, in DEF_TEST()
2068 0x00,0x91,0x60,0x0d, in DEF_TEST()
2069 0x00,0x81,0x60,0x4d, in DEF_TEST()
2070 0x00,0x91,0x60,0x4d, in DEF_TEST()
2072 0x00,0xa1,0x60,0x0d, in DEF_TEST()
2073 0x00,0xb1,0x60,0x0d, in DEF_TEST()
2074 0x00,0xa1,0x60,0x4d, in DEF_TEST()
2075 0x00,0xb1,0x60,0x4d, in DEF_TEST()
2078 test_asm(r, [&](A& a) { in DEF_TEST() argument
2079 a.xtns2h(A::v0, A::v0); in DEF_TEST()
2080 a.xtnh2b(A::v0, A::v0); in DEF_TEST()
2081 a.strs (A::v0, A::x0); in DEF_TEST()
2083 a.ldrs (A::v0, A::x0); in DEF_TEST()
2084 a.uxtlb2h(A::v0, A::v0); in DEF_TEST()
2085 a.uxtlh2s(A::v0, A::v0); in DEF_TEST()
2087 a.uminv4s(A::v3, A::v4); in DEF_TEST()
2088 a.movs (A::x3, A::v4,0); // mov.s w3,v4[0] in DEF_TEST()
2089 a.movs (A::x3, A::v4,1); // mov.s w3,v4[1] in DEF_TEST()
2090 a.inss (A::v4, A::x3,3); // ins.s v4[3],w3 in DEF_TEST()
2092 0x00,0x28,0x61,0x0e, in DEF_TEST()
2093 0x00,0x28,0x21,0x0e, in DEF_TEST()
2094 0x00,0x00,0x00,0xbd, in DEF_TEST()
2096 0x00,0x00,0x40,0xbd, in DEF_TEST()
2097 0x00,0xa4,0x08,0x2f, in DEF_TEST()
2098 0x00,0xa4,0x10,0x2f, in DEF_TEST()
2100 0x83,0xa8,0xb1,0x6e, in DEF_TEST()
2101 0x83,0x3c,0x04,0x0e, in DEF_TEST()
2102 0x83,0x3c,0x0c,0x0e, in DEF_TEST()
2103 0x64,0x1c,0x1c,0x4e, in DEF_TEST()
2106 test_asm(r, [&](A& a) { in DEF_TEST() argument
2107 a.ldrb(A::v0, A::x8); in DEF_TEST()
2108 a.strb(A::v0, A::x8); in DEF_TEST()
2110 0x00,0x01,0x40,0x3d, in DEF_TEST()
2111 0x00,0x01,0x00,0x3d, in DEF_TEST()
2114 test_asm(r, [&](A& a) { in DEF_TEST() argument
2115 a.ldrd(A::x0, A::x1, 3); // ldr x0, [x1, #24] in DEF_TEST()
2116 a.ldrs(A::x0, A::x1, 3); // ldr w0, [x1, #12] in DEF_TEST()
2117 a.ldrh(A::x0, A::x1, 3); // ldrh w0, [x1, #6] in DEF_TEST()
2118 a.ldrb(A::x0, A::x1, 3); // ldrb w0, [x1, #3] in DEF_TEST()
2120 a.strs(A::x0, A::x1, 3); // str w0, [x1, #12] in DEF_TEST()
2122 0x20,0x0c,0x40,0xf9, in DEF_TEST()
2123 0x20,0x0c,0x40,0xb9, in DEF_TEST()
2124 0x20,0x0c,0x40,0x79, in DEF_TEST()
2125 0x20,0x0c,0x40,0x39, in DEF_TEST()
2127 0x20,0x0c,0x00,0xb9, in DEF_TEST()
2130 test_asm(r, [&](A& a) { in DEF_TEST() argument
2131 a.tbl (A::v0, A::v1, A::v2); in DEF_TEST()
2132 a.uzp14s(A::v0, A::v1, A::v2); in DEF_TEST()
2133 a.uzp24s(A::v0, A::v1, A::v2); in DEF_TEST()
2134 a.zip14s(A::v0, A::v1, A::v2); in DEF_TEST()
2135 a.zip24s(A::v0, A::v1, A::v2); in DEF_TEST()
2137 0x20,0x00,0x02,0x4e, in DEF_TEST()
2138 0x20,0x18,0x82,0x4e, in DEF_TEST()
2139 0x20,0x58,0x82,0x4e, in DEF_TEST()
2140 0x20,0x38,0x82,0x4e, in DEF_TEST()
2141 0x20,0x78,0x82,0x4e, in DEF_TEST()
2156 for (int i = 0; i < N; ++i) { in DEF_TEST()
2170 const float expected[] = {-2, -1, 0, 1, 2, 3}; in DEF_TEST()
2176 float values[] = {-80, -5, -2, -1, 0, 1, 2, 3, 5, 160}; in DEF_TEST()
2181 const float expected[] = {0, 0.03125f, 0.25f, 0.5f, 1, 2, 4, 8, 32, INFINITY}; in DEF_TEST()
2186 float exps[] = {-2, -1, 0, 1, 2}; in DEF_TEST()
2196 float exps[] = {-80, -5, -2, -1, 0, 1, 2, 3, 5, 160}; in DEF_TEST()
2201 const float expected[] = {0, 0.03125f, 0.25f, 0.5f, 1, 2, 4, 8, 32, INFINITY}; in DEF_TEST()
2206 float exps[] = {-2, -1, 0, 1, 2}; in DEF_TEST()
2216 float bases[] = {0, 1, 4, 9, 16}; in DEF_TEST()
2221 const float expected[] = {0, 1, 2, 3, 4}; in DEF_TEST()
2226 float bases[] = {0, 1, 2, 3, 4}; in DEF_TEST()
2231 const float expected[] = {0, 1, 2, 3, 4}; in DEF_TEST()
2236 float bases[] = {0, 1, 2, 3, 4}; in DEF_TEST()
2241 const float expected[] = {0, 1, 4, 9, 16}; in DEF_TEST()
2293 // so bring in the domain a little. in DEF_TEST()
2295 float err = 0; in DEF_TEST()
2314 float err = 0; in DEF_TEST()
2325 err = 0; in DEF_TEST()
2353 // ±NaN, ±0, ±1, ±inf in DEF_TEST()
2354 const uint32_t bits[] = {0x7f80'0001, 0xff80'0001, 0x0000'0000, 0x8000'0000, in DEF_TEST()
2355 0x3f80'0000, 0xbf80'0000, 0x7f80'0000, 0xff80'0000}; in DEF_TEST()
2377 y = b.uniformF(b.uniform(), 0); in DEF_TEST()
2385 for (int i = 0; i < 8; i++) { in DEF_TEST()
2389 for (int j = 0; j < 8; j++) { in DEF_TEST()
2398 for (int i = 0; i < 8; i++) { in DEF_TEST()
2415 for (int j = 0; j < 8; j++) { in DEF_TEST()
2423 for (int i = 0; i < 8; i++) { in DEF_TEST()
2440 for (int j = 0; j < 8; j++) { in DEF_TEST()
2449 const uint16_t hs[] = {0x0000,0x3800,0x3c00,0x4000, in DEF_TEST()
2450 0xc400,0xb800,0xbc00,0xc000}; in DEF_TEST()
2462 for (int i = 0; i < 8; i++) { in DEF_TEST()
2476 for (int i = 0; i < 8; i++) { in DEF_TEST()
2487 for (int i = 0; i < 65; i++) { in DEF_TEST()
2488 lo[i] = 2*i+0; in DEF_TEST()
2490 wide[i] = ((uint64_t)lo[i] << 0) in DEF_TEST()
2500 b.store32(loPtr, b.load64(widePtr, 0)); in DEF_TEST()
2506 for (int i = 0; i < 65; i++) { in DEF_TEST()
2524 for (int i = 0; i < 65; i++) { in DEF_TEST()
2535 for (int i = 0; i < 4*63; i++) { in DEF_TEST()
2552 memset(packed, 0, sizeof(packed)); in DEF_TEST()
2554 for (int i = 0; i < 4*63; i++) { in DEF_TEST()
2571 memset(floats, 0, sizeof(floats)); in DEF_TEST()
2573 for (int i = 0; i < 4*63; i++) { in DEF_TEST()
2591 // ±NaN, ±0, ±1, ±inf in DEF_TEST()
2592 const uint32_t bits[] = {0x7f80'0001, 0xff80'0001, 0x0000'0000, 0x8000'0000, in DEF_TEST()
2593 0x3f80'0000, 0xbf80'0000, 0x7f80'0000, 0xff80'0000}; in DEF_TEST()
2597 for (int i = 0; i < 8; i++) { in DEF_TEST()
2598 REPORTER_ASSERT(r, nan[i] == ((i == 0 || i == 1) ? 0xffffffff : 0)); in DEF_TEST()
2600 i == 4 || i == 5) ? 0xffffffff : 0)); in DEF_TEST()
2610 A = b.varying<float>(), in DEF_TEST()
2615 storeF(dst, b.loadF(A) in DEF_TEST()
2623 float dst[17],A[17],B[17],C[17],D[17],E[17]; in DEF_TEST() local
2624 for (int i = 0; i < 17; i++) { in DEF_TEST()
2625 A[i] = B[i] = C[i] = D[i] = E[i] = (float)i; in DEF_TEST()
2627 program.eval(17, dst,A,B,C,D,E); in DEF_TEST()
2628 for (int i = 0; i < 17; i++) { in DEF_TEST()
2636 // originally with a bad arm64 implementation of pack(). in DEF_TEST()
2643 a = p.splat(0xf); in DEF_TEST() local
2645 skvm::I32 _4444 = p.splat(0); in DEF_TEST()
2647 _4444 = pack(_4444, a, 0); in DEF_TEST()
2655 uint16_t dst[17] = {0}; in DEF_TEST()
2657 for (int i = 0; i < 17; i++) { in DEF_TEST()
2658 REPORTER_ASSERT(reporter, dst[i] == 0xf00f, "got %04x, want %04x\n", dst[i], 0xf00f); in DEF_TEST()
2694 // A gather instruction isn't necessarily varying... it's whatever its index is. in DEF_TEST()
2695 // First a typical gather scenario with varying index. in DEF_TEST()
2701 b.store32(buf, b.gather32(uniforms,0, ix)); in DEF_TEST()
2709 // v1 = gather32 uniforms+0 v0 in DEF_TEST()
2712 REPORTER_ASSERT(r, p.loop() == 0); in DEF_TEST()
2715 // Now the same but with a uniform index instead. in DEF_TEST()
2721 b.store32(buf, b.gather32(uniforms,0, ix)); in DEF_TEST()
2728 // v1 = gather32 uniforms+0 v0 in DEF_TEST()
2738 // and deduplicating them, which results in a simple common subexpression eliminator. in DEF_TEST()
2740 // But we can't soundly dedup two identical loads with a store between. in DEF_TEST()
2746 for (int i = 0; i < K; i++) { in DEF_TEST()
2752 int buf[] = { 0,1,2,3,4 }; in DEF_TEST()
2754 for (int i = 0; i < (int)std::size(buf); i++) { in DEF_TEST()
2761 // Following a similar line of reasoning to SkVM_dont_dedup_loads, in DEF_TEST()
2762 // we cannot dedup stores either. A different store between two identical stores in DEF_TEST()
2793 0x0000'0000, 0x8000'0000, //±0 in DEF_TEST()
2794 0x3f80'0000, 0xbf80'0000, //±1 in DEF_TEST()
2795 0x7f80'0000, 0xff80'0000, //±inf in DEF_TEST()
2796 0x7f80'0001, 0xff80'0001, //±NaN in DEF_TEST()
2802 for (int i = 0; i < 8; i++) { in DEF_TEST()
2822 a = p.splat(1.0f); in DEF_TEST() local
2824 p.unpremul(&r, &g, &b, a); in DEF_TEST()
2830 int duplicates = 0; in DEF_TEST()
2836 REPORTER_ASSERT(reporter, duplicates > 0); in DEF_TEST()
2851 a = p.splat(1.0f); in DEF_TEST() local
2853 p.unpremul(&r, &g, &b, a); in DEF_TEST()
2866 " int a = 99;\n" in DEF_TEST()
2867 " if (x > 0) a += 100;\n" in DEF_TEST()
2868 " if (y > 0) a += 101;\n" in DEF_TEST()
2869 " a = 102;\n" in DEF_TEST()
2870 " return a;\n" in DEF_TEST()
2898 "<td>v2 = splat 0 (0)</td></tr>")); in DEF_TEST()