1 /*
2 * Copyright © 2016 Broadcom
3 * Copyright © 2020 Google LLC
4 * SPDX-License-Identifier: MIT
5 */
6
7 /* Unit test for disassembly of instructions.
8 *
9 * The goal is to take instructions we've seen the blob produce, and test that
10 * we can disassemble them correctly. For the next person investigating the
11 * behavior of this instruction, please include the testcase it was generated
12 * from, and the qcom disassembly as a comment if it differs from what we
13 * produce.
14 */
15
16 #include <stdio.h>
17 #include <stdlib.h>
18 #include <string.h>
19 #include "util/macros.h"
20 #include "util/u_vector.h"
21
22 #include "ir3.h"
23 #include "ir3_assembler.h"
24 #include "ir3_shader.h"
25
26 #include "freedreno/isa/ir3-isa.h"
27
28 /* clang-format off */
29 /* Note: @anholt's 4xx disasm was done on an a418 Nexus 5x */
30 #define INSTR_4XX(i, d, ...) { .gpu_id = 420, .instr = #i, .instr_raw = 0, .expected = d, __VA_ARGS__ }
31 #define INSTR_5XX(i, d, ...) { .gpu_id = 540, .instr = #i, .instr_raw = 0, .expected = d, __VA_ARGS__ }
32 #define INSTR_6XX(i, d, ...) { .gpu_id = 630, .instr = #i, .instr_raw = 0, .expected = d, __VA_ARGS__ }
33 #define INSTR_6XX_RAW(i, d, ...) { .gpu_id = 630, .instr = NULL, .instr_raw = i, .expected = d, __VA_ARGS__ }
34 #define INSTR_7XX(i, d, ...) { .chip_id = 0x07030001, .instr = #i, .instr_raw = 0, .expected = d, __VA_ARGS__ }
35 #define INSTR_7XX_RAW(i, d, ...) { .chip_id = 0x07030001, .instr = NULL, .instr_raw = i, .expected = d, __VA_ARGS__ }
36 /* clang-format on */
37
38 static const struct test {
39 int gpu_id;
40 int chip_id;
41 const char *instr;
42 uint64_t instr_raw;
43 const char *expected;
44 /**
45 * Do we expect asm parse fail (ie. for things not (yet) supported by
46 * ir3_parser.y)
47 */
48 bool parse_fail;
49 } tests[] = {
50 /* clang-format off */
51 /* cat0 */
52 INSTR_6XX(00000000_00000000, "nop"),
53 INSTR_6XX(00000200_00000000, "(rpt2)nop"),
54 INSTR_6XX(00010000_00000000, "(eq)nop"),
55 INSTR_6XX(03000000_00000000, "end"),
56 INSTR_6XX(00800000_00000004, "br p0.x, #4"),
57 INSTR_6XX(00800000_fffffffc, "br p0.x, #-4"),
58 INSTR_6XX(00900000_00000003, "br !p0.x, #3"),
59 INSTR_6XX(03820000_00000015, "shps #21"), /* emit */
60 INSTR_6XX(04021000_00000000, "(ss)shpe"), /* cut */
61 INSTR_6XX(02220000_00000004, "getlast.w8 #4"),
62 INSTR_6XX(02820000_00000014, "getone #20"), /* kill p0.x */
63 INSTR_6XX(00906020_00000007, "brao !p0.x, !p0.y, #7"),
64 INSTR_6XX(00804040_00000003, "braa p0.x, p0.y, #3"),
65 INSTR_6XX(07820000_00000000, "prede"),
66 INSTR_6XX(00800063_0000001e, "brac.3 #30"),
67 INSTR_6XX(06820000_00000000, "predt"),
68 INSTR_6XX(07020000_00000000, "predf"),
69 INSTR_6XX(07820000_00000000, "prede"),
70
71 /* cat1 */
72 INSTR_6XX(20244000_00000020, "mov.f32f32 r0.x, c8.x"),
73 INSTR_6XX(20200000_00000020, "mov.f16f16 hr0.x, hc8.x"),
74 INSTR_6XX(20150000_00000000, "cov.s32s16 hr0.x, r0.x"),
75 INSTR_6XX(20156004_00000c11, "(ul)mov.s32s32 r1.x, c<a0.x + 17>"),
76 INSTR_6XX(201100f4_00000000, "mova a0.x, hr0.x"),
77 INSTR_6XX(20244905_00000410, "(rpt1)mov.f32f32 r1.y, (r)c260.x"),
78 INSTR_6XX(20174004_00000008, "mov.s32s32 r<a0.x + 4>, r2.x"),
79 INSTR_6XX(20130000_00000005, "mov.s16s16 hr<a0.x>, hr1.y"),
80 INSTR_6XX(20110004_00000800, "mov.s16s16 hr1.x, hr<a0.x>"),
81 /* dEQP-VK.subgroups.ballot.compute.compute */
82 INSTR_6XX(260cc3c0_00000000, "movmsk.w128 r48.x"), /* movmsk.w128 sr48.x */
83
84 INSTR_6XX(240cc004_00030201, "swz.u32u32 r1.x, r0.w, r0.y, r0.z"),
85 INSTR_6XX(2400c105_04030201, "gat.f16u32 r1.y, hr0.y, hr0.z, hr0.w, hr1.x"),
86 INSTR_6XX(240c0205_04030201, "sct.u32f16 hr1.y, hr0.z, hr0.w, hr1.x, r0.y"),
87 INSTR_6XX(2400c205_04030201, "sct.f16u32 r1.y, r0.z, r0.w, r1.x, hr0.y"),
88
89 INSTR_6XX(20510005_0000ffff, "mov.s16s16 hr1.y, -1"),
90 INSTR_6XX(20400005_00003900, "mov.f16f16 hr1.y, h(0.625000)"),
91 INSTR_6XX(20400006_00003800, "mov.f16f16 hr1.z, h(0.500000)"),
92 INSTR_6XX(204880f5_00000000, "mova1 a1.x, 0"),
93
94 INSTR_7XX(2004c005_00000405, "cov.f32u32 r1.y, (last)r1.y"),
95
96 /* cat2 */
97 INSTR_6XX(40104002_0c210001, "add.f hr0.z, r0.y, c<a0.x + 33>"),
98 INSTR_6XX(40b80804_10408004, "(nop3) cmps.f.lt r1.x, (abs)r1.x, c16.x"),
99 INSTR_6XX(47308a02_00002000, "(rpt2)bary.f (ei)r0.z, (r)0, r0.x"),
100 INSTR_6XX(47348000_00002000, "flat.b (ei)r0.x, 0, r0.x"),
101 INSTR_6XX(43480801_00008001, "(nop3) absneg.s hr0.y, (abs)hr0.y"),
102 INSTR_6XX(42280807_27ff0000, "(nop3) add.s hr1.w, hr0.x, h(-1)"),
103 INSTR_6XX(40a500f8_2c000004, "cmps.f.ne p0.x, hr1.x, h(0.0)"),
104 INSTR_6XX(438000f8_20010009, "and.b p0.x, hr2.y, h(1)"),
105 INSTR_6XX(438000f9_00020001, "and.b p0.y, hr0.y, hr0.z"),
106 INSTR_6XX(40080902_50200006, "(rpt1)add.f hr0.z, (r)hr1.z, (neg)(r)hc8.x"),
107 INSTR_6XX(42380c01_00040001, "(sat)(nop3) add.s r0.y, r0.y, r1.x"),
108 INSTR_6XX(42480000_48801086, "(nop2) sub.u hr0.x, hc33.z, (neg)hr<a0.x + 128>"),
109 INSTR_6XX(46b00001_00001020, "clz.b r0.y, c8.x"),
110 INSTR_6XX(46700009_00000009, "bfrev.b r2.y, r2.y"),
111
112 INSTR_7XX(42380800_04010400, "(nop3) add.s r0.x, (last)r0.x, (last)r0.y"),
113 INSTR_7XX(42930000_04000406, "cmps.u.ge r0.x, (last)r1.z, (last)r0.x"),
114
115 /* cat3 */
116 INSTR_6XX(66000000_10421041, "sel.f16 hr0.x, hc16.y, hr0.x, hc16.z"),
117 INSTR_6XX(64848109_109a9099, "(rpt1)sel.b32 r2.y, c38.y, (r)r2.y, c38.z"),
118 INSTR_6XX(64810904_30521036, "(rpt1)sel.b32 r1.x, (r)c13.z, r0.z, (r)c20.z"),
119 INSTR_6XX(64818902_20041032, "(rpt1)sel.b32 r0.z, (r)c12.z, r0.w, (r)r1.x"),
120 INSTR_6XX(63820005_10315030, "mad.f32 r1.y, (neg)c12.x, r1.x, c12.y"),
121 INSTR_6XX(62050009_00091000, "mad.u24 r2.y, c0.x, r2.z, r2.y"),
122 INSTR_6XX(61828008_00081033, "madsh.m16 r2.x, c12.w, r1.y, r2.x"),
123 INSTR_6XX(65900820_100cb008, "(nop3) shlg hr8.x, 8, hr8.x, 12"), /* (nop3) shlg.b16 hr8.x, (r)8, (r)hr8.x, 12; */
124 INSTR_6XX(65ae085c_0002a001, "(nop3) shlg hr23.x, hr0.y, hr23.x, hr0.z"), /* not seen in blob */
125 INSTR_6XX(65900820_0c0aac05, "(nop3) shlg hr8.x, hc<a0.x + 5>, hr8.x, hc<a0.x + 10>"), /* not seen in blob */
126 INSTR_6XX(65ae0c5c_0002a001, "(nop3) shlg r23.x, r0.y, r23.x, r0.z"), /* (nop3) shlg.b32 r23.x, (r)r0.y, (r)r23.x, r0.z */
127 INSTR_6XX(64018802_0002e003, "(nop3) shrm hr0.z, (neg)hr0.w, hr0.w, hr0.z"),
128 INSTR_6XX(646084c3_1fff300a, "shrm r48.w, 10, r48.y, 4095"),
129 INSTR_6XX(64818802_0002e003, "(nop3) shlm hr0.z, (neg)hr0.w, hr0.w, hr0.z"),
130 INSTR_6XX(65018802_0002e003, "(nop3) shrg hr0.z, (neg)hr0.w, hr0.w, hr0.z"),
131 INSTR_6XX(66018802_0002e003, "(nop3) andg hr0.z, (neg)hr0.w, hr0.w, hr0.z"),
132 INSTR_6XX(67018802_1002e003, "(nop3) wmm hr0.z, (neg)hr0.w, hr0.w, 2"), /* (nop3) wmm.f16f16 hr0.z, (abs)(r)hr0.w, (r)hr0.w, 2 */
133 INSTR_6XX(67018c02_1002e003, "(nop3) wmm.accu hr0.z, (neg)hr0.w, hr0.w, 2"),
134 INSTR_6XX(6701c802_9002a003, "(nop3) wmm r0.z, r0.w, r0.w, 2"), /* (nop3) wmm.f32f32 r0.z, (r)r0.w, (r)r0.w, 2 */
135 /* custom test with qcom_dot8 function from cl_qcom_dot_product8 */
136 INSTR_6XX(66818c02_0002e003, "(sat)(nop3) dp2acc.mixed.low r0.z, r0.w, r0.w, r0.z"), /* (nop3) dp2acc (sat)r0.z, (signed)(low)(r)r0.w, (low)(r)r0.w, r0.z */
137 INSTR_6XX(6681c802_8002a003, "(nop3) dp4acc.unsigned.low r0.z, r0.w, r0.w, (neg)r0.z"), /* (nop3) dp4acc r0.z, (unsigned)(r)r0.w, (r)r0.w, (neg)r0.z */
138
139 INSTR_7XX(61808000_04020400, "madsh.m16 r0.x, (last)r0.x, r0.y, (last)r0.z"),
140 INSTR_7XX(64838806_04088406, "(nop3) sel.b32 r1.z, (last)r1.z, r1.w, (last)r2.x"),
141
142 /* cat4 */
143 INSTR_6XX(8010000a_00000003, "rcp r2.z, r0.w"),
144
145 /* cat5 */
146 /* dEQP-VK.glsl.derivate.dfdx.uniform_if.float_mediump */
147 INSTR_6XX(a3801102_00000001, "dsx (f32)(x)r0.z, r0.x"), /* dsx (f32)(xOOO)r0.z, r0.x */
148 /* dEQP-VK.glsl.derivate.dfdy.uniform_if.float_mediump */
149 INSTR_6XX(a3c01102_00000001, "dsy (f32)(x)r0.z, r0.x"), /* dsy (f32)(xOOO)r0.z, r0.x */
150 /* dEQP-VK.glsl.derivate.dfdxfine.uniform_loop.float_highp */
151 INSTR_6XX(a6001105_00000001, "dsxpp.1 (x)r1.y, r0.x"), /* dsxpp.1 (xOOO)r1.y, r0.x */
152 INSTR_6XX(a6201105_00000001, "dsxpp.1.p (x)r1.y, r0.x"), /* dsxpp.1 (xOOO)r1.y, r0.x */
153
154 INSTR_6XX(a2802f00_00000001, "getsize (u16)(xyzw)hr0.x, r0.x, t#0"),
155 INSTR_6XX(a0c89f04_c4600005, "sam.base1 (f32)(xyzw)r1.x, r0.z, s#3, t#2"), /* sam.s2en.mode6.base1 (f32)(xyzw)r1.x, r0.z, 35 */
156 INSTR_6XX(a1c85f00_c0200005, "getlod.base0 (s32)(xyzw)r0.x, r0.z, s#1, t#0"), /* getlod.s2en.mode6.base0 (s32)(xyzw)r0.x, r0.z, 1 */
157 INSTR_6XX(a1000f00_00000004, "samb (f16)(xyzw)hr0.x, hr0.z, hr0.x, s#0, t#0"),
158 INSTR_6XX(a1000f00_00000003, "samb (f16)(xyzw)hr0.x, r0.y, r0.x, s#0, t#0"),
159 INSTR_6XX(a0c00f00_04400002, "sam (f16)(xyzw)hr0.x, hr0.y, s#2, t#2"),
160 INSTR_6XX(a6c02f00_00000000, "rgetinfo (u16)(xyzw)hr0.x"),
161 INSTR_6XX(a3482f08_c0000000, "getinfo.base0 (u16)(xyzw)hr2.x, t#0"),
162 /* dEQP-GLES31.functional.texture.texture_buffer.render.as_fragment_texture.buffer_size_65536 */
163 INSTR_5XX(a2c03102_00000000, "getbuf (u32)(x)r0.z, t#0"),
164 INSTR_6XX(a0c81f00_e0200005, "sam.base0 (f32)(xyzw)r0.x, r0.z, s#1, a1.x"),
165 INSTR_6XX(a0c81108_e2000001, "sam.base0 (f32)(x)r2.x, r0.x, s#16, a1.x"),
166 INSTR_6XX(a048d107_cc080a07, "isaml.base3 (s32)(x)r1.w, r0.w, r1.y, s#0, t#6"),
167 INSTR_6XX(a048d107_e0080a07, "isaml.base3 (s32)(x)r1.w, r0.w, r1.y, s#0, a1.x"),
168 INSTR_6XX(a1481606_e4803035, "saml.base0 (f32)(yz)r1.z, r6.z, r6.x, s#36, a1.x"),
169 INSTR_6XX(a0c89707_20a00005, "sam.s2en.uniform.base1 (f32)(xyz)r1.w, r0.z, r1.y, a1.x"),
170 INSTR_6XX(a1489f34_25e06e07, "saml.s2en.uniform.base1 (f32)(xyzw)r13.x, r0.w, r13.w, r11.w, a1.x"),
171
172 INSTR_7XX(a0081f02_e2040001, "isam.base0 (f32)(xyzw)r0.z, r0.x, t#16, a1.x"),
173 INSTR_7XX(a0081f02_e2000001, "isam.base0.1d (f32)(xyzw)r0.z, r0.x, t#16, a1.x"),
174 INSTR_7XX(a148310d_e028302c, "saml.base2 (u32)(x)r3.y, hr5.z, hr6.x, t#1, a1.x"),
175
176 INSTR_7XX(a00c3101_c2040001, "isam.v.base0 (u32)(x)r0.y, r0.x, s#0, t#1"),
177 INSTR_7XX(a00c3101_c2000001, "isam.v.base0.1d (u32)(x)r0.y, r0.x, s#0, t#1"),
178 INSTR_7XX(a02c3f06_c2041003, "isam.v.base0 (u32)(xyzw)r1.z, r0.y+8, s#0, t#1"),
179 INSTR_7XX(a02c3f05_a1240601, "isam.v.s2en.uniform.base0 (u32)(xyzw)r1.y, r0.x+3, r2.y"),
180
181 /* dEQP-VK.subgroups.arithmetic.compute.subgroupadd_float */
182 INSTR_6XX(a7c03102_00100003, "brcst.active.w8 (u32)(x)r0.z, r0.y"), /* brcst.active.w8 (u32)(xOOO)r0.z, r0.y */
183 /* dEQP-VK.subgroups.quad.graphics.subgroupquadbroadcast_int */
184 INSTR_6XX(b7e03107_00000401, "(sy)quad_shuffle.brcst (u32)(x)r1.w, r0.x, r0.z"), /* (sy)quad_shuffle.brcst (u32)(xOOO)r1.w, r0.x, r0.z */
185 /* dEQP-VK.subgroups.quad.graphics.subgroupquadswapdiagonal_int */
186 INSTR_6XX(b7e03104_00180001, "(sy)quad_shuffle.diag (u32)(x)r1.x, r0.x"), /* (sy)quad_shuffle.diag (u32)(xOOO)r1.x, r0.x */
187
188 INSTR_6XX(a7000000_00000000, "tcinv"),
189
190 /* cat6 */
191
192 INSTR_5XX(c6e60000_00010600, "ldgb.untyped.4d.u32.1 r0.x, g[0], r1.x, r0.x"), /* ldgb.a.untyped.1dtype.u32.1 r0.x, g[r1.x], r0.x, 0 */
193 INSTR_5XX(d7660204_02000a01, "(sy)stib.typed.2d.u32.1 g[1], r0.x, r0.z, r1.x"), /* (sy)stib.a.u32.2d.1 g[r1.x], r0.x, r0.z, 1. r1.x is offset in ibo, r0.x is value*/
194 /* dEQP-VK.image.load_store.1d_array.r8g8b8a8_unorm */
195 INSTR_5XX(c1a20006_0600ba01, "ldib.typed.2d.f32.4 r1.z, g[0], r0.z, r1.z"), /* ldib.a.f32.2d.4 r1.z, g[r0.z], r1.z, 0. r0.z is offset in ibo as src. r1.z */
196 /* dEQP-VK.image.load_store.3d.r32g32b32a32_sint */
197 INSTR_5XX(c1aa0003_0500fc01, "ldib.typed.3d.s32.4 r0.w, g[0], r0.w, r1.y"), /* ldib.a.s32.3d.4 r0.w, g[r0.w], r1.y, 0. r0.w is offset in ibo as src, and dst */
198 /* dEQP-VK.binding_model.shader_access.primary_cmd_buf.storage_image.vertex.descriptor_array.3d */
199 INSTR_5XX(c1a20204_0401fc01, "ldib.typed.3d.f32.4 r1.x, g[1], r1.w, r1.x"), /* ldib.a.f32.3d.4 r1.x, g[r1.w], r1.x, 1 */
200 /* dEQP-VK.binding_model.shader_access.secondary_cmd_buf.with_push.storage_texel_buffer.vertex_fragment.single_descriptor.offset_zero */
201 INSTR_5XX(c1a20005_0501be01, "ldib.typed.4d.f32.4 r1.y, g[0], r1.z, r1.y"), /* ldib.a.f32.1dtype.4 r1.y, g[r1.z], r1.y, 0 */
202 /* dEQP-VK.texture.filtering.cube.formats.r8g8b8a8_snorm_nearest */
203 INSTR_5XX(c1a60200_0000ba01, "ldib.typed.2d.u32.4 r0.x, g[1], r0.z, r0.x"), /* ldib.a.u32.2d.4 r0.x, g[r0.z], r0.x, 1 */
204
205 // TODO is this a real instruction? Or float -6.0 ?
206 // INSTR_6XX(c0c00000_00000000, "stg.f16 g[hr0.x], hr0.x, hr0.x", .parse_fail=true),
207 /* dEQP-GLES31.functional.tessellation.invariance.outer_edge_symmetry.isolines_equal_spacing_ccw */
208 INSTR_6XX(c0d20906_02800004, "stg.a.f32 g[r1.x+(r1.z)<<2], r0.z, 2"), /* stg.a.f32 g[r1.x+(r1.z<<2)], r0.z, 2 */
209 INSTR_6XX(c0da052e_01800042, "stg.a.s32 g[r0.z+(r11.z)<<2], r8.y, 1"), /* stg.a.s32 g[r0.z+(r11.z<<2)], r8.y, 1 */
210 INSTR_6XX(c0dc052e_01800042, "stg.a.u8 g[r0.z+(r11.z)<<2], hr8.y, 1"),
211 INSTR_6XX(c0ca0505_03800042, "stg.s32 g[r0.z+5], r8.y, 3"),
212 INSTR_6XX(c0ca0500_03800042, "stg.s32 g[r0.z], r8.y, 3"),
213 INSTR_6XX(c0ca0531_03800242, "stg.s32 g[r0.z+305], r8.y, 3"),
214 INSTR_5XX(c0ce0100_02800000, "stg.u8_32 g[r0.x], r0.x, 2"),
215 INSTR_5XX(c0c00100_02800000, "stg.f16 g[r0.x], hr0.x, 2"),
216
217 /* dEQP-VK.ray_query.builtin.objectraydirection.geom.aabbs */
218 INSTR_7XX(c380941e_0703c079, "ray_intersection r7.z, [r3.w], r15.x, r1.w, r18.z"),
219 /* dEQP-VK.ray_query.builtin.rayqueryterminate.geom.triangles */
220 INSTR_7XX(c0260207_00630100, "resbase.untyped.1d.u32.1.imm.base0 r1.w, 1"), /* resbase.u32.1d.mode4.base0 r1.w, 1 */
221
222 /* Customely crafted */
223 INSTR_6XX(c0d61104_01800228, "stg.a.u32 g[r2.x+(r1.x+1)<<2], r5.x, 1"),
224 INSTR_6XX(c0d61104_01802628, "stg.a.u32 g[r2.x+r1.x<<4+3<<2], r5.x, 1"),
225
226 INSTR_7XX(c0d20505_07bfc006, "stg.a.f32 g[r0.z+r1.y+255], r0.w, 7"),
227 INSTR_7XX(c0d20507_04812006, "stg.a.f32 g[c0.z+r1.w+4], r0.w, 4"),
228
229 INSTR_6XX(c0020011_04c08023, "ldg.a.f32 r4.y, g[r0.z+(r4.y)<<2], 4"), /* ldg.a.f32 r4.y, g[r0.z+(r4.y<<2)], 4 */
230 INSTR_6XX(c0060006_01c18017, "ldg.a.u32 r1.z, g[r1.z+(r2.w)<<2], 1"), /* ldg.a.u32 r1.z, g[r1.z+(r2.w<<2)], 1 */
231 INSTR_6XX(c0060006_0181800f, "ldg.u32 r1.z, g[r1.z+7], 1"),
232 INSTR_6XX(c0060006_01818001, "ldg.u32 r1.z, g[r1.z], 1"),
233 INSTR_6XX(c0060003_0180c269, "ldg.u32 r0.w, g[r0.w+308], 1"),
234 INSTR_6XX(c0040003_0180c269, "ldg.u16 hr0.w, g[r0.w+308], 1"),
235
236 /* Found in TCS/TES shaders of GTA V */
237 INSTR_6XX(c0020007_03c1420f, "ldg.a.f32 r1.w, g[r1.y+(r1.w+1)<<2], 3"), /* ldg.a.f32 r1.w, g[r1.y+((r1.w+1)<<2)], 3 */
238
239 /* Customely crafted */
240 INSTR_6XX(c0020007_03c1740f, "ldg.a.f32 r1.w, g[r1.y+r1.w<<5+2<<2], 3"),
241
242 INSTR_6XX(c0020011_04c08023, "ldg.a.f32 r4.y, g[r0.z+(r4.y)<<2], 4"), /* ldg.a.f32 r4.y, g[r0.z+(r4.y<<2)], 4 */
243 INSTR_6XX(c0060006_01c18017, "ldg.a.u32 r1.z, g[r1.z+(r2.w)<<2], 1"), /* ldg.a.u32 r1.z, g[r1.z+(r2.w<<2)], 1 */
244 INSTR_6XX(c0000006_01c18017, "ldg.a.f16 hr1.z, g[r1.z+(r2.w)<<2], 1"),
245 INSTR_6XX(c0060006_0181800f, "ldg.u32 r1.z, g[r1.z+7], 1"),
246 INSTR_6XX(c0060006_01818001, "ldg.u32 r1.z, g[r1.z], 1"),
247
248 INSTR_7XX(c0020411_04c08023, "ldg.a.f32 r4.y, g[r0.z+r4.y+2], 4"),
249 INSTR_7XX(c0004006_01c1a017, "ldg.a.f16 hr1.z, g[c1.z+r2.w+32], 1"),
250
251 /* dEQP-GLES3.functional.ubo.random.basic_arrays.0 */
252 INSTR_6XX(c7020020_01800000, "stc.f32 c[32], r0.x, 1"), /* stc c[32], r0.x, 1 */
253 /* dEQP-VK.image.image_size.cube_array.readonly_writeonly_1x1x12 */
254 INSTR_6XX(c7060020_03800000, "stc.u32 c[32], r0.x, 3"), /* stc c[32], r0.x, 3 */
255 /* A660 EQP-VK.robustness.robustness2.push.notemplate.r32i.unroll.nonvolatile.sampled_image.no_fmt_qual.img.samples_1.1d.frag */
256 /* TODO: stc has a similar to stsc DST range */
257 /* INSTR_6XX(c702026e_0480025c, "stc.u32 c[366], r11.z, 4"), */ /* stc c[366], r11.z, 4 */
258
259 /* dEQP-VK.pipeline.monolithic.extended_dynamic_state.two_draws_static.stencil_state_face_both_single_gt_replace_clear_102_ref_103_depthfail */
260 INSTR_7XX(c7420000_0cc00000, "stsc.f32 c[0], 0, 12"),
261 /* dEQP-VK.pipeline.monolithic.push_constant.graphics_pipeline.overlap_4_shaders_vert_tess_frag */
262 INSTR_7XX(c7420000_08c00020, "stsc.f32 c[0], 16, 8"),
263 INSTR_7XX(c742006e_08c00220, "stsc.f32 c[366], 16, 8"),
264
265 /* custom */
266 INSTR_6XX(c7060100_03800000, "stc.u32 c[a1.x], r0.x, 3"), /* stc c[a1.x], r0.x, 3 */
267 INSTR_6XX(c7060120_03800000, "stc.u32 c[a1.x+32], r0.x, 3"), /* stc c[a1.x+32], r0.x, 3 */
268
269 /* dEQP-VK.image.image_size.cube_array.readonly_writeonly_1x1x12 */
270 INSTR_6XX(c0260200_03676100, "stib.b.untyped.1d.u32.3.imm.base0 r0.x, r0.w, 1"), /* stib.untyped.u32.1d.3.mode4.base0 r0.x, r0.w, 1 */
271
272 INSTR_6XX(c0240402_00674100, "stib.b.untyped.1d.u16.1.imm.base0 hr0.z, r0.x, 2"),
273 #if 0
274 /* TODO blob sometimes/frequently sets b0, although there does not seem
275 * to be an obvious pattern and our encoding never sets it. AFAICT it
276 * is a dontcare bit
277 */
278 /* dEQP-VK.texture.filtering.cube.formats.a8b8g8r8_srgb_nearest_mipmap_nearest.txt */
279 INSTR_6XX(c0220200_0361b801, "ldib.b.typed.1d.f32.4.imm r0.x, r0.w, 1"), /* ldib.f32.1d.4.mode0.base0 r0.x, r0.w, 1 */
280 #else
281 /* dEQP-VK.texture.filtering.cube.formats.a8b8g8r8_srgb_nearest_mipmap_nearest.txt */
282 INSTR_6XX(c0220200_0361b800, "ldib.b.typed.1d.f32.4.imm r0.x, r0.w, 1"), /* ldib.f32.1d.4.mode0.base0 r0.x, r0.w, 1 */
283 #endif
284
285 INSTR_7XX(d1260406_00e77100, "(sy)stib.b.untyped.1d.u32.4.imm.base0 r1.z, r0.x+4, 2"),
286 INSTR_7XX(c3260002_01e1b100, "ldib.b.untyped.1d.u32.4.imm.base0 r0.z, r0.y+12, 0"),
287 INSTR_7XX(c7661840_4de74144, "stib.b.untyped.1d.u32.1.uniform.base2 r16.x, r19.y+29, r3.x"),
288
289 /* dEQP-GLES31.functional.tessellation.invariance.outer_edge_symmetry.isolines_equal_spacing_ccw */
290 INSTR_6XX(c2c21100_04800006, "stlw.f32 l[r2.x], r0.w, 4"),
291 INSTR_6XX(c2c20f00_01800004, "stlw.f32 l[r1.w], r0.z, 1"),
292 INSTR_6XX(c2860003_02808011, "ldlw.u32 r0.w, l[r0.z+8], 2"),
293
294 /* dEQP-VK.compute.basic.shared_var_single_group */
295 INSTR_6XX(c1060500_01800008, "stl.u32 l[r0.z], r1.x, 1"),
296 INSTR_6XX(c0460001_01804001, "ldl.u32 r0.y, l[r0.y], 1"),
297
298 INSTR_6XX(c0860018_03820001, "ldp.u32 r6.x, p[r2.x], 3"),
299 INSTR_6XX(c0420002_01808019, "ldl.f32 r0.z, l[r0.z+12], 1"),
300 INSTR_6XX(c1021710_04800000, "stl.f32 l[r2.w+16], r0.x, 4"),
301 INSTR_6XX(d7c60011_03c00000, "(sy)ldlv.u32 r4.y, l[0], 3"),
302
303 /* resinfo */
304 INSTR_6XX(c0260000_0063c200, "resinfo.b.untyped.2d.u32.1.imm r0.x, 0"), /* resinfo.u32.2d.mode0.base0 r0.x, 0 */
305 /* dEQP-GLES31.functional.image_load_store.buffer.image_size.writeonly_7.txt */
306 INSTR_6XX(c0260000_0063c000, "resinfo.b.untyped.1d.u32.1.imm r0.x, 0"), /* resinfo.u32.1d.mode0.base0 r0.x, 0 */
307 /* dEQP-VK.image.image_size.2d.readonly_12x34.txt */
308 INSTR_6XX(c0260000_0063c300, "resinfo.b.untyped.2d.u32.1.imm.base0 r0.x, 0"), /* resinfo.u32.2d.mode4.base0 r0.x, 0 */
309 /* Custom test */
310 INSTR_6XX(c0260000_0063c382, "resinfo.b.untyped.2d.u32.1.nonuniform.base1 r0.x, r0.x"), /* resinfo.u32.2d.mode6.base1 r0.x, r0.x */
311
312 /* dEQP-GLES31.functional.image_load_store.2d.image_size.readonly_writeonly_32x32.txt */
313 INSTR_5XX(c3e60000_00000200, "resinfo.u32.2d r0.x, g[0]"), /* resinfo.u32.2d r0.x, 0 */
314 #if 0
315 /* TODO our encoding differs in b11 ('typed'), which seems to be a dontcare bit */
316 /* dEQP-GLES31.functional.image_load_store.buffer.image_size.readonly_writeonly_7 */
317 INSTR_5XX(c3e60000_00000e00, "resinfo.u32.4d r0.x, g[0]"), /* resinfo.u32.1dtype r0.x, 0 */
318 /* dEQP-GLES31.functional.image_load_store.3d.image_size.readonly_writeonly_12x34x56 */
319 INSTR_5XX(c3e60000_00000c00, "resinfo.u32.3d r0.x, g[0]"), /* resinfo.u32.3d r0.x, 0 */
320 #else
321 /* dEQP-GLES31.functional.image_load_store.buffer.image_size.readonly_writeonly_7 */
322 INSTR_5XX(c3e60000_00000600, "resinfo.u32.4d r0.x, g[0]"), /* resinfo.u32.1dtype r0.x, 0 */
323 /* dEQP-GLES31.functional.image_load_store.2d.image_size.readonly_writeonly_32x32.txt */
324 INSTR_5XX(c3e60000_00000400, "resinfo.u32.3d r0.x, g[0]"), /* resinfo.u32.3d r0.x, 0 */
325 #endif
326
327 /* ldgb */
328 /* dEQP-GLES31.functional.ssbo.layout.single_basic_type.packed.mediump_vec4 */
329 INSTR_5XX(c6e20000_06003600, "ldgb.untyped.4d.f32.4 r0.x, g[0], r0.x, r1.z"), /* ldgb.a.untyped.1dtype.f32.4 r0.x, g[r0.x], r1.z, 0 */
330 /* dEQP-GLES31.functional.ssbo.layout.single_basic_type.packed.mediump_ivec4 */
331 INSTR_5XX(c6ea0000_06003600, "ldgb.untyped.4d.s32.4 r0.x, g[0], r0.x, r1.z"), /* ldgb.a.untyped.1dtype.s32.4 r0.x, g[r0.x], r1.z, 0 */
332 /* dEQP-GLES31.functional.ssbo.layout.single_basic_type.packed.mediump_float */
333 INSTR_5XX(c6e20000_02000600, "ldgb.untyped.4d.f32.1 r0.x, g[0], r0.x, r0.z"), /* ldgb.a.untyped.1dtype.f32.1 r0.x, g[r0.x], r0.z, 0 */
334 /* dEQP-GLES31.functional.ssbo.layout.random.vector_types.0 */
335 INSTR_5XX(c6ea0008_14002600, "ldgb.untyped.4d.s32.3 r2.x, g[0], r0.x, r5.x"), /* ldgb.a.untyped.1dtype.s32.3 r2.x, g[r0.x], r5.x, 0 */
336 INSTR_5XX(c6ea0204_1401a600, "ldgb.untyped.4d.s32.3 r1.x, g[1], r1.z, r5.x"), /* ldgb.a.untyped.1dtype.s32.3 r1.x, g[r1.z], r5.x, 1 */
337
338 /* stgb */
339 INSTR_5XX(c7220028_0480000d, "stgb.untyped.1d.f32.1 g[0], r1.z, 4, r10.x"), /* stgb.untyped.1d.1 g[r10.x], r1.z, 4, r0.x */
340 INSTR_5XX(c7260023_02800009, "stgb.untyped.1d.u32.1 g[0], r1.x, 2, r8.w"), /* stgb.untyped.1d.1 g[r8.w], r1.x, 2, r0.x */
341
342 /* discard stuff */
343 INSTR_6XX(42b400f8_20010004, "cmps.s.eq p0.x, r1.x, 1"),
344 INSTR_6XX(02800000_00000000, "kill p0.x"),
345
346 /* Immediates */
347 INSTR_6XX(40100007_68000008, "add.f r1.w, r2.x, (neg)(0.0)"),
348 INSTR_6XX(40100007_68010008, "add.f r1.w, r2.x, (neg)(0.5)"),
349 INSTR_6XX(40100007_68020008, "add.f r1.w, r2.x, (neg)(1.0)"),
350 INSTR_6XX(40100007_68030008, "add.f r1.w, r2.x, (neg)(2.0)"),
351 INSTR_6XX(40100007_68040008, "add.f r1.w, r2.x, (neg)(e)"),
352 INSTR_6XX(40100007_68050008, "add.f r1.w, r2.x, (neg)(pi)"),
353 INSTR_6XX(40100007_68060008, "add.f r1.w, r2.x, (neg)(1/pi)"),
354 INSTR_6XX(40100007_68070008, "add.f r1.w, r2.x, (neg)(1/log2(e))"),
355 INSTR_6XX(40100007_68080008, "add.f r1.w, r2.x, (neg)(log2(e))"),
356 INSTR_6XX(40100007_68090008, "add.f r1.w, r2.x, (neg)(1/log2(10))"),
357 INSTR_6XX(40100007_680a0008, "add.f r1.w, r2.x, (neg)(log2(10))"),
358 INSTR_6XX(40100007_680b0008, "add.f r1.w, r2.x, (neg)(4.0)"),
359 INSTR_6XX(50600004_2c000004, "(sy)mul.f hr1.x, hr1.x, h(0.0)"),
360 INSTR_6XX(50600004_2c010004, "(sy)mul.f hr1.x, hr1.x, h(0.5)"),
361 INSTR_6XX(50600004_2c020004, "(sy)mul.f hr1.x, hr1.x, h(1.0)"),
362 INSTR_6XX(50600004_2c030004, "(sy)mul.f hr1.x, hr1.x, h(2.0)"),
363 INSTR_6XX(50600004_2c040004, "(sy)mul.f hr1.x, hr1.x, h(e)"),
364 INSTR_6XX(50600004_2c050004, "(sy)mul.f hr1.x, hr1.x, h(pi)"),
365 INSTR_6XX(50600004_2c060004, "(sy)mul.f hr1.x, hr1.x, h(1/pi)"),
366 INSTR_6XX(50600004_2c070004, "(sy)mul.f hr1.x, hr1.x, h(1/log2(e))"),
367 INSTR_6XX(50600004_2c080004, "(sy)mul.f hr1.x, hr1.x, h(log2(e))"),
368 INSTR_6XX(50600004_2c090004, "(sy)mul.f hr1.x, hr1.x, h(1/log2(10))"),
369 INSTR_6XX(50600004_2c0a0004, "(sy)mul.f hr1.x, hr1.x, h(log2(10))"),
370 INSTR_6XX(50600004_2c0b0004, "(sy)mul.f hr1.x, hr1.x, h(4.0)"),
371 INSTR_6XX(20444000_00000000, "mov.f32f32 r0.x, (0.000000)"),
372 INSTR_6XX(20444000_3f000000, "mov.f32f32 r0.x, (0.500000)"),
373 INSTR_6XX(20444000_3f800000, "mov.f32f32 r0.x, (1.000000)"),
374 INSTR_6XX(20444000_40000000, "mov.f32f32 r0.x, (2.000000)"),
375 INSTR_6XX(20444000_40400000, "mov.f32f32 r0.x, (3.000000)"),
376 INSTR_6XX(20444000_40800000, "mov.f32f32 r0.x, (4.000000)"),
377
378 /* LDC. Our disasm differs greatly from qcom here, and we've got some
379 * important info they lack(?!), but same goes the other way.
380 */
381 /* dEQP-GLES31.functional.shaders.opaque_type_indexing.ubo.uniform_fragment */
382 INSTR_6XX(c0260000_00c78040, "ldc.offset0.1.uniform r0.x, 0, r0.x"), /* ldc.1.mode1.base0 r0.x, 0, r0.x */
383 INSTR_6XX(c0260201_00c78040, "ldc.offset0.1.uniform r0.y, 0, r0.y"), /* ldc.1.mode1.base0 r0.y, 0, r0.y */
384 /* dEQP-GLES31.functional.shaders.opaque_type_indexing.ubo.dynamically_uniform_fragment */
385 INSTR_6XX(c0260000_00c78080, "ldc.offset0.1.nonuniform r0.x, 0, r0.x"), /* ldc.1.mode2.base0 r0.x, 0, r0.x */
386 INSTR_6XX(c0260201_00c78080, "ldc.offset0.1.nonuniform r0.y, 0, r0.y"), /* ldc.1.mode2.base0 r0.y, 0, r0.y */
387
388 /* a4xx-a5xx has the exact same instrs in
389 * dEQP-GLES31.functional.shaders.opaque_type_indexing.ubo.(dynamically_)uniform_fragment
390 * with no change based on the mode. Note that we can't decode this yet.
391 */
392 /* INSTR_4XX(c7860000_00810001), */ /* ldc.1 r0.x, g[r1.x], 0, r0.x */
393 /* INSTR_5XX(c7860000_00800000), */ /* ldc.a.1 r0.x, g[r0.x], 0, r0.x */
394
395 /* custom */
396 INSTR_6XX(c0260201_ffc78080, "ldc.offset0.1.nonuniform r0.y, 255, r0.y"), /* ldc.1.mode2.base0 r0.y, 255, r0.y */
397
398 /* custom shaders, loading .x, .y, .z, .w from an array of vec4 in block 0 */
399 INSTR_6XX(c0260000_00478000, "ldc.offset0.1.imm r0.x, r0.x, 0"), /* ldc.1.mode0.base0 r0.x, r0.x, 0 */
400 INSTR_6XX(c0260000_00478200, "ldc.offset1.1.imm r0.x, r0.x, 0"), /* ldc.1.mode0.base0 r0.x, r0.x, 0 */
401 INSTR_6XX(c0260000_00478400, "ldc.offset2.1.imm r0.x, r0.x, 0"), /* ldc.1.mode0.base0 r0.x, r0.x, 0 */
402 INSTR_6XX(c0260000_00478600, "ldc.offset3.1.imm r0.x, r0.x, 0"), /* ldc.1.mode0.base0 r0.x, r0.x, 0 */
403
404 /* dEQP-VK.glsl.arrays.length.float_fragment */
405 INSTR_6XX(c02600c1_00c7a900, "ldc.u.offset0.3.imm.base0 r48.y, 0, 0"), /* ldc.u.3.mode4.base0 sr48.y, 0, 0 */
406
407 /* dEQP-VK.glsl.conditionals.if.if_else_vertex */
408 INSTR_6XX(c0360000_00c78100, "ldc.1.k.imm.base0 c[a1.x], 0, 0"), /* ldc.1.k.mode4.base0 c[a1.x], 0, 0 */
409 /* custom */
410 INSTR_6XX(c0360003_00c78100, "ldc.4.k.imm.base0 c[a1.x], 0, 0"), /* ldc.4.k.mode4.base0 c[a1.x], 0, 0 */
411
412 /* dEQP-VK.glsl.struct.local.nested_struct_array_dynamic_index_fragment */
413 INSTR_6XX(c1425b50_01803e02, "stp.f32 p[r11.y-176], r0.y, 1"),
414 INSTR_6XX(c1425b98_02803e14, "stp.f32 p[r11.y-104], r2.z, 2"),
415 INSTR_6XX(c1465ba0_01803e2a, "stp.u32 p[r11.y-96], r5.y, 1"),
416 INSTR_6XX(c0860008_01860001, "ldp.u32 r2.x, p[r6.x], 1"),
417 /* Custom stp based on above to catch a disasm bug. */
418 INSTR_6XX(c1465b00_0180022a, "stp.u32 p[r11.y+256], r5.y, 1"),
419
420 INSTR_6XX(c0160010_00b001a1, "ldg.k.u32 c[16], g[r48.x+208], 1"),
421 INSTR_6XX(c0160188_00b01261, "ldg.k.u32 c[a1.x+136], g[r48.x+2352], 1"),
422
423 /* Atomic: */
424 #if 0
425 /* TODO our encoding differs in b53 for these two */
426 INSTR_5XX(c4f60002_00008001, "atomic.s.inc.untyped.1d.u32.1.g r0.z, g[0], r0.z, r0.x, r0.x"),
427 INSTR_5XX(c4360205_03000001, "atomic.s.add.untyped.1d.u32.1.g r1.y, g[1], r0.x, r0.w, r0.x"),
428 #else
429 INSTR_5XX(c4f60002_00008001, "atomic.s.inc.untyped.1d.u32.1.g r0.z, g[0], r0.z, r0.x, r0.x"),
430 INSTR_5XX(c4360205_03000001, "atomic.s.add.untyped.1d.u32.1.g r1.y, g[1], r0.x, r0.w, r0.x"),
431 #endif
432 INSTR_6XX(d5c60003_03008001, "(sy)atomic.max.untyped.1d.u32.1.l r0.w, l[r0.z], r0.w"),
433
434 /* dEQP-VK.glsl.atomic_operations.add_unsigned_compute_reference */
435 INSTR_6XX(c4160002_02000001, "atomic.g.add.untyped.1d.u32.1.g r0.z, r0.x, r0.z"),
436
437 /* Bindless atomic: */
438 INSTR_6XX(c03a0003_01640000, "atomic.b.add.untyped.1d.s32.1.imm r0.w, r0.y, 0"), /* atomic.b.add.g.s32.1d.mode0.base0 r0.w,r0.y,0 */
439 INSTR_6XX(c03a0003_01660000, "atomic.b.and.untyped.1d.s32.1.imm r0.w, r0.y, 0"), /* atomic.b.and.g.s32.1d.mode0.base0 r0.w,r0.y,0 */
440 INSTR_6XX(c0360000_0365c800, "atomic.b.max.typed.1d.u32.1.imm r0.x, r0.w, 0"), /* atomic.b.max.g.u32.1d.mode0.base0 r0.x,r0.w,0 */
441
442 /* dEQP-GLES31.functional.shaders.opaque_type_indexing.sampler.const_literal.fragment.sampler2d */
443 INSTR_6XX(a0c01f04_0cc00005, "sam (f32)(xyzw)r1.x, r0.z, s#6, t#6"),
444
445 /* dEQP-GLES31.functional.shaders.opaque_type_indexing.sampler.uniform.fragment.sampler2d */
446 INSTR_4XX(a0c81f02_00800001, "sam.s2en.uniform (f32)(xyzw)r0.z, r0.x, hr1.x"), /* sam.s2en.mode0 (f32)(xyzw)r0.z, r0.x, hr1.x */ /* same for 5xx */
447 INSTR_6XX(a0c81f07_0100000b, "sam.s2en.uniform (f32)(xyzw)r1.w, r1.y, hr2.x"), /* sam.s2en.mode0 (f32)(xyzw)r1.w, r1.y, hr2.x */
448
449 /* dEQP-GLES31.functional.shaders.opaque_type_indexing.sampler.dynamically_uniform.fragment.sampler2d */
450 INSTR_4XX(a0c81f02_80800001, "sam.s2en.nonuniform (f32)(xyzw)r0.z, r0.x, hr1.x"), /* sam.s2en.uniform (f32)(xyzw)r0.z, r0.x, hr1.x */ /* same for 5xx */
451 INSTR_6XX(a0c81f07_8100000b, "sam.s2en.nonuniform (f32)(xyzw)r1.w, r1.y, hr2.x"), /* sam.s2en.mode4 (f32)(xyzw)r1.w, r1.y, hr2.x */
452
453 /* NonUniform: */
454 /* dEQP-VK.descriptor_indexing.storage_buffer */
455 INSTR_6XX(c0260c0a_0a61b180, "ldib.b.untyped.1d.u32.4.nonuniform.base0 r2.z, r2.z, r1.z"),
456 INSTR_6XX(d0260e0a_09677180, "(sy)stib.b.untyped.1d.u32.4.nonuniform.base0 r2.z, r2.y, r1.w"),
457 /* dEQP-VK.descriptor_indexing.uniform_texel_buffer */
458 INSTR_6XX(a0481f00_40000405, "isaml.s2en.nonuniform.base0 (f32)(xyzw)r0.x, r0.z, r0.z, r0.x"),
459 /* dEQP-VK.descriptor_indexing.storage_image */
460 INSTR_6XX(d0360c04_02640b80, "(sy)atomic.b.add.typed.2d.u32.1.nonuniform.base0 r1.x, r0.z, r1.z"),
461 /* dEQP-VK.descriptor_indexing.sampler */
462 INSTR_6XX(a0c81f00_40000005, "sam.s2en.nonuniform.base0 (f32)(xyzw)r0.x, r0.z, r0.x"),
463
464 /* dEQP-VK.subgroups.quad.graphics.subgroupquadbroadcast_int */
465 INSTR_6XX(c0260001_00c98000, "getfiberid.u32 r0.y"),
466
467 /* dEQP-VK.subgroups.shuffle.compute.subgroupshuffleup_bvec4_constant */
468 INSTR_6XX(c6e4400d_05800002, "shfl.up.u16 hr3.y, hr0.y, 5"),
469 /* dEQP-VK.subgroups.shuffle.compute.subgroupshuffleup_f16vec3 */
470 INSTR_6XX(c6e44017_c0000018, "shfl.up.u16 hr5.w, hr3.x, r48.x"),
471 /* dEQP-VK.subgroups.shuffle.compute.subgroupshuffleup_uvec3_constant */
472 INSTR_6XX(c6e64006_05800000, "shfl.up.u32 r1.z, r0.x, 5"),
473 /* dEQP-VK.subgroups.shuffle.compute.subgroupshuffleup_ivec3_dynamically_uniform */
474 INSTR_6XX(c6e64007_05000004, "shfl.up.u32 r1.w, r0.z, r1.y"),
475 /* dEQP-VK.subgroups.shuffle.graphics.subgroupshuffledown_i8vec3 */
476 INSTR_6XX(c6e46011_c1000014, "shfl.down.u16 hr4.y, hr2.z, r48.y"),
477 /* dEQP-VK.memory_model.write_after_read.ext.u32.coherent.fence_atomic.atomicwrite.subgroup.payload_local.image.guard_local.image.frag */
478 INSTR_6XX(c6e62005_3f800008, "shfl.xor.u32 r1.y, r1.x, 63"),
479 /* dEQP-VK.subgroups.shuffle.graphics.subgroupshuffle_bvec4 */
480 INSTR_6XX(c6e4c012_c0000020, "shfl.rup.u16 hr4.z, hr4.x, r48.x"),
481 /* dEQP-VK.glsl.atomic_operations.exchange_unsigned64bit_compute */
482 INSTR_7XX(c03c0009_05648142, "atomic.b.xchg.untyped.1d.u64.1.uniform.base1 r2.y, r1.y, r0.x"),
483
484 /* Custom test since we've never seen the blob emit these. */
485 INSTR_6XX(c0260004_00490000, "getspid.u32 r1.x"),
486 INSTR_6XX(c0260005_00494000, "getwid.u32 r1.y"),
487
488 /* cat7 */
489
490 /* dEQP-VK.compute.basic.ssbo_local_barrier_single_invocation */
491 INSTR_6XX(e0fa0000_00000000, "fence.g.l.r.w"),
492 INSTR_6XX(e09a0000_00000000, "fence.r.w"),
493 INSTR_6XX(f0420000_00000000, "(sy)bar.g"),
494 INSTR_6XX(e1080000_00000000, "sleep.l"),
495 INSTR_6XX(e2080000_00000000, "dccln.all"),
496 /* dEQP-VK.memory_model.message_passing.core11.u32.coherent.fence_fence.atomicwrite.device.payload_local.buffer.guard_local.buffer.comp */
497 INSTR_7XX(e2d20000_00000000, "ccinv"),
498
499 INSTR_7XX(e3c20000_00000000, "lock"),
500 INSTR_7XX(fbc21000_00000000, "(sy)(ss)(jp)lock"),
501
502 /* dEQP-VK.pipeline.monolithic.sampler.border_swizzle.r4g4b4a4_unorm_pack16.rg1a.opaque_white.gather_1.no_swizzle_hint */
503 INSTR_7XX(e45401a0_bfba7736, "alias.tex.f32.1 r40.x, (-1.456763)"),
504 /* dEQP-VK.synchronization.op.single_queue.event.write_draw_indexed_read_image_geometry.image_128x128_r32g32b32a32_sfloat */
505 INSTR_7XX(e44c0009_00000007, "alias.tex.f32.0 r2.y, c1.w"),
506 /* dEQP-VK.binding_model.shader_access.primary_cmd_buf.storage_image.geometry.single_descriptor.2d_base_mip */
507 INSTR_7XX(ec5501a0_00000006, "(jp)alias.tex.b32.1 r40.x, (0x6)"),
508 /* dEQP-VK.subgroups.quad.graphics.subgroupquadbroadcast_i16vec2 */
509 INSTR_7XX(e45100a0_00000002, "alias.tex.b16.0 hr40.x, h(0x2)"),
510
511 /* dEQP-VK.glsl.derivate.dfdx.constant.float */
512 INSTR_7XX(e4508003_00003c00, "alias.rt.f16.0 rt0.w, h(1.000000)"),
513 INSTR_7XX(f4488000_00000000, "(sy)alias.rt.f16.0 rt0.x, hc0.x"),
514
515 /* dEQP-VK.glsl.opaque_type_indexing.ubo.const_literal_fragment */
516 INSTR_7XX(e44c8008_00000010, "alias.rt.f32.0 rt2.x, c4.x"),
517
518 /* dEQP-VK.dynamic_rendering.primary_cmd_buff.suballocation.multisample_resolve.layers_3.r16g16_unorm.samples_4_resolve_level_4 */
519 INSTR_7XX(e4548008_3f800000, "alias.rt.f32.0 rt2.x, (1.000000)"),
520
521 /* dEQP-VK.renderpass.suballocation.multisample_resolve.layers_3.r8g8b8a8_uint.samples_2_baseLayer1 */
522 INSTR_7XX(e4558007_000000ff, "alias.rt.b32.0 rt1.w, (0xff)"),
523
524 INSTR_6XX(ffffffff_ffffffff, "raw 0xFFFFFFFFFFFFFFFF"),
525 /* clang-format on */
526 };
527
528 static void
add_generated_tests(struct u_vector * all_tests,void * ctx)529 add_generated_tests(struct u_vector *all_tests, void *ctx) {
530 /* stib.b/ldib.b OFFSET_LO aliases what other instructions use for opcode */
531 for (int offset = 1; offset < 0x1f; offset++) {
532 char *stib = ralloc_asprintf(
533 ctx, "stib.b.untyped.1d.u32.4.imm.base0 r2.y, r5.z+%u, 4", offset);
534 *(struct test *)u_vector_add(all_tests) = (struct test)INSTR_6XX_RAW(
535 0xc026080916e77100ull + ((uint64_t)offset << 54), stib);
536
537 char *ldib = ralloc_asprintf(
538 ctx, "ldib.b.untyped.1d.u32.4.imm.base0 r0.z, r0.y+%u, 0", offset);
539 *(struct test *)u_vector_add(all_tests) = (struct test)INSTR_6XX_RAW(
540 0xc026000201e1b100ull + ((uint64_t)offset << 54), ldib);
541 }
542 }
543
544 static void
trim(char * string)545 trim(char *string)
546 {
547 for (int len = strlen(string); len > 0 && string[len - 1] == '\n'; len--)
548 string[len - 1] = 0;
549 }
550
551 int
main(int argc,char ** argv)552 main(int argc, char **argv)
553 {
554 int retval = 0;
555 int decode_fails = 0, asm_fails = 0, encode_fails = 0;
556 const int output_size = 4096;
557 char *disasm_output = malloc(output_size);
558 FILE *fdisasm = fmemopen(disasm_output, output_size, "w+");
559 if (!fdisasm) {
560 fprintf(stderr, "failed to fmemopen\n");
561 return 1;
562 }
563
564 void *ctx = ralloc_context(NULL);
565
566 struct u_vector all_tests = { 0 };
567 u_vector_init(&all_tests, ARRAY_SIZE(tests), sizeof(struct test));
568 for (uint32_t i = 0; i < ARRAY_SIZE(tests); i++) {
569 *(struct test *) u_vector_add(&all_tests) = tests[i];
570 }
571
572 add_generated_tests(&all_tests, ctx);
573
574 struct ir3_compiler *compilers[10] = {};
575 struct fd_dev_id dev_ids[ARRAY_SIZE(compilers)];
576
577 struct test *test;
578 u_vector_foreach (test, &all_tests) {
579 uint32_t code[2];
580 if (test->instr) {
581 code[0] = strtoll(&test->instr[9], NULL, 16);
582 code[1] = strtoll(&test->instr[0], NULL, 16);
583 } else {
584 code[0] = test->instr_raw;
585 code[1] = test->instr_raw >> 32;
586 }
587
588 struct fd_dev_id dev_id = {
589 .gpu_id = test->gpu_id,
590 .chip_id = test->chip_id,
591 };
592
593 const struct fd_dev_info *dev_info = fd_dev_info_raw(&dev_id);
594 const char *name = fd_dev_name(&dev_id);
595
596 printf("Testing %s %08x_%08x: \"%s\"...\n", name, code[1], code[0],
597 test->expected);
598
599 rewind(fdisasm);
600 memset(disasm_output, 0, output_size);
601
602 /*
603 * Test disassembly:
604 */
605
606 ir3_isa_disasm(code, 8, fdisasm,
607 &(struct isa_decode_options){
608 .gpu_id = dev_info->chip * 100,
609 .show_errors = true,
610 .no_match_cb = print_raw,
611 });
612 fflush(fdisasm);
613
614 trim(disasm_output);
615
616 if (strcmp(disasm_output, test->expected) != 0) {
617 printf("FAIL: disasm\n");
618 printf(" Expected: \"%s\"\n", test->expected);
619 printf(" Got: \"%s\"\n", disasm_output);
620 retval = 1;
621 decode_fails++;
622 }
623
624 /*
625 * Test assembly, which should result in the identical binary:
626 */
627
628 if (!compilers[dev_info->chip]) {
629 dev_ids[dev_info->chip].gpu_id = test->gpu_id;
630 dev_ids[dev_info->chip].chip_id = test->chip_id;
631 compilers[dev_info->chip] =
632 ir3_compiler_create(NULL, &dev_ids[dev_info->chip],
633 fd_dev_info_raw(&dev_ids[dev_info->chip]),
634 &(struct ir3_compiler_options){});
635 }
636
637 FILE *fasm =
638 fmemopen((void *)test->expected, strlen(test->expected), "r");
639
640 struct ir3_kernel_info info = {};
641 struct ir3_shader *shader = ir3_parse_asm(compilers[dev_info->chip], &info, fasm);
642 fclose(fasm);
643 if (!shader) {
644 printf("FAIL: %sexpected assembler fail\n",
645 test->parse_fail ? "" : "un");
646 asm_fails++;
647 /* If this is an instruction that the asm parser is not expected
648 * to handle, don't count it as a fail.
649 */
650 if (!test->parse_fail)
651 retval = 1;
652 continue;
653 } else if (test->parse_fail) {
654 /* If asm parse starts passing, and we don't expect that, flag
655 * it as a fail so we don't forget to update the test vector:
656 */
657 printf(
658 "FAIL: unexpected parse success, please remove '.parse_fail=true'\n");
659 retval = 1;
660 }
661
662 struct ir3_shader_variant *v = shader->variants;
663 if (memcmp(v->bin, code, sizeof(code))) {
664 printf("FAIL: assembler\n");
665 printf(" Expected: %08x_%08x\n", code[1], code[0]);
666 printf(" Got: %08x_%08x\n", v->bin[1], v->bin[0]);
667 retval = 1;
668 encode_fails++;
669 }
670
671 ir3_shader_destroy(shader);
672 }
673
674 if (decode_fails)
675 printf("%d/%d decode fails\n", decode_fails, (int)ARRAY_SIZE(tests));
676 if (asm_fails)
677 printf("%d/%d assembler fails\n", asm_fails, (int)ARRAY_SIZE(tests));
678 if (encode_fails)
679 printf("%d/%d encode fails\n", encode_fails, (int)ARRAY_SIZE(tests));
680
681 if (retval) {
682 printf("FAILED!\n");
683 } else {
684 printf("PASSED!\n");
685 }
686
687 for (unsigned i = 0; i < ARRAY_SIZE(compilers); i++) {
688 if (!compilers[i])
689 continue;
690 ir3_compiler_destroy(compilers[i]);
691 }
692
693 u_vector_finish(&all_tests);
694 ralloc_free(ctx);
695 fclose(fdisasm);
696 free(disasm_output);
697
698 return retval;
699 }
700