1 /*
2 * Copyright 2022 Alyssa Rosenzweig
3 * Copyright 2021 Collabora, Ltd.
4 * SPDX-License-Identifier: MIT
5 */
6
7 #include "agx_compiler.h"
8 #include "agx_debug.h"
9 #include "agx_opcodes.h"
10
11 /* Validatation doesn't make sense in release builds */
12 #ifndef NDEBUG
13
14 #define agx_validate_assert(stmt) \
15 if (!(stmt)) { \
16 return false; \
17 }
18
19 /*
20 * If a block contains phi nodes, they must come at the start of the block. If a
21 * block contains control flow, it must come at the beginning/end as applicable.
22 * Therefore the form of a valid block is:
23 *
24 * Control flow instructions (else)
25 * Phi nodes
26 * General instructions
27 * Control flow instructions (except else)
28 *
29 * Validate that this form is satisfied.
30 */
31 enum agx_block_state {
32 AGX_BLOCK_STATE_CF_ELSE = 0,
33 AGX_BLOCK_STATE_PHI = 1,
34 AGX_BLOCK_STATE_BODY = 2,
35 AGX_BLOCK_STATE_CF = 3
36 };
37
38 static bool
agx_validate_block_form(agx_block * block)39 agx_validate_block_form(agx_block *block)
40 {
41 enum agx_block_state state = AGX_BLOCK_STATE_CF_ELSE;
42
43 agx_foreach_instr_in_block(block, I) {
44 switch (I->op) {
45 case AGX_OPCODE_ELSE_ICMP:
46 case AGX_OPCODE_ELSE_FCMP:
47 agx_validate_assert(state == AGX_BLOCK_STATE_CF_ELSE);
48 break;
49
50 case AGX_OPCODE_PHI:
51 agx_validate_assert(state == AGX_BLOCK_STATE_CF_ELSE ||
52 state == AGX_BLOCK_STATE_PHI);
53
54 state = AGX_BLOCK_STATE_PHI;
55 break;
56
57 default:
58 if (instr_after_logical_end(I)) {
59 state = AGX_BLOCK_STATE_CF;
60 } else {
61 agx_validate_assert(state != AGX_BLOCK_STATE_CF);
62 state = AGX_BLOCK_STATE_BODY;
63 }
64 break;
65 }
66 }
67
68 return true;
69 }
70
71 /*
72 * Only moves and phis use stack. Phis cannot use moves due to their
73 * parallel nature, so we allow phis to take memory, later lowered to moves.
74 */
75 static bool
is_stack_valid(agx_instr * I)76 is_stack_valid(agx_instr *I)
77 {
78 return (I->op == AGX_OPCODE_MOV) || (I->op == AGX_OPCODE_PHI);
79 }
80
81 static bool
agx_validate_sources(agx_instr * I)82 agx_validate_sources(agx_instr *I)
83 {
84 agx_foreach_src(I, s) {
85 agx_index src = I->src[s];
86
87 if (src.type == AGX_INDEX_IMMEDIATE) {
88 agx_validate_assert(!src.kill);
89 agx_validate_assert(!src.cache);
90 agx_validate_assert(!src.discard);
91
92 bool ldst = agx_allows_16bit_immediate(I);
93
94 /* Immediates are encoded as 8-bit (16-bit for memory load/store). For
95 * integers, they extend to 16-bit. For floating point, they are 8-bit
96 * minifloats. The 8-bit minifloats are a strict subset of 16-bit
97 * standard floats, so we treat them as such in the IR, with an
98 * implicit f16->f32 for 32-bit floating point operations.
99 */
100 agx_validate_assert(src.size == AGX_SIZE_16);
101 agx_validate_assert(src.value < (1 << (ldst ? 16 : 8)));
102 } else if (I->op == AGX_OPCODE_COLLECT && !agx_is_null(src)) {
103 agx_validate_assert(src.size == I->src[0].size);
104 }
105
106 agx_validate_assert(!src.memory || is_stack_valid(I));
107 }
108
109 return true;
110 }
111
112 static bool
agx_validate_defs(agx_instr * I,BITSET_WORD * defs)113 agx_validate_defs(agx_instr *I, BITSET_WORD *defs)
114 {
115 agx_foreach_ssa_src(I, s) {
116 /* Skip phis, they're special in loop headers */
117 if (I->op == AGX_OPCODE_PHI)
118 break;
119
120 /* Sources must be defined before their use */
121 if (!BITSET_TEST(defs, I->src[s].value))
122 return false;
123 }
124
125 agx_foreach_ssa_dest(I, d) {
126 /* Static single assignment */
127 if (BITSET_TEST(defs, I->dest[d].value))
128 return false;
129
130 BITSET_SET(defs, I->dest[d].value);
131
132 if (I->dest[d].memory && !is_stack_valid(I))
133 return false;
134 }
135
136 return true;
137 }
138
139 /** Returns number of registers written by an instruction */
140 static unsigned
agx_write_registers(const agx_instr * I,unsigned d)141 agx_write_registers(const agx_instr *I, unsigned d)
142 {
143 unsigned size = agx_size_align_16(I->dest[d].size);
144
145 switch (I->op) {
146 case AGX_OPCODE_MOV:
147 /* Tautological */
148 return agx_index_size_16(I->dest[d]);
149
150 case AGX_OPCODE_ITER:
151 case AGX_OPCODE_ITERPROJ:
152 assert(1 <= I->channels && I->channels <= 4);
153 return I->channels * size;
154
155 case AGX_OPCODE_IMAGE_LOAD:
156 case AGX_OPCODE_TEXTURE_LOAD:
157 case AGX_OPCODE_TEXTURE_SAMPLE:
158 /* Even when masked out, these clobber 4 registers */
159 return 4 * size;
160
161 case AGX_OPCODE_DEVICE_LOAD:
162 case AGX_OPCODE_LOCAL_LOAD:
163 case AGX_OPCODE_STACK_LOAD:
164 case AGX_OPCODE_LD_TILE:
165 /* Can write 16-bit or 32-bit. Anything logically 64-bit is already
166 * expanded to 32-bit in the mask.
167 */
168 return util_bitcount(I->mask) * MIN2(size, 2);
169
170 case AGX_OPCODE_LDCF:
171 return 6;
172 case AGX_OPCODE_COLLECT:
173 return I->nr_srcs * agx_size_align_16(I->src[0].size);
174 default:
175 return size;
176 }
177 }
178
179 /*
180 * Return number of registers required for coordinates for a
181 * texture/image instruction. We handle layer + sample index as 32-bit even when
182 * only the lower 16-bits are present.
183 */
184 static unsigned
agx_coordinate_registers(const agx_instr * I)185 agx_coordinate_registers(const agx_instr *I)
186 {
187 switch (I->dim) {
188 case AGX_DIM_1D:
189 return 2 * 1;
190 case AGX_DIM_1D_ARRAY:
191 return 2 * 2;
192 case AGX_DIM_2D:
193 return 2 * 2;
194 case AGX_DIM_2D_ARRAY:
195 return 2 * 3;
196 case AGX_DIM_2D_MS:
197 return 2 * 3;
198 case AGX_DIM_3D:
199 return 2 * 3;
200 case AGX_DIM_CUBE:
201 return 2 * 3;
202 case AGX_DIM_CUBE_ARRAY:
203 return 2 * 4;
204 case AGX_DIM_2D_MS_ARRAY:
205 return 2 * 3;
206 }
207
208 unreachable("Invalid texture dimension");
209 }
210
211 static unsigned
agx_read_registers(const agx_instr * I,unsigned s)212 agx_read_registers(const agx_instr *I, unsigned s)
213 {
214 unsigned size = agx_size_align_16(I->src[s].size);
215
216 switch (I->op) {
217 case AGX_OPCODE_MOV:
218 /* Tautological */
219 return agx_index_size_16(I->src[0]);
220
221 case AGX_OPCODE_SPLIT:
222 return I->nr_dests * agx_size_align_16(agx_split_width(I));
223
224 case AGX_OPCODE_DEVICE_STORE:
225 case AGX_OPCODE_LOCAL_STORE:
226 case AGX_OPCODE_STACK_STORE:
227 case AGX_OPCODE_ST_TILE:
228 /* See agx_write_registers */
229 if (s == 0)
230 return util_bitcount(I->mask) * MIN2(size, 2);
231 else
232 return size;
233
234 case AGX_OPCODE_ZS_EMIT:
235 if (s == 1) {
236 /* Depth (bit 0) is fp32, stencil (bit 1) is u16 in the hw but we pad
237 * up to u32 for simplicity
238 */
239 bool z = !!(I->zs & 1);
240 bool s = !!(I->zs & 2);
241 assert(z || s);
242
243 return (z && s) ? 4 : z ? 2 : 1;
244 } else {
245 return 1;
246 }
247
248 case AGX_OPCODE_IMAGE_WRITE:
249 if (s == 0)
250 return 4 * size /* data */;
251 else if (s == 1)
252 return agx_coordinate_registers(I);
253 else
254 return size;
255
256 case AGX_OPCODE_IMAGE_LOAD:
257 case AGX_OPCODE_TEXTURE_LOAD:
258 case AGX_OPCODE_TEXTURE_SAMPLE:
259 if (s == 0) {
260 return agx_coordinate_registers(I);
261 } else if (s == 1) {
262 /* LOD */
263 if (I->lod_mode == AGX_LOD_MODE_LOD_GRAD) {
264 switch (I->dim) {
265 case AGX_DIM_1D:
266 case AGX_DIM_1D_ARRAY:
267 return 2 * 2 * 1;
268 case AGX_DIM_2D:
269 case AGX_DIM_2D_ARRAY:
270 case AGX_DIM_2D_MS_ARRAY:
271 case AGX_DIM_2D_MS:
272 return 2 * 2 * 2;
273 case AGX_DIM_CUBE:
274 case AGX_DIM_CUBE_ARRAY:
275 case AGX_DIM_3D:
276 return 2 * 2 * 3;
277 }
278
279 unreachable("Invalid texture dimension");
280 } else {
281 return 1;
282 }
283 } else if (s == 5) {
284 /* Compare/offset */
285 return 2 * ((!!I->shadow) + (!!I->offset));
286 } else {
287 return size;
288 }
289
290 case AGX_OPCODE_ATOMIC:
291 case AGX_OPCODE_LOCAL_ATOMIC:
292 if (s == 0 && I->atomic_opc == AGX_ATOMIC_OPC_CMPXCHG)
293 return size * 2;
294 else
295 return size;
296
297 default:
298 return size;
299 }
300 }
301
302 /* Type check the dimensionality of sources and destinations. */
303 static bool
agx_validate_width(agx_context * ctx)304 agx_validate_width(agx_context *ctx)
305 {
306 bool succ = true;
307
308 agx_foreach_instr_global(ctx, I) {
309 agx_foreach_dest(I, d) {
310 unsigned exp = agx_write_registers(I, d);
311 unsigned act =
312 agx_channels(I->dest[d]) * agx_size_align_16(I->dest[d].size);
313
314 if (exp != act) {
315 succ = false;
316 fprintf(stderr, "destination %u, expected width %u, got width %u\n",
317 d, exp, act);
318 agx_print_instr(I, stderr);
319 fprintf(stderr, "\n");
320 }
321 }
322
323 agx_foreach_src(I, s) {
324 if (I->src[s].type == AGX_INDEX_NULL)
325 continue;
326
327 unsigned exp = agx_read_registers(I, s);
328 unsigned act =
329 agx_channels(I->src[s]) * agx_size_align_16(I->src[s].size);
330
331 if (exp != act) {
332 succ = false;
333 fprintf(stderr, "source %u, expected width %u, got width %u\n", s,
334 exp, act);
335 agx_print_instr(I, stderr);
336 fprintf(stderr, "\n");
337 }
338 }
339 }
340
341 return succ;
342 }
343
344 static bool
agx_validate_predecessors(agx_block * block)345 agx_validate_predecessors(agx_block *block)
346 {
347 /* Loop headers (only) have predecessors that are later in source form */
348 bool has_later_preds = false;
349
350 agx_foreach_predecessor(block, pred) {
351 if ((*pred)->index >= block->index)
352 has_later_preds = true;
353 }
354
355 if (has_later_preds && !block->loop_header)
356 return false;
357
358 /* Successors and predecessors are found together */
359 agx_foreach_predecessor(block, pred) {
360 bool found = false;
361
362 agx_foreach_successor((*pred), succ) {
363 if (succ == block)
364 found = true;
365 }
366
367 if (!found)
368 return false;
369 }
370
371 return true;
372 }
373
374 static bool
agx_validate_sr(const agx_instr * I)375 agx_validate_sr(const agx_instr *I)
376 {
377 bool none = (I->op == AGX_OPCODE_GET_SR);
378 bool coverage = (I->op == AGX_OPCODE_GET_SR_COVERAGE);
379 bool barrier = (I->op == AGX_OPCODE_GET_SR_BARRIER);
380
381 /* Filter get_sr instructions */
382 if (!(none || coverage || barrier))
383 return true;
384
385 switch (I->sr) {
386 case AGX_SR_ACTIVE_THREAD_INDEX_IN_QUAD:
387 case AGX_SR_ACTIVE_THREAD_INDEX_IN_SUBGROUP:
388 case AGX_SR_COVERAGE_MASK:
389 case AGX_SR_IS_ACTIVE_THREAD:
390 return coverage;
391
392 case AGX_SR_HELPER_OP:
393 case AGX_SR_HELPER_ARG_L:
394 case AGX_SR_HELPER_ARG_H:
395 return barrier;
396
397 default:
398 return none;
399 }
400 }
401
402 void
agx_validate(agx_context * ctx,const char * after)403 agx_validate(agx_context *ctx, const char *after)
404 {
405 bool fail = false;
406
407 if (agx_compiler_debug & AGX_DBG_NOVALIDATE)
408 return;
409
410 int last_index = -1;
411
412 agx_foreach_block(ctx, block) {
413 if ((int)block->index < last_index) {
414 fprintf(stderr, "Out-of-order block index %d vs %d after %s\n",
415 block->index, last_index, after);
416 agx_print_block(block, stderr);
417 fail = true;
418 }
419
420 last_index = block->index;
421
422 if (!agx_validate_block_form(block)) {
423 fprintf(stderr, "Invalid block form after %s\n", after);
424 agx_print_block(block, stderr);
425 fail = true;
426 }
427
428 if (!agx_validate_predecessors(block)) {
429 fprintf(stderr, "Invalid loop header flag after %s\n", after);
430 agx_print_block(block, stderr);
431 fail = true;
432 }
433 }
434
435 {
436 BITSET_WORD *defs = calloc(sizeof(BITSET_WORD), BITSET_WORDS(ctx->alloc));
437
438 agx_foreach_instr_global(ctx, I) {
439 if (!agx_validate_defs(I, defs)) {
440 fprintf(stderr, "Invalid defs after %s\n", after);
441 agx_print_instr(I, stderr);
442 fail = true;
443 }
444 }
445
446 free(defs);
447 }
448
449 agx_foreach_instr_global(ctx, I) {
450 if (!agx_validate_sources(I)) {
451 fprintf(stderr, "Invalid sources form after %s\n", after);
452 agx_print_instr(I, stderr);
453 fail = true;
454 }
455
456 if (!agx_validate_sr(I)) {
457 fprintf(stderr, "Invalid SR after %s\n", after);
458 agx_print_instr(I, stdout);
459 fail = true;
460 }
461 }
462
463 if (!agx_validate_width(ctx)) {
464 fprintf(stderr, "Invalid vectors after %s\n", after);
465 fail = true;
466 }
467
468 if (fail) {
469 agx_print_shader(ctx, stderr);
470 exit(1);
471 }
472 }
473
474 #endif /* NDEBUG */
475