1 /*
2 * Copyright (C) 2021 Valve Corporation
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 */
23
24 #include "util/ralloc.h"
25 #include "ir3_ra.h"
26 #include "ir3_shader.h"
27
28 /* This file implements a validation pass for register allocation. We check
29 * that the assignment of SSA values to registers is "valid", in the sense
30 * that each original definition reaches all of its uses without being
31 * clobbered by something else.
32 *
33 * The validation is a forward dataflow analysis. The state at each point
34 * consists of, for each physical register, the SSA value occupying it, or a
35 * few special values:
36 *
37 * - "unknown" is set initially, before the dataflow analysis assigns it a
38 * value. This is the lattice bottom.
39 * - Values at the start get "undef", which acts like a special SSA value that
40 * indicates it is never written.
41 * - "overdefined" registers are set to more than one value, depending on
42 * which path you take to get to the spot. This is the lattice top.
43 *
44 * Overdefined is necessary to distinguish because in some programs, like this
45 * simple example, it's perfectly normal and allowed:
46 *
47 * if (...) {
48 * mov.u32u32 ssa_1(r1.x), ...
49 * ...
50 * } else {
51 * mov.u32u32 ssa_2(r1.x), ...
52 * ...
53 * }
54 * // r1.x is overdefined here!
55 *
56 * However, if an ssa value after the if is accidentally assigned to r1.x, we
57 * need to remember that it's invalid to catch the mistake. Overdef has to be
58 * distinguished from undef so that the state forms a valid lattice to
59 * guarantee that the analysis always terminates. We could avoid relying on
60 * overdef by using liveness analysis, but not relying on liveness has the
61 * benefit that we can catch bugs in liveness analysis too.
62 *
63 * One tricky thing we have to handle is the coalescing of splits/collects,
64 * which means that multiple SSA values can occupy a register at the same
65 * time. While we could use the same merge set indices that RA uses, again
66 * that would rely on the merge set calculation being correct which we don't
67 * want to. Instead we treat splits/collects as transfer instructions, similar
68 * to the parallelcopy instructions inserted by RA, and have them copy their
69 * sources to their destinations. This means that each physreg must carry the
70 * SSA def assigned to it plus an offset into that definition, and when
71 * validating sources we must look through splits/collects to find the
72 * "original" source for each subregister.
73 */
74
75 #define UNKNOWN ((struct ir3_register *)NULL)
76 #define UNDEF ((struct ir3_register *)(uintptr_t)1)
77 #define OVERDEF ((struct ir3_register *)(uintptr_t)2)
78
79 struct reg_state {
80 struct ir3_register *def;
81 unsigned offset;
82 };
83
84 struct file_state {
85 struct reg_state regs[RA_MAX_FILE_SIZE];
86 };
87
88 struct reaching_state {
89 struct file_state half, full, shared;
90 };
91
92 struct ra_val_ctx {
93 struct ir3_instruction *current_instr;
94
95 /* The current state of the dataflow analysis for the instruction we're
96 * processing.
97 */
98 struct reaching_state reaching;
99
100 /* The state at the end of each basic block. */
101 struct reaching_state *block_reaching;
102 unsigned block_count;
103
104 /* When validating shared RA, we have to take spill/reload instructions into
105 * account. This saves an array of reg_state for the source of each spill
106 * instruction, to be restored at the corresponding reload(s).
107 */
108 struct hash_table *spill_reaching;
109
110 unsigned full_size, half_size;
111
112 bool merged_regs;
113 bool shared_ra;
114
115 bool failed;
116 };
117
118 static void
validate_error(struct ra_val_ctx * ctx,const char * condstr)119 validate_error(struct ra_val_ctx *ctx, const char *condstr)
120 {
121 fprintf(stderr, "ra validation fail: %s\n", condstr);
122 fprintf(stderr, " -> for instruction: ");
123 ir3_print_instr(ctx->current_instr);
124 abort();
125 }
126
127 #define validate_assert(ctx, cond) \
128 do { \
129 if (!(cond)) { \
130 validate_error(ctx, #cond); \
131 } \
132 } while (0)
133
134 static unsigned
get_file_size(struct ra_val_ctx * ctx,struct ir3_register * reg)135 get_file_size(struct ra_val_ctx *ctx, struct ir3_register *reg)
136 {
137 if (reg->flags & IR3_REG_SHARED)
138 return RA_SHARED_SIZE;
139 else if (ctx->merged_regs || !(reg->flags & IR3_REG_HALF))
140 return ctx->full_size;
141 else
142 return ctx->half_size;
143 }
144
145 static struct reg_state *
get_spill_state(struct ra_val_ctx * ctx,struct ir3_register * dst)146 get_spill_state(struct ra_val_ctx *ctx, struct ir3_register *dst)
147 {
148 struct hash_entry *entry = _mesa_hash_table_search(ctx->spill_reaching, dst);
149 if (entry)
150 return entry->data;
151 else
152 return NULL;
153 }
154
155 static struct reg_state *
get_or_create_spill_state(struct ra_val_ctx * ctx,struct ir3_register * dst)156 get_or_create_spill_state(struct ra_val_ctx *ctx, struct ir3_register *dst)
157 {
158 struct reg_state *state = get_spill_state(ctx, dst);
159 if (state)
160 return state;
161
162 state = rzalloc_array(ctx, struct reg_state, reg_size(dst));
163 _mesa_hash_table_insert(ctx->spill_reaching, dst, state);
164 return state;
165 }
166
167 /* Validate simple things, like the registers being in-bounds. This way we
168 * don't have to worry about out-of-bounds accesses later.
169 */
170
171 static void
validate_simple(struct ra_val_ctx * ctx,struct ir3_instruction * instr)172 validate_simple(struct ra_val_ctx *ctx, struct ir3_instruction *instr)
173 {
174 ctx->current_instr = instr;
175 ra_foreach_dst (dst, instr) {
176 if (ctx->shared_ra && !(dst->flags & IR3_REG_SHARED))
177 continue;
178 unsigned dst_max = ra_reg_get_physreg(dst) + reg_size(dst);
179 validate_assert(ctx, dst_max <= get_file_size(ctx, dst));
180 if (dst->tied)
181 validate_assert(ctx, ra_reg_get_num(dst) == ra_reg_get_num(dst->tied));
182 }
183
184 ra_foreach_src (src, instr) {
185 if (ctx->shared_ra && !(src->flags & IR3_REG_SHARED))
186 continue;
187 unsigned src_max = ra_reg_get_physreg(src) + reg_size(src);
188 validate_assert(ctx, src_max <= get_file_size(ctx, src));
189 }
190 }
191
192 /* This is the lattice operator. */
193 static bool
merge_reg(struct reg_state * dst,const struct reg_state * src)194 merge_reg(struct reg_state *dst, const struct reg_state *src)
195 {
196 if (dst->def == UNKNOWN) {
197 *dst = *src;
198 return src->def != UNKNOWN;
199 } else if (dst->def == OVERDEF) {
200 return false;
201 } else {
202 if (src->def == UNKNOWN)
203 return false;
204 else if (src->def == OVERDEF) {
205 *dst = *src;
206 return true;
207 } else {
208 if (dst->def != src->def || dst->offset != src->offset) {
209 dst->def = OVERDEF;
210 dst->offset = 0;
211 return true;
212 } else {
213 return false;
214 }
215 }
216 }
217 }
218
219 static bool
merge_file(struct file_state * dst,const struct file_state * src,unsigned size)220 merge_file(struct file_state *dst, const struct file_state *src, unsigned size)
221 {
222 bool progress = false;
223 for (unsigned i = 0; i < size; i++)
224 progress |= merge_reg(&dst->regs[i], &src->regs[i]);
225 return progress;
226 }
227
228 static bool
merge_state(struct ra_val_ctx * ctx,struct reaching_state * dst,const struct reaching_state * src)229 merge_state(struct ra_val_ctx *ctx, struct reaching_state *dst,
230 const struct reaching_state *src)
231 {
232 bool progress = false;
233 progress |= merge_file(&dst->full, &src->full, ctx->full_size);
234 progress |= merge_file(&dst->half, &src->half, ctx->half_size);
235 return progress;
236 }
237
238 static bool
merge_state_physical(struct ra_val_ctx * ctx,struct reaching_state * dst,const struct reaching_state * src)239 merge_state_physical(struct ra_val_ctx *ctx, struct reaching_state *dst,
240 const struct reaching_state *src)
241 {
242 return merge_file(&dst->shared, &src->shared, RA_SHARED_SIZE);
243 }
244
245 static struct file_state *
ra_val_get_file(struct ra_val_ctx * ctx,struct ir3_register * reg)246 ra_val_get_file(struct ra_val_ctx *ctx, struct ir3_register *reg)
247 {
248 if (reg->flags & IR3_REG_SHARED)
249 return &ctx->reaching.shared;
250 else if (ctx->merged_regs || !(reg->flags & IR3_REG_HALF))
251 return &ctx->reaching.full;
252 else
253 return &ctx->reaching.half;
254 }
255
256 static void
propagate_normal_instr(struct ra_val_ctx * ctx,struct ir3_instruction * instr)257 propagate_normal_instr(struct ra_val_ctx *ctx, struct ir3_instruction *instr)
258 {
259 ra_foreach_dst (dst, instr) {
260 /* Process destinations from scalar ALU instructions that were demoted to
261 * normal ALU instructions. For these we must treat the instruction as a
262 * spill of itself and set the propagate state to itself. See
263 * try_demote_instructions().
264 */
265 if (ctx->shared_ra && !(dst->flags & IR3_REG_SHARED)) {
266 if (instr->flags & IR3_INSTR_SHARED_SPILL) {
267 struct reg_state *state = get_or_create_spill_state(ctx, dst);
268 for (unsigned i = 0; i < reg_size(dst); i++) {
269 state[i] = (struct reg_state){
270 .def = dst,
271 .offset = i,
272 };
273 }
274 }
275 continue;
276 }
277
278 struct file_state *file = ra_val_get_file(ctx, dst);
279 physreg_t physreg = ra_reg_get_physreg(dst);
280 for (unsigned i = 0; i < reg_size(dst); i++) {
281 file->regs[physreg + i] = (struct reg_state){
282 .def = dst,
283 .offset = i,
284 };
285 }
286 }
287 }
288
289 static void
propagate_split(struct ra_val_ctx * ctx,struct ir3_instruction * split)290 propagate_split(struct ra_val_ctx *ctx, struct ir3_instruction *split)
291 {
292 struct ir3_register *dst = split->dsts[0];
293 struct ir3_register *src = split->srcs[0];
294 physreg_t dst_physreg = ra_reg_get_physreg(dst);
295 physreg_t src_physreg = ra_reg_get_physreg(src);
296 struct file_state *file = ra_val_get_file(ctx, dst);
297
298 if (ctx->shared_ra && !(dst->flags & IR3_REG_SHARED)) {
299 struct reg_state *src_state = get_spill_state(ctx, src->def);
300 if (src_state) {
301 struct reg_state *dst_state = get_or_create_spill_state(ctx, dst);
302 memcpy(dst_state, &src_state[split->split.off * reg_elem_size(src)],
303 reg_size(dst) * sizeof(struct reg_state));
304 }
305 return;
306 }
307
308 unsigned offset = split->split.off * reg_elem_size(src);
309 for (unsigned i = 0; i < reg_elem_size(src); i++) {
310 file->regs[dst_physreg + i] = file->regs[src_physreg + offset + i];
311 }
312 }
313
314 static void
propagate_collect(struct ra_val_ctx * ctx,struct ir3_instruction * collect)315 propagate_collect(struct ra_val_ctx *ctx, struct ir3_instruction *collect)
316 {
317 struct ir3_register *dst = collect->dsts[0];
318 unsigned size = reg_size(dst);
319
320 if (ctx->shared_ra && !(dst->flags & IR3_REG_SHARED)) {
321 struct reg_state *dst_state = NULL;
322
323 for (unsigned i = 0; i < collect->srcs_count; i++) {
324 struct ir3_register *src = collect->srcs[i];
325 unsigned dst_offset = i * reg_elem_size(dst);
326
327 if (ra_reg_is_src(src)) {
328 struct reg_state *src_state = get_spill_state(ctx, src->def);
329 if (src_state) {
330 if (!dst_state)
331 dst_state = get_or_create_spill_state(ctx, dst);
332 memcpy(&dst_state[dst_offset], src_state,
333 reg_size(src) * sizeof(struct reg_state));
334 }
335 }
336 }
337 } else {
338 struct file_state *file = ra_val_get_file(ctx, dst);
339 physreg_t dst_physreg = ra_reg_get_physreg(dst);
340 struct reg_state srcs[size];
341
342 for (unsigned i = 0; i < collect->srcs_count; i++) {
343 struct ir3_register *src = collect->srcs[i];
344 unsigned dst_offset = i * reg_elem_size(dst);
345
346 for (unsigned j = 0; j < reg_elem_size(dst); j++) {
347 if (!ra_reg_is_src(src)) {
348 srcs[dst_offset + j] = (struct reg_state){
349 .def = dst,
350 .offset = dst_offset + j,
351 };
352 } else {
353 physreg_t src_physreg = ra_reg_get_physreg(src);
354 srcs[dst_offset + j] = file->regs[src_physreg + j];
355 }
356 }
357 }
358
359 for (unsigned i = 0; i < size; i++)
360 file->regs[dst_physreg + i] = srcs[i];
361 }
362 }
363
364 static void
propagate_parallelcopy(struct ra_val_ctx * ctx,struct ir3_instruction * pcopy)365 propagate_parallelcopy(struct ra_val_ctx *ctx, struct ir3_instruction *pcopy)
366 {
367 unsigned size = 0;
368 for (unsigned i = 0; i < pcopy->dsts_count; i++) {
369 size += reg_size(pcopy->srcs[i]);
370 }
371
372 struct reg_state srcs[size];
373
374 unsigned offset = 0;
375 for (unsigned i = 0; i < pcopy->srcs_count; i++) {
376 struct ir3_register *dst = pcopy->dsts[i];
377 struct ir3_register *src = pcopy->srcs[i];
378 struct file_state *file = ra_val_get_file(ctx, dst);
379
380 if (ctx->shared_ra && !(dst->flags & IR3_REG_SHARED)) {
381 if (ra_reg_is_src(src)) {
382 struct reg_state *src_state = get_spill_state(ctx, src->def);
383 if (src_state) {
384 struct reg_state *dst_state = get_or_create_spill_state(ctx, dst);
385 memcpy(dst_state, src_state, reg_size(dst) * sizeof(struct reg_state));
386 }
387 }
388 } else {
389 for (unsigned j = 0; j < reg_size(dst); j++) {
390 if (src->flags & (IR3_REG_IMMED | IR3_REG_CONST)) {
391 srcs[offset + j] = (struct reg_state){
392 .def = dst,
393 .offset = j,
394 };
395 } else {
396 physreg_t src_physreg = ra_reg_get_physreg(src);
397 srcs[offset + j] = file->regs[src_physreg + j];
398 }
399 }
400 }
401
402 offset += reg_size(dst);
403 }
404 assert(offset == size);
405
406 offset = 0;
407 for (unsigned i = 0; i < pcopy->dsts_count; i++) {
408 struct ir3_register *dst = pcopy->dsts[i];
409
410 if (ctx->shared_ra && !(dst->flags & IR3_REG_SHARED)) {
411 offset += reg_size(dst);
412 continue;
413 }
414
415 physreg_t dst_physreg = ra_reg_get_physreg(dst);
416 struct file_state *file = ra_val_get_file(ctx, dst);
417
418 for (unsigned j = 0; j < reg_size(dst); j++)
419 file->regs[dst_physreg + j] = srcs[offset + j];
420
421 offset += reg_size(dst);
422 }
423 assert(offset == size);
424 }
425
426 static void
propagate_spill(struct ra_val_ctx * ctx,struct ir3_instruction * instr)427 propagate_spill(struct ra_val_ctx *ctx, struct ir3_instruction *instr)
428 {
429 if (instr->srcs[0]->flags & IR3_REG_SHARED) { /* spill */
430 struct reg_state *state = get_or_create_spill_state(ctx, instr->dsts[0]);
431 physreg_t src_physreg = ra_reg_get_physreg(instr->srcs[0]);
432 memcpy(state, &ctx->reaching.shared.regs[src_physreg],
433 reg_size(instr->srcs[0]) * sizeof(struct reg_state));
434 } else { /* reload */
435 struct reg_state *state = get_spill_state(ctx, instr->srcs[0]->def);
436 assert(state);
437 physreg_t dst_physreg = ra_reg_get_physreg(instr->dsts[0]);
438 memcpy(&ctx->reaching.shared.regs[dst_physreg], state,
439 reg_size(instr->dsts[0]) * sizeof(struct reg_state));
440 }
441 }
442
443 static void
propagate_instr(struct ra_val_ctx * ctx,struct ir3_instruction * instr)444 propagate_instr(struct ra_val_ctx *ctx, struct ir3_instruction *instr)
445 {
446 if (instr->opc == OPC_META_SPLIT)
447 propagate_split(ctx, instr);
448 else if (instr->opc == OPC_META_COLLECT)
449 propagate_collect(ctx, instr);
450 else if (instr->opc == OPC_META_PARALLEL_COPY)
451 propagate_parallelcopy(ctx, instr);
452 else if (ctx->shared_ra && instr->opc == OPC_MOV &&
453 /* Moves from immed/const with IR3_INSTR_SHARED_SPILL were demoted
454 * from scalar ALU, see try_demote_instruction().
455 */
456 !(instr->srcs[0]->flags & (IR3_REG_IMMED | IR3_REG_CONST)) &&
457 (instr->flags & IR3_INSTR_SHARED_SPILL))
458 propagate_spill(ctx, instr);
459 else
460 propagate_normal_instr(ctx, instr);
461 }
462
463 static bool
propagate_block(struct ra_val_ctx * ctx,struct ir3_block * block)464 propagate_block(struct ra_val_ctx *ctx, struct ir3_block *block)
465 {
466 ctx->reaching = ctx->block_reaching[block->index];
467
468 foreach_instr (instr, &block->instr_list) {
469 propagate_instr(ctx, instr);
470 }
471
472 bool progress = false;
473 for (unsigned i = 0; i < 2; i++) {
474 struct ir3_block *succ = block->successors[i];
475 if (!succ)
476 continue;
477 progress |=
478 merge_state(ctx, &ctx->block_reaching[succ->index], &ctx->reaching);
479 }
480 for (unsigned i = 0; i < block->physical_successors_count; i++) {
481 struct ir3_block *succ = block->physical_successors[i];
482 progress |= merge_state_physical(ctx, &ctx->block_reaching[succ->index],
483 &ctx->reaching);
484 }
485 return progress;
486 }
487
488 static void
chase_definition(struct reg_state * state)489 chase_definition(struct reg_state *state)
490 {
491 while (true) {
492 struct ir3_instruction *instr = state->def->instr;
493 switch (instr->opc) {
494 case OPC_META_SPLIT: {
495 struct ir3_register *new_def = instr->srcs[0]->def;
496 unsigned offset = instr->split.off * reg_elem_size(new_def);
497 *state = (struct reg_state){
498 .def = new_def,
499 .offset = state->offset + offset,
500 };
501 break;
502 }
503 case OPC_META_COLLECT: {
504 unsigned src_idx = state->offset / reg_elem_size(state->def);
505 unsigned src_offset = state->offset % reg_elem_size(state->def);
506 struct ir3_register *new_def = instr->srcs[src_idx]->def;
507 if (new_def) {
508 *state = (struct reg_state){
509 .def = new_def,
510 .offset = src_offset,
511 };
512 } else {
513 /* Bail on immed/const */
514 return;
515 }
516 break;
517 }
518 case OPC_META_PARALLEL_COPY: {
519 unsigned dst_idx = ~0;
520 for (unsigned i = 0; i < instr->dsts_count; i++) {
521 if (instr->dsts[i] == state->def) {
522 dst_idx = i;
523 break;
524 }
525 }
526 assert(dst_idx != ~0);
527
528 struct ir3_register *new_def = instr->srcs[dst_idx]->def;
529 if (new_def) {
530 state->def = new_def;
531 } else {
532 /* Bail on immed/const */
533 return;
534 }
535 break;
536 }
537 default:
538 return;
539 }
540 }
541 }
542
543 static void
dump_reg_state(struct reg_state * state)544 dump_reg_state(struct reg_state *state)
545 {
546 if (state->def == UNDEF) {
547 fprintf(stderr, "no reaching definition");
548 } else if (state->def == OVERDEF) {
549 fprintf(stderr,
550 "more than one reaching definition or partial definition");
551 } else {
552 /* The analysis should always remove UNKNOWN eventually. */
553 assert(state->def != UNKNOWN);
554
555 fprintf(stderr, "ssa_%u:%u(%sr%u.%c) + %u", state->def->instr->serialno,
556 state->def->name, (state->def->flags & IR3_REG_HALF) ? "h" : "",
557 state->def->num / 4, "xyzw"[state->def->num % 4],
558 state -> offset);
559 }
560 }
561
562 static void
check_reaching_src(struct ra_val_ctx * ctx,struct ir3_instruction * instr,struct ir3_register * src)563 check_reaching_src(struct ra_val_ctx *ctx, struct ir3_instruction *instr,
564 struct ir3_register *src)
565 {
566 if (ctx->shared_ra && !(src->flags & IR3_REG_SHARED))
567 return;
568 struct file_state *file = ra_val_get_file(ctx, src);
569 physreg_t physreg = ra_reg_get_physreg(src);
570 for (unsigned i = 0; i < reg_size(src); i++) {
571 struct reg_state expected = (struct reg_state){
572 .def = src->def,
573 .offset = i,
574 };
575 chase_definition(&expected);
576
577 struct reg_state actual = file->regs[physreg + i];
578
579 if (expected.def != actual.def || expected.offset != actual.offset) {
580 fprintf(
581 stderr,
582 "ra validation fail: wrong definition reaches source ssa_%u:%u + %u\n",
583 src->def->instr->serialno, src->def->name, i);
584 fprintf(stderr, "expected: ");
585 dump_reg_state(&expected);
586 fprintf(stderr, "\n");
587 fprintf(stderr, "actual: ");
588 dump_reg_state(&actual);
589 fprintf(stderr, "\n");
590 fprintf(stderr, "-> for instruction: ");
591 ir3_print_instr(instr);
592 ctx->failed = true;
593 }
594 }
595 }
596
597 static void
check_reaching_instr(struct ra_val_ctx * ctx,struct ir3_instruction * instr)598 check_reaching_instr(struct ra_val_ctx *ctx, struct ir3_instruction *instr)
599 {
600 if (instr->opc == OPC_META_SPLIT || instr->opc == OPC_META_COLLECT ||
601 instr->opc == OPC_META_PARALLEL_COPY || instr->opc == OPC_META_PHI) {
602 return;
603 }
604
605 ra_foreach_src (src, instr) {
606 check_reaching_src(ctx, instr, src);
607 }
608 }
609
610 static void
check_reaching_block(struct ra_val_ctx * ctx,struct ir3_block * block)611 check_reaching_block(struct ra_val_ctx *ctx, struct ir3_block *block)
612 {
613 ctx->reaching = ctx->block_reaching[block->index];
614
615 foreach_instr (instr, &block->instr_list) {
616 check_reaching_instr(ctx, instr);
617 propagate_instr(ctx, instr);
618 }
619
620 for (unsigned i = 0; i < 2; i++) {
621 struct ir3_block *succ = block->successors[i];
622 if (!succ)
623 continue;
624
625 unsigned pred_idx = ir3_block_get_pred_index(succ, block);
626 foreach_instr (instr, &succ->instr_list) {
627 if (instr->opc != OPC_META_PHI)
628 break;
629 if (instr->srcs[pred_idx]->def)
630 check_reaching_src(ctx, instr, instr->srcs[pred_idx]);
631 }
632 }
633 }
634
635 static void
check_reaching_defs(struct ra_val_ctx * ctx,struct ir3 * ir)636 check_reaching_defs(struct ra_val_ctx *ctx, struct ir3 *ir)
637 {
638 ctx->block_reaching =
639 rzalloc_array(ctx, struct reaching_state, ctx->block_count);
640
641 struct reaching_state *start = &ctx->block_reaching[0];
642 for (unsigned i = 0; i < ctx->full_size; i++)
643 start->full.regs[i].def = UNDEF;
644 for (unsigned i = 0; i < ctx->half_size; i++)
645 start->half.regs[i].def = UNDEF;
646 for (unsigned i = 0; i < RA_SHARED_SIZE; i++)
647 start->shared.regs[i].def = UNDEF;
648
649 bool progress;
650 do {
651 progress = false;
652 foreach_block (block, &ir->block_list) {
653 progress |= propagate_block(ctx, block);
654 }
655 } while (progress);
656
657 foreach_block (block, &ir->block_list) {
658 check_reaching_block(ctx, block);
659 }
660
661 if (ctx->failed) {
662 fprintf(stderr, "failing shader:\n");
663 ir3_print(ir);
664 abort();
665 }
666 }
667
668 void
ir3_ra_validate(struct ir3_shader_variant * v,unsigned full_size,unsigned half_size,unsigned block_count,bool shared_ra)669 ir3_ra_validate(struct ir3_shader_variant *v, unsigned full_size,
670 unsigned half_size, unsigned block_count, bool shared_ra)
671 {
672 #ifdef NDEBUG
673 #define VALIDATE 0
674 #else
675 #define VALIDATE 1
676 #endif
677
678 if (!VALIDATE)
679 return;
680
681 struct ra_val_ctx *ctx = rzalloc(NULL, struct ra_val_ctx);
682 ctx->merged_regs = v->mergedregs;
683 ctx->full_size = full_size;
684 ctx->half_size = half_size;
685 ctx->block_count = block_count;
686 ctx->shared_ra = shared_ra;
687 if (ctx->shared_ra)
688 ctx->spill_reaching = _mesa_pointer_hash_table_create(ctx);
689
690 foreach_block (block, &v->ir->block_list) {
691 foreach_instr (instr, &block->instr_list) {
692 validate_simple(ctx, instr);
693 }
694 }
695
696 check_reaching_defs(ctx, v->ir);
697
698 ralloc_free(ctx);
699 }
700