• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2021 Valve Corporation
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include "util/ralloc.h"
7 #include "ir3_ra.h"
8 #include "ir3_shader.h"
9 
10 /* This file implements a validation pass for register allocation. We check
11  * that the assignment of SSA values to registers is "valid", in the sense
12  * that each original definition reaches all of its uses without being
13  * clobbered by something else.
14  *
15  * The validation is a forward dataflow analysis. The state at each point
16  * consists of, for each physical register, the SSA value occupying it, or a
17  * few special values:
18  *
19  * - "unknown" is set initially, before the dataflow analysis assigns it a
20  *   value. This is the lattice bottom.
21  * - Values at the start get "undef", which acts like a special SSA value that
22  *   indicates it is never written.
23  * - "overdefined" registers are set to more than one value, depending on
24  *   which path you take to get to the spot. This is the lattice top.
25  *
26  * Overdefined is necessary to distinguish because in some programs, like this
27  * simple example, it's perfectly normal and allowed:
28  *
29  * if (...) {
30  *    mov.u32u32 ssa_1(r1.x), ...
31  *    ...
32  * } else {
33  *    mov.u32u32 ssa_2(r1.x), ...
34  *    ...
35  * }
36  * // r1.x is overdefined here!
37  *
38  * However, if an ssa value after the if is accidentally assigned to r1.x, we
39  * need to remember that it's invalid to catch the mistake. Overdef has to be
40  * distinguished from undef so that the state forms a valid lattice to
41  * guarantee that the analysis always terminates. We could avoid relying on
42  * overdef by using liveness analysis, but not relying on liveness has the
43  * benefit that we can catch bugs in liveness analysis too.
44  *
45  * One tricky thing we have to handle is the coalescing of splits/collects,
46  * which means that multiple SSA values can occupy a register at the same
47  * time. While we could use the same merge set indices that RA uses, again
48  * that would rely on the merge set calculation being correct which we don't
49  * want to. Instead we treat splits/collects as transfer instructions, similar
50  * to the parallelcopy instructions inserted by RA, and have them copy their
51  * sources to their destinations. This means that each physreg must carry the
52  * SSA def assigned to it plus an offset into that definition, and when
53  * validating sources we must look through splits/collects to find the
54  * "original" source for each subregister.
55  */
56 
57 #define UNKNOWN ((struct ir3_register *)NULL)
58 #define UNDEF   ((struct ir3_register *)(uintptr_t)1)
59 #define OVERDEF ((struct ir3_register *)(uintptr_t)2)
60 
61 struct reg_state {
62    struct ir3_register *def;
63    unsigned offset;
64 };
65 
66 struct file_state {
67    struct reg_state regs[RA_MAX_FILE_SIZE];
68 };
69 
70 struct reaching_state {
71    struct file_state half, full, shared, predicate;
72 };
73 
74 struct ra_val_ctx {
75    struct ir3_instruction *current_instr;
76 
77    /* The current state of the dataflow analysis for the instruction we're
78     * processing.
79     */
80    struct reaching_state reaching;
81 
82    /* The state at the end of each basic block. */
83    struct reaching_state *block_reaching;
84    unsigned block_count;
85 
86    /* When validating shared RA, we have to take spill/reload instructions into
87     * account. This saves an array of reg_state for the source of each spill
88     * instruction, to be restored at the corresponding reload(s).
89     */
90    struct hash_table *spill_reaching;
91 
92    unsigned full_size, half_size, predicate_size;
93 
94    bool merged_regs;
95    bool shared_ra;
96 
97    bool failed;
98 };
99 
100 static void
validate_error(struct ra_val_ctx * ctx,const char * condstr)101 validate_error(struct ra_val_ctx *ctx, const char *condstr)
102 {
103    fprintf(stderr, "ra validation fail: %s\n", condstr);
104    fprintf(stderr, "  -> for instruction: ");
105    ir3_print_instr(ctx->current_instr);
106    abort();
107 }
108 
109 #define validate_assert(ctx, cond)                                             \
110    do {                                                                        \
111       if (!(cond)) {                                                           \
112          validate_error(ctx, #cond);                                           \
113       }                                                                        \
114    } while (0)
115 
116 static unsigned
get_file_size(struct ra_val_ctx * ctx,struct ir3_register * reg)117 get_file_size(struct ra_val_ctx *ctx, struct ir3_register *reg)
118 {
119    if (reg->flags & IR3_REG_SHARED) {
120       if (reg->flags & IR3_REG_HALF)
121          return RA_SHARED_HALF_SIZE;
122       else
123          return RA_SHARED_SIZE;
124    } else if (reg->flags & IR3_REG_PREDICATE) {
125       return ctx->predicate_size;
126    } else if (ctx->merged_regs || !(reg->flags & IR3_REG_HALF)) {
127       return ctx->full_size;
128    } else {
129       return ctx->half_size;
130    }
131 }
132 
133 static struct reg_state *
get_spill_state(struct ra_val_ctx * ctx,struct ir3_register * dst)134 get_spill_state(struct ra_val_ctx *ctx, struct ir3_register *dst)
135 {
136    struct hash_entry *entry = _mesa_hash_table_search(ctx->spill_reaching, dst);
137    if (entry)
138       return entry->data;
139    else
140       return NULL;
141 }
142 
143 static struct reg_state *
get_or_create_spill_state(struct ra_val_ctx * ctx,struct ir3_register * dst)144 get_or_create_spill_state(struct ra_val_ctx *ctx, struct ir3_register *dst)
145 {
146    struct reg_state *state = get_spill_state(ctx, dst);
147    if (state)
148       return state;
149 
150    state = rzalloc_array(ctx, struct reg_state, reg_size(dst));
151    _mesa_hash_table_insert(ctx->spill_reaching, dst, state);
152    return state;
153 }
154 
155 static bool
validate_reg_is_src(const struct ir3_register * reg)156 validate_reg_is_src(const struct ir3_register *reg)
157 {
158    return ra_reg_is_src(reg) || ra_reg_is_predicate(reg);
159 }
160 
161 static bool
validate_reg_is_dst(const struct ir3_register * reg)162 validate_reg_is_dst(const struct ir3_register *reg)
163 {
164    return ra_reg_is_dst(reg) || ra_reg_is_predicate(reg);
165 }
166 
167 /* Validate simple things, like the registers being in-bounds. This way we
168  * don't have to worry about out-of-bounds accesses later.
169  */
170 
171 static void
validate_simple(struct ra_val_ctx * ctx,struct ir3_instruction * instr)172 validate_simple(struct ra_val_ctx *ctx, struct ir3_instruction *instr)
173 {
174    ctx->current_instr = instr;
175    foreach_dst_if (dst, instr, validate_reg_is_dst) {
176       if (ctx->shared_ra && !(dst->flags & IR3_REG_SHARED))
177          continue;
178       validate_assert(ctx, ra_reg_get_num(dst) != INVALID_REG);
179       unsigned dst_max = ra_reg_get_physreg(dst) + reg_size(dst);
180       validate_assert(ctx, dst_max <= get_file_size(ctx, dst));
181       if (dst->tied)
182          validate_assert(ctx, ra_reg_get_num(dst) == ra_reg_get_num(dst->tied));
183    }
184 
185    foreach_src_if (src, instr, validate_reg_is_src) {
186       if (ctx->shared_ra && !(src->flags & IR3_REG_SHARED))
187          continue;
188       validate_assert(ctx, ra_reg_get_num(src) != INVALID_REG);
189       unsigned src_max = ra_reg_get_physreg(src) + reg_size(src);
190       validate_assert(ctx, src_max <= get_file_size(ctx, src));
191    }
192 }
193 
194 /* This is the lattice operator. */
195 static bool
merge_reg(struct reg_state * dst,const struct reg_state * src)196 merge_reg(struct reg_state *dst, const struct reg_state *src)
197 {
198    if (dst->def == UNKNOWN) {
199       *dst = *src;
200       return src->def != UNKNOWN;
201    } else if (dst->def == OVERDEF) {
202       return false;
203    } else {
204       if (src->def == UNKNOWN)
205          return false;
206       else if (src->def == OVERDEF) {
207          *dst = *src;
208          return true;
209       } else {
210          if (dst->def != src->def || dst->offset != src->offset) {
211             dst->def = OVERDEF;
212             dst->offset = 0;
213             return true;
214          } else {
215             return false;
216          }
217       }
218    }
219 }
220 
221 static bool
merge_file(struct file_state * dst,const struct file_state * src,unsigned size)222 merge_file(struct file_state *dst, const struct file_state *src, unsigned size)
223 {
224    bool progress = false;
225    for (unsigned i = 0; i < size; i++)
226       progress |= merge_reg(&dst->regs[i], &src->regs[i]);
227    return progress;
228 }
229 
230 static bool
merge_state(struct ra_val_ctx * ctx,struct reaching_state * dst,const struct reaching_state * src)231 merge_state(struct ra_val_ctx *ctx, struct reaching_state *dst,
232             const struct reaching_state *src)
233 {
234    bool progress = false;
235    progress |= merge_file(&dst->full, &src->full, ctx->full_size);
236    progress |= merge_file(&dst->half, &src->half, ctx->half_size);
237    progress |=
238       merge_file(&dst->predicate, &src->predicate, ctx->predicate_size);
239    return progress;
240 }
241 
242 static bool
merge_state_physical(struct ra_val_ctx * ctx,struct reaching_state * dst,const struct reaching_state * src)243 merge_state_physical(struct ra_val_ctx *ctx, struct reaching_state *dst,
244                      const struct reaching_state *src)
245 {
246    return merge_file(&dst->shared, &src->shared, RA_SHARED_SIZE);
247 }
248 
249 static struct file_state *
ra_val_get_file(struct ra_val_ctx * ctx,struct ir3_register * reg)250 ra_val_get_file(struct ra_val_ctx *ctx, struct ir3_register *reg)
251 {
252    if (reg->flags & IR3_REG_SHARED)
253       return &ctx->reaching.shared;
254    else if (reg->flags & IR3_REG_PREDICATE)
255       return &ctx->reaching.predicate;
256    else if (ctx->merged_regs || !(reg->flags & IR3_REG_HALF))
257       return &ctx->reaching.full;
258    else
259       return &ctx->reaching.half;
260 }
261 
262 /* Predicate RA implements spilling by cloning the instruction that produces a
263  * def. In that case, we might end up two different defs legitimately reaching a
264  * source. To support validation, the RA will store the original def in the
265  * instruction's data field.
266  */
267 static struct ir3_register *
get_original_def(struct ir3_register * def)268 get_original_def(struct ir3_register *def)
269 {
270    if (def == UNKNOWN || def == UNDEF || def == OVERDEF)
271       return def;
272    if (def->flags & IR3_REG_PREDICATE)
273       return def->instr->data;
274    return def;
275 }
276 
277 static void
propagate_normal_instr(struct ra_val_ctx * ctx,struct ir3_instruction * instr)278 propagate_normal_instr(struct ra_val_ctx *ctx, struct ir3_instruction *instr)
279 {
280    foreach_dst_if (dst, instr, validate_reg_is_dst) {
281       /* Process destinations from scalar ALU instructions that were demoted to
282        * normal ALU instructions. For these we must treat the instruction as a
283        * spill of itself and set the propagate state to itself. See
284        * try_demote_instructions().
285        */
286       if (ctx->shared_ra && !(dst->flags & IR3_REG_SHARED)) {
287          if (instr->flags & IR3_INSTR_SHARED_SPILL) {
288             struct reg_state *state = get_or_create_spill_state(ctx, dst);
289             for (unsigned i = 0; i < reg_size(dst); i++) {
290                state[i] = (struct reg_state){
291                   .def = dst,
292                   .offset = i,
293                };
294             }
295          }
296          continue;
297       }
298 
299       struct file_state *file = ra_val_get_file(ctx, dst);
300       physreg_t physreg = ra_reg_get_physreg(dst);
301 
302       for (unsigned i = 0; i < reg_size(dst); i++) {
303          file->regs[physreg + i] = (struct reg_state){
304             .def = get_original_def(dst),
305             .offset = i,
306          };
307       }
308    }
309 }
310 
311 static void
propagate_split(struct ra_val_ctx * ctx,struct ir3_instruction * split)312 propagate_split(struct ra_val_ctx *ctx, struct ir3_instruction *split)
313 {
314    struct ir3_register *dst = split->dsts[0];
315    struct ir3_register *src = split->srcs[0];
316    physreg_t dst_physreg = ra_reg_get_physreg(dst);
317    physreg_t src_physreg = ra_reg_get_physreg(src);
318    struct file_state *file = ra_val_get_file(ctx, dst);
319 
320    if (ctx->shared_ra && !(dst->flags & IR3_REG_SHARED)) {
321       struct reg_state *src_state = get_spill_state(ctx, src->def);
322       if (src_state) {
323          struct reg_state *dst_state = get_or_create_spill_state(ctx, dst);
324          memcpy(dst_state, &src_state[split->split.off * reg_elem_size(src)],
325                 reg_size(dst) * sizeof(struct reg_state));
326       }
327       return;
328    }
329 
330    unsigned offset = split->split.off * reg_elem_size(src);
331    for (unsigned i = 0; i < reg_elem_size(src); i++) {
332       file->regs[dst_physreg + i] = file->regs[src_physreg + offset + i];
333    }
334 }
335 
336 static void
propagate_collect(struct ra_val_ctx * ctx,struct ir3_instruction * collect)337 propagate_collect(struct ra_val_ctx *ctx, struct ir3_instruction *collect)
338 {
339    struct ir3_register *dst = collect->dsts[0];
340    unsigned size = reg_size(dst);
341 
342    if (ctx->shared_ra && !(dst->flags & IR3_REG_SHARED)) {
343       struct reg_state *dst_state = NULL;
344 
345       for (unsigned i = 0; i < collect->srcs_count; i++) {
346          struct ir3_register *src = collect->srcs[i];
347          unsigned dst_offset = i * reg_elem_size(dst);
348 
349          if (ra_reg_is_src(src)) {
350             struct reg_state *src_state = get_spill_state(ctx, src->def);
351             if (src_state) {
352                if (!dst_state)
353                   dst_state = get_or_create_spill_state(ctx, dst);
354                memcpy(&dst_state[dst_offset], src_state,
355                       reg_size(src) * sizeof(struct reg_state));
356             }
357          }
358       }
359    } else {
360       struct file_state *file = ra_val_get_file(ctx, dst);
361       physreg_t dst_physreg = ra_reg_get_physreg(dst);
362       struct reg_state srcs[size];
363 
364       for (unsigned i = 0; i < collect->srcs_count; i++) {
365          struct ir3_register *src = collect->srcs[i];
366          unsigned dst_offset = i * reg_elem_size(dst);
367 
368          for (unsigned j = 0; j < reg_elem_size(dst); j++) {
369             if (!ra_reg_is_src(src)) {
370                srcs[dst_offset + j] = (struct reg_state){
371                   .def = dst,
372                   .offset = dst_offset + j,
373                };
374             } else {
375                physreg_t src_physreg = ra_reg_get_physreg(src);
376                srcs[dst_offset + j] = file->regs[src_physreg + j];
377             }
378          }
379       }
380 
381       for (unsigned i = 0; i < size; i++)
382          file->regs[dst_physreg + i] = srcs[i];
383    }
384 }
385 
386 static void
propagate_parallelcopy(struct ra_val_ctx * ctx,struct ir3_instruction * pcopy)387 propagate_parallelcopy(struct ra_val_ctx *ctx, struct ir3_instruction *pcopy)
388 {
389    unsigned size = 0;
390    for (unsigned i = 0; i < pcopy->dsts_count; i++) {
391       size += reg_size(pcopy->srcs[i]);
392    }
393 
394    struct reg_state srcs[size];
395 
396    unsigned offset = 0;
397    for (unsigned i = 0; i < pcopy->srcs_count; i++) {
398       struct ir3_register *dst = pcopy->dsts[i];
399       struct ir3_register *src = pcopy->srcs[i];
400       struct file_state *file = ra_val_get_file(ctx, dst);
401 
402       if (ctx->shared_ra && !(dst->flags & IR3_REG_SHARED)) {
403          if (ra_reg_is_src(src)) {
404             struct reg_state *src_state = get_spill_state(ctx, src->def);
405             if (src_state) {
406                struct reg_state *dst_state = get_or_create_spill_state(ctx, dst);
407                memcpy(dst_state, src_state, reg_size(dst) * sizeof(struct reg_state));
408             }
409          }
410       } else {
411          for (unsigned j = 0; j < reg_size(dst); j++) {
412             if (src->flags & (IR3_REG_IMMED | IR3_REG_CONST)) {
413                srcs[offset + j] = (struct reg_state){
414                   .def = dst,
415                   .offset = j,
416                };
417             } else {
418                physreg_t src_physreg = ra_reg_get_physreg(src);
419                srcs[offset + j] = file->regs[src_physreg + j];
420             }
421          }
422       }
423 
424       offset += reg_size(dst);
425    }
426    assert(offset == size);
427 
428    offset = 0;
429    for (unsigned i = 0; i < pcopy->dsts_count; i++) {
430       struct ir3_register *dst = pcopy->dsts[i];
431 
432       if (ctx->shared_ra && !(dst->flags & IR3_REG_SHARED)) {
433          offset += reg_size(dst);
434          continue;
435       }
436 
437       physreg_t dst_physreg = ra_reg_get_physreg(dst);
438       struct file_state *file = ra_val_get_file(ctx, dst);
439 
440       for (unsigned j = 0; j < reg_size(dst); j++)
441          file->regs[dst_physreg + j] = srcs[offset + j];
442 
443       offset += reg_size(dst);
444    }
445    assert(offset == size);
446 }
447 
448 static void
propagate_spill(struct ra_val_ctx * ctx,struct ir3_instruction * instr)449 propagate_spill(struct ra_val_ctx *ctx, struct ir3_instruction *instr)
450 {
451    if (instr->srcs[0]->flags & IR3_REG_SHARED) { /* spill */
452       struct reg_state *state = get_or_create_spill_state(ctx, instr->dsts[0]);
453       physreg_t src_physreg = ra_reg_get_physreg(instr->srcs[0]);
454       memcpy(state, &ctx->reaching.shared.regs[src_physreg],
455              reg_size(instr->srcs[0]) * sizeof(struct reg_state));
456    } else { /* reload */
457       struct reg_state *state = get_spill_state(ctx, instr->srcs[0]->def);
458       assert(state);
459       physreg_t dst_physreg = ra_reg_get_physreg(instr->dsts[0]);
460       memcpy(&ctx->reaching.shared.regs[dst_physreg], state,
461              reg_size(instr->dsts[0]) * sizeof(struct reg_state));
462    }
463 }
464 
465 static void
propagate_instr(struct ra_val_ctx * ctx,struct ir3_instruction * instr)466 propagate_instr(struct ra_val_ctx *ctx, struct ir3_instruction *instr)
467 {
468    if (instr->opc == OPC_META_SPLIT)
469       propagate_split(ctx, instr);
470    else if (instr->opc == OPC_META_COLLECT)
471       propagate_collect(ctx, instr);
472    else if (instr->opc == OPC_META_PARALLEL_COPY)
473       propagate_parallelcopy(ctx, instr);
474    else if (ctx->shared_ra && instr->opc == OPC_MOV &&
475             /* Moves from immed/const with IR3_INSTR_SHARED_SPILL were demoted
476              * from scalar ALU, see try_demote_instruction().
477              */
478             !(instr->srcs[0]->flags & (IR3_REG_IMMED | IR3_REG_CONST)) &&
479             (instr->flags & IR3_INSTR_SHARED_SPILL))
480       propagate_spill(ctx, instr);
481    else
482       propagate_normal_instr(ctx, instr);
483 }
484 
485 static bool
propagate_block(struct ra_val_ctx * ctx,struct ir3_block * block)486 propagate_block(struct ra_val_ctx *ctx, struct ir3_block *block)
487 {
488    ctx->reaching = ctx->block_reaching[block->index];
489 
490    foreach_instr (instr, &block->instr_list) {
491       propagate_instr(ctx, instr);
492    }
493 
494    bool progress = false;
495    for (unsigned i = 0; i < 2; i++) {
496       struct ir3_block *succ = block->successors[i];
497       if (!succ)
498          continue;
499       progress |=
500          merge_state(ctx, &ctx->block_reaching[succ->index], &ctx->reaching);
501    }
502    for (unsigned i = 0; i < block->physical_successors_count; i++) {
503       struct ir3_block *succ = block->physical_successors[i];
504       progress |= merge_state_physical(ctx, &ctx->block_reaching[succ->index],
505                                        &ctx->reaching);
506    }
507    return progress;
508 }
509 
510 static void
chase_definition(struct reg_state * state)511 chase_definition(struct reg_state *state)
512 {
513    while (true) {
514       struct ir3_instruction *instr = state->def->instr;
515       switch (instr->opc) {
516       case OPC_META_SPLIT: {
517          struct ir3_register *new_def = instr->srcs[0]->def;
518          unsigned offset = instr->split.off * reg_elem_size(new_def);
519          *state = (struct reg_state){
520             .def = new_def,
521             .offset = state->offset + offset,
522          };
523          break;
524       }
525       case OPC_META_COLLECT: {
526          unsigned src_idx = state->offset / reg_elem_size(state->def);
527          unsigned src_offset = state->offset % reg_elem_size(state->def);
528          struct ir3_register *new_def = instr->srcs[src_idx]->def;
529          if (new_def) {
530             *state = (struct reg_state){
531                .def = new_def,
532                .offset = src_offset,
533             };
534          } else {
535             /* Bail on immed/const */
536             return;
537          }
538          break;
539       }
540       case OPC_META_PARALLEL_COPY: {
541          unsigned dst_idx = ~0;
542          for (unsigned i = 0; i < instr->dsts_count; i++) {
543             if (instr->dsts[i] == state->def) {
544                dst_idx = i;
545                break;
546             }
547          }
548          assert(dst_idx != ~0);
549 
550          struct ir3_register *new_def = instr->srcs[dst_idx]->def;
551          if (new_def) {
552             state->def = new_def;
553          } else {
554             /* Bail on immed/const */
555             return;
556          }
557          break;
558       }
559       default:
560          return;
561       }
562    }
563 }
564 
565 static void
dump_reg_state(struct reg_state * state)566 dump_reg_state(struct reg_state *state)
567 {
568    if (state->def == UNDEF) {
569       fprintf(stderr, "no reaching definition");
570    } else if (state->def == OVERDEF) {
571       fprintf(stderr,
572               "more than one reaching definition or partial definition");
573    } else {
574       /* The analysis should always remove UNKNOWN eventually. */
575       assert(state->def != UNKNOWN);
576 
577       const char *prefix = "r";
578       unsigned num = state->def->num / 4;
579       if (state->def->flags & IR3_REG_PREDICATE) {
580          prefix = "p";
581          num = 0;
582       }
583 
584       fprintf(stderr, "ssa_%u:%u(%s%s%u.%c) + %u", state->def->instr->serialno,
585               state->def->name, (state->def->flags & IR3_REG_HALF) ? "h" : "",
586               prefix, num, "xyzw"[state->def->num % 4], state -> offset);
587    }
588 }
589 
590 static void
check_reaching_src(struct ra_val_ctx * ctx,struct ir3_instruction * instr,struct ir3_register * src)591 check_reaching_src(struct ra_val_ctx *ctx, struct ir3_instruction *instr,
592                    struct ir3_register *src)
593 {
594    if (ctx->shared_ra && !(src->flags & IR3_REG_SHARED))
595       return;
596    struct file_state *file = ra_val_get_file(ctx, src);
597    physreg_t physreg = ra_reg_get_physreg(src);
598    for (unsigned i = 0; i < reg_size(src); i++) {
599       struct reg_state expected = (struct reg_state){
600          .def = get_original_def(src->def),
601          .offset = i,
602       };
603       chase_definition(&expected);
604 
605       struct reg_state actual = file->regs[physreg + i];
606 
607       if (expected.def != actual.def || expected.offset != actual.offset) {
608          fprintf(
609             stderr,
610             "ra validation fail: wrong definition reaches source ssa_%u:%u + %u\n",
611             src->def->instr->serialno, src->def->name, i);
612          fprintf(stderr, "expected: ");
613          dump_reg_state(&expected);
614          fprintf(stderr, "\n");
615          fprintf(stderr, "actual: ");
616          dump_reg_state(&actual);
617          fprintf(stderr, "\n");
618          fprintf(stderr, "-> for instruction: ");
619          ir3_print_instr(instr);
620          ctx->failed = true;
621       }
622    }
623 }
624 
625 static void
check_reaching_instr(struct ra_val_ctx * ctx,struct ir3_instruction * instr)626 check_reaching_instr(struct ra_val_ctx *ctx, struct ir3_instruction *instr)
627 {
628    if (instr->opc == OPC_META_SPLIT || instr->opc == OPC_META_COLLECT ||
629        instr->opc == OPC_META_PARALLEL_COPY || instr->opc == OPC_META_PHI) {
630       return;
631    }
632 
633    foreach_src_if (src, instr, validate_reg_is_src) {
634       check_reaching_src(ctx, instr, src);
635    }
636 }
637 
638 static void
check_reaching_block(struct ra_val_ctx * ctx,struct ir3_block * block)639 check_reaching_block(struct ra_val_ctx *ctx, struct ir3_block *block)
640 {
641    ctx->reaching = ctx->block_reaching[block->index];
642 
643    foreach_instr (instr, &block->instr_list) {
644       check_reaching_instr(ctx, instr);
645       propagate_instr(ctx, instr);
646    }
647 
648    for (unsigned i = 0; i < 2; i++) {
649       struct ir3_block *succ = block->successors[i];
650       if (!succ)
651          continue;
652 
653       unsigned pred_idx = ir3_block_get_pred_index(succ, block);
654       foreach_instr (instr, &succ->instr_list) {
655          if (instr->opc != OPC_META_PHI)
656             break;
657          if (instr->srcs[pred_idx]->def)
658             check_reaching_src(ctx, instr, instr->srcs[pred_idx]);
659       }
660    }
661 }
662 
663 static void
check_reaching_defs(struct ra_val_ctx * ctx,struct ir3 * ir)664 check_reaching_defs(struct ra_val_ctx *ctx, struct ir3 *ir)
665 {
666    ctx->block_reaching =
667       rzalloc_array(ctx, struct reaching_state, ctx->block_count);
668 
669    struct reaching_state *start = &ctx->block_reaching[0];
670    for (unsigned i = 0; i < ctx->full_size; i++)
671       start->full.regs[i].def = UNDEF;
672    for (unsigned i = 0; i < ctx->half_size; i++)
673       start->half.regs[i].def = UNDEF;
674    for (unsigned i = 0; i < RA_SHARED_SIZE; i++)
675       start->shared.regs[i].def = UNDEF;
676    for (unsigned i = 0; i < ctx->predicate_size; i++)
677       start->predicate.regs[i].def = UNDEF;
678 
679    bool progress;
680    do {
681       progress = false;
682       foreach_block (block, &ir->block_list) {
683          progress |= propagate_block(ctx, block);
684       }
685    } while (progress);
686 
687    foreach_block (block, &ir->block_list) {
688       check_reaching_block(ctx, block);
689    }
690 
691    if (ctx->failed) {
692       fprintf(stderr, "failing shader:\n");
693       ir3_print(ir);
694       abort();
695    }
696 }
697 
698 void
ir3_ra_validate(struct ir3_shader_variant * v,unsigned full_size,unsigned half_size,unsigned block_count,bool shared_ra)699 ir3_ra_validate(struct ir3_shader_variant *v, unsigned full_size,
700                 unsigned half_size, unsigned block_count, bool shared_ra)
701 {
702 #ifdef NDEBUG
703 #define VALIDATE 0
704 #else
705 #define VALIDATE 1
706 #endif
707 
708    if (!VALIDATE)
709       return;
710 
711    struct ra_val_ctx *ctx = rzalloc(NULL, struct ra_val_ctx);
712    ctx->merged_regs = v->mergedregs;
713    ctx->full_size = full_size;
714    ctx->half_size = half_size;
715    ctx->predicate_size = v->compiler->num_predicates * 2;
716    ctx->block_count = block_count;
717    ctx->shared_ra = shared_ra;
718    if (ctx->shared_ra)
719       ctx->spill_reaching = _mesa_pointer_hash_table_create(ctx);
720 
721    foreach_block (block, &v->ir->block_list) {
722       foreach_instr (instr, &block->instr_list) {
723          validate_simple(ctx, instr);
724       }
725    }
726 
727    check_reaching_defs(ctx, v->ir);
728 
729    ralloc_free(ctx);
730 }
731