• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2021 Valve Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  */
23 
24 #include "util/ralloc.h"
25 #include "ir3_ra.h"
26 #include "ir3_shader.h"
27 
28 /* This file implements a validation pass for register allocation. We check
29  * that the assignment of SSA values to registers is "valid", in the sense
30  * that each original definition reaches all of its uses without being
31  * clobbered by something else.
32  *
33  * The validation is a forward dataflow analysis. The state at each point
34  * consists of, for each physical register, the SSA value occupying it, or a
35  * few special values:
36  *
37  * - "unknown" is set initially, before the dataflow analysis assigns it a
38  *   value. This is the lattice bottom.
39  * - Values at the start get "undef", which acts like a special SSA value that
40  *   indicates it is never written.
41  * - "overdefined" registers are set to more than one value, depending on
42  *   which path you take to get to the spot. This is the lattice top.
43  *
44  * Overdefined is necessary to distinguish because in some programs, like this
45  * simple example, it's perfectly normal and allowed:
46  *
47  * if (...) {
48  *    mov.u32u32 ssa_1(r1.x), ...
49  *    ...
50  * } else {
51  *    mov.u32u32 ssa_2(r1.x), ...
52  *    ...
53  * }
54  * // r1.x is overdefined here!
55  *
56  * However, if an ssa value after the if is accidentally assigned to r1.x, we
57  * need to remember that it's invalid to catch the mistake. Overdef has to be
58  * distinguished from undef so that the state forms a valid lattice to
59  * guarantee that the analysis always terminates. We could avoid relying on
60  * overdef by using liveness analysis, but not relying on liveness has the
61  * benefit that we can catch bugs in liveness analysis too.
62  *
63  * One tricky thing we have to handle is the coalescing of splits/collects,
64  * which means that multiple SSA values can occupy a register at the same
65  * time. While we could use the same merge set indices that RA uses, again
66  * that would rely on the merge set calculation being correct which we don't
67  * want to. Instead we treat splits/collects as transfer instructions, similar
68  * to the parallelcopy instructions inserted by RA, and have them copy their
69  * sources to their destinations. This means that each physreg must carry the
70  * SSA def assigned to it plus an offset into that definition, and when
71  * validating sources we must look through splits/collects to find the
72  * "original" source for each subregister.
73  */
74 
75 #define UNKNOWN ((struct ir3_register *)NULL)
76 #define UNDEF   ((struct ir3_register *)(uintptr_t)1)
77 #define OVERDEF ((struct ir3_register *)(uintptr_t)2)
78 
79 struct reg_state {
80    struct ir3_register *def;
81    unsigned offset;
82 };
83 
84 struct file_state {
85    struct reg_state regs[RA_MAX_FILE_SIZE];
86 };
87 
88 struct reaching_state {
89    struct file_state half, full, shared;
90 };
91 
92 struct ra_val_ctx {
93    struct ir3_instruction *current_instr;
94 
95    /* The current state of the dataflow analysis for the instruction we're
96     * processing.
97     */
98    struct reaching_state reaching;
99 
100    /* The state at the end of each basic block. */
101    struct reaching_state *block_reaching;
102    unsigned block_count;
103 
104    /* When validating shared RA, we have to take spill/reload instructions into
105     * account. This saves an array of reg_state for the source of each spill
106     * instruction, to be restored at the corresponding reload(s).
107     */
108    struct hash_table *spill_reaching;
109 
110    unsigned full_size, half_size;
111 
112    bool merged_regs;
113    bool shared_ra;
114 
115    bool failed;
116 };
117 
118 static void
validate_error(struct ra_val_ctx * ctx,const char * condstr)119 validate_error(struct ra_val_ctx *ctx, const char *condstr)
120 {
121    fprintf(stderr, "ra validation fail: %s\n", condstr);
122    fprintf(stderr, "  -> for instruction: ");
123    ir3_print_instr(ctx->current_instr);
124    abort();
125 }
126 
127 #define validate_assert(ctx, cond)                                             \
128    do {                                                                        \
129       if (!(cond)) {                                                           \
130          validate_error(ctx, #cond);                                           \
131       }                                                                        \
132    } while (0)
133 
134 static unsigned
get_file_size(struct ra_val_ctx * ctx,struct ir3_register * reg)135 get_file_size(struct ra_val_ctx *ctx, struct ir3_register *reg)
136 {
137    if (reg->flags & IR3_REG_SHARED)
138       return RA_SHARED_SIZE;
139    else if (ctx->merged_regs || !(reg->flags & IR3_REG_HALF))
140       return ctx->full_size;
141    else
142       return ctx->half_size;
143 }
144 
145 static struct reg_state *
get_spill_state(struct ra_val_ctx * ctx,struct ir3_register * dst)146 get_spill_state(struct ra_val_ctx *ctx, struct ir3_register *dst)
147 {
148    struct hash_entry *entry = _mesa_hash_table_search(ctx->spill_reaching, dst);
149    if (entry)
150       return entry->data;
151    else
152       return NULL;
153 }
154 
155 static struct reg_state *
get_or_create_spill_state(struct ra_val_ctx * ctx,struct ir3_register * dst)156 get_or_create_spill_state(struct ra_val_ctx *ctx, struct ir3_register *dst)
157 {
158    struct reg_state *state = get_spill_state(ctx, dst);
159    if (state)
160       return state;
161 
162    state = rzalloc_array(ctx, struct reg_state, reg_size(dst));
163    _mesa_hash_table_insert(ctx->spill_reaching, dst, state);
164    return state;
165 }
166 
167 /* Validate simple things, like the registers being in-bounds. This way we
168  * don't have to worry about out-of-bounds accesses later.
169  */
170 
171 static void
validate_simple(struct ra_val_ctx * ctx,struct ir3_instruction * instr)172 validate_simple(struct ra_val_ctx *ctx, struct ir3_instruction *instr)
173 {
174    ctx->current_instr = instr;
175    ra_foreach_dst (dst, instr) {
176       if (ctx->shared_ra && !(dst->flags & IR3_REG_SHARED))
177          continue;
178       unsigned dst_max = ra_reg_get_physreg(dst) + reg_size(dst);
179       validate_assert(ctx, dst_max <= get_file_size(ctx, dst));
180       if (dst->tied)
181          validate_assert(ctx, ra_reg_get_num(dst) == ra_reg_get_num(dst->tied));
182    }
183 
184    ra_foreach_src (src, instr) {
185       if (ctx->shared_ra && !(src->flags & IR3_REG_SHARED))
186          continue;
187       unsigned src_max = ra_reg_get_physreg(src) + reg_size(src);
188       validate_assert(ctx, src_max <= get_file_size(ctx, src));
189    }
190 }
191 
192 /* This is the lattice operator. */
193 static bool
merge_reg(struct reg_state * dst,const struct reg_state * src)194 merge_reg(struct reg_state *dst, const struct reg_state *src)
195 {
196    if (dst->def == UNKNOWN) {
197       *dst = *src;
198       return src->def != UNKNOWN;
199    } else if (dst->def == OVERDEF) {
200       return false;
201    } else {
202       if (src->def == UNKNOWN)
203          return false;
204       else if (src->def == OVERDEF) {
205          *dst = *src;
206          return true;
207       } else {
208          if (dst->def != src->def || dst->offset != src->offset) {
209             dst->def = OVERDEF;
210             dst->offset = 0;
211             return true;
212          } else {
213             return false;
214          }
215       }
216    }
217 }
218 
219 static bool
merge_file(struct file_state * dst,const struct file_state * src,unsigned size)220 merge_file(struct file_state *dst, const struct file_state *src, unsigned size)
221 {
222    bool progress = false;
223    for (unsigned i = 0; i < size; i++)
224       progress |= merge_reg(&dst->regs[i], &src->regs[i]);
225    return progress;
226 }
227 
228 static bool
merge_state(struct ra_val_ctx * ctx,struct reaching_state * dst,const struct reaching_state * src)229 merge_state(struct ra_val_ctx *ctx, struct reaching_state *dst,
230             const struct reaching_state *src)
231 {
232    bool progress = false;
233    progress |= merge_file(&dst->full, &src->full, ctx->full_size);
234    progress |= merge_file(&dst->half, &src->half, ctx->half_size);
235    return progress;
236 }
237 
238 static bool
merge_state_physical(struct ra_val_ctx * ctx,struct reaching_state * dst,const struct reaching_state * src)239 merge_state_physical(struct ra_val_ctx *ctx, struct reaching_state *dst,
240                      const struct reaching_state *src)
241 {
242    return merge_file(&dst->shared, &src->shared, RA_SHARED_SIZE);
243 }
244 
245 static struct file_state *
ra_val_get_file(struct ra_val_ctx * ctx,struct ir3_register * reg)246 ra_val_get_file(struct ra_val_ctx *ctx, struct ir3_register *reg)
247 {
248    if (reg->flags & IR3_REG_SHARED)
249       return &ctx->reaching.shared;
250    else if (ctx->merged_regs || !(reg->flags & IR3_REG_HALF))
251       return &ctx->reaching.full;
252    else
253       return &ctx->reaching.half;
254 }
255 
256 static void
propagate_normal_instr(struct ra_val_ctx * ctx,struct ir3_instruction * instr)257 propagate_normal_instr(struct ra_val_ctx *ctx, struct ir3_instruction *instr)
258 {
259    ra_foreach_dst (dst, instr) {
260       /* Process destinations from scalar ALU instructions that were demoted to
261        * normal ALU instructions. For these we must treat the instruction as a
262        * spill of itself and set the propagate state to itself. See
263        * try_demote_instructions().
264        */
265       if (ctx->shared_ra && !(dst->flags & IR3_REG_SHARED)) {
266          if (instr->flags & IR3_INSTR_SHARED_SPILL) {
267             struct reg_state *state = get_or_create_spill_state(ctx, dst);
268             for (unsigned i = 0; i < reg_size(dst); i++) {
269                state[i] = (struct reg_state){
270                   .def = dst,
271                   .offset = i,
272                };
273             }
274          }
275          continue;
276       }
277 
278       struct file_state *file = ra_val_get_file(ctx, dst);
279       physreg_t physreg = ra_reg_get_physreg(dst);
280       for (unsigned i = 0; i < reg_size(dst); i++) {
281          file->regs[physreg + i] = (struct reg_state){
282             .def = dst,
283             .offset = i,
284          };
285       }
286    }
287 }
288 
289 static void
propagate_split(struct ra_val_ctx * ctx,struct ir3_instruction * split)290 propagate_split(struct ra_val_ctx *ctx, struct ir3_instruction *split)
291 {
292    struct ir3_register *dst = split->dsts[0];
293    struct ir3_register *src = split->srcs[0];
294    physreg_t dst_physreg = ra_reg_get_physreg(dst);
295    physreg_t src_physreg = ra_reg_get_physreg(src);
296    struct file_state *file = ra_val_get_file(ctx, dst);
297 
298    if (ctx->shared_ra && !(dst->flags & IR3_REG_SHARED)) {
299       struct reg_state *src_state = get_spill_state(ctx, src->def);
300       if (src_state) {
301          struct reg_state *dst_state = get_or_create_spill_state(ctx, dst);
302          memcpy(dst_state, &src_state[split->split.off * reg_elem_size(src)],
303                 reg_size(dst) * sizeof(struct reg_state));
304       }
305       return;
306    }
307 
308    unsigned offset = split->split.off * reg_elem_size(src);
309    for (unsigned i = 0; i < reg_elem_size(src); i++) {
310       file->regs[dst_physreg + i] = file->regs[src_physreg + offset + i];
311    }
312 }
313 
314 static void
propagate_collect(struct ra_val_ctx * ctx,struct ir3_instruction * collect)315 propagate_collect(struct ra_val_ctx *ctx, struct ir3_instruction *collect)
316 {
317    struct ir3_register *dst = collect->dsts[0];
318    unsigned size = reg_size(dst);
319 
320    if (ctx->shared_ra && !(dst->flags & IR3_REG_SHARED)) {
321       struct reg_state *dst_state = NULL;
322 
323       for (unsigned i = 0; i < collect->srcs_count; i++) {
324          struct ir3_register *src = collect->srcs[i];
325          unsigned dst_offset = i * reg_elem_size(dst);
326 
327          if (ra_reg_is_src(src)) {
328             struct reg_state *src_state = get_spill_state(ctx, src->def);
329             if (src_state) {
330                if (!dst_state)
331                   dst_state = get_or_create_spill_state(ctx, dst);
332                memcpy(&dst_state[dst_offset], src_state,
333                       reg_size(src) * sizeof(struct reg_state));
334             }
335          }
336       }
337    } else {
338       struct file_state *file = ra_val_get_file(ctx, dst);
339       physreg_t dst_physreg = ra_reg_get_physreg(dst);
340       struct reg_state srcs[size];
341 
342       for (unsigned i = 0; i < collect->srcs_count; i++) {
343          struct ir3_register *src = collect->srcs[i];
344          unsigned dst_offset = i * reg_elem_size(dst);
345 
346          for (unsigned j = 0; j < reg_elem_size(dst); j++) {
347             if (!ra_reg_is_src(src)) {
348                srcs[dst_offset + j] = (struct reg_state){
349                   .def = dst,
350                   .offset = dst_offset + j,
351                };
352             } else {
353                physreg_t src_physreg = ra_reg_get_physreg(src);
354                srcs[dst_offset + j] = file->regs[src_physreg + j];
355             }
356          }
357       }
358 
359       for (unsigned i = 0; i < size; i++)
360          file->regs[dst_physreg + i] = srcs[i];
361    }
362 }
363 
364 static void
propagate_parallelcopy(struct ra_val_ctx * ctx,struct ir3_instruction * pcopy)365 propagate_parallelcopy(struct ra_val_ctx *ctx, struct ir3_instruction *pcopy)
366 {
367    unsigned size = 0;
368    for (unsigned i = 0; i < pcopy->dsts_count; i++) {
369       size += reg_size(pcopy->srcs[i]);
370    }
371 
372    struct reg_state srcs[size];
373 
374    unsigned offset = 0;
375    for (unsigned i = 0; i < pcopy->srcs_count; i++) {
376       struct ir3_register *dst = pcopy->dsts[i];
377       struct ir3_register *src = pcopy->srcs[i];
378       struct file_state *file = ra_val_get_file(ctx, dst);
379 
380       if (ctx->shared_ra && !(dst->flags & IR3_REG_SHARED)) {
381          if (ra_reg_is_src(src)) {
382             struct reg_state *src_state = get_spill_state(ctx, src->def);
383             if (src_state) {
384                struct reg_state *dst_state = get_or_create_spill_state(ctx, dst);
385                memcpy(dst_state, src_state, reg_size(dst) * sizeof(struct reg_state));
386             }
387          }
388       } else {
389          for (unsigned j = 0; j < reg_size(dst); j++) {
390             if (src->flags & (IR3_REG_IMMED | IR3_REG_CONST)) {
391                srcs[offset + j] = (struct reg_state){
392                   .def = dst,
393                   .offset = j,
394                };
395             } else {
396                physreg_t src_physreg = ra_reg_get_physreg(src);
397                srcs[offset + j] = file->regs[src_physreg + j];
398             }
399          }
400       }
401 
402       offset += reg_size(dst);
403    }
404    assert(offset == size);
405 
406    offset = 0;
407    for (unsigned i = 0; i < pcopy->dsts_count; i++) {
408       struct ir3_register *dst = pcopy->dsts[i];
409 
410       if (ctx->shared_ra && !(dst->flags & IR3_REG_SHARED)) {
411          offset += reg_size(dst);
412          continue;
413       }
414 
415       physreg_t dst_physreg = ra_reg_get_physreg(dst);
416       struct file_state *file = ra_val_get_file(ctx, dst);
417 
418       for (unsigned j = 0; j < reg_size(dst); j++)
419          file->regs[dst_physreg + j] = srcs[offset + j];
420 
421       offset += reg_size(dst);
422    }
423    assert(offset == size);
424 }
425 
426 static void
propagate_spill(struct ra_val_ctx * ctx,struct ir3_instruction * instr)427 propagate_spill(struct ra_val_ctx *ctx, struct ir3_instruction *instr)
428 {
429    if (instr->srcs[0]->flags & IR3_REG_SHARED) { /* spill */
430       struct reg_state *state = get_or_create_spill_state(ctx, instr->dsts[0]);
431       physreg_t src_physreg = ra_reg_get_physreg(instr->srcs[0]);
432       memcpy(state, &ctx->reaching.shared.regs[src_physreg],
433              reg_size(instr->srcs[0]) * sizeof(struct reg_state));
434    } else { /* reload */
435       struct reg_state *state = get_spill_state(ctx, instr->srcs[0]->def);
436       assert(state);
437       physreg_t dst_physreg = ra_reg_get_physreg(instr->dsts[0]);
438       memcpy(&ctx->reaching.shared.regs[dst_physreg], state,
439              reg_size(instr->dsts[0]) * sizeof(struct reg_state));
440    }
441 }
442 
443 static void
propagate_instr(struct ra_val_ctx * ctx,struct ir3_instruction * instr)444 propagate_instr(struct ra_val_ctx *ctx, struct ir3_instruction *instr)
445 {
446    if (instr->opc == OPC_META_SPLIT)
447       propagate_split(ctx, instr);
448    else if (instr->opc == OPC_META_COLLECT)
449       propagate_collect(ctx, instr);
450    else if (instr->opc == OPC_META_PARALLEL_COPY)
451       propagate_parallelcopy(ctx, instr);
452    else if (ctx->shared_ra && instr->opc == OPC_MOV &&
453             /* Moves from immed/const with IR3_INSTR_SHARED_SPILL were demoted
454              * from scalar ALU, see try_demote_instruction().
455              */
456             !(instr->srcs[0]->flags & (IR3_REG_IMMED | IR3_REG_CONST)) &&
457             (instr->flags & IR3_INSTR_SHARED_SPILL))
458       propagate_spill(ctx, instr);
459    else
460       propagate_normal_instr(ctx, instr);
461 }
462 
463 static bool
propagate_block(struct ra_val_ctx * ctx,struct ir3_block * block)464 propagate_block(struct ra_val_ctx *ctx, struct ir3_block *block)
465 {
466    ctx->reaching = ctx->block_reaching[block->index];
467 
468    foreach_instr (instr, &block->instr_list) {
469       propagate_instr(ctx, instr);
470    }
471 
472    bool progress = false;
473    for (unsigned i = 0; i < 2; i++) {
474       struct ir3_block *succ = block->successors[i];
475       if (!succ)
476          continue;
477       progress |=
478          merge_state(ctx, &ctx->block_reaching[succ->index], &ctx->reaching);
479    }
480    for (unsigned i = 0; i < block->physical_successors_count; i++) {
481       struct ir3_block *succ = block->physical_successors[i];
482       progress |= merge_state_physical(ctx, &ctx->block_reaching[succ->index],
483                                        &ctx->reaching);
484    }
485    return progress;
486 }
487 
488 static void
chase_definition(struct reg_state * state)489 chase_definition(struct reg_state *state)
490 {
491    while (true) {
492       struct ir3_instruction *instr = state->def->instr;
493       switch (instr->opc) {
494       case OPC_META_SPLIT: {
495          struct ir3_register *new_def = instr->srcs[0]->def;
496          unsigned offset = instr->split.off * reg_elem_size(new_def);
497          *state = (struct reg_state){
498             .def = new_def,
499             .offset = state->offset + offset,
500          };
501          break;
502       }
503       case OPC_META_COLLECT: {
504          unsigned src_idx = state->offset / reg_elem_size(state->def);
505          unsigned src_offset = state->offset % reg_elem_size(state->def);
506          struct ir3_register *new_def = instr->srcs[src_idx]->def;
507          if (new_def) {
508             *state = (struct reg_state){
509                .def = new_def,
510                .offset = src_offset,
511             };
512          } else {
513             /* Bail on immed/const */
514             return;
515          }
516          break;
517       }
518       case OPC_META_PARALLEL_COPY: {
519          unsigned dst_idx = ~0;
520          for (unsigned i = 0; i < instr->dsts_count; i++) {
521             if (instr->dsts[i] == state->def) {
522                dst_idx = i;
523                break;
524             }
525          }
526          assert(dst_idx != ~0);
527 
528          struct ir3_register *new_def = instr->srcs[dst_idx]->def;
529          if (new_def) {
530             state->def = new_def;
531          } else {
532             /* Bail on immed/const */
533             return;
534          }
535          break;
536       }
537       default:
538          return;
539       }
540    }
541 }
542 
543 static void
dump_reg_state(struct reg_state * state)544 dump_reg_state(struct reg_state *state)
545 {
546    if (state->def == UNDEF) {
547       fprintf(stderr, "no reaching definition");
548    } else if (state->def == OVERDEF) {
549       fprintf(stderr,
550               "more than one reaching definition or partial definition");
551    } else {
552       /* The analysis should always remove UNKNOWN eventually. */
553       assert(state->def != UNKNOWN);
554 
555       fprintf(stderr, "ssa_%u:%u(%sr%u.%c) + %u", state->def->instr->serialno,
556               state->def->name, (state->def->flags & IR3_REG_HALF) ? "h" : "",
557               state->def->num / 4, "xyzw"[state->def->num % 4],
558               state -> offset);
559    }
560 }
561 
562 static void
check_reaching_src(struct ra_val_ctx * ctx,struct ir3_instruction * instr,struct ir3_register * src)563 check_reaching_src(struct ra_val_ctx *ctx, struct ir3_instruction *instr,
564                    struct ir3_register *src)
565 {
566    if (ctx->shared_ra && !(src->flags & IR3_REG_SHARED))
567       return;
568    struct file_state *file = ra_val_get_file(ctx, src);
569    physreg_t physreg = ra_reg_get_physreg(src);
570    for (unsigned i = 0; i < reg_size(src); i++) {
571       struct reg_state expected = (struct reg_state){
572          .def = src->def,
573          .offset = i,
574       };
575       chase_definition(&expected);
576 
577       struct reg_state actual = file->regs[physreg + i];
578 
579       if (expected.def != actual.def || expected.offset != actual.offset) {
580          fprintf(
581             stderr,
582             "ra validation fail: wrong definition reaches source ssa_%u:%u + %u\n",
583             src->def->instr->serialno, src->def->name, i);
584          fprintf(stderr, "expected: ");
585          dump_reg_state(&expected);
586          fprintf(stderr, "\n");
587          fprintf(stderr, "actual: ");
588          dump_reg_state(&actual);
589          fprintf(stderr, "\n");
590          fprintf(stderr, "-> for instruction: ");
591          ir3_print_instr(instr);
592          ctx->failed = true;
593       }
594    }
595 }
596 
597 static void
check_reaching_instr(struct ra_val_ctx * ctx,struct ir3_instruction * instr)598 check_reaching_instr(struct ra_val_ctx *ctx, struct ir3_instruction *instr)
599 {
600    if (instr->opc == OPC_META_SPLIT || instr->opc == OPC_META_COLLECT ||
601        instr->opc == OPC_META_PARALLEL_COPY || instr->opc == OPC_META_PHI) {
602       return;
603    }
604 
605    ra_foreach_src (src, instr) {
606       check_reaching_src(ctx, instr, src);
607    }
608 }
609 
610 static void
check_reaching_block(struct ra_val_ctx * ctx,struct ir3_block * block)611 check_reaching_block(struct ra_val_ctx *ctx, struct ir3_block *block)
612 {
613    ctx->reaching = ctx->block_reaching[block->index];
614 
615    foreach_instr (instr, &block->instr_list) {
616       check_reaching_instr(ctx, instr);
617       propagate_instr(ctx, instr);
618    }
619 
620    for (unsigned i = 0; i < 2; i++) {
621       struct ir3_block *succ = block->successors[i];
622       if (!succ)
623          continue;
624 
625       unsigned pred_idx = ir3_block_get_pred_index(succ, block);
626       foreach_instr (instr, &succ->instr_list) {
627          if (instr->opc != OPC_META_PHI)
628             break;
629          if (instr->srcs[pred_idx]->def)
630             check_reaching_src(ctx, instr, instr->srcs[pred_idx]);
631       }
632    }
633 }
634 
635 static void
check_reaching_defs(struct ra_val_ctx * ctx,struct ir3 * ir)636 check_reaching_defs(struct ra_val_ctx *ctx, struct ir3 *ir)
637 {
638    ctx->block_reaching =
639       rzalloc_array(ctx, struct reaching_state, ctx->block_count);
640 
641    struct reaching_state *start = &ctx->block_reaching[0];
642    for (unsigned i = 0; i < ctx->full_size; i++)
643       start->full.regs[i].def = UNDEF;
644    for (unsigned i = 0; i < ctx->half_size; i++)
645       start->half.regs[i].def = UNDEF;
646    for (unsigned i = 0; i < RA_SHARED_SIZE; i++)
647       start->shared.regs[i].def = UNDEF;
648 
649    bool progress;
650    do {
651       progress = false;
652       foreach_block (block, &ir->block_list) {
653          progress |= propagate_block(ctx, block);
654       }
655    } while (progress);
656 
657    foreach_block (block, &ir->block_list) {
658       check_reaching_block(ctx, block);
659    }
660 
661    if (ctx->failed) {
662       fprintf(stderr, "failing shader:\n");
663       ir3_print(ir);
664       abort();
665    }
666 }
667 
668 void
ir3_ra_validate(struct ir3_shader_variant * v,unsigned full_size,unsigned half_size,unsigned block_count,bool shared_ra)669 ir3_ra_validate(struct ir3_shader_variant *v, unsigned full_size,
670                 unsigned half_size, unsigned block_count, bool shared_ra)
671 {
672 #ifdef NDEBUG
673 #define VALIDATE 0
674 #else
675 #define VALIDATE 1
676 #endif
677 
678    if (!VALIDATE)
679       return;
680 
681    struct ra_val_ctx *ctx = rzalloc(NULL, struct ra_val_ctx);
682    ctx->merged_regs = v->mergedregs;
683    ctx->full_size = full_size;
684    ctx->half_size = half_size;
685    ctx->block_count = block_count;
686    ctx->shared_ra = shared_ra;
687    if (ctx->shared_ra)
688       ctx->spill_reaching = _mesa_pointer_hash_table_create(ctx);
689 
690    foreach_block (block, &v->ir->block_list) {
691       foreach_instr (instr, &block->instr_list) {
692          validate_simple(ctx, instr);
693       }
694    }
695 
696    check_reaching_defs(ctx, v->ir);
697 
698    ralloc_free(ctx);
699 }
700