• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2021 Valve Corporation
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  */
23 
24 #include "util/ralloc.h"
25 #include "ir3_ra.h"
26 #include "ir3_shader.h"
27 
28 /* This file implements a validation pass for register allocation. We check
29  * that the assignment of SSA values to registers is "valid", in the sense
30  * that each original definition reaches all of its uses without being
31  * clobbered by something else.
32  *
33  * The validation is a forward dataflow analysis. The state at each point
34  * consists of, for each physical register, the SSA value occupying it, or a
35  * few special values:
36  *
37  * - "unknown" is set initially, before the dataflow analysis assigns it a
38  *   value. This is the lattice bottom.
39  * - Values at the start get "undef", which acts like a special SSA value that
40  *   indicates it is never written.
41  * - "overdefined" registers are set to more than one value, depending on
42  *   which path you take to get to the spot. This is the lattice top.
43  *
44  * Overdefined is necessary to distinguish because in some programs, like this
45  * simple example, it's perfectly normal and allowed:
46  *
47  * if (...) {
48  *    mov.u32u32 ssa_1(r1.x), ...
49  *    ...
50  * } else {
51  *    mov.u32u32 ssa_2(r1.x), ...
52  *    ...
53  * }
54  * // r1.x is overdefined here!
55  *
56  * However, if an ssa value after the if is accidentally assigned to r1.x, we
57  * need to remember that it's invalid to catch the mistake. Overdef has to be
58  * distinguished from undef so that the state forms a valid lattice to
59  * guarantee that the analysis always terminates. We could avoid relying on
60  * overdef by using liveness analysis, but not relying on liveness has the
61  * benefit that we can catch bugs in liveness analysis too.
62  *
63  * One tricky thing we have to handle is the coalescing of splits/collects,
64  * which means that multiple SSA values can occupy a register at the same
65  * time. While we could use the same merge set indices that RA uses, again
66  * that would rely on the merge set calculation being correct which we don't
67  * want to. Instead we treat splits/collects as transfer instructions, similar
68  * to the parallelcopy instructions inserted by RA, and have them copy their
69  * sources to their destinations. This means that each physreg must carry the
70  * SSA def assigned to it plus an offset into that definition, and when
71  * validating sources we must look through splits/collects to find the
72  * "original" source for each subregister.
73  */
74 
75 #define UNKNOWN ((struct ir3_register *)NULL)
76 #define UNDEF   ((struct ir3_register *)(uintptr_t)1)
77 #define OVERDEF ((struct ir3_register *)(uintptr_t)2)
78 
79 struct reg_state {
80    struct ir3_register *def;
81    unsigned offset;
82 };
83 
84 struct file_state {
85    struct reg_state regs[RA_MAX_FILE_SIZE];
86 };
87 
88 struct reaching_state {
89    struct file_state half, full, shared;
90 };
91 
92 struct ra_val_ctx {
93    struct ir3_instruction *current_instr;
94 
95    struct reaching_state reaching;
96    struct reaching_state *block_reaching;
97    unsigned block_count;
98 
99    unsigned full_size, half_size;
100 
101    bool merged_regs;
102 
103    bool failed;
104 };
105 
106 static void
validate_error(struct ra_val_ctx * ctx,const char * condstr)107 validate_error(struct ra_val_ctx *ctx, const char *condstr)
108 {
109    fprintf(stderr, "ra validation fail: %s\n", condstr);
110    fprintf(stderr, "  -> for instruction: ");
111    ir3_print_instr(ctx->current_instr);
112    abort();
113 }
114 
115 #define validate_assert(ctx, cond)                                             \
116    do {                                                                        \
117       if (!(cond)) {                                                           \
118          validate_error(ctx, #cond);                                           \
119       }                                                                        \
120    } while (0)
121 
122 static unsigned
get_file_size(struct ra_val_ctx * ctx,struct ir3_register * reg)123 get_file_size(struct ra_val_ctx *ctx, struct ir3_register *reg)
124 {
125    if (reg->flags & IR3_REG_SHARED)
126       return RA_SHARED_SIZE;
127    else if (ctx->merged_regs || !(reg->flags & IR3_REG_HALF))
128       return ctx->full_size;
129    else
130       return ctx->half_size;
131 }
132 
133 /* Validate simple things, like the registers being in-bounds. This way we
134  * don't have to worry about out-of-bounds accesses later.
135  */
136 
137 static void
validate_simple(struct ra_val_ctx * ctx,struct ir3_instruction * instr)138 validate_simple(struct ra_val_ctx *ctx, struct ir3_instruction *instr)
139 {
140    ctx->current_instr = instr;
141    ra_foreach_dst (dst, instr) {
142       unsigned dst_max = ra_reg_get_physreg(dst) + reg_size(dst);
143       validate_assert(ctx, dst_max <= get_file_size(ctx, dst));
144       if (dst->tied)
145          validate_assert(ctx, ra_reg_get_num(dst) == ra_reg_get_num(dst->tied));
146    }
147 
148    ra_foreach_src (src, instr) {
149       unsigned src_max = ra_reg_get_physreg(src) + reg_size(src);
150       validate_assert(ctx, src_max <= get_file_size(ctx, src));
151    }
152 }
153 
154 /* This is the lattice operator. */
155 static bool
merge_reg(struct reg_state * dst,const struct reg_state * src)156 merge_reg(struct reg_state *dst, const struct reg_state *src)
157 {
158    if (dst->def == UNKNOWN) {
159       *dst = *src;
160       return src->def != UNKNOWN;
161    } else if (dst->def == OVERDEF) {
162       return false;
163    } else {
164       if (src->def == UNKNOWN)
165          return false;
166       else if (src->def == OVERDEF) {
167          *dst = *src;
168          return true;
169       } else {
170          if (dst->def != src->def || dst->offset != src->offset) {
171             dst->def = OVERDEF;
172             dst->offset = 0;
173             return true;
174          } else {
175             return false;
176          }
177       }
178    }
179 }
180 
181 static bool
merge_file(struct file_state * dst,const struct file_state * src,unsigned size)182 merge_file(struct file_state *dst, const struct file_state *src, unsigned size)
183 {
184    bool progress = false;
185    for (unsigned i = 0; i < size; i++)
186       progress |= merge_reg(&dst->regs[i], &src->regs[i]);
187    return progress;
188 }
189 
190 static bool
merge_state(struct ra_val_ctx * ctx,struct reaching_state * dst,const struct reaching_state * src)191 merge_state(struct ra_val_ctx *ctx, struct reaching_state *dst,
192             const struct reaching_state *src)
193 {
194    bool progress = false;
195    progress |= merge_file(&dst->full, &src->full, ctx->full_size);
196    progress |= merge_file(&dst->half, &src->half, ctx->half_size);
197    return progress;
198 }
199 
200 static bool
merge_state_physical(struct ra_val_ctx * ctx,struct reaching_state * dst,const struct reaching_state * src)201 merge_state_physical(struct ra_val_ctx *ctx, struct reaching_state *dst,
202                      const struct reaching_state *src)
203 {
204    return merge_file(&dst->shared, &src->shared, RA_SHARED_SIZE);
205 }
206 
207 static struct file_state *
ra_val_get_file(struct ra_val_ctx * ctx,struct ir3_register * reg)208 ra_val_get_file(struct ra_val_ctx *ctx, struct ir3_register *reg)
209 {
210    if (reg->flags & IR3_REG_SHARED)
211       return &ctx->reaching.shared;
212    else if (ctx->merged_regs || !(reg->flags & IR3_REG_HALF))
213       return &ctx->reaching.full;
214    else
215       return &ctx->reaching.half;
216 }
217 
218 static void
propagate_normal_instr(struct ra_val_ctx * ctx,struct ir3_instruction * instr)219 propagate_normal_instr(struct ra_val_ctx *ctx, struct ir3_instruction *instr)
220 {
221    ra_foreach_dst (dst, instr) {
222       struct file_state *file = ra_val_get_file(ctx, dst);
223       physreg_t physreg = ra_reg_get_physreg(dst);
224       for (unsigned i = 0; i < reg_size(dst); i++) {
225          file->regs[physreg + i] = (struct reg_state){
226             .def = dst,
227             .offset = i,
228          };
229       }
230    }
231 }
232 
233 static void
propagate_split(struct ra_val_ctx * ctx,struct ir3_instruction * split)234 propagate_split(struct ra_val_ctx *ctx, struct ir3_instruction *split)
235 {
236    struct ir3_register *dst = split->dsts[0];
237    struct ir3_register *src = split->srcs[0];
238    physreg_t dst_physreg = ra_reg_get_physreg(dst);
239    physreg_t src_physreg = ra_reg_get_physreg(src);
240    struct file_state *file = ra_val_get_file(ctx, dst);
241 
242    unsigned offset = split->split.off * reg_elem_size(src);
243    for (unsigned i = 0; i < reg_elem_size(src); i++) {
244       file->regs[dst_physreg + i] = file->regs[src_physreg + offset + i];
245    }
246 }
247 
248 static void
propagate_collect(struct ra_val_ctx * ctx,struct ir3_instruction * collect)249 propagate_collect(struct ra_val_ctx *ctx, struct ir3_instruction *collect)
250 {
251    struct ir3_register *dst = collect->dsts[0];
252    physreg_t dst_physreg = ra_reg_get_physreg(dst);
253    struct file_state *file = ra_val_get_file(ctx, dst);
254 
255    unsigned size = reg_size(dst);
256    struct reg_state srcs[size];
257 
258    for (unsigned i = 0; i < collect->srcs_count; i++) {
259       struct ir3_register *src = collect->srcs[i];
260       unsigned dst_offset = i * reg_elem_size(dst);
261       for (unsigned j = 0; j < reg_elem_size(dst); j++) {
262          if (!ra_reg_is_src(src)) {
263             srcs[dst_offset + j] = (struct reg_state){
264                .def = dst,
265                .offset = dst_offset + j,
266             };
267          } else {
268             physreg_t src_physreg = ra_reg_get_physreg(src);
269             srcs[dst_offset + j] = file->regs[src_physreg + j];
270          }
271       }
272    }
273 
274    for (unsigned i = 0; i < size; i++)
275       file->regs[dst_physreg + i] = srcs[i];
276 }
277 
278 static void
propagate_parallelcopy(struct ra_val_ctx * ctx,struct ir3_instruction * pcopy)279 propagate_parallelcopy(struct ra_val_ctx *ctx, struct ir3_instruction *pcopy)
280 {
281    unsigned size = 0;
282    for (unsigned i = 0; i < pcopy->dsts_count; i++) {
283       size += reg_size(pcopy->srcs[i]);
284    }
285 
286    struct reg_state srcs[size];
287 
288    unsigned offset = 0;
289    for (unsigned i = 0; i < pcopy->srcs_count; i++) {
290       struct ir3_register *dst = pcopy->dsts[i];
291       struct ir3_register *src = pcopy->srcs[i];
292       struct file_state *file = ra_val_get_file(ctx, dst);
293 
294       for (unsigned j = 0; j < reg_size(dst); j++) {
295          if (src->flags & (IR3_REG_IMMED | IR3_REG_CONST)) {
296             srcs[offset + j] = (struct reg_state){
297                .def = dst,
298                .offset = j,
299             };
300          } else {
301             physreg_t src_physreg = ra_reg_get_physreg(src);
302             srcs[offset + j] = file->regs[src_physreg + j];
303          }
304       }
305 
306       offset += reg_size(dst);
307    }
308    assert(offset == size);
309 
310    offset = 0;
311    for (unsigned i = 0; i < pcopy->dsts_count; i++) {
312       struct ir3_register *dst = pcopy->dsts[i];
313       physreg_t dst_physreg = ra_reg_get_physreg(dst);
314       struct file_state *file = ra_val_get_file(ctx, dst);
315 
316       for (unsigned j = 0; j < reg_size(dst); j++)
317          file->regs[dst_physreg + j] = srcs[offset + j];
318 
319       offset += reg_size(dst);
320    }
321    assert(offset == size);
322 }
323 
324 static void
propagate_instr(struct ra_val_ctx * ctx,struct ir3_instruction * instr)325 propagate_instr(struct ra_val_ctx *ctx, struct ir3_instruction *instr)
326 {
327    if (instr->opc == OPC_META_SPLIT)
328       propagate_split(ctx, instr);
329    else if (instr->opc == OPC_META_COLLECT)
330       propagate_collect(ctx, instr);
331    else if (instr->opc == OPC_META_PARALLEL_COPY)
332       propagate_parallelcopy(ctx, instr);
333    else
334       propagate_normal_instr(ctx, instr);
335 }
336 
337 static bool
propagate_block(struct ra_val_ctx * ctx,struct ir3_block * block)338 propagate_block(struct ra_val_ctx *ctx, struct ir3_block *block)
339 {
340    ctx->reaching = ctx->block_reaching[block->index];
341 
342    foreach_instr (instr, &block->instr_list) {
343       propagate_instr(ctx, instr);
344    }
345 
346    bool progress = false;
347    for (unsigned i = 0; i < 2; i++) {
348       struct ir3_block *succ = block->successors[i];
349       if (!succ)
350          continue;
351       progress |=
352          merge_state(ctx, &ctx->block_reaching[succ->index], &ctx->reaching);
353    }
354    for (unsigned i = 0; i < 2; i++) {
355       struct ir3_block *succ = block->physical_successors[i];
356       if (!succ)
357          continue;
358       progress |= merge_state_physical(ctx, &ctx->block_reaching[succ->index],
359                                        &ctx->reaching);
360    }
361    return progress;
362 }
363 
364 static void
chase_definition(struct reg_state * state)365 chase_definition(struct reg_state *state)
366 {
367    while (true) {
368       struct ir3_instruction *instr = state->def->instr;
369       switch (instr->opc) {
370       case OPC_META_SPLIT: {
371          struct ir3_register *new_def = instr->srcs[0]->def;
372          unsigned offset = instr->split.off * reg_elem_size(new_def);
373          *state = (struct reg_state){
374             .def = new_def,
375             .offset = state->offset + offset,
376          };
377          break;
378       }
379       case OPC_META_COLLECT: {
380          unsigned src_idx = state->offset / reg_elem_size(state->def);
381          unsigned src_offset = state->offset % reg_elem_size(state->def);
382          struct ir3_register *new_def = instr->srcs[src_idx]->def;
383          if (new_def) {
384             *state = (struct reg_state){
385                .def = new_def,
386                .offset = src_offset,
387             };
388          } else {
389             /* Bail on immed/const */
390             return;
391          }
392          break;
393       }
394       case OPC_META_PARALLEL_COPY: {
395          unsigned dst_idx = ~0;
396          for (unsigned i = 0; i < instr->dsts_count; i++) {
397             if (instr->dsts[i] == state->def) {
398                dst_idx = i;
399                break;
400             }
401          }
402          assert(dst_idx != ~0);
403 
404          struct ir3_register *new_def = instr->srcs[dst_idx]->def;
405          if (new_def) {
406             state->def = new_def;
407          } else {
408             /* Bail on immed/const */
409             return;
410          }
411          break;
412       }
413       default:
414          return;
415       }
416    }
417 }
418 
419 static void
dump_reg_state(struct reg_state * state)420 dump_reg_state(struct reg_state *state)
421 {
422    if (state->def == UNDEF) {
423       fprintf(stderr, "no reaching definition");
424    } else if (state->def == OVERDEF) {
425       fprintf(stderr,
426               "more than one reaching definition or partial definition");
427    } else {
428       /* The analysis should always remove UNKNOWN eventually. */
429       assert(state->def != UNKNOWN);
430 
431       fprintf(stderr, "ssa_%u:%u(%sr%u.%c) + %u", state->def->instr->serialno,
432               state->def->name, (state->def->flags & IR3_REG_HALF) ? "h" : "",
433               state->def->num / 4, "xyzw"[state->def->num % 4],
434               state -> offset);
435    }
436 }
437 
438 static void
check_reaching_src(struct ra_val_ctx * ctx,struct ir3_instruction * instr,struct ir3_register * src)439 check_reaching_src(struct ra_val_ctx *ctx, struct ir3_instruction *instr,
440                    struct ir3_register *src)
441 {
442    struct file_state *file = ra_val_get_file(ctx, src);
443    physreg_t physreg = ra_reg_get_physreg(src);
444    for (unsigned i = 0; i < reg_size(src); i++) {
445       struct reg_state expected = (struct reg_state){
446          .def = src->def,
447          .offset = i,
448       };
449       chase_definition(&expected);
450 
451       struct reg_state actual = file->regs[physreg + i];
452 
453       if (expected.def != actual.def || expected.offset != actual.offset) {
454          fprintf(
455             stderr,
456             "ra validation fail: wrong definition reaches source ssa_%u:%u + %u\n",
457             src->def->instr->serialno, src->def->name, i);
458          fprintf(stderr, "expected: ");
459          dump_reg_state(&expected);
460          fprintf(stderr, "\n");
461          fprintf(stderr, "actual: ");
462          dump_reg_state(&actual);
463          fprintf(stderr, "\n");
464          fprintf(stderr, "-> for instruction: ");
465          ir3_print_instr(instr);
466          ctx->failed = true;
467       }
468    }
469 }
470 
471 static void
check_reaching_instr(struct ra_val_ctx * ctx,struct ir3_instruction * instr)472 check_reaching_instr(struct ra_val_ctx *ctx, struct ir3_instruction *instr)
473 {
474    if (instr->opc == OPC_META_SPLIT || instr->opc == OPC_META_COLLECT ||
475        instr->opc == OPC_META_PARALLEL_COPY || instr->opc == OPC_META_PHI) {
476       return;
477    }
478 
479    ra_foreach_src (src, instr) {
480       check_reaching_src(ctx, instr, src);
481    }
482 }
483 
484 static void
check_reaching_block(struct ra_val_ctx * ctx,struct ir3_block * block)485 check_reaching_block(struct ra_val_ctx *ctx, struct ir3_block *block)
486 {
487    ctx->reaching = ctx->block_reaching[block->index];
488 
489    foreach_instr (instr, &block->instr_list) {
490       check_reaching_instr(ctx, instr);
491       propagate_instr(ctx, instr);
492    }
493 
494    for (unsigned i = 0; i < 2; i++) {
495       struct ir3_block *succ = block->successors[i];
496       if (!succ)
497          continue;
498 
499       unsigned pred_idx = ir3_block_get_pred_index(succ, block);
500       foreach_instr (instr, &succ->instr_list) {
501          if (instr->opc != OPC_META_PHI)
502             break;
503          if (instr->srcs[pred_idx]->def)
504             check_reaching_src(ctx, instr, instr->srcs[pred_idx]);
505       }
506    }
507 }
508 
509 static void
check_reaching_defs(struct ra_val_ctx * ctx,struct ir3 * ir)510 check_reaching_defs(struct ra_val_ctx *ctx, struct ir3 *ir)
511 {
512    ctx->block_reaching =
513       rzalloc_array(ctx, struct reaching_state, ctx->block_count);
514 
515    struct reaching_state *start = &ctx->block_reaching[0];
516    for (unsigned i = 0; i < ctx->full_size; i++)
517       start->full.regs[i].def = UNDEF;
518    for (unsigned i = 0; i < ctx->half_size; i++)
519       start->half.regs[i].def = UNDEF;
520    for (unsigned i = 0; i < RA_SHARED_SIZE; i++)
521       start->shared.regs[i].def = UNDEF;
522 
523    bool progress;
524    do {
525       progress = false;
526       foreach_block (block, &ir->block_list) {
527          progress |= propagate_block(ctx, block);
528       }
529    } while (progress);
530 
531    foreach_block (block, &ir->block_list) {
532       check_reaching_block(ctx, block);
533    }
534 
535    if (ctx->failed) {
536       fprintf(stderr, "failing shader:\n");
537       ir3_print(ir);
538       abort();
539    }
540 }
541 
542 void
ir3_ra_validate(struct ir3_shader_variant * v,unsigned full_size,unsigned half_size,unsigned block_count)543 ir3_ra_validate(struct ir3_shader_variant *v, unsigned full_size,
544                 unsigned half_size, unsigned block_count)
545 {
546 #ifdef NDEBUG
547 #define VALIDATE 0
548 #else
549 #define VALIDATE 1
550 #endif
551 
552    if (!VALIDATE)
553       return;
554 
555    struct ra_val_ctx *ctx = rzalloc(NULL, struct ra_val_ctx);
556    ctx->merged_regs = v->mergedregs;
557    ctx->full_size = full_size;
558    ctx->half_size = half_size;
559    ctx->block_count = block_count;
560 
561    foreach_block (block, &v->ir->block_list) {
562       foreach_instr (instr, &block->instr_list) {
563          validate_simple(ctx, instr);
564       }
565    }
566 
567    check_reaching_defs(ctx, v->ir);
568 
569    ralloc_free(ctx);
570 }
571