• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2023 Valve Corporation.
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include "ir3.h"
7 #include "util/ralloc.h"
8 
9 /* RA cannot handle phis of shared registers where there are extra physical
10  * sources, or the sources have extra physical destinations, because these edges
11  * are critical edges that we cannot resolve copies along.  Here's a contrived
12  * example:
13  *
14  * loop {
15  *    if non-uniform {
16  *       if uniform {
17  *          x_1 = ...;
18  *          continue;
19  *       }
20  *       x_2 = ...;
21  *    } else {
22  *       break;
23  *    }
24  *    // continue block
25  *    x_3 = phi(x_1, x_2)
26  * }
27  *
28  * Assuming x_1 and x_2 are uniform, x_3 will also be uniform, because all
29  * threads that stay in the loop take the same branch to the continue block,
30  * however execution may fall through from the assignment to x_2 to the
31  * break statement because the outer if is non-uniform, and then it will fall
32  * through again to the continue block. In cases like this we have to demote the
33  * phi to normal registers and insert movs around it (which will probably be
34  * coalesced).
35  */
36 
37 static void
lower_phi(void * ctx,struct ir3_instruction * phi)38 lower_phi(void *ctx, struct ir3_instruction *phi)
39 {
40    struct ir3_block *block = phi->block;
41    for (unsigned i = 0; i < block->predecessors_count; i++) {
42       struct ir3_block *pred = block->predecessors[i];
43       if (phi->srcs[i]->def) {
44          struct ir3_instruction *pred_mov =
45             ir3_instr_create_at(ir3_before_terminator(pred), OPC_MOV, 1, 1);
46          pred_mov->uses = _mesa_pointer_set_create(ctx);
47          __ssa_dst(pred_mov)->flags |= (phi->srcs[i]->flags & IR3_REG_HALF);
48          unsigned src_flags = IR3_REG_SSA | IR3_REG_SHARED |
49             (phi->srcs[i]->flags & IR3_REG_HALF);
50          ir3_src_create(pred_mov, INVALID_REG, src_flags)->def =
51             phi->srcs[i]->def;
52          pred_mov->cat1.src_type = pred_mov->cat1.dst_type =
53             (src_flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32;
54 
55          _mesa_set_remove_key(phi->srcs[i]->def->instr->uses, phi);
56          _mesa_set_add(phi->srcs[i]->def->instr->uses, pred_mov);
57          phi->srcs[i]->def = pred_mov->dsts[0];
58       }
59       phi->srcs[i]->flags &= ~IR3_REG_SHARED;
60    }
61 
62    phi->dsts[0]->flags &= ~IR3_REG_SHARED;
63 
64    struct ir3_builder build = ir3_builder_at(ir3_after_phis(block));
65    struct ir3_instruction *shared_mov = ir3_MOV(
66       &build, phi, (phi->dsts[0]->flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32);
67    shared_mov->uses = _mesa_pointer_set_create(ctx);
68    shared_mov->dsts[0]->flags |= IR3_REG_SHARED;
69 
70    foreach_ssa_use (use, phi) {
71       for (unsigned i = 0; i < use->srcs_count; i++) {
72          if (use->srcs[i]->def == phi->dsts[0])
73             use->srcs[i]->def = shared_mov->dsts[0];
74       }
75    }
76 }
77 
78 bool
ir3_lower_shared_phis(struct ir3 * ir)79 ir3_lower_shared_phis(struct ir3 *ir)
80 {
81    void *mem_ctx = ralloc_context(NULL);
82    bool progress = false;
83 
84    ir3_find_ssa_uses(ir, mem_ctx, false);
85 
86    foreach_block (block, &ir->block_list) {
87       bool pred_physical_edge = false;
88       for (unsigned i = 0; i < block->predecessors_count; i++) {
89          unsigned successors_count =
90             block->predecessors[i]->successors[1] ? 2 : 1;
91          if (block->predecessors[i]->physical_successors_count > successors_count) {
92             pred_physical_edge = true;
93             break;
94          }
95       }
96 
97       if (!pred_physical_edge &&
98           block->physical_predecessors_count == block->predecessors_count)
99          continue;
100 
101       foreach_instr_safe (phi, &block->instr_list) {
102          if (phi->opc != OPC_META_PHI)
103             break;
104 
105          if (!(phi->dsts[0]->flags & IR3_REG_SHARED))
106             continue;
107 
108          lower_phi(mem_ctx, phi);
109          progress = true;
110       }
111    }
112 
113    ralloc_free(mem_ctx);
114    return progress;
115 }
116 
117