• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2023 Valve Corporation.
3  * SPDX-License-Identifier: MIT
4  */
5 
6 /* Try to fold a shared -> non-shared mov into the instruction producing the
7  * shared src. We do this aggresively, even if there are other uses of the
8  * source, on the assumption that the "default" state should be non-shared and
9  * we should be able to fold the other sources eventually.
10  */
11 
12 #include "util/ralloc.h"
13 
14 #include "ir3.h"
15 
16 static bool
try_shared_folding(struct ir3_instruction * mov,void * mem_ctx)17 try_shared_folding(struct ir3_instruction *mov, void *mem_ctx)
18 {
19    if (mov->opc != OPC_MOV)
20       return false;
21 
22    if ((mov->dsts[0]->flags & IR3_REG_SHARED) ||
23        !(mov->srcs[0]->flags & IR3_REG_SHARED))
24       return false;
25 
26    struct ir3_instruction *src = ssa(mov->srcs[0]);
27    if (!src)
28       return false;
29 
30    if (mov->cat1.dst_type != mov->cat1.src_type) {
31       /* Check if the conversion can be folded into the source by ir3_cf */
32       bool can_fold;
33       type_t output_type = ir3_output_conv_type(src, &can_fold);
34       if (!can_fold || output_type != TYPE_U32)
35          return false;
36       foreach_ssa_use (use, src) {
37          if (use->opc != OPC_MOV ||
38              use->cat1.src_type != mov->cat1.src_type ||
39              use->cat1.dst_type != mov->cat1.dst_type)
40             return false;
41       }
42    }
43 
44    if (src->opc == OPC_META_PHI) {
45       struct ir3_block *block = src->block;
46       for (unsigned i = 0; i < block->predecessors_count; i++) {
47          struct ir3_block *pred = block->predecessors[i];
48          if (src->srcs[i]->def) {
49             struct ir3_instruction *pred_mov =
50                ir3_instr_create_at(ir3_before_terminator(pred), OPC_MOV, 1, 1);
51             __ssa_dst(pred_mov)->flags |= (src->srcs[i]->flags & IR3_REG_HALF);
52             unsigned src_flags = IR3_REG_SSA | IR3_REG_SHARED |
53                (src->srcs[i]->flags & IR3_REG_HALF);
54             ir3_src_create(pred_mov, INVALID_REG, src_flags)->def =
55                src->srcs[i]->def;
56             pred_mov->cat1.src_type = pred_mov->cat1.dst_type =
57                (src_flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32;
58 
59             _mesa_set_remove_key(src->srcs[i]->def->instr->uses, src);
60             _mesa_set_add(src->srcs[i]->def->instr->uses, pred_mov);
61             src->srcs[i]->def = pred_mov->dsts[0];
62          }
63          src->srcs[i]->flags &= ~IR3_REG_SHARED;
64       }
65    } else if (opc_cat(src->opc) == 2 && src->srcs_count >= 2) {
66       /* cat2 vector ALU instructions cannot have both shared sources */
67       if ((src->srcs[0]->flags & (IR3_REG_SHARED | IR3_REG_CONST)) &&
68           (src->srcs[1]->flags & (IR3_REG_SHARED | IR3_REG_CONST)))
69          return false;
70    } else if (opc_cat(src->opc) == 3) {
71       /* cat3 vector ALU instructions cannot have src1 shared */
72       if (src->srcs[1]->flags & IR3_REG_SHARED)
73          return false;
74    } else if (src->opc == OPC_LDC) {
75       src->flags &= ~IR3_INSTR_U;
76    } else if (src->opc == OPC_MOV) {
77       /* This catches cases like:
78        * cov.f32f16 sssa_1, c0.x
79        * mov.u16u16 ssa_2, sssa_1
80        * The cov can directly write to a non-shared reg.
81        */
82    } else {
83       return false;
84    }
85 
86    /* Remove IR3_REG_SHARED from the original destination, which should make the
87     * mov trivial so that it can be cleaned up later by copy prop.
88     */
89    src->dsts[0]->flags &= ~IR3_REG_SHARED;
90    mov->srcs[0]->flags &= ~IR3_REG_SHARED;
91 
92    /* Insert a copy to shared for uses other than this move instruction. */
93    struct ir3_instruction *shared_mov = NULL;
94    foreach_ssa_use (use, src) {
95       if (use == mov)
96          continue;
97 
98       if (!shared_mov) {
99          struct ir3_builder build =
100             ir3_builder_at(ir3_after_instr_and_phis(src));
101          shared_mov = ir3_MOV(&build, src, mov->cat1.src_type);
102          shared_mov->dsts[0]->flags |= IR3_REG_SHARED;
103          shared_mov->uses = _mesa_pointer_set_create(mem_ctx);
104       }
105 
106       for (unsigned i = 0; i < use->srcs_count; i++) {
107          if (use->srcs[i]->def == src->dsts[0])
108             use->srcs[i]->def = shared_mov->dsts[0];
109       }
110       _mesa_set_add(shared_mov->uses, use);
111    }
112 
113    return true;
114 }
115 
116 bool
ir3_shared_fold(struct ir3 * ir)117 ir3_shared_fold(struct ir3 *ir)
118 {
119    void *mem_ctx = ralloc_context(NULL);
120    bool progress = false;
121 
122    ir3_find_ssa_uses(ir, mem_ctx, false);
123 
124    /* Folding a phi can push the mov up to its sources, so iterate blocks in
125     * reverse to try and convert an entire phi-web in one go.
126     */
127    foreach_block_rev (block, &ir->block_list) {
128       foreach_instr_safe (instr, &block->instr_list) {
129          progress |= try_shared_folding(instr, mem_ctx);
130       }
131    }
132 
133    ralloc_free(mem_ctx);
134 
135    return progress;
136 }
137 
138