1 /*
2 * Copyright © 2023 Valve Corporation.
3 * SPDX-License-Identifier: MIT
4 */
5
6 /* Try to fold a shared -> non-shared mov into the instruction producing the
7 * shared src. We do this aggresively, even if there are other uses of the
8 * source, on the assumption that the "default" state should be non-shared and
9 * we should be able to fold the other sources eventually.
10 */
11
12 #include "util/ralloc.h"
13
14 #include "ir3.h"
15
16 static bool
try_shared_folding(struct ir3_instruction * mov,void * mem_ctx)17 try_shared_folding(struct ir3_instruction *mov, void *mem_ctx)
18 {
19 if (mov->opc != OPC_MOV)
20 return false;
21
22 if ((mov->dsts[0]->flags & IR3_REG_SHARED) ||
23 !(mov->srcs[0]->flags & IR3_REG_SHARED))
24 return false;
25
26 struct ir3_instruction *src = ssa(mov->srcs[0]);
27 if (!src)
28 return false;
29
30 if (mov->cat1.dst_type != mov->cat1.src_type) {
31 /* Check if the conversion can be folded into the source by ir3_cf */
32 bool can_fold;
33 type_t output_type = ir3_output_conv_type(src, &can_fold);
34 if (!can_fold || output_type != TYPE_U32)
35 return false;
36 foreach_ssa_use (use, src) {
37 if (use->opc != OPC_MOV ||
38 use->cat1.src_type != mov->cat1.src_type ||
39 use->cat1.dst_type != mov->cat1.dst_type)
40 return false;
41 }
42 }
43
44 if (src->opc == OPC_META_PHI) {
45 struct ir3_block *block = src->block;
46 for (unsigned i = 0; i < block->predecessors_count; i++) {
47 struct ir3_block *pred = block->predecessors[i];
48 if (src->srcs[i]->def) {
49 struct ir3_instruction *pred_mov =
50 ir3_instr_create_at(ir3_before_terminator(pred), OPC_MOV, 1, 1);
51 __ssa_dst(pred_mov)->flags |= (src->srcs[i]->flags & IR3_REG_HALF);
52 unsigned src_flags = IR3_REG_SSA | IR3_REG_SHARED |
53 (src->srcs[i]->flags & IR3_REG_HALF);
54 ir3_src_create(pred_mov, INVALID_REG, src_flags)->def =
55 src->srcs[i]->def;
56 pred_mov->cat1.src_type = pred_mov->cat1.dst_type =
57 (src_flags & IR3_REG_HALF) ? TYPE_U16 : TYPE_U32;
58
59 _mesa_set_remove_key(src->srcs[i]->def->instr->uses, src);
60 _mesa_set_add(src->srcs[i]->def->instr->uses, pred_mov);
61 src->srcs[i]->def = pred_mov->dsts[0];
62 }
63 src->srcs[i]->flags &= ~IR3_REG_SHARED;
64 }
65 } else if (opc_cat(src->opc) == 2 && src->srcs_count >= 2) {
66 /* cat2 vector ALU instructions cannot have both shared sources */
67 if ((src->srcs[0]->flags & (IR3_REG_SHARED | IR3_REG_CONST)) &&
68 (src->srcs[1]->flags & (IR3_REG_SHARED | IR3_REG_CONST)))
69 return false;
70 } else if (opc_cat(src->opc) == 3) {
71 /* cat3 vector ALU instructions cannot have src1 shared */
72 if (src->srcs[1]->flags & IR3_REG_SHARED)
73 return false;
74 } else if (src->opc == OPC_LDC) {
75 src->flags &= ~IR3_INSTR_U;
76 } else if (src->opc == OPC_MOV) {
77 /* This catches cases like:
78 * cov.f32f16 sssa_1, c0.x
79 * mov.u16u16 ssa_2, sssa_1
80 * The cov can directly write to a non-shared reg.
81 */
82 } else {
83 return false;
84 }
85
86 /* Remove IR3_REG_SHARED from the original destination, which should make the
87 * mov trivial so that it can be cleaned up later by copy prop.
88 */
89 src->dsts[0]->flags &= ~IR3_REG_SHARED;
90 mov->srcs[0]->flags &= ~IR3_REG_SHARED;
91
92 /* Insert a copy to shared for uses other than this move instruction. */
93 struct ir3_instruction *shared_mov = NULL;
94 foreach_ssa_use (use, src) {
95 if (use == mov)
96 continue;
97
98 if (!shared_mov) {
99 struct ir3_builder build =
100 ir3_builder_at(ir3_after_instr_and_phis(src));
101 shared_mov = ir3_MOV(&build, src, mov->cat1.src_type);
102 shared_mov->dsts[0]->flags |= IR3_REG_SHARED;
103 shared_mov->uses = _mesa_pointer_set_create(mem_ctx);
104 }
105
106 for (unsigned i = 0; i < use->srcs_count; i++) {
107 if (use->srcs[i]->def == src->dsts[0])
108 use->srcs[i]->def = shared_mov->dsts[0];
109 }
110 _mesa_set_add(shared_mov->uses, use);
111 }
112
113 return true;
114 }
115
116 bool
ir3_shared_fold(struct ir3 * ir)117 ir3_shared_fold(struct ir3 *ir)
118 {
119 void *mem_ctx = ralloc_context(NULL);
120 bool progress = false;
121
122 ir3_find_ssa_uses(ir, mem_ctx, false);
123
124 /* Folding a phi can push the mov up to its sources, so iterate blocks in
125 * reverse to try and convert an entire phi-web in one go.
126 */
127 foreach_block_rev (block, &ir->block_list) {
128 foreach_instr_safe (instr, &block->instr_list) {
129 progress |= try_shared_folding(instr, mem_ctx);
130 }
131 }
132
133 ralloc_free(mem_ctx);
134
135 return progress;
136 }
137
138