1 /*
2 * Copyright 2024 Igalia S.L.
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include "ir3_nir.h"
7
8 bool
ir3_supports_vectorized_nir_op(nir_op op)9 ir3_supports_vectorized_nir_op(nir_op op)
10 {
11 switch (op) {
12 /* TODO: emitted as absneg which can often be folded away (e.g., into
13 * (neg)). This seems to often fail when repeated.
14 */
15 case nir_op_b2b1:
16
17 /* dp2acc/dp4acc don't seem to support repeat. */
18 case nir_op_udot_4x8_uadd:
19 case nir_op_udot_4x8_uadd_sat:
20 case nir_op_sudot_4x8_iadd:
21 case nir_op_sudot_4x8_iadd_sat:
22
23 /* Among SFU instructions, only rcp doesn't seem to support repeat. */
24 case nir_op_frcp:
25 return false;
26
27 default:
28 return true;
29 }
30 }
31
32 uint8_t
ir3_nir_vectorize_filter(const nir_instr * instr,const void * data)33 ir3_nir_vectorize_filter(const nir_instr *instr, const void *data)
34 {
35 if (instr->type == nir_instr_type_phi)
36 return 4;
37 if (instr->type != nir_instr_type_alu)
38 return 0;
39
40 struct nir_alu_instr *alu = nir_instr_as_alu(instr);
41
42 if (!ir3_supports_vectorized_nir_op(alu->op))
43 return 0;
44
45 return 4;
46 }
47
48 static void
rpt_list_split(struct list_head * list,struct list_head * at)49 rpt_list_split(struct list_head *list, struct list_head *at)
50 {
51 struct list_head *new_last = at->prev;
52 new_last->next = list;
53 at->prev = list->prev;
54 list->prev->next = at;
55 list->prev = new_last;
56 }
57
58 static enum ir3_register_flags
rpt_compatible_src_flags(struct ir3_register * src)59 rpt_compatible_src_flags(struct ir3_register *src)
60 {
61 return src->flags &
62 (IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_FABS | IR3_REG_FNEG |
63 IR3_REG_BNOT | IR3_REG_CONST | IR3_REG_IMMED | IR3_REG_SSA |
64 IR3_REG_HALF | IR3_REG_SHARED);
65 }
66
67 static enum ir3_register_flags
rpt_compatible_dst_flags(struct ir3_instruction * instr)68 rpt_compatible_dst_flags(struct ir3_instruction *instr)
69 {
70 return instr->dsts[0]->flags & (IR3_REG_SSA | IR3_REG_HALF | IR3_REG_SHARED);
71 }
72
73 static enum ir3_register_flags
rpt_illegal_src_flags(struct ir3_register * src)74 rpt_illegal_src_flags(struct ir3_register *src)
75 {
76 return src->flags & (IR3_REG_ARRAY | IR3_REG_RELATIV);
77 }
78
79 static enum ir3_instruction_flags
rpt_compatible_instr_flags(struct ir3_instruction * instr)80 rpt_compatible_instr_flags(struct ir3_instruction *instr)
81 {
82 return instr->flags & IR3_INSTR_SAT;
83 }
84
85 static bool
supports_imm_r(unsigned opc)86 supports_imm_r(unsigned opc)
87 {
88 return opc == OPC_BARY_F || opc == OPC_FLAT_B;
89 }
90
91 static bool
srcs_can_rpt(struct ir3_instruction * instr,struct ir3_register * src,struct ir3_register * rpt_src,unsigned rpt_n)92 srcs_can_rpt(struct ir3_instruction *instr, struct ir3_register *src,
93 struct ir3_register *rpt_src, unsigned rpt_n)
94 {
95 if (rpt_illegal_src_flags(src) != 0 || rpt_illegal_src_flags(rpt_src) != 0)
96 return false;
97 if (rpt_compatible_src_flags(src) != rpt_compatible_src_flags(rpt_src))
98 return false;
99 if (src->flags & IR3_REG_IMMED) {
100 uint32_t val = src->uim_val;
101 uint32_t rpt_val = rpt_src->uim_val;
102
103 if (rpt_val == val)
104 return true;
105 if (supports_imm_r(instr->opc))
106 return rpt_val == val + rpt_n;
107 return false;
108 }
109
110 return true;
111 }
112
113 static bool
can_rpt(struct ir3_instruction * instr,struct ir3_instruction * rpt,unsigned rpt_n)114 can_rpt(struct ir3_instruction *instr, struct ir3_instruction *rpt,
115 unsigned rpt_n)
116 {
117 if (rpt_n >= 4)
118 return false;
119 if (rpt->ip != instr->ip + rpt_n)
120 return false;
121 if (rpt->opc != instr->opc)
122 return false;
123 if (!ir3_supports_rpt(instr->block->shader->compiler, instr->opc))
124 return false;
125 if (rpt_compatible_instr_flags(rpt) != rpt_compatible_instr_flags(instr))
126 return false;
127 if (rpt_compatible_dst_flags(rpt) != rpt_compatible_dst_flags(instr))
128 return false;
129 if (instr->srcs_count != rpt->srcs_count)
130 return false;
131
132 foreach_src_n (src, src_n, instr) {
133 if (!srcs_can_rpt(instr, src, rpt->srcs[src_n], rpt_n))
134 return false;
135 }
136
137 return true;
138 }
139
140 static bool
cleanup_rpt_instr(struct ir3_instruction * instr)141 cleanup_rpt_instr(struct ir3_instruction *instr)
142 {
143 if (!ir3_instr_is_first_rpt(instr))
144 return false;
145
146 unsigned rpt_n = 1;
147 foreach_instr_rpt_excl (rpt, instr) {
148 if (!can_rpt(instr, rpt, rpt_n++)) {
149 rpt_list_split(&instr->rpt_node, &rpt->rpt_node);
150
151 /* We have to do this recursively since later repetitions might come
152 * before the first in the instruction list.
153 */
154 cleanup_rpt_instr(rpt);
155 return true;
156 }
157 }
158
159 return false;
160 }
161
162 /* Pre-RA pass to clean up repetition groups that can never be merged into a rpt
163 * instruction. This ensures we don't needlessly allocate merge sets for them.
164 */
165 bool
ir3_cleanup_rpt(struct ir3 * ir,struct ir3_shader_variant * v)166 ir3_cleanup_rpt(struct ir3 *ir, struct ir3_shader_variant *v)
167 {
168 ir3_count_instructions(ir);
169 bool progress = false;
170
171 foreach_block (block, &ir->block_list) {
172 foreach_instr (instr, &block->instr_list)
173 progress |= cleanup_rpt_instr(instr);
174 }
175
176 return progress;
177 }
178
179 enum rpt_src_type {
180 RPT_INCOMPATIBLE, /* Incompatible sources. */
181 RPT_SET, /* Compatible sources that need (r) set. */
182 RPT_DONT_SET, /* Compatible sources that don't need (r) set. */
183 };
184
185 static enum rpt_src_type
srcs_rpt_compatible(struct ir3_instruction * instr,struct ir3_register * src,struct ir3_register * rpt_src)186 srcs_rpt_compatible(struct ir3_instruction *instr, struct ir3_register *src,
187 struct ir3_register *rpt_src)
188 {
189 /* Shared RA may have demoted some sources from shared to non-shared. When
190 * this happened for some but not all instructions in a repeat group, the
191 * assert below would trigger. Detect this here.
192 */
193 if ((src->flags & IR3_REG_SHARED) != (rpt_src->flags & IR3_REG_SHARED))
194 return RPT_INCOMPATIBLE;
195
196 assert(srcs_can_rpt(instr, src, rpt_src, instr->repeat + 1));
197
198 if (src->flags & IR3_REG_IMMED) {
199 if (supports_imm_r(instr->opc) &&
200 rpt_src->uim_val == src->uim_val + instr->repeat + 1) {
201 return RPT_SET;
202 }
203
204 assert(rpt_src->uim_val == src->uim_val);
205 return RPT_DONT_SET;
206 }
207
208 if (rpt_src->num == src->num + instr->repeat + 1) {
209 if ((src->flags & IR3_REG_R) || instr->repeat == 0)
210 return RPT_SET;
211 return RPT_INCOMPATIBLE;
212 }
213
214 if (rpt_src->num == src->num && !(src->flags & IR3_REG_R))
215 return RPT_DONT_SET;
216 return RPT_INCOMPATIBLE;
217 }
218
219 static unsigned
inc_wrmask(unsigned wrmask)220 inc_wrmask(unsigned wrmask)
221 {
222 return (wrmask << 1) | 0x1;
223 }
224
225 static bool
try_merge(struct ir3_instruction * instr,struct ir3_instruction * rpt,unsigned rpt_n)226 try_merge(struct ir3_instruction *instr, struct ir3_instruction *rpt,
227 unsigned rpt_n)
228 {
229 assert(rpt_n > 0 && rpt_n < 4);
230 assert(instr->opc == rpt->opc);
231 assert(instr->dsts_count == 1 && rpt->dsts_count == 1);
232 assert(instr->srcs_count == rpt->srcs_count);
233 assert(rpt_compatible_instr_flags(instr) == rpt_compatible_instr_flags(rpt));
234
235 struct ir3_register *dst = instr->dsts[0];
236 struct ir3_register *rpt_dst = rpt->dsts[0];
237
238 if (rpt->ip != instr->ip + rpt_n)
239 return false;
240 if (rpt_dst->num != dst->num + rpt_n)
241 return false;
242
243 enum rpt_src_type srcs_rpt[instr->srcs_count];
244
245 foreach_src_n (src, src_n, instr) {
246 srcs_rpt[src_n] = srcs_rpt_compatible(instr, src, rpt->srcs[src_n]);
247
248 if (srcs_rpt[src_n] == RPT_INCOMPATIBLE)
249 return false;
250 }
251
252 foreach_src_n (src, src_n, instr) {
253 assert((src->flags & ~(IR3_REG_R | IR3_REG_KILL | IR3_REG_FIRST_KILL)) ==
254 (rpt->srcs[src_n]->flags & ~(IR3_REG_KILL | IR3_REG_FIRST_KILL)));
255
256 if (srcs_rpt[src_n] == RPT_SET) {
257 src->flags |= IR3_REG_R;
258 src->wrmask = inc_wrmask(src->wrmask);
259 }
260 }
261
262 dst->wrmask = inc_wrmask(dst->wrmask);
263 return true;
264 }
265
266 static bool
merge_instr(struct ir3_instruction * instr)267 merge_instr(struct ir3_instruction *instr)
268 {
269 if (!ir3_instr_is_first_rpt(instr))
270 return false;
271
272 bool progress = false;
273
274 unsigned rpt_n = 1;
275
276 foreach_instr_rpt_excl_safe (rpt, instr) {
277 /* When rpt cannot be merged, stop immediately. We will try to merge rpt
278 * with the following instructions (if any) once we encounter it in
279 * ir3_combine_rpt.
280 */
281 if (!try_merge(instr, rpt, rpt_n))
282 break;
283
284 instr->repeat++;
285
286 /* We cannot remove the rpt immediately since when it is the instruction
287 * after instr, foreach_instr_safe will fail. So mark it instead and
288 * remove it in ir3_combine_rpt when we encounter it.
289 */
290 rpt->flags |= IR3_INSTR_MARK;
291 list_delinit(&rpt->rpt_node);
292 ++rpt_n;
293 progress = true;
294 }
295
296 list_delinit(&instr->rpt_node);
297 return progress;
298 }
299
300 /* Merge compatible instructions in a repetition group into one or more rpt
301 * instructions.
302 */
303 bool
ir3_merge_rpt(struct ir3 * ir,struct ir3_shader_variant * v)304 ir3_merge_rpt(struct ir3 *ir, struct ir3_shader_variant *v)
305 {
306 ir3_clear_mark(ir);
307 ir3_count_instructions(ir);
308 bool progress = false;
309
310 foreach_block (block, &ir->block_list) {
311 foreach_instr_safe (instr, &block->instr_list) {
312 if (instr->flags & IR3_INSTR_MARK) {
313 list_delinit(&instr->node);
314 continue;
315 }
316
317 progress |= merge_instr(instr);
318 }
319 }
320
321 return progress;
322 }
323