• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2024 Igalia S.L.
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include "ir3_nir.h"
7 
8 bool
ir3_supports_vectorized_nir_op(nir_op op)9 ir3_supports_vectorized_nir_op(nir_op op)
10 {
11    switch (op) {
12       /* TODO: emitted as absneg which can often be folded away (e.g., into
13        * (neg)). This seems to often fail when repeated.
14        */
15    case nir_op_b2b1:
16 
17       /* dp2acc/dp4acc don't seem to support repeat. */
18    case nir_op_udot_4x8_uadd:
19    case nir_op_udot_4x8_uadd_sat:
20    case nir_op_sudot_4x8_iadd:
21    case nir_op_sudot_4x8_iadd_sat:
22 
23       /* Among SFU instructions, only rcp doesn't seem to support repeat. */
24    case nir_op_frcp:
25       return false;
26 
27    default:
28       return true;
29    }
30 }
31 
32 uint8_t
ir3_nir_vectorize_filter(const nir_instr * instr,const void * data)33 ir3_nir_vectorize_filter(const nir_instr *instr, const void *data)
34 {
35    if (instr->type == nir_instr_type_phi)
36       return 4;
37    if (instr->type != nir_instr_type_alu)
38       return 0;
39 
40    struct nir_alu_instr *alu = nir_instr_as_alu(instr);
41 
42    if (!ir3_supports_vectorized_nir_op(alu->op))
43       return 0;
44 
45    return 4;
46 }
47 
48 static void
rpt_list_split(struct list_head * list,struct list_head * at)49 rpt_list_split(struct list_head *list, struct list_head *at)
50 {
51    struct list_head *new_last = at->prev;
52    new_last->next = list;
53    at->prev = list->prev;
54    list->prev->next = at;
55    list->prev = new_last;
56 }
57 
58 static enum ir3_register_flags
rpt_compatible_src_flags(struct ir3_register * src)59 rpt_compatible_src_flags(struct ir3_register *src)
60 {
61    return src->flags &
62           (IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_FABS | IR3_REG_FNEG |
63            IR3_REG_BNOT | IR3_REG_CONST | IR3_REG_IMMED | IR3_REG_SSA |
64            IR3_REG_HALF | IR3_REG_SHARED);
65 }
66 
67 static enum ir3_register_flags
rpt_compatible_dst_flags(struct ir3_instruction * instr)68 rpt_compatible_dst_flags(struct ir3_instruction *instr)
69 {
70    return instr->dsts[0]->flags & (IR3_REG_SSA | IR3_REG_HALF | IR3_REG_SHARED);
71 }
72 
73 static enum ir3_register_flags
rpt_illegal_src_flags(struct ir3_register * src)74 rpt_illegal_src_flags(struct ir3_register *src)
75 {
76    return src->flags & (IR3_REG_ARRAY | IR3_REG_RELATIV);
77 }
78 
79 static enum ir3_instruction_flags
rpt_compatible_instr_flags(struct ir3_instruction * instr)80 rpt_compatible_instr_flags(struct ir3_instruction *instr)
81 {
82    return instr->flags & IR3_INSTR_SAT;
83 }
84 
85 static bool
supports_imm_r(unsigned opc)86 supports_imm_r(unsigned opc)
87 {
88    return opc == OPC_BARY_F || opc == OPC_FLAT_B;
89 }
90 
91 static bool
srcs_can_rpt(struct ir3_instruction * instr,struct ir3_register * src,struct ir3_register * rpt_src,unsigned rpt_n)92 srcs_can_rpt(struct ir3_instruction *instr, struct ir3_register *src,
93              struct ir3_register *rpt_src, unsigned rpt_n)
94 {
95    if (rpt_illegal_src_flags(src) != 0 || rpt_illegal_src_flags(rpt_src) != 0)
96       return false;
97    if (rpt_compatible_src_flags(src) != rpt_compatible_src_flags(rpt_src))
98       return false;
99    if (src->flags & IR3_REG_IMMED) {
100       uint32_t val = src->uim_val;
101       uint32_t rpt_val = rpt_src->uim_val;
102 
103       if (rpt_val == val)
104          return true;
105       if (supports_imm_r(instr->opc))
106          return rpt_val == val + rpt_n;
107       return false;
108    }
109 
110    return true;
111 }
112 
113 static bool
can_rpt(struct ir3_instruction * instr,struct ir3_instruction * rpt,unsigned rpt_n)114 can_rpt(struct ir3_instruction *instr, struct ir3_instruction *rpt,
115         unsigned rpt_n)
116 {
117    if (rpt_n >= 4)
118       return false;
119    if (rpt->ip != instr->ip + rpt_n)
120       return false;
121    if (rpt->opc != instr->opc)
122       return false;
123    if (!ir3_supports_rpt(instr->block->shader->compiler, instr->opc))
124       return false;
125    if (rpt_compatible_instr_flags(rpt) != rpt_compatible_instr_flags(instr))
126       return false;
127    if (rpt_compatible_dst_flags(rpt) != rpt_compatible_dst_flags(instr))
128       return false;
129    if (instr->srcs_count != rpt->srcs_count)
130       return false;
131 
132    foreach_src_n (src, src_n, instr) {
133       if (!srcs_can_rpt(instr, src, rpt->srcs[src_n], rpt_n))
134          return false;
135    }
136 
137    return true;
138 }
139 
140 static bool
cleanup_rpt_instr(struct ir3_instruction * instr)141 cleanup_rpt_instr(struct ir3_instruction *instr)
142 {
143    if (!ir3_instr_is_first_rpt(instr))
144       return false;
145 
146    unsigned rpt_n = 1;
147    foreach_instr_rpt_excl (rpt, instr) {
148       if (!can_rpt(instr, rpt, rpt_n++)) {
149          rpt_list_split(&instr->rpt_node, &rpt->rpt_node);
150 
151          /* We have to do this recursively since later repetitions might come
152           * before the first in the instruction list.
153           */
154          cleanup_rpt_instr(rpt);
155          return true;
156       }
157    }
158 
159    return false;
160 }
161 
162 /* Pre-RA pass to clean up repetition groups that can never be merged into a rpt
163  * instruction. This ensures we don't needlessly allocate merge sets for them.
164  */
165 bool
ir3_cleanup_rpt(struct ir3 * ir,struct ir3_shader_variant * v)166 ir3_cleanup_rpt(struct ir3 *ir, struct ir3_shader_variant *v)
167 {
168    ir3_count_instructions(ir);
169    bool progress = false;
170 
171    foreach_block (block, &ir->block_list) {
172       foreach_instr (instr, &block->instr_list)
173          progress |= cleanup_rpt_instr(instr);
174    }
175 
176    return progress;
177 }
178 
179 enum rpt_src_type {
180    RPT_INCOMPATIBLE, /* Incompatible sources. */
181    RPT_SET,          /* Compatible sources that need (r) set. */
182    RPT_DONT_SET,     /* Compatible sources that don't need (r) set. */
183 };
184 
185 static enum rpt_src_type
srcs_rpt_compatible(struct ir3_instruction * instr,struct ir3_register * src,struct ir3_register * rpt_src)186 srcs_rpt_compatible(struct ir3_instruction *instr, struct ir3_register *src,
187                     struct ir3_register *rpt_src)
188 {
189    /* Shared RA may have demoted some sources from shared to non-shared. When
190     * this happened for some but not all instructions in a repeat group, the
191     * assert below would trigger. Detect this here.
192     */
193    if ((src->flags & IR3_REG_SHARED) != (rpt_src->flags & IR3_REG_SHARED))
194       return RPT_INCOMPATIBLE;
195 
196    assert(srcs_can_rpt(instr, src, rpt_src, instr->repeat + 1));
197 
198    if (src->flags & IR3_REG_IMMED) {
199       if (supports_imm_r(instr->opc) &&
200           rpt_src->uim_val == src->uim_val + instr->repeat + 1) {
201          return RPT_SET;
202       }
203 
204       assert(rpt_src->uim_val == src->uim_val);
205       return RPT_DONT_SET;
206    }
207 
208    if (rpt_src->num == src->num + instr->repeat + 1) {
209       if ((src->flags & IR3_REG_R) || instr->repeat == 0)
210          return RPT_SET;
211       return RPT_INCOMPATIBLE;
212    }
213 
214    if (rpt_src->num == src->num && !(src->flags & IR3_REG_R))
215       return RPT_DONT_SET;
216    return RPT_INCOMPATIBLE;
217 }
218 
219 static unsigned
inc_wrmask(unsigned wrmask)220 inc_wrmask(unsigned wrmask)
221 {
222    return (wrmask << 1) | 0x1;
223 }
224 
225 static bool
try_merge(struct ir3_instruction * instr,struct ir3_instruction * rpt,unsigned rpt_n)226 try_merge(struct ir3_instruction *instr, struct ir3_instruction *rpt,
227           unsigned rpt_n)
228 {
229    assert(rpt_n > 0 && rpt_n < 4);
230    assert(instr->opc == rpt->opc);
231    assert(instr->dsts_count == 1 && rpt->dsts_count == 1);
232    assert(instr->srcs_count == rpt->srcs_count);
233    assert(rpt_compatible_instr_flags(instr) == rpt_compatible_instr_flags(rpt));
234 
235    struct ir3_register *dst = instr->dsts[0];
236    struct ir3_register *rpt_dst = rpt->dsts[0];
237 
238    if (rpt->ip != instr->ip + rpt_n)
239       return false;
240    if (rpt_dst->num != dst->num + rpt_n)
241       return false;
242 
243    enum rpt_src_type srcs_rpt[instr->srcs_count];
244 
245    foreach_src_n (src, src_n, instr) {
246       srcs_rpt[src_n] = srcs_rpt_compatible(instr, src, rpt->srcs[src_n]);
247 
248       if (srcs_rpt[src_n] == RPT_INCOMPATIBLE)
249          return false;
250    }
251 
252    foreach_src_n (src, src_n, instr) {
253       assert((src->flags & ~(IR3_REG_R | IR3_REG_KILL | IR3_REG_FIRST_KILL)) ==
254              (rpt->srcs[src_n]->flags & ~(IR3_REG_KILL | IR3_REG_FIRST_KILL)));
255 
256       if (srcs_rpt[src_n] == RPT_SET) {
257          src->flags |= IR3_REG_R;
258          src->wrmask = inc_wrmask(src->wrmask);
259       }
260    }
261 
262    dst->wrmask = inc_wrmask(dst->wrmask);
263    return true;
264 }
265 
266 static bool
merge_instr(struct ir3_instruction * instr)267 merge_instr(struct ir3_instruction *instr)
268 {
269    if (!ir3_instr_is_first_rpt(instr))
270       return false;
271 
272    bool progress = false;
273 
274    unsigned rpt_n = 1;
275 
276    foreach_instr_rpt_excl_safe (rpt, instr) {
277       /* When rpt cannot be merged, stop immediately. We will try to merge rpt
278        * with the following instructions (if any) once we encounter it in
279        * ir3_combine_rpt.
280        */
281       if (!try_merge(instr, rpt, rpt_n))
282          break;
283 
284       instr->repeat++;
285 
286       /* We cannot remove the rpt immediately since when it is the instruction
287        * after instr, foreach_instr_safe will fail. So mark it instead and
288        * remove it in ir3_combine_rpt when we encounter it.
289        */
290       rpt->flags |= IR3_INSTR_MARK;
291       list_delinit(&rpt->rpt_node);
292       ++rpt_n;
293       progress = true;
294    }
295 
296    list_delinit(&instr->rpt_node);
297    return progress;
298 }
299 
300 /* Merge compatible instructions in a repetition group into one or more rpt
301  * instructions.
302  */
303 bool
ir3_merge_rpt(struct ir3 * ir,struct ir3_shader_variant * v)304 ir3_merge_rpt(struct ir3 *ir, struct ir3_shader_variant *v)
305 {
306    ir3_clear_mark(ir);
307    ir3_count_instructions(ir);
308    bool progress = false;
309 
310    foreach_block (block, &ir->block_list) {
311       foreach_instr_safe (instr, &block->instr_list) {
312          if (instr->flags & IR3_INSTR_MARK) {
313             list_delinit(&instr->node);
314             continue;
315          }
316 
317          progress |= merge_instr(instr);
318       }
319    }
320 
321    return progress;
322 }
323