• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2024 Igalia S.L.
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include "ir3.h"
7 #include "ir3_shader.h"
8 
9 #define MAX_ALIASES 16
10 
11 static bool
supports_alias_srcs(struct ir3_instruction * instr)12 supports_alias_srcs(struct ir3_instruction *instr)
13 {
14    if (!is_tex(instr))
15       return false;
16    if (is_tex_shuffle(instr))
17       return false;
18    /* Descriptor prefetches don't support alias.tex. */
19    if (instr->opc == OPC_SAM && instr->dsts_count == 0)
20       return false;
21    /* Seems to not always work properly. Blob disables it as well. */
22    if (instr->opc == OPC_ISAM && (instr->flags & IR3_INSTR_IMM_OFFSET))
23       return false;
24    return true;
25 }
26 
27 static bool
can_alias_src(struct ir3_register * src)28 can_alias_src(struct ir3_register *src)
29 {
30    return is_reg_gpr(src) && !(src->flags & IR3_REG_SHARED);
31 }
32 
33 static bool
can_alias_srcs_of_def(struct ir3_register * src)34 can_alias_srcs_of_def(struct ir3_register *src)
35 {
36    if (!can_alias_src(src)) {
37       return false;
38    }
39 
40    assert(src->flags & IR3_REG_SSA);
41    struct ir3_instruction *def_instr = src->def->instr;
42 
43    if (def_instr->opc == OPC_META_COLLECT) {
44       return true;
45    }
46    if (def_instr->opc == OPC_MOV) {
47       return is_same_type_mov(def_instr) &&
48              !(def_instr->srcs[0]->flags & IR3_REG_SHARED);
49    }
50 
51    return false;
52 }
53 
54 static bool
alias_srcs(struct ir3_instruction * instr)55 alias_srcs(struct ir3_instruction *instr)
56 {
57    bool progress = false;
58 
59    /* All sources that come from collects are replaced by the sources of the
60     * collects. So allocate a new srcs array to hold all the collect'ed sources
61     * as well.
62     */
63    unsigned new_srcs_count = 0;
64 
65    foreach_src_n (src, src_n, instr) {
66       if (can_alias_srcs_of_def(src)) {
67          new_srcs_count += util_last_bit(src->wrmask);
68       } else {
69          new_srcs_count++;
70       }
71    }
72 
73    struct ir3_register **old_srcs = instr->srcs;
74    unsigned old_srcs_count = instr->srcs_count;
75    instr->srcs =
76       ir3_alloc(instr->block->shader, new_srcs_count * sizeof(instr->srcs[0]));
77    instr->srcs_count = 0;
78    unsigned num_aliases = 0;
79 
80 #if MESA_DEBUG
81    instr->srcs_max = new_srcs_count;
82 #endif
83 
84    for (unsigned src_n = 0; src_n < old_srcs_count; src_n++) {
85       struct ir3_register *src = old_srcs[src_n];
86       bool can_alias = can_alias_src(src);
87 
88       if (!can_alias || !can_alias_srcs_of_def(src)) {
89          if (can_alias && num_aliases + 1 <= MAX_ALIASES) {
90             src->flags |= (IR3_REG_FIRST_ALIAS | IR3_REG_ALIAS);
91             num_aliases++;
92             progress = true;
93          }
94 
95          instr->srcs[instr->srcs_count++] = src;
96          continue;
97       }
98 
99       struct ir3_instruction *collect = src->def->instr;
100       assert(collect->opc == OPC_META_COLLECT || collect->opc == OPC_MOV);
101 
102       /* Make sure we don't create more aliases than supported in the alias
103        * table. Note that this is rather conservative because we might actually
104        * need less due to reuse of GPRs. However, once we mark a src as alias
105        * here, and it doesn't get reused, we have to be able to allocate an
106        * alias for it. Since it's impossible to predict reuse at this point, we
107        * have to be conservative.
108        */
109       if (num_aliases + collect->srcs_count > MAX_ALIASES) {
110          instr->srcs[instr->srcs_count++] = src;
111          continue;
112       }
113 
114       foreach_src_n (collect_src, collect_src_n, collect) {
115          struct ir3_register *alias_src;
116 
117          if (collect_src->flags & IR3_REG_SSA) {
118             alias_src =
119                __ssa_src(instr, collect_src->def->instr, collect_src->flags);
120          } else {
121             alias_src =
122                ir3_src_create(instr, collect_src->num, collect_src->flags);
123             alias_src->uim_val = collect_src->uim_val;
124          }
125 
126          alias_src->flags |= IR3_REG_ALIAS;
127 
128          if (collect_src_n == 0) {
129             alias_src->flags |= IR3_REG_FIRST_ALIAS;
130          }
131       }
132 
133       num_aliases += collect->srcs_count;
134       progress = true;
135    }
136 
137    return progress;
138 }
139 
140 /* First alias.tex pass: replace sources of tex instructions with alias sources
141  * (IR3_REG_ALIAS):
142  * - movs from const/imm: replace with the const/imm;
143  * - collects: replace with the sources of the collect;
144  * - GPR sources: simply mark as alias.
145  *
146  * This way, RA won't be forced to allocate consecutive registers for collects
147  * and useless collects/movs can be DCE'd. Note that simply lowering collects to
148  * aliases doesn't work because RA would assume that killed sources of aliases
149  * are dead, while they are in fact live until the tex instruction that uses
150  * them.
151  */
152 bool
ir3_create_alias_tex_regs(struct ir3 * ir)153 ir3_create_alias_tex_regs(struct ir3 *ir)
154 {
155    if (!ir->compiler->has_alias_tex)
156       return false;
157    if (ir3_shader_debug & IR3_DBG_NOALIASTEX)
158       return false;
159 
160    bool progress = false;
161 
162    foreach_block (block, &ir->block_list) {
163       foreach_instr (instr, &block->instr_list) {
164          if (supports_alias_srcs(instr)) {
165             progress |= alias_srcs(instr);
166          }
167       }
168    }
169 
170    return progress;
171 }
172 
173 #define FIRST_ALIAS_REG regid(40, 0)
174 
175 struct alias_table_entry {
176    unsigned alias_reg;
177    struct ir3_register *src;
178 };
179 
180 typedef BITSET_DECLARE(reg_bitset, GPR_REG_SIZE);
181 
182 struct alias_table_state {
183    struct alias_table_entry entries[16];
184    unsigned num_entries;
185 
186    /* The registers currently allocated for the instruction. Note that this
187     * includes both alias registers as well as GPRs that are reused.
188     */
189    reg_bitset full_alloc;
190    reg_bitset half_alloc;
191 };
192 
193 static void
add_table_entry(struct alias_table_state * state,unsigned alias_reg,struct ir3_register * src)194 add_table_entry(struct alias_table_state *state, unsigned alias_reg,
195                 struct ir3_register *src)
196 {
197    assert(state->num_entries < ARRAY_SIZE(state->entries));
198    struct alias_table_entry *entry = &state->entries[state->num_entries++];
199    entry->alias_reg = alias_reg;
200    entry->src = src;
201 }
202 
203 static void
clear_table(struct alias_table_state * state)204 clear_table(struct alias_table_state *state)
205 {
206    BITSET_ZERO(state->full_alloc);
207    BITSET_ZERO(state->half_alloc);
208    state->num_entries = 0;
209 }
210 
211 static unsigned
lookup_alias(struct alias_table_state * state,struct ir3_register * alias)212 lookup_alias(struct alias_table_state *state, struct ir3_register *alias)
213 {
214    for (unsigned i = 0; i < state->num_entries; i++) {
215       struct alias_table_entry *entry = &state->entries[i];
216       unsigned match_flags = (IR3_REG_CONST | IR3_REG_IMMED | IR3_REG_HALF);
217 
218       if ((alias->flags & match_flags) != (entry->src->flags & match_flags)) {
219          continue;
220       }
221 
222       if (alias->flags & IR3_REG_IMMED) {
223          if (alias->uim_val == entry->src->uim_val) {
224             return entry->alias_reg;
225          }
226       } else if (alias->num == entry->src->num) {
227          return entry->alias_reg;
228       }
229    }
230 
231    return INVALID_REG;
232 }
233 
234 /* Find existing entries in the alias table for all aliases in this alias group.
235  * If all aliases are already in the table, and they are in consecutive
236  * registers, we can simply reuse these registers without creating new table
237  * entries.
238  * TODO if there's a partial overlap between the start of the alias group and
239  * the end of an existing allocation range, we might be able to partially reuse
240  * table entries.
241  */
242 static unsigned
find_existing_alloc(struct alias_table_state * state,struct ir3_instruction * instr,unsigned first_src_n)243 find_existing_alloc(struct alias_table_state *state,
244                     struct ir3_instruction *instr, unsigned first_src_n)
245 {
246    if (state->num_entries == 0) {
247       return INVALID_REG;
248    }
249 
250    unsigned first_reg = INVALID_REG;
251 
252    foreach_src_in_alias_group_n (alias, alias_n, instr, first_src_n) {
253       unsigned reg = lookup_alias(state, alias);
254 
255       if (reg == INVALID_REG) {
256          return INVALID_REG;
257       }
258 
259       if (alias_n == 0) {
260          first_reg = reg;
261       } else if (reg != first_reg + alias_n) {
262          return INVALID_REG;
263       }
264    }
265 
266    assert(first_reg != INVALID_REG);
267    return first_reg;
268 }
269 
270 static unsigned
find_free_alias_regs_in_range(const reg_bitset * alloc_regs,unsigned num_aliases,unsigned start,unsigned end)271 find_free_alias_regs_in_range(const reg_bitset *alloc_regs,
272                               unsigned num_aliases, unsigned start,
273                               unsigned end)
274 {
275    assert(end >= num_aliases);
276 
277    for (unsigned reg = start; reg < end - num_aliases; reg++) {
278       if (!BITSET_TEST_RANGE(*alloc_regs, reg, reg + num_aliases - 1)) {
279          return reg;
280       }
281    }
282 
283    return INVALID_REG;
284 }
285 
286 static unsigned
find_free_alias_regs(const reg_bitset * alloc_regs,unsigned num_aliases)287 find_free_alias_regs(const reg_bitset *alloc_regs, unsigned num_aliases)
288 {
289    unsigned reg = find_free_alias_regs_in_range(alloc_regs, num_aliases,
290                                                 FIRST_ALIAS_REG, GPR_REG_SIZE);
291 
292    if (reg != INVALID_REG) {
293       return reg;
294    }
295 
296    return find_free_alias_regs_in_range(alloc_regs, num_aliases, 0,
297                                         FIRST_ALIAS_REG);
298 }
299 
300 struct reg_alloc_info {
301    unsigned first_src_n;
302    unsigned reg;
303    unsigned num_reused;
304 };
305 
306 /* Allocate alias registers for an alias group while trying to minimize the
307  * number of needed aliases. That is, if the allocated GPRs for the group are
308  * (partially) consecutive, only allocate aliases to fill-in the gaps. For
309  * example:
310  *    sam ..., @{r1.x, r5.z, r1.z}, ...
311  * only needs a single alias:
312  *    alias.tex.b32.0 r1.y, r5.z
313  *    sam ..., r1.x, ...
314  */
315 static struct reg_alloc_info
alloc_alias(struct alias_table_state * state,struct ir3_instruction * instr,unsigned first_src_n)316 alloc_alias(struct alias_table_state *state, struct ir3_instruction *instr,
317             unsigned first_src_n)
318 {
319    assert(first_src_n < instr->srcs_count);
320 
321    struct ir3_register *src0 = instr->srcs[first_src_n];
322    assert(src0->flags & IR3_REG_FIRST_ALIAS);
323 
324    unsigned num_aliases = 0;
325 
326    foreach_src_in_alias_group (alias, instr, first_src_n) {
327       num_aliases++;
328    }
329 
330    assert(num_aliases > 0);
331 
332    reg_bitset *alloc_regs =
333       (src0->flags & IR3_REG_HALF) ? &state->half_alloc : &state->full_alloc;
334 
335    /* All the GPRs used by this alias group that aren't already allocated by
336     * previous alias groups.
337     */
338    unsigned used_regs[num_aliases];
339 
340    foreach_src_in_alias_group_n (alias, alias_n, instr, first_src_n) {
341       if (is_reg_gpr(alias) && !BITSET_TEST(*alloc_regs, alias->num)) {
342          used_regs[alias_n] = alias->num;
343       } else {
344          used_regs[alias_n] = INVALID_REG;
345       }
346    }
347 
348    /* Find the register that, when allocated to the first src in the alias
349     * group, will maximize the number of GPRs reused (i.e., that don't need an
350     * alias) in the group.
351     */
352    unsigned best_reg = INVALID_REG;
353    unsigned best_num_reused = 0;
354 
355    foreach_src_in_alias_group_n (alias, alias_n, instr, first_src_n) {
356       if (used_regs[alias_n] == INVALID_REG) {
357          /* No (free) GPR is used by this alias. */
358          continue;
359       }
360 
361       if (alias->num < alias_n) {
362          /* To be able to fit the current alias reg in a valid consecutive
363           * range, its GPR number needs to be at least its index in the alias
364           * group. Otherwise, there won't be enough GPR space left before it:
365           * sam, ..., @{r5.w, r0.x, r0.y}, ...
366           * Even though r0.x and r0.y are consecutive, we won't be able to reuse
367           * them since there's no GPR before r0.x to alias to r5.w.
368           */
369          continue;
370       }
371 
372       if (alias->num + num_aliases - alias_n >= GPR_REG_SIZE) {
373          /* Same reasoning as above but for the end of the GPR space. */
374          continue;
375       }
376 
377       /* Check if it's possible to reuse the allocated GPR of the current alias
378        * reg. If we reuse it, all other aliases in this group will have their
379        * GPR number based on the current one and need to be free.
380        */
381       unsigned first_reg = alias->num - alias_n;
382 
383       if (BITSET_TEST_RANGE(*alloc_regs, first_reg,
384                             first_reg + num_aliases - 1)) {
385          continue;
386       }
387 
388       /* Check how many GPRs will be reused with this choice. Note that we don't
389        * have to check previous registers in the alias group since if we can
390        * reuse those, the current alias would have been counted there as well.
391        */
392       unsigned num_reused = 1;
393 
394       for (unsigned i = alias_n + 1; i < num_aliases; i++) {
395          if (used_regs[i] == first_reg + i) {
396             num_reused++;
397          }
398       }
399 
400       if (num_reused > best_num_reused) {
401          best_num_reused = num_reused;
402          best_reg = alias->num - alias_n;
403       }
404    }
405 
406    if (best_reg == INVALID_REG) {
407       /* No reuse possible, just allocate fresh registers. */
408       best_reg = find_free_alias_regs(alloc_regs, num_aliases);
409 
410       /* We can use the full GPR space (4 * 48 regs) to allocate aliases which
411        * is enough to always find a free range that is large enough. The maximum
412        * number of aliases is 12 (src0) + 4 (src1) + 2 (samp_tex) so the worst
413        * case reuse looks something like this (note that the number of aliases
414        * is limited to 16 so in practice, it will never be this bad):
415        *     [ ... src1.x..src1.w ... samp_tex.x samp_tex.y ... ]
416        * #GPR 0    11      14         26         27
417        * Here, src1 and samp_tex reuse GPRs in such a way that they leave a gap
418        * of 11 GPRs around them so that the src0 will not fit. There is ample
419        * GPR space left for src0 even in this scenario.
420        */
421       assert(best_reg != INVALID_REG);
422    }
423 
424    /* Mark used registers as allocated. */
425    unsigned end_reg = best_reg + num_aliases - 1;
426    assert(end_reg < GPR_REG_SIZE);
427    assert(!BITSET_TEST_RANGE(*alloc_regs, best_reg, end_reg));
428    BITSET_SET_RANGE(*alloc_regs, best_reg, end_reg);
429 
430    /* Add the allocated registers that differ from the ones already used to the
431     * alias table.
432     */
433    for (unsigned i = 0; i < num_aliases; i++) {
434       unsigned reg = best_reg + i;
435 
436       if (used_regs[i] != reg) {
437          struct ir3_register *src = instr->srcs[first_src_n + i];
438          add_table_entry(state, reg, src);
439       }
440    }
441 
442    return (struct reg_alloc_info){
443       .first_src_n = first_src_n,
444       .reg = best_reg,
445       .num_reused = best_num_reused,
446    };
447 }
448 
449 static int
cmp_alloc(const void * ptr1,const void * ptr2)450 cmp_alloc(const void *ptr1, const void *ptr2)
451 {
452    const struct reg_alloc_info *alloc1 = ptr1;
453    const struct reg_alloc_info *alloc2 = ptr2;
454    return alloc2->num_reused - alloc1->num_reused;
455 }
456 
457 static void
alloc_aliases(struct alias_table_state * state,struct ir3_instruction * instr,unsigned * regs)458 alloc_aliases(struct alias_table_state *state, struct ir3_instruction *instr,
459               unsigned *regs)
460 {
461    unsigned num_alias_groups = 0;
462 
463    foreach_src (src, instr) {
464       if (src->flags & IR3_REG_FIRST_ALIAS) {
465          num_alias_groups++;
466       }
467    }
468 
469    assert(num_alias_groups > 0);
470    struct reg_alloc_info allocs[num_alias_groups];
471    unsigned alloc_i = 0;
472 
473    /* We allocate alias registers in two phases:
474     * 1. Allocate each alias group as if they are the only group. This way, the
475     * number of registers they can reuse is maximized (because they will never
476     * conflict with other groups). We keep track of the number of reused
477     * registers per group.
478     */
479    foreach_src_n (src, src_n, instr) {
480       if (src->flags & IR3_REG_FIRST_ALIAS) {
481          allocs[alloc_i++] = alloc_alias(state, instr, src_n);
482          clear_table(state);
483       }
484    }
485 
486    /* 2. Do the actual allocation of the groups ordered by decreasing number of
487     * reused registers. This results in a greater (though not necessarily
488     * optimal) total number of reused registers and, thus, a smaller number of
489     * table entries. This helps in situations like this:
490     *    sam ..., @{r0.z, r1.y}, @{r0.w, r1.x}
491     * The first group can reuse 1 register while the second 2. All valid
492     * choices to reuse one register in the first group (r0.z/r0.w or r1.x/r1.y)
493     * lead to an overlap with the second group which means that no reuse is
494     * possible in the second group:
495     *    alias.tex.b32.2 r0.w, r1.y
496     *    alias.tex.b32.0 r40.x, r0.w
497     *    alias.tex.b32.0 r40.y, r1.x
498     *    sam ..., r0.z, r40.x
499     * Allocating the second group first leads to an optimal allocation:
500     *    alias.tex.b32.1 r40.x, r0.z
501     *    alias.tex.b32.0 r40.y, r1.y
502     *    sam ..., r40.x, r0.w
503     */
504    qsort(allocs, num_alias_groups, sizeof(allocs[0]), cmp_alloc);
505 
506    /* Mark all GPR sources that cannot be aliased as allocated since we have to
507     * make sure no alias overlaps with them.
508     */
509    foreach_src (src, instr) {
510       if (can_alias_src(src) && !(src->flags & IR3_REG_ALIAS)) {
511          reg_bitset *alloc_regs = (src->flags & IR3_REG_HALF)
512                                      ? &state->half_alloc
513                                      : &state->full_alloc;
514          BITSET_SET(*alloc_regs, src->num);
515       }
516    }
517 
518    for (unsigned i = 0; i < num_alias_groups; i++) {
519       struct reg_alloc_info *alloc = &allocs[i];
520 
521       /* Check if any allocations made by previous groups can be reused for this
522        * one. For example, this is relatively common:
523        *    sam ..., @{r2.z, 0}, @{0}
524        * Reusing the allocation of the first group for the second one gives
525        * this:
526        *    alias.tex.b32.0 r2.w, 0
527        *    sam ..., r2.z, r2.w
528        */
529       alloc->reg = find_existing_alloc(state, instr, alloc->first_src_n);
530 
531       if (alloc->reg == INVALID_REG) {
532          *alloc = alloc_alias(state, instr, alloc->first_src_n);
533       }
534 
535       regs[alloc->first_src_n] = alloc->reg;
536    }
537 }
538 
539 static bool
insert_aliases(struct ir3_instruction * instr)540 insert_aliases(struct ir3_instruction *instr)
541 {
542    bool progress = false;
543 
544    struct alias_table_state state = {0};
545    struct ir3_cursor cursor = ir3_before_instr(instr);
546 
547    unsigned regs[instr->srcs_count];
548    alloc_aliases(&state, instr, regs);
549    assert(state.num_entries <= MAX_ALIASES);
550 
551    for (unsigned i = 0; i < state.num_entries; i++) {
552       struct alias_table_entry *entry = &state.entries[i];
553 
554       struct ir3_instruction *alias =
555          ir3_instr_create_at(cursor, OPC_ALIAS, 1, 2);
556       alias->cat7.alias_scope = ALIAS_TEX;
557       struct ir3_register *src = ir3_src_create(
558          alias, entry->src->num,
559          entry->src->flags & ~(IR3_REG_FIRST_ALIAS | IR3_REG_ALIAS));
560       src->uim_val = entry->src->uim_val;
561       ir3_dst_create(alias, entry->alias_reg,
562                      (entry->src->flags & IR3_REG_HALF) | IR3_REG_ALIAS);
563 
564       if (i == 0) {
565          alias->cat7.alias_table_size_minus_one = state.num_entries - 1;
566       }
567 
568       progress = true;
569    }
570 
571    unsigned next_src_n = 0;
572 
573    for (unsigned src_n = 0; src_n < instr->srcs_count;) {
574       struct ir3_register *src0 = instr->srcs[src_n];
575       unsigned num_srcs = 0;
576 
577       if (src0->flags & IR3_REG_FIRST_ALIAS) {
578          foreach_src_in_alias_group (src, instr, src_n) {
579             num_srcs++;
580          }
581 
582          src0->num = regs[src_n];
583          src0->flags &= ~(IR3_REG_IMMED | IR3_REG_CONST);
584          src0->wrmask = MASK(num_srcs);
585       } else {
586          num_srcs = 1;
587       }
588 
589       instr->srcs[next_src_n++] = src0;
590       src_n += num_srcs;
591    }
592 
593    instr->srcs_count = next_src_n;
594    return progress;
595 }
596 
597 static bool
has_alias_srcs(struct ir3_instruction * instr)598 has_alias_srcs(struct ir3_instruction *instr)
599 {
600    if (!supports_alias_srcs(instr)) {
601       return false;
602    }
603 
604    foreach_src (src, instr) {
605       if (src->flags & IR3_REG_FIRST_ALIAS) {
606          return true;
607       }
608    }
609 
610    return false;
611 }
612 
613 /* Second alias.tex pass: insert alias.tex instructions in front of the tex
614  * instructions that need them and fix up the tex instruction's sources. This
615  * pass needs to run post-RA (see ir3_create_alias_tex_regs). It also needs to
616  * run post-legalization as all the sync flags need to be inserted based on the
617  * registers instructions actually use, not on the alias registers they have as
618  * sources.
619  */
620 bool
ir3_insert_alias_tex(struct ir3 * ir)621 ir3_insert_alias_tex(struct ir3 *ir)
622 {
623    if (!ir->compiler->has_alias_tex)
624       return false;
625    if (ir3_shader_debug & IR3_DBG_NOALIASTEX)
626       return false;
627 
628    bool progress = false;
629 
630    foreach_block (block, &ir->block_list) {
631       foreach_instr_safe (instr, &block->instr_list) {
632          if (has_alias_srcs(instr)) {
633             progress |= insert_aliases(instr);
634          }
635       }
636    }
637 
638    return progress;
639 }
640 
641 static struct ir3_instruction *
get_or_create_shpe(struct ir3 * ir)642 get_or_create_shpe(struct ir3 *ir)
643 {
644    struct ir3_instruction *shpe = ir3_find_shpe(ir);
645 
646    if (!shpe) {
647       shpe = ir3_create_empty_preamble(ir);
648       assert(shpe);
649    }
650 
651    return shpe;
652 }
653 
654 static bool
create_output_aliases(struct ir3_shader_variant * v,struct ir3_instruction * end)655 create_output_aliases(struct ir3_shader_variant *v, struct ir3_instruction *end)
656 {
657    bool progress = false;
658    struct ir3_instruction *shpe = NULL;
659 
660    foreach_src_n (src, src_n, end) {
661       struct ir3_shader_output *output = &v->outputs[end->end.outidxs[src_n]];
662 
663       if (output->slot < FRAG_RESULT_DATA0 ||
664           output->slot > FRAG_RESULT_DATA7) {
665          continue;
666       }
667 
668       assert(src->flags & IR3_REG_SSA);
669       struct ir3_instruction *src_instr = src->def->instr;
670 
671       if (src_instr->opc != OPC_META_COLLECT && src_instr->opc != OPC_MOV) {
672          continue;
673       }
674 
675       unsigned rt = output->slot - FRAG_RESULT_DATA0;
676 
677       foreach_src_n (comp_src, comp, src_instr) {
678          if (!(comp_src->flags & (IR3_REG_IMMED | IR3_REG_CONST))) {
679             /* Only const and immediate values can be aliased. */
680             continue;
681          }
682 
683          if ((comp_src->flags & IR3_REG_HALF) &&
684              (comp_src->flags & IR3_REG_CONST)) {
685             /* alias.rt doesn't seem to work with half const.
686              * TODO figure out what's going wrong here. Might just be
687              * unsupported because the blob only uses it in one CTS test.
688              */
689             continue;
690          }
691 
692          if (!shpe) {
693             shpe = get_or_create_shpe(v->ir);
694          }
695 
696          struct ir3_instruction *alias =
697             ir3_instr_create_at(ir3_before_instr(shpe), OPC_ALIAS, 1, 2);
698          alias->cat7.alias_scope = ALIAS_RT;
699          ir3_dst_create(alias, regid(rt, comp), IR3_REG_RT);
700 
701          unsigned src_flags =
702             comp_src->flags & (IR3_REG_HALF | IR3_REG_CONST | IR3_REG_IMMED);
703          ir3_src_create(alias, comp_src->num, src_flags)->uim_val =
704             comp_src->uim_val;
705 
706          if (src_instr->opc == OPC_MOV) {
707             /* The float type bit seems entirely optional (i.e., it only affects
708              * disassembly) but since we have this info for movs, we might as
709              * well set it.
710              */
711             alias->cat7.alias_type_float = type_float(src_instr->cat1.dst_type);
712          }
713 
714          /* Scheduling an alias.rt right before an alias.tex causes a GPU hang.
715           * Follow the blob and schedule all alias.rt at the end of the
716           * preamble to prevent this from happening.
717           */
718          alias->barrier_class = alias->barrier_conflict = IR3_BARRIER_CONST_W;
719 
720          /* Nothing actually uses the alias.rt dst so make sure it doesn't get
721           * DCE'd.
722           */
723          array_insert(shpe->block, shpe->block->keeps, alias);
724 
725          output->aliased_components |= (1 << comp);
726          progress = true;
727       }
728 
729       /* Remove the aliased components from the src so that they can be DCE'd.
730        */
731       src->wrmask &= ~output->aliased_components;
732 
733       if (!src->wrmask) {
734          src->def = NULL;
735       }
736    }
737 
738    return progress;
739 }
740 
741 /* Replace const and immediate components of the RT sources of end with alias.rt
742  * instructions in the preamble.
743  */
744 bool
ir3_create_alias_rt(struct ir3 * ir,struct ir3_shader_variant * v)745 ir3_create_alias_rt(struct ir3 *ir, struct ir3_shader_variant *v)
746 {
747    if (!ir->compiler->has_alias_rt)
748       return false;
749    if (ir3_shader_debug & IR3_DBG_NOALIASRT)
750       return false;
751    if (v->type != MESA_SHADER_FRAGMENT)
752       return false;
753    if (v->shader_options.fragdata_dynamic_remap)
754       return false;
755 
756    struct ir3_instruction *end = ir3_find_end(ir);
757    assert(end->opc == OPC_END);
758    return create_output_aliases(v, end);
759 }
760