1 /*
2 * Copyright 2024 Igalia S.L.
3 * SPDX-License-Identifier: MIT
4 */
5
6 #include "ir3.h"
7 #include "ir3_shader.h"
8
9 #define MAX_ALIASES 16
10
11 static bool
supports_alias_srcs(struct ir3_instruction * instr)12 supports_alias_srcs(struct ir3_instruction *instr)
13 {
14 if (!is_tex(instr))
15 return false;
16 if (is_tex_shuffle(instr))
17 return false;
18 /* Descriptor prefetches don't support alias.tex. */
19 if (instr->opc == OPC_SAM && instr->dsts_count == 0)
20 return false;
21 /* Seems to not always work properly. Blob disables it as well. */
22 if (instr->opc == OPC_ISAM && (instr->flags & IR3_INSTR_IMM_OFFSET))
23 return false;
24 return true;
25 }
26
27 static bool
can_alias_src(struct ir3_register * src)28 can_alias_src(struct ir3_register *src)
29 {
30 return is_reg_gpr(src) && !(src->flags & IR3_REG_SHARED);
31 }
32
33 static bool
can_alias_srcs_of_def(struct ir3_register * src)34 can_alias_srcs_of_def(struct ir3_register *src)
35 {
36 if (!can_alias_src(src)) {
37 return false;
38 }
39
40 assert(src->flags & IR3_REG_SSA);
41 struct ir3_instruction *def_instr = src->def->instr;
42
43 if (def_instr->opc == OPC_META_COLLECT) {
44 return true;
45 }
46 if (def_instr->opc == OPC_MOV) {
47 return is_same_type_mov(def_instr) &&
48 !(def_instr->srcs[0]->flags & IR3_REG_SHARED);
49 }
50
51 return false;
52 }
53
54 static bool
alias_srcs(struct ir3_instruction * instr)55 alias_srcs(struct ir3_instruction *instr)
56 {
57 bool progress = false;
58
59 /* All sources that come from collects are replaced by the sources of the
60 * collects. So allocate a new srcs array to hold all the collect'ed sources
61 * as well.
62 */
63 unsigned new_srcs_count = 0;
64
65 foreach_src_n (src, src_n, instr) {
66 if (can_alias_srcs_of_def(src)) {
67 new_srcs_count += util_last_bit(src->wrmask);
68 } else {
69 new_srcs_count++;
70 }
71 }
72
73 struct ir3_register **old_srcs = instr->srcs;
74 unsigned old_srcs_count = instr->srcs_count;
75 instr->srcs =
76 ir3_alloc(instr->block->shader, new_srcs_count * sizeof(instr->srcs[0]));
77 instr->srcs_count = 0;
78 unsigned num_aliases = 0;
79
80 #if MESA_DEBUG
81 instr->srcs_max = new_srcs_count;
82 #endif
83
84 for (unsigned src_n = 0; src_n < old_srcs_count; src_n++) {
85 struct ir3_register *src = old_srcs[src_n];
86 bool can_alias = can_alias_src(src);
87
88 if (!can_alias || !can_alias_srcs_of_def(src)) {
89 if (can_alias && num_aliases + 1 <= MAX_ALIASES) {
90 src->flags |= (IR3_REG_FIRST_ALIAS | IR3_REG_ALIAS);
91 num_aliases++;
92 progress = true;
93 }
94
95 instr->srcs[instr->srcs_count++] = src;
96 continue;
97 }
98
99 struct ir3_instruction *collect = src->def->instr;
100 assert(collect->opc == OPC_META_COLLECT || collect->opc == OPC_MOV);
101
102 /* Make sure we don't create more aliases than supported in the alias
103 * table. Note that this is rather conservative because we might actually
104 * need less due to reuse of GPRs. However, once we mark a src as alias
105 * here, and it doesn't get reused, we have to be able to allocate an
106 * alias for it. Since it's impossible to predict reuse at this point, we
107 * have to be conservative.
108 */
109 if (num_aliases + collect->srcs_count > MAX_ALIASES) {
110 instr->srcs[instr->srcs_count++] = src;
111 continue;
112 }
113
114 foreach_src_n (collect_src, collect_src_n, collect) {
115 struct ir3_register *alias_src;
116
117 if (collect_src->flags & IR3_REG_SSA) {
118 alias_src =
119 __ssa_src(instr, collect_src->def->instr, collect_src->flags);
120 } else {
121 alias_src =
122 ir3_src_create(instr, collect_src->num, collect_src->flags);
123 alias_src->uim_val = collect_src->uim_val;
124 }
125
126 alias_src->flags |= IR3_REG_ALIAS;
127
128 if (collect_src_n == 0) {
129 alias_src->flags |= IR3_REG_FIRST_ALIAS;
130 }
131 }
132
133 num_aliases += collect->srcs_count;
134 progress = true;
135 }
136
137 return progress;
138 }
139
140 /* First alias.tex pass: replace sources of tex instructions with alias sources
141 * (IR3_REG_ALIAS):
142 * - movs from const/imm: replace with the const/imm;
143 * - collects: replace with the sources of the collect;
144 * - GPR sources: simply mark as alias.
145 *
146 * This way, RA won't be forced to allocate consecutive registers for collects
147 * and useless collects/movs can be DCE'd. Note that simply lowering collects to
148 * aliases doesn't work because RA would assume that killed sources of aliases
149 * are dead, while they are in fact live until the tex instruction that uses
150 * them.
151 */
152 bool
ir3_create_alias_tex_regs(struct ir3 * ir)153 ir3_create_alias_tex_regs(struct ir3 *ir)
154 {
155 if (!ir->compiler->has_alias_tex)
156 return false;
157 if (ir3_shader_debug & IR3_DBG_NOALIASTEX)
158 return false;
159
160 bool progress = false;
161
162 foreach_block (block, &ir->block_list) {
163 foreach_instr (instr, &block->instr_list) {
164 if (supports_alias_srcs(instr)) {
165 progress |= alias_srcs(instr);
166 }
167 }
168 }
169
170 return progress;
171 }
172
173 #define FIRST_ALIAS_REG regid(40, 0)
174
175 struct alias_table_entry {
176 unsigned alias_reg;
177 struct ir3_register *src;
178 };
179
180 typedef BITSET_DECLARE(reg_bitset, GPR_REG_SIZE);
181
182 struct alias_table_state {
183 struct alias_table_entry entries[16];
184 unsigned num_entries;
185
186 /* The registers currently allocated for the instruction. Note that this
187 * includes both alias registers as well as GPRs that are reused.
188 */
189 reg_bitset full_alloc;
190 reg_bitset half_alloc;
191 };
192
193 static void
add_table_entry(struct alias_table_state * state,unsigned alias_reg,struct ir3_register * src)194 add_table_entry(struct alias_table_state *state, unsigned alias_reg,
195 struct ir3_register *src)
196 {
197 assert(state->num_entries < ARRAY_SIZE(state->entries));
198 struct alias_table_entry *entry = &state->entries[state->num_entries++];
199 entry->alias_reg = alias_reg;
200 entry->src = src;
201 }
202
203 static void
clear_table(struct alias_table_state * state)204 clear_table(struct alias_table_state *state)
205 {
206 BITSET_ZERO(state->full_alloc);
207 BITSET_ZERO(state->half_alloc);
208 state->num_entries = 0;
209 }
210
211 static unsigned
lookup_alias(struct alias_table_state * state,struct ir3_register * alias)212 lookup_alias(struct alias_table_state *state, struct ir3_register *alias)
213 {
214 for (unsigned i = 0; i < state->num_entries; i++) {
215 struct alias_table_entry *entry = &state->entries[i];
216 unsigned match_flags = (IR3_REG_CONST | IR3_REG_IMMED | IR3_REG_HALF);
217
218 if ((alias->flags & match_flags) != (entry->src->flags & match_flags)) {
219 continue;
220 }
221
222 if (alias->flags & IR3_REG_IMMED) {
223 if (alias->uim_val == entry->src->uim_val) {
224 return entry->alias_reg;
225 }
226 } else if (alias->num == entry->src->num) {
227 return entry->alias_reg;
228 }
229 }
230
231 return INVALID_REG;
232 }
233
234 /* Find existing entries in the alias table for all aliases in this alias group.
235 * If all aliases are already in the table, and they are in consecutive
236 * registers, we can simply reuse these registers without creating new table
237 * entries.
238 * TODO if there's a partial overlap between the start of the alias group and
239 * the end of an existing allocation range, we might be able to partially reuse
240 * table entries.
241 */
242 static unsigned
find_existing_alloc(struct alias_table_state * state,struct ir3_instruction * instr,unsigned first_src_n)243 find_existing_alloc(struct alias_table_state *state,
244 struct ir3_instruction *instr, unsigned first_src_n)
245 {
246 if (state->num_entries == 0) {
247 return INVALID_REG;
248 }
249
250 unsigned first_reg = INVALID_REG;
251
252 foreach_src_in_alias_group_n (alias, alias_n, instr, first_src_n) {
253 unsigned reg = lookup_alias(state, alias);
254
255 if (reg == INVALID_REG) {
256 return INVALID_REG;
257 }
258
259 if (alias_n == 0) {
260 first_reg = reg;
261 } else if (reg != first_reg + alias_n) {
262 return INVALID_REG;
263 }
264 }
265
266 assert(first_reg != INVALID_REG);
267 return first_reg;
268 }
269
270 static unsigned
find_free_alias_regs_in_range(const reg_bitset * alloc_regs,unsigned num_aliases,unsigned start,unsigned end)271 find_free_alias_regs_in_range(const reg_bitset *alloc_regs,
272 unsigned num_aliases, unsigned start,
273 unsigned end)
274 {
275 assert(end >= num_aliases);
276
277 for (unsigned reg = start; reg < end - num_aliases; reg++) {
278 if (!BITSET_TEST_RANGE(*alloc_regs, reg, reg + num_aliases - 1)) {
279 return reg;
280 }
281 }
282
283 return INVALID_REG;
284 }
285
286 static unsigned
find_free_alias_regs(const reg_bitset * alloc_regs,unsigned num_aliases)287 find_free_alias_regs(const reg_bitset *alloc_regs, unsigned num_aliases)
288 {
289 unsigned reg = find_free_alias_regs_in_range(alloc_regs, num_aliases,
290 FIRST_ALIAS_REG, GPR_REG_SIZE);
291
292 if (reg != INVALID_REG) {
293 return reg;
294 }
295
296 return find_free_alias_regs_in_range(alloc_regs, num_aliases, 0,
297 FIRST_ALIAS_REG);
298 }
299
300 struct reg_alloc_info {
301 unsigned first_src_n;
302 unsigned reg;
303 unsigned num_reused;
304 };
305
306 /* Allocate alias registers for an alias group while trying to minimize the
307 * number of needed aliases. That is, if the allocated GPRs for the group are
308 * (partially) consecutive, only allocate aliases to fill-in the gaps. For
309 * example:
310 * sam ..., @{r1.x, r5.z, r1.z}, ...
311 * only needs a single alias:
312 * alias.tex.b32.0 r1.y, r5.z
313 * sam ..., r1.x, ...
314 */
315 static struct reg_alloc_info
alloc_alias(struct alias_table_state * state,struct ir3_instruction * instr,unsigned first_src_n)316 alloc_alias(struct alias_table_state *state, struct ir3_instruction *instr,
317 unsigned first_src_n)
318 {
319 assert(first_src_n < instr->srcs_count);
320
321 struct ir3_register *src0 = instr->srcs[first_src_n];
322 assert(src0->flags & IR3_REG_FIRST_ALIAS);
323
324 unsigned num_aliases = 0;
325
326 foreach_src_in_alias_group (alias, instr, first_src_n) {
327 num_aliases++;
328 }
329
330 assert(num_aliases > 0);
331
332 reg_bitset *alloc_regs =
333 (src0->flags & IR3_REG_HALF) ? &state->half_alloc : &state->full_alloc;
334
335 /* All the GPRs used by this alias group that aren't already allocated by
336 * previous alias groups.
337 */
338 unsigned used_regs[num_aliases];
339
340 foreach_src_in_alias_group_n (alias, alias_n, instr, first_src_n) {
341 if (is_reg_gpr(alias) && !BITSET_TEST(*alloc_regs, alias->num)) {
342 used_regs[alias_n] = alias->num;
343 } else {
344 used_regs[alias_n] = INVALID_REG;
345 }
346 }
347
348 /* Find the register that, when allocated to the first src in the alias
349 * group, will maximize the number of GPRs reused (i.e., that don't need an
350 * alias) in the group.
351 */
352 unsigned best_reg = INVALID_REG;
353 unsigned best_num_reused = 0;
354
355 foreach_src_in_alias_group_n (alias, alias_n, instr, first_src_n) {
356 if (used_regs[alias_n] == INVALID_REG) {
357 /* No (free) GPR is used by this alias. */
358 continue;
359 }
360
361 if (alias->num < alias_n) {
362 /* To be able to fit the current alias reg in a valid consecutive
363 * range, its GPR number needs to be at least its index in the alias
364 * group. Otherwise, there won't be enough GPR space left before it:
365 * sam, ..., @{r5.w, r0.x, r0.y}, ...
366 * Even though r0.x and r0.y are consecutive, we won't be able to reuse
367 * them since there's no GPR before r0.x to alias to r5.w.
368 */
369 continue;
370 }
371
372 if (alias->num + num_aliases - alias_n >= GPR_REG_SIZE) {
373 /* Same reasoning as above but for the end of the GPR space. */
374 continue;
375 }
376
377 /* Check if it's possible to reuse the allocated GPR of the current alias
378 * reg. If we reuse it, all other aliases in this group will have their
379 * GPR number based on the current one and need to be free.
380 */
381 unsigned first_reg = alias->num - alias_n;
382
383 if (BITSET_TEST_RANGE(*alloc_regs, first_reg,
384 first_reg + num_aliases - 1)) {
385 continue;
386 }
387
388 /* Check how many GPRs will be reused with this choice. Note that we don't
389 * have to check previous registers in the alias group since if we can
390 * reuse those, the current alias would have been counted there as well.
391 */
392 unsigned num_reused = 1;
393
394 for (unsigned i = alias_n + 1; i < num_aliases; i++) {
395 if (used_regs[i] == first_reg + i) {
396 num_reused++;
397 }
398 }
399
400 if (num_reused > best_num_reused) {
401 best_num_reused = num_reused;
402 best_reg = alias->num - alias_n;
403 }
404 }
405
406 if (best_reg == INVALID_REG) {
407 /* No reuse possible, just allocate fresh registers. */
408 best_reg = find_free_alias_regs(alloc_regs, num_aliases);
409
410 /* We can use the full GPR space (4 * 48 regs) to allocate aliases which
411 * is enough to always find a free range that is large enough. The maximum
412 * number of aliases is 12 (src0) + 4 (src1) + 2 (samp_tex) so the worst
413 * case reuse looks something like this (note that the number of aliases
414 * is limited to 16 so in practice, it will never be this bad):
415 * [ ... src1.x..src1.w ... samp_tex.x samp_tex.y ... ]
416 * #GPR 0 11 14 26 27
417 * Here, src1 and samp_tex reuse GPRs in such a way that they leave a gap
418 * of 11 GPRs around them so that the src0 will not fit. There is ample
419 * GPR space left for src0 even in this scenario.
420 */
421 assert(best_reg != INVALID_REG);
422 }
423
424 /* Mark used registers as allocated. */
425 unsigned end_reg = best_reg + num_aliases - 1;
426 assert(end_reg < GPR_REG_SIZE);
427 assert(!BITSET_TEST_RANGE(*alloc_regs, best_reg, end_reg));
428 BITSET_SET_RANGE(*alloc_regs, best_reg, end_reg);
429
430 /* Add the allocated registers that differ from the ones already used to the
431 * alias table.
432 */
433 for (unsigned i = 0; i < num_aliases; i++) {
434 unsigned reg = best_reg + i;
435
436 if (used_regs[i] != reg) {
437 struct ir3_register *src = instr->srcs[first_src_n + i];
438 add_table_entry(state, reg, src);
439 }
440 }
441
442 return (struct reg_alloc_info){
443 .first_src_n = first_src_n,
444 .reg = best_reg,
445 .num_reused = best_num_reused,
446 };
447 }
448
449 static int
cmp_alloc(const void * ptr1,const void * ptr2)450 cmp_alloc(const void *ptr1, const void *ptr2)
451 {
452 const struct reg_alloc_info *alloc1 = ptr1;
453 const struct reg_alloc_info *alloc2 = ptr2;
454 return alloc2->num_reused - alloc1->num_reused;
455 }
456
457 static void
alloc_aliases(struct alias_table_state * state,struct ir3_instruction * instr,unsigned * regs)458 alloc_aliases(struct alias_table_state *state, struct ir3_instruction *instr,
459 unsigned *regs)
460 {
461 unsigned num_alias_groups = 0;
462
463 foreach_src (src, instr) {
464 if (src->flags & IR3_REG_FIRST_ALIAS) {
465 num_alias_groups++;
466 }
467 }
468
469 assert(num_alias_groups > 0);
470 struct reg_alloc_info allocs[num_alias_groups];
471 unsigned alloc_i = 0;
472
473 /* We allocate alias registers in two phases:
474 * 1. Allocate each alias group as if they are the only group. This way, the
475 * number of registers they can reuse is maximized (because they will never
476 * conflict with other groups). We keep track of the number of reused
477 * registers per group.
478 */
479 foreach_src_n (src, src_n, instr) {
480 if (src->flags & IR3_REG_FIRST_ALIAS) {
481 allocs[alloc_i++] = alloc_alias(state, instr, src_n);
482 clear_table(state);
483 }
484 }
485
486 /* 2. Do the actual allocation of the groups ordered by decreasing number of
487 * reused registers. This results in a greater (though not necessarily
488 * optimal) total number of reused registers and, thus, a smaller number of
489 * table entries. This helps in situations like this:
490 * sam ..., @{r0.z, r1.y}, @{r0.w, r1.x}
491 * The first group can reuse 1 register while the second 2. All valid
492 * choices to reuse one register in the first group (r0.z/r0.w or r1.x/r1.y)
493 * lead to an overlap with the second group which means that no reuse is
494 * possible in the second group:
495 * alias.tex.b32.2 r0.w, r1.y
496 * alias.tex.b32.0 r40.x, r0.w
497 * alias.tex.b32.0 r40.y, r1.x
498 * sam ..., r0.z, r40.x
499 * Allocating the second group first leads to an optimal allocation:
500 * alias.tex.b32.1 r40.x, r0.z
501 * alias.tex.b32.0 r40.y, r1.y
502 * sam ..., r40.x, r0.w
503 */
504 qsort(allocs, num_alias_groups, sizeof(allocs[0]), cmp_alloc);
505
506 /* Mark all GPR sources that cannot be aliased as allocated since we have to
507 * make sure no alias overlaps with them.
508 */
509 foreach_src (src, instr) {
510 if (can_alias_src(src) && !(src->flags & IR3_REG_ALIAS)) {
511 reg_bitset *alloc_regs = (src->flags & IR3_REG_HALF)
512 ? &state->half_alloc
513 : &state->full_alloc;
514 BITSET_SET(*alloc_regs, src->num);
515 }
516 }
517
518 for (unsigned i = 0; i < num_alias_groups; i++) {
519 struct reg_alloc_info *alloc = &allocs[i];
520
521 /* Check if any allocations made by previous groups can be reused for this
522 * one. For example, this is relatively common:
523 * sam ..., @{r2.z, 0}, @{0}
524 * Reusing the allocation of the first group for the second one gives
525 * this:
526 * alias.tex.b32.0 r2.w, 0
527 * sam ..., r2.z, r2.w
528 */
529 alloc->reg = find_existing_alloc(state, instr, alloc->first_src_n);
530
531 if (alloc->reg == INVALID_REG) {
532 *alloc = alloc_alias(state, instr, alloc->first_src_n);
533 }
534
535 regs[alloc->first_src_n] = alloc->reg;
536 }
537 }
538
539 static bool
insert_aliases(struct ir3_instruction * instr)540 insert_aliases(struct ir3_instruction *instr)
541 {
542 bool progress = false;
543
544 struct alias_table_state state = {0};
545 struct ir3_cursor cursor = ir3_before_instr(instr);
546
547 unsigned regs[instr->srcs_count];
548 alloc_aliases(&state, instr, regs);
549 assert(state.num_entries <= MAX_ALIASES);
550
551 for (unsigned i = 0; i < state.num_entries; i++) {
552 struct alias_table_entry *entry = &state.entries[i];
553
554 struct ir3_instruction *alias =
555 ir3_instr_create_at(cursor, OPC_ALIAS, 1, 2);
556 alias->cat7.alias_scope = ALIAS_TEX;
557 struct ir3_register *src = ir3_src_create(
558 alias, entry->src->num,
559 entry->src->flags & ~(IR3_REG_FIRST_ALIAS | IR3_REG_ALIAS));
560 src->uim_val = entry->src->uim_val;
561 ir3_dst_create(alias, entry->alias_reg,
562 (entry->src->flags & IR3_REG_HALF) | IR3_REG_ALIAS);
563
564 if (i == 0) {
565 alias->cat7.alias_table_size_minus_one = state.num_entries - 1;
566 }
567
568 progress = true;
569 }
570
571 unsigned next_src_n = 0;
572
573 for (unsigned src_n = 0; src_n < instr->srcs_count;) {
574 struct ir3_register *src0 = instr->srcs[src_n];
575 unsigned num_srcs = 0;
576
577 if (src0->flags & IR3_REG_FIRST_ALIAS) {
578 foreach_src_in_alias_group (src, instr, src_n) {
579 num_srcs++;
580 }
581
582 src0->num = regs[src_n];
583 src0->flags &= ~(IR3_REG_IMMED | IR3_REG_CONST);
584 src0->wrmask = MASK(num_srcs);
585 } else {
586 num_srcs = 1;
587 }
588
589 instr->srcs[next_src_n++] = src0;
590 src_n += num_srcs;
591 }
592
593 instr->srcs_count = next_src_n;
594 return progress;
595 }
596
597 static bool
has_alias_srcs(struct ir3_instruction * instr)598 has_alias_srcs(struct ir3_instruction *instr)
599 {
600 if (!supports_alias_srcs(instr)) {
601 return false;
602 }
603
604 foreach_src (src, instr) {
605 if (src->flags & IR3_REG_FIRST_ALIAS) {
606 return true;
607 }
608 }
609
610 return false;
611 }
612
613 /* Second alias.tex pass: insert alias.tex instructions in front of the tex
614 * instructions that need them and fix up the tex instruction's sources. This
615 * pass needs to run post-RA (see ir3_create_alias_tex_regs). It also needs to
616 * run post-legalization as all the sync flags need to be inserted based on the
617 * registers instructions actually use, not on the alias registers they have as
618 * sources.
619 */
620 bool
ir3_insert_alias_tex(struct ir3 * ir)621 ir3_insert_alias_tex(struct ir3 *ir)
622 {
623 if (!ir->compiler->has_alias_tex)
624 return false;
625 if (ir3_shader_debug & IR3_DBG_NOALIASTEX)
626 return false;
627
628 bool progress = false;
629
630 foreach_block (block, &ir->block_list) {
631 foreach_instr_safe (instr, &block->instr_list) {
632 if (has_alias_srcs(instr)) {
633 progress |= insert_aliases(instr);
634 }
635 }
636 }
637
638 return progress;
639 }
640
641 static struct ir3_instruction *
get_or_create_shpe(struct ir3 * ir)642 get_or_create_shpe(struct ir3 *ir)
643 {
644 struct ir3_instruction *shpe = ir3_find_shpe(ir);
645
646 if (!shpe) {
647 shpe = ir3_create_empty_preamble(ir);
648 assert(shpe);
649 }
650
651 return shpe;
652 }
653
654 static bool
create_output_aliases(struct ir3_shader_variant * v,struct ir3_instruction * end)655 create_output_aliases(struct ir3_shader_variant *v, struct ir3_instruction *end)
656 {
657 bool progress = false;
658 struct ir3_instruction *shpe = NULL;
659
660 foreach_src_n (src, src_n, end) {
661 struct ir3_shader_output *output = &v->outputs[end->end.outidxs[src_n]];
662
663 if (output->slot < FRAG_RESULT_DATA0 ||
664 output->slot > FRAG_RESULT_DATA7) {
665 continue;
666 }
667
668 assert(src->flags & IR3_REG_SSA);
669 struct ir3_instruction *src_instr = src->def->instr;
670
671 if (src_instr->opc != OPC_META_COLLECT && src_instr->opc != OPC_MOV) {
672 continue;
673 }
674
675 unsigned rt = output->slot - FRAG_RESULT_DATA0;
676
677 foreach_src_n (comp_src, comp, src_instr) {
678 if (!(comp_src->flags & (IR3_REG_IMMED | IR3_REG_CONST))) {
679 /* Only const and immediate values can be aliased. */
680 continue;
681 }
682
683 if ((comp_src->flags & IR3_REG_HALF) &&
684 (comp_src->flags & IR3_REG_CONST)) {
685 /* alias.rt doesn't seem to work with half const.
686 * TODO figure out what's going wrong here. Might just be
687 * unsupported because the blob only uses it in one CTS test.
688 */
689 continue;
690 }
691
692 if (!shpe) {
693 shpe = get_or_create_shpe(v->ir);
694 }
695
696 struct ir3_instruction *alias =
697 ir3_instr_create_at(ir3_before_instr(shpe), OPC_ALIAS, 1, 2);
698 alias->cat7.alias_scope = ALIAS_RT;
699 ir3_dst_create(alias, regid(rt, comp), IR3_REG_RT);
700
701 unsigned src_flags =
702 comp_src->flags & (IR3_REG_HALF | IR3_REG_CONST | IR3_REG_IMMED);
703 ir3_src_create(alias, comp_src->num, src_flags)->uim_val =
704 comp_src->uim_val;
705
706 if (src_instr->opc == OPC_MOV) {
707 /* The float type bit seems entirely optional (i.e., it only affects
708 * disassembly) but since we have this info for movs, we might as
709 * well set it.
710 */
711 alias->cat7.alias_type_float = type_float(src_instr->cat1.dst_type);
712 }
713
714 /* Scheduling an alias.rt right before an alias.tex causes a GPU hang.
715 * Follow the blob and schedule all alias.rt at the end of the
716 * preamble to prevent this from happening.
717 */
718 alias->barrier_class = alias->barrier_conflict = IR3_BARRIER_CONST_W;
719
720 /* Nothing actually uses the alias.rt dst so make sure it doesn't get
721 * DCE'd.
722 */
723 array_insert(shpe->block, shpe->block->keeps, alias);
724
725 output->aliased_components |= (1 << comp);
726 progress = true;
727 }
728
729 /* Remove the aliased components from the src so that they can be DCE'd.
730 */
731 src->wrmask &= ~output->aliased_components;
732
733 if (!src->wrmask) {
734 src->def = NULL;
735 }
736 }
737
738 return progress;
739 }
740
741 /* Replace const and immediate components of the RT sources of end with alias.rt
742 * instructions in the preamble.
743 */
744 bool
ir3_create_alias_rt(struct ir3 * ir,struct ir3_shader_variant * v)745 ir3_create_alias_rt(struct ir3 *ir, struct ir3_shader_variant *v)
746 {
747 if (!ir->compiler->has_alias_rt)
748 return false;
749 if (ir3_shader_debug & IR3_DBG_NOALIASRT)
750 return false;
751 if (v->type != MESA_SHADER_FRAGMENT)
752 return false;
753 if (v->shader_options.fragdata_dynamic_remap)
754 return false;
755
756 struct ir3_instruction *end = ir3_find_end(ir);
757 assert(end->opc == OPC_END);
758 return create_output_aliases(v, end);
759 }
760