1 /*
2 * Copyright © 2024 Valve Corporation
3 *
4 * SPDX-License-Identifier: MIT
5 */
6
7 #include "aco_builder.h"
8 #include "aco_ir.h"
9
10 namespace aco {
11 namespace {
12
13 struct branch_ctx {
14 Program* program;
15
branch_ctxaco::__anon839568660111::branch_ctx16 branch_ctx(Program* program_) : program(program_) {}
17 };
18
19 void
remove_linear_successor(branch_ctx & ctx,Block & block,uint32_t succ_index)20 remove_linear_successor(branch_ctx& ctx, Block& block, uint32_t succ_index)
21 {
22 Block& succ = ctx.program->blocks[succ_index];
23 ASSERTED auto it = std::remove(succ.linear_preds.begin(), succ.linear_preds.end(), block.index);
24 assert(std::next(it) == succ.linear_preds.end());
25 succ.linear_preds.pop_back();
26 it = std::remove(block.linear_succs.begin(), block.linear_succs.end(), succ_index);
27 assert(std::next(it) == block.linear_succs.end());
28 block.linear_succs.pop_back();
29
30 if (succ.linear_preds.empty()) {
31 /* This block became unreachable - Recursively remove successors. */
32 succ.instructions.clear();
33 for (unsigned i : succ.linear_succs)
34 remove_linear_successor(ctx, succ, i);
35 }
36 }
37
38 /**
39 * Check if the branch instruction can be removed:
40 * This is beneficial when executing the next block with an empty exec mask
41 * is faster than the branch instruction itself.
42 *
43 * Override this judgement when:
44 * - The application prefers to remove control flow
45 * - The compiler stack knows that it's a divergent branch never taken
46 */
47 bool
can_remove_branch(branch_ctx & ctx,Block & block,Pseudo_branch_instruction * branch)48 can_remove_branch(branch_ctx& ctx, Block& block, Pseudo_branch_instruction* branch)
49 {
50 const uint32_t target = branch->target[0];
51 const bool uniform_branch =
52 !((branch->opcode == aco_opcode::p_cbranch_z || branch->opcode == aco_opcode::p_cbranch_nz) &&
53 branch->operands[0].physReg() == exec);
54
55 if (branch->never_taken) {
56 assert(!uniform_branch);
57 return true;
58 }
59
60 /* Cannot remove back-edges. */
61 if (block.index >= target)
62 return false;
63
64 const bool prefer_remove = branch->rarely_taken;
65 unsigned num_scalar = 0;
66 unsigned num_vector = 0;
67
68 /* Check the instructions between branch and target */
69 for (unsigned i = block.index + 1; i < target; i++) {
70 /* Uniform conditional branches must not be ignored if they
71 * are about to jump over actual instructions */
72 if (uniform_branch && !ctx.program->blocks[i].instructions.empty())
73 return false;
74
75 for (aco_ptr<Instruction>& instr : ctx.program->blocks[i].instructions) {
76 if (instr->isSOPP()) {
77 /* Discard early exits and loop breaks and continues should work fine with
78 * an empty exec mask.
79 */
80 if (instr->opcode == aco_opcode::s_cbranch_scc0 ||
81 instr->opcode == aco_opcode::s_cbranch_scc1 ||
82 instr->opcode == aco_opcode::s_cbranch_execz ||
83 instr->opcode == aco_opcode::s_cbranch_execnz) {
84 bool is_break_continue =
85 ctx.program->blocks[i].kind & (block_kind_break | block_kind_continue);
86 bool discard_early_exit =
87 ctx.program->blocks[instr->salu().imm].kind & block_kind_discard_early_exit;
88 if (is_break_continue || discard_early_exit)
89 continue;
90 }
91 return false;
92 } else if (instr->isSALU()) {
93 num_scalar++;
94 } else if (instr->isVALU() || instr->isVINTRP()) {
95 if (instr->opcode == aco_opcode::v_writelane_b32 ||
96 instr->opcode == aco_opcode::v_writelane_b32_e64) {
97 /* writelane ignores exec, writing inactive lanes results in UB. */
98 return false;
99 }
100 num_vector++;
101 /* VALU which writes SGPRs are always executed on GFX10+ */
102 if (ctx.program->gfx_level >= GFX10) {
103 for (Definition& def : instr->definitions) {
104 if (def.regClass().type() == RegType::sgpr)
105 num_scalar++;
106 }
107 }
108 } else if (instr->isEXP() || instr->isSMEM() || instr->isBarrier()) {
109 /* Export instructions with exec=0 can hang some GFX10+ (unclear on old GPUs),
110 * SMEM might be an invalid access, and barriers are probably expensive. */
111 return false;
112 } else if (instr->isVMEM() || instr->isFlatLike() || instr->isDS() || instr->isLDSDIR()) {
113 // TODO: GFX6-9 can use vskip
114 if (!prefer_remove)
115 return false;
116 } else if (instr->opcode != aco_opcode::p_debug_info) {
117 assert(false && "Pseudo instructions should be lowered by this point.");
118 return false;
119 }
120
121 if (!prefer_remove) {
122 /* Under these conditions, we shouldn't remove the branch.
123 * Don't care about the estimated cycles when the shader prefers flattening.
124 */
125 unsigned est_cycles;
126 if (ctx.program->gfx_level >= GFX10)
127 est_cycles = num_scalar * 2 + num_vector;
128 else
129 est_cycles = num_scalar * 4 + num_vector * 4;
130
131 if (est_cycles > 16)
132 return false;
133 }
134 }
135 }
136
137 return true;
138 }
139
140 void
lower_branch_instruction(branch_ctx & ctx,Block & block)141 lower_branch_instruction(branch_ctx& ctx, Block& block)
142 {
143 if (block.instructions.empty() || !block.instructions.back()->isBranch())
144 return;
145
146 aco_ptr<Instruction> branch = std::move(block.instructions.back());
147 const uint32_t target = branch->branch().target[0];
148 block.instructions.pop_back();
149
150 if (can_remove_branch(ctx, block, &branch->branch())) {
151 if (branch->opcode != aco_opcode::p_branch)
152 remove_linear_successor(ctx, block, target);
153 return;
154 }
155
156 /* emit branch instruction */
157 Builder bld(ctx.program, &block.instructions);
158 switch (branch->opcode) {
159 case aco_opcode::p_branch:
160 assert(block.linear_succs[0] == target);
161 bld.sopp(aco_opcode::s_branch, target);
162 break;
163 case aco_opcode::p_cbranch_nz:
164 assert(block.linear_succs[1] == target);
165 if (branch->operands[0].physReg() == exec)
166 bld.sopp(aco_opcode::s_cbranch_execnz, target);
167 else if (branch->operands[0].physReg() == vcc)
168 bld.sopp(aco_opcode::s_cbranch_vccnz, target);
169 else {
170 assert(branch->operands[0].physReg() == scc);
171 bld.sopp(aco_opcode::s_cbranch_scc1, target);
172 }
173 break;
174 case aco_opcode::p_cbranch_z:
175 assert(block.linear_succs[1] == target);
176 if (branch->operands[0].physReg() == exec)
177 bld.sopp(aco_opcode::s_cbranch_execz, target);
178 else if (branch->operands[0].physReg() == vcc)
179 bld.sopp(aco_opcode::s_cbranch_vccz, target);
180 else {
181 assert(branch->operands[0].physReg() == scc);
182 bld.sopp(aco_opcode::s_cbranch_scc0, target);
183 }
184 break;
185 default: unreachable("Unknown Pseudo branch instruction!");
186 }
187 }
188
189 } /* end namespace */
190
191 void
lower_branches(Program * program)192 lower_branches(Program* program)
193 {
194 branch_ctx ctx(program);
195
196 for (int i = program->blocks.size() - 1; i >= 0; i--) {
197 Block& block = program->blocks[i];
198 lower_branch_instruction(ctx, block);
199 }
200 }
201
202 } // namespace aco
203