• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright © 2024 Valve Corporation
3  *
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #include "aco_builder.h"
8 #include "aco_ir.h"
9 
10 namespace aco {
11 namespace {
12 
13 struct branch_ctx {
14    Program* program;
15 
branch_ctxaco::__anon839568660111::branch_ctx16    branch_ctx(Program* program_) : program(program_) {}
17 };
18 
19 void
remove_linear_successor(branch_ctx & ctx,Block & block,uint32_t succ_index)20 remove_linear_successor(branch_ctx& ctx, Block& block, uint32_t succ_index)
21 {
22    Block& succ = ctx.program->blocks[succ_index];
23    ASSERTED auto it = std::remove(succ.linear_preds.begin(), succ.linear_preds.end(), block.index);
24    assert(std::next(it) == succ.linear_preds.end());
25    succ.linear_preds.pop_back();
26    it = std::remove(block.linear_succs.begin(), block.linear_succs.end(), succ_index);
27    assert(std::next(it) == block.linear_succs.end());
28    block.linear_succs.pop_back();
29 
30    if (succ.linear_preds.empty()) {
31       /* This block became unreachable - Recursively remove successors. */
32       succ.instructions.clear();
33       for (unsigned i : succ.linear_succs)
34          remove_linear_successor(ctx, succ, i);
35    }
36 }
37 
38 /**
39  *  Check if the branch instruction can be removed:
40  *  This is beneficial when executing the next block with an empty exec mask
41  *  is faster than the branch instruction itself.
42  *
43  *  Override this judgement when:
44  *  - The application prefers to remove control flow
45  *  - The compiler stack knows that it's a divergent branch never taken
46  */
47 bool
can_remove_branch(branch_ctx & ctx,Block & block,Pseudo_branch_instruction * branch)48 can_remove_branch(branch_ctx& ctx, Block& block, Pseudo_branch_instruction* branch)
49 {
50    const uint32_t target = branch->target[0];
51    const bool uniform_branch =
52       !((branch->opcode == aco_opcode::p_cbranch_z || branch->opcode == aco_opcode::p_cbranch_nz) &&
53         branch->operands[0].physReg() == exec);
54 
55    if (branch->never_taken) {
56       assert(!uniform_branch);
57       return true;
58    }
59 
60    /* Cannot remove back-edges. */
61    if (block.index >= target)
62       return false;
63 
64    const bool prefer_remove = branch->rarely_taken;
65    unsigned num_scalar = 0;
66    unsigned num_vector = 0;
67 
68    /* Check the instructions between branch and target */
69    for (unsigned i = block.index + 1; i < target; i++) {
70       /* Uniform conditional branches must not be ignored if they
71        * are about to jump over actual instructions */
72       if (uniform_branch && !ctx.program->blocks[i].instructions.empty())
73          return false;
74 
75       for (aco_ptr<Instruction>& instr : ctx.program->blocks[i].instructions) {
76          if (instr->isSOPP()) {
77             /* Discard early exits and loop breaks and continues should work fine with
78              * an empty exec mask.
79              */
80             if (instr->opcode == aco_opcode::s_cbranch_scc0 ||
81                 instr->opcode == aco_opcode::s_cbranch_scc1 ||
82                 instr->opcode == aco_opcode::s_cbranch_execz ||
83                 instr->opcode == aco_opcode::s_cbranch_execnz) {
84                bool is_break_continue =
85                   ctx.program->blocks[i].kind & (block_kind_break | block_kind_continue);
86                bool discard_early_exit =
87                   ctx.program->blocks[instr->salu().imm].kind & block_kind_discard_early_exit;
88                if (is_break_continue || discard_early_exit)
89                   continue;
90             }
91             return false;
92          } else if (instr->isSALU()) {
93             num_scalar++;
94          } else if (instr->isVALU() || instr->isVINTRP()) {
95             if (instr->opcode == aco_opcode::v_writelane_b32 ||
96                 instr->opcode == aco_opcode::v_writelane_b32_e64) {
97                /* writelane ignores exec, writing inactive lanes results in UB. */
98                return false;
99             }
100             num_vector++;
101             /* VALU which writes SGPRs are always executed on GFX10+ */
102             if (ctx.program->gfx_level >= GFX10) {
103                for (Definition& def : instr->definitions) {
104                   if (def.regClass().type() == RegType::sgpr)
105                      num_scalar++;
106                }
107             }
108          } else if (instr->isEXP() || instr->isSMEM() || instr->isBarrier()) {
109             /* Export instructions with exec=0 can hang some GFX10+ (unclear on old GPUs),
110              * SMEM might be an invalid access, and barriers are probably expensive. */
111             return false;
112          } else if (instr->isVMEM() || instr->isFlatLike() || instr->isDS() || instr->isLDSDIR()) {
113             // TODO: GFX6-9 can use vskip
114             if (!prefer_remove)
115                return false;
116          } else if (instr->opcode != aco_opcode::p_debug_info) {
117             assert(false && "Pseudo instructions should be lowered by this point.");
118             return false;
119          }
120 
121          if (!prefer_remove) {
122             /* Under these conditions, we shouldn't remove the branch.
123              * Don't care about the estimated cycles when the shader prefers flattening.
124              */
125             unsigned est_cycles;
126             if (ctx.program->gfx_level >= GFX10)
127                est_cycles = num_scalar * 2 + num_vector;
128             else
129                est_cycles = num_scalar * 4 + num_vector * 4;
130 
131             if (est_cycles > 16)
132                return false;
133          }
134       }
135    }
136 
137    return true;
138 }
139 
140 void
lower_branch_instruction(branch_ctx & ctx,Block & block)141 lower_branch_instruction(branch_ctx& ctx, Block& block)
142 {
143    if (block.instructions.empty() || !block.instructions.back()->isBranch())
144       return;
145 
146    aco_ptr<Instruction> branch = std::move(block.instructions.back());
147    const uint32_t target = branch->branch().target[0];
148    block.instructions.pop_back();
149 
150    if (can_remove_branch(ctx, block, &branch->branch())) {
151       if (branch->opcode != aco_opcode::p_branch)
152          remove_linear_successor(ctx, block, target);
153       return;
154    }
155 
156    /* emit branch instruction */
157    Builder bld(ctx.program, &block.instructions);
158    switch (branch->opcode) {
159    case aco_opcode::p_branch:
160       assert(block.linear_succs[0] == target);
161       bld.sopp(aco_opcode::s_branch, target);
162       break;
163    case aco_opcode::p_cbranch_nz:
164       assert(block.linear_succs[1] == target);
165       if (branch->operands[0].physReg() == exec)
166          bld.sopp(aco_opcode::s_cbranch_execnz, target);
167       else if (branch->operands[0].physReg() == vcc)
168          bld.sopp(aco_opcode::s_cbranch_vccnz, target);
169       else {
170          assert(branch->operands[0].physReg() == scc);
171          bld.sopp(aco_opcode::s_cbranch_scc1, target);
172       }
173       break;
174    case aco_opcode::p_cbranch_z:
175       assert(block.linear_succs[1] == target);
176       if (branch->operands[0].physReg() == exec)
177          bld.sopp(aco_opcode::s_cbranch_execz, target);
178       else if (branch->operands[0].physReg() == vcc)
179          bld.sopp(aco_opcode::s_cbranch_vccz, target);
180       else {
181          assert(branch->operands[0].physReg() == scc);
182          bld.sopp(aco_opcode::s_cbranch_scc0, target);
183       }
184       break;
185    default: unreachable("Unknown Pseudo branch instruction!");
186    }
187 }
188 
189 } /* end namespace */
190 
191 void
lower_branches(Program * program)192 lower_branches(Program* program)
193 {
194    branch_ctx ctx(program);
195 
196    for (int i = program->blocks.size() - 1; i >= 0; i--) {
197       Block& block = program->blocks[i];
198       lower_branch_instruction(ctx, block);
199    }
200 }
201 
202 } // namespace aco
203