• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2023 Alyssa Rosenzweig
3  * SPDX-License-Identifier: MIT
4  */
5 
6 #include "agx_builder.h"
7 #include "agx_compiler.h"
8 #include "agx_opcodes.h"
9 
10 /*
11  * AGX control flow instructions predicate out threads. No forward branches are
12  * inserted during instruction selection, only backwards branches at the end of
13  * loops exist before this pass. This means, prior to this pass, we would always
14  * execute both sides of an if.
15  *
16  * To improve performance, this pass inserts conservative forward branches after
17  * if_*cmp, else_*cmp, and break_if_*cmp instructions, jumping the
18  * subgroup to their logical destination if all threads in the subgroup are
19  * inactive. This has the effect of skipping over the unexecuted half of an if.
20  * That means this pass is critical for control flow performance.
21  */
22 
23 /* Estimated cost of inserting a jmp_exec_none. This value is tuned to Dolphin
24  * ubershaders. It needs to be retuned in lockstep with changes to the cost
25  * estimation heuristic.
26  */
27 #define COST_JMP (19)
28 
29 static uint32_t
cost_instr(agx_instr * I)30 cost_instr(agx_instr *I)
31 {
32    /* TODO: Better heuristic */
33    switch (I->op) {
34    case AGX_OPCODE_DEVICE_LOAD:
35    case AGX_OPCODE_TEXTURE_LOAD:
36    case AGX_OPCODE_TEXTURE_SAMPLE:
37       return 10;
38    default:
39       return 1;
40    }
41 }
42 
43 /*
44  * Estimate the cost between the instruction and the branch target. This is an
45  * input for our heuristic. The branch target is guaranteed to be a forward
46  * branch.
47  */
48 static uint32_t
cost_between(agx_context * ctx,agx_block * from,agx_instr * from_I,agx_block * target,bool skip_to_end_of_target)49 cost_between(agx_context *ctx, agx_block *from, agx_instr *from_I,
50              agx_block *target, bool skip_to_end_of_target)
51 {
52    uint32_t cost = 0;
53 
54    /* Consider the cost in the rest of this block */
55    if (from_I != agx_last_instr(from)) {
56       agx_foreach_instr_in_block_from(from, J, from_I) {
57          /* If we reach the end, we're done */
58          if (from == target && skip_to_end_of_target &&
59              J == agx_last_instr(target))
60             break;
61 
62          cost += cost_instr(J);
63       }
64    }
65 
66    if (from == target)
67       return cost;
68 
69    /* Consider the cost in the subsequent blocks */
70    agx_foreach_block_from(ctx, from, block) {
71       if (block == from)
72          continue;
73 
74       if (block == target && !skip_to_end_of_target)
75          break;
76 
77       agx_foreach_instr_in_block(block, I) {
78          if (block == target && I == agx_last_instr(target))
79             break;
80 
81          cost += cost_instr(I);
82       }
83 
84       if (block == target) {
85          assert(skip_to_end_of_target);
86          break;
87       }
88    }
89 
90    return cost;
91 }
92 
93 static void
try_insert_jmp(agx_context * ctx,agx_block * from,agx_instr * from_I,agx_block * target,bool skip_to_end_of_target,unsigned inverse_probability)94 try_insert_jmp(agx_context *ctx, agx_block *from, agx_instr *from_I,
95                agx_block *target, bool skip_to_end_of_target,
96                unsigned inverse_probability)
97 {
98    agx_builder b = agx_init_builder(ctx, agx_after_instr(from_I));
99 
100    /* If the control flow instruction was only inserted for its side effects,
101     * there is nowhere to jump. Bail.
102     */
103    if (!target)
104       return;
105 
106    /* If we do not insert a jump, we execute the predicated instructions
107     * unconditionally, with an expected cost C.
108     *
109     * If we do insert a jump, then we pay the cost J of the jump, AND if we do
110     * not take the jump, also the cost of the instructions C. The expected cost
111     * if we insert a jump is therefore J + P(not all threads inactive) C.
112     *
113     * Therefore, we should insert a jump if:
114     *
115     *    J + P(not all threads inactive) C < C
116     *
117     * To model the implicit (i-cache, etc) costs of inserting a jump
118     * instruction, we tie break conservatively, comparing with < instead of <=.
119     *
120     * Rearranging terms, we should NOT insert a jump if:
121     *
122     *    C < J / P(all threads inactive).
123     */
124    uint32_t cost_instructions =
125       cost_between(ctx, from, from_I, target, skip_to_end_of_target);
126 
127    if (cost_instructions < COST_JMP * inverse_probability)
128       return;
129 
130    /* It looks like inserting a jump will be a win. Do so. */
131    if (skip_to_end_of_target)
132       agx_jmp_exec_none_after(&b, target);
133    else
134       agx_jmp_exec_none(&b, target);
135 }
136 
137 void
agx_opt_jmp_none(agx_context * ctx)138 agx_opt_jmp_none(agx_context *ctx)
139 {
140    agx_foreach_block(ctx, blk) {
141       /* Handle the beginning of blocks */
142       agx_instr *first_ = agx_first_instr(blk);
143       if (first_ && (first_->op == AGX_OPCODE_ELSE_ICMP ||
144                      first_->op == AGX_OPCODE_ELSE_FCMP)) {
145 
146          /* The target of the else is the last block of the else, so we skip
147           * to the end of the block (to start execution with the pop_exec).
148           */
149          try_insert_jmp(ctx, blk, first_, first_->target, true, 2);
150       } else if (first_ &&
151                  (first_->op == AGX_OPCODE_BREAK_IF_ICMP ||
152                   first_->op == AGX_OPCODE_BREAK_IF_FCMP) &&
153                  first_->nest == 1) {
154          /* The target of the break is the block immediately after the end of
155           * the loop, so jump to the end of the previous block to get the
156           * appropriate pop_exec.
157           *
158           * Also, note we only do this for nest=1 to ensure we don't insert
159           * jumps inside if-statements inside breaks. We can't insert a
160           * jmp_exec_none inside the if because it would break out of the loop
161           * for threads that are still running the loop but merely predicated
162           * out due to the if-condition. This is similarly why we don't bother
163           * handling unconditional break.
164           *
165           * TODO: This is not optimal, but fixing this would require
166           * considerably more CFG gymnastics.
167           */
168          agx_block *target = agx_prev_block(first_->target);
169          try_insert_jmp(ctx, blk, first_, target, true, 10);
170       }
171 
172       /* Handle end of block instructions */
173       agx_foreach_instr_in_block_rev(blk, I) {
174          if (!instr_after_logical_end(I))
175             break;
176 
177          if (I->op == AGX_OPCODE_IF_ICMP || I->op == AGX_OPCODE_IF_FCMP) {
178             try_insert_jmp(ctx, blk, I, I->target, false, 2);
179             break;
180          }
181       }
182    }
183 }
184