• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2019-2021 Collabora, Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors (Collabora):
24  *    Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
25  */
26 
27 #include "compiler.h"
28 
29 /* Bifrost texture operations have a `skip` bit, instructinh helper invocations
30  * to skip execution. Each clause has a `terminate_discarded_threads` bit,
31  * which will terminate helper invocations.
32  *
33  * The terminate bit should be set on the last clause requiring helper
34  * invocations. Without control flow, that's the last source-order instruction;
35  * with control flow, there may be multiple such instructions (with ifs) or no
36  * such instruction (with loops).
37  *
38  * The skip bit should be set unless the value of this instruction is required
39  * by a future instruction requiring helper invocations. Consider:
40  *
41  *      0 = texture ...
42  *      1 = fmul 0, #10
43  *      2 = dfdx 1
44  *      store 2
45  *
46  * Since the derivative calculation 2 requires helper invocations, the value 1
47  * must be calculated by helper invocations, and since it depends on 0, 0 must
48  * be calculated by helpers. Hence the texture op does NOT have the skip bit
49  * set, and the clause containing the derivative has the terminate bit set.
50  *
51  * Calculating the terminate bit occurs by forward dataflow analysis to
52  * determine which blocks require helper invocations. A block requires
53  * invocations in if any of its instructions use helper invocations, or if it
54  * depends on a block that requires invocation. With that analysis, the
55  * terminate bit is set on the last instruction using invocations within any
56  * block that does *not* require invocations out.
57  *
58  * Likewise, calculating the execute bit requires backward dataflow analysis
59  * with union as the join operation and the generating set being the union of
60  * sources of instructions writing executed values. The skip bit is the inverse
61  * of the execute bit.
62  */
63 
64 static bool
bi_has_skip_bit(enum bi_opcode op)65 bi_has_skip_bit(enum bi_opcode op)
66 {
67         switch (op) {
68         case BI_OPCODE_TEXC:
69         case BI_OPCODE_TEXS_2D_F16:
70         case BI_OPCODE_TEXS_2D_F32:
71         case BI_OPCODE_TEXS_CUBE_F16:
72         case BI_OPCODE_TEXS_CUBE_F32:
73         case BI_OPCODE_VAR_TEX_F16:
74         case BI_OPCODE_VAR_TEX_F32:
75                 return true;
76         default:
77                 return false;
78         }
79 }
80 
81 /* Does a given instruction require helper threads to be active (because it
82  * reads from other subgroup lanes)? This only applies to fragment shaders.
83  * Other shader stages do not have a notion of helper threads. */
84 
85 static bool
bi_instr_uses_helpers(bi_instr * I)86 bi_instr_uses_helpers(bi_instr *I)
87 {
88         switch (I->op) {
89         case BI_OPCODE_TEXC:
90         case BI_OPCODE_TEXS_2D_F16:
91         case BI_OPCODE_TEXS_2D_F32:
92         case BI_OPCODE_TEXS_CUBE_F16:
93         case BI_OPCODE_TEXS_CUBE_F32:
94         case BI_OPCODE_VAR_TEX_F16:
95         case BI_OPCODE_VAR_TEX_F32:
96                 return !I->lod_mode; /* set for zero, clear for computed */
97         case BI_OPCODE_CLPER_I32:
98         case BI_OPCODE_CLPER_V6_I32:
99                 /* Fragment shaders require helpers to implement derivatives.
100                  * Other shader stages don't have helpers at all */
101                 return true;
102         default:
103                 return false;
104         }
105 }
106 
107 /* Does a block use helpers directly */
108 static bool
bi_block_uses_helpers(bi_block * block)109 bi_block_uses_helpers(bi_block *block)
110 {
111         bi_foreach_instr_in_block(block, I) {
112                 if (bi_instr_uses_helpers(I))
113                         return true;
114         }
115 
116         return false;
117 }
118 
119 static bool
bi_block_terminates_helpers(bi_block * block)120 bi_block_terminates_helpers(bi_block *block)
121 {
122         /* Can't terminate if a successor needs helpers */
123         bi_foreach_successor(block, succ) {
124                 if (succ->pass_flags & 1)
125                         return false;
126         }
127 
128         /* Otherwise we terminate */
129         return true;
130 }
131 
132 void
bi_analyze_helper_terminate(bi_context * ctx)133 bi_analyze_helper_terminate(bi_context *ctx)
134 {
135         /* Other shader stages do not have a notion of helper threads, so we
136          * can skip the analysis. Don't run for blend shaders, either, since
137          * they run in the context of another shader that we don't see. */
138         if (ctx->stage != MESA_SHADER_FRAGMENT || ctx->inputs->is_blend)
139                 return;
140 
141         /* Set blocks as directly requiring helpers, and if they do add them to
142          * the worklist to propagate to their predecessors */
143 
144         struct set *worklist = _mesa_set_create(NULL,
145                         _mesa_hash_pointer,
146                         _mesa_key_pointer_equal);
147 
148         struct set *visited = _mesa_set_create(NULL,
149                         _mesa_hash_pointer,
150                         _mesa_key_pointer_equal);
151 
152         bi_foreach_block(ctx, block) {
153                 block->pass_flags = bi_block_uses_helpers(block) ? 1 : 0;
154 
155                 if (block->pass_flags & 1)
156                         _mesa_set_add(worklist, block);
157         }
158 
159         /* Next, propagate back. Since there are a finite number of blocks, the
160          * worklist (a subset of all the blocks) is finite. Since a block can
161          * only be added to the worklist if it is not on the visited list and
162          * the visited list - also a subset of the blocks - grows every
163          * iteration, the algorithm must terminate. */
164 
165         struct set_entry *cur;
166 
167         while((cur = _mesa_set_next_entry(worklist, NULL)) != NULL) {
168                 /* Pop off a block requiring helpers */
169                 bi_block *blk = (struct bi_block *) cur->key;
170                 _mesa_set_remove(worklist, cur);
171 
172                 /* Its predecessors also require helpers */
173                 bi_foreach_predecessor(blk, pred) {
174                         if (!_mesa_set_search(visited, pred)) {
175                                 pred->pass_flags |= 1;
176                                 _mesa_set_add(worklist, pred);
177                         }
178                 }
179 
180                 _mesa_set_add(visited, blk);
181         }
182 
183         _mesa_set_destroy(visited, NULL);
184         _mesa_set_destroy(worklist, NULL);
185 
186         /* Finally, mark clauses requiring helpers */
187         bi_foreach_block(ctx, block) {
188                 /* At the end, there are helpers iff we don't terminate */
189                 bool helpers = !bi_block_terminates_helpers(block);
190 
191                 bi_foreach_clause_in_block_rev(block, clause) {
192                         bi_foreach_instr_in_clause_rev(block, clause, I) {
193                                 helpers |= bi_instr_uses_helpers(I);
194                         }
195 
196                         clause->td = !helpers;
197                 }
198         }
199 }
200 
201 static bool
bi_helper_block_update(BITSET_WORD * deps,bi_block * block)202 bi_helper_block_update(BITSET_WORD *deps, bi_block *block)
203 {
204         bool progress = false;
205 
206         bi_foreach_instr_in_block_rev(block, I) {
207                 /* If our destination is required by helper invocation... */
208                 if (I->dest[0].type != BI_INDEX_NORMAL)
209                         continue;
210 
211                 if (!BITSET_TEST(deps, bi_get_node(I->dest[0])))
212                         continue;
213 
214                 /* ...so are our sources */
215                 bi_foreach_src(I, s) {
216                         if (I->src[s].type == BI_INDEX_NORMAL) {
217                                 unsigned node = bi_get_node(I->src[s]);
218                                 progress |= !BITSET_TEST(deps, node);
219                                 BITSET_SET(deps, node);
220                         }
221                 }
222         }
223 
224         return progress;
225 }
226 
227 void
bi_analyze_helper_requirements(bi_context * ctx)228 bi_analyze_helper_requirements(bi_context *ctx)
229 {
230         unsigned temp_count = bi_max_temp(ctx);
231         BITSET_WORD *deps = calloc(sizeof(BITSET_WORD), BITSET_WORDS(temp_count));
232 
233         /* Initialize with the sources of instructions consuming
234          * derivatives */
235 
236         bi_foreach_instr_global(ctx, I) {
237                 if (I->dest[0].type != BI_INDEX_NORMAL) continue;
238                 if (!bi_instr_uses_helpers(I)) continue;
239 
240                 bi_foreach_src(I, s) {
241                         if (I->src[s].type == BI_INDEX_NORMAL)
242                                 BITSET_SET(deps, bi_get_node(I->src[s]));
243                 }
244         }
245 
246         /* Propagate that up */
247 
248         struct set *work_list = _mesa_set_create(NULL,
249                         _mesa_hash_pointer,
250                         _mesa_key_pointer_equal);
251 
252         struct set *visited = _mesa_set_create(NULL,
253                         _mesa_hash_pointer,
254                         _mesa_key_pointer_equal);
255 
256         struct set_entry *cur = _mesa_set_add(work_list, pan_exit_block(&ctx->blocks));
257 
258         do {
259                 bi_block *blk = (struct bi_block *) cur->key;
260                 _mesa_set_remove(work_list, cur);
261 
262                 bool progress = bi_helper_block_update(deps, blk);
263 
264                 if (progress || !_mesa_set_search(visited, blk)) {
265                         bi_foreach_predecessor(blk, pred)
266                                 _mesa_set_add(work_list, pred);
267                 }
268 
269                 _mesa_set_add(visited, blk);
270         } while((cur = _mesa_set_next_entry(work_list, NULL)) != NULL);
271 
272         _mesa_set_destroy(visited, NULL);
273         _mesa_set_destroy(work_list, NULL);
274 
275         /* Set the execute bits */
276 
277         bi_foreach_instr_global(ctx, I) {
278                 if (!bi_has_skip_bit(I->op)) continue;
279                 if (I->dest[0].type != BI_INDEX_NORMAL) continue;
280 
281                 I->skip = !BITSET_TEST(deps, bi_get_node(I->dest[0]));
282         }
283 
284         free(deps);
285 }
286