• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2022 Collabora Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors (Collabora):
24  *      Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
25  */
26 
27 /* Bottom-up local scheduler to reduce register pressure */
28 
29 #include "util/dag.h"
30 #include "compiler.h"
31 
32 struct sched_ctx {
33    /* Dependency graph */
34    struct dag *dag;
35 
36    /* Live set */
37    BITSET_WORD *live;
38 };
39 
40 struct sched_node {
41    struct dag_node dag;
42 
43    /* Instruction this node represents */
44    bi_instr *instr;
45 };
46 
47 static void
add_dep(struct sched_node * a,struct sched_node * b)48 add_dep(struct sched_node *a, struct sched_node *b)
49 {
50    if (a && b)
51       dag_add_edge(&a->dag, &b->dag, 0);
52 }
53 
54 static struct dag *
create_dag(bi_context * ctx,bi_block * block,void * memctx)55 create_dag(bi_context *ctx, bi_block *block, void *memctx)
56 {
57    struct dag *dag = dag_create(ctx);
58 
59    struct sched_node **last_write =
60       calloc(ctx->ssa_alloc, sizeof(struct sched_node *));
61    struct sched_node *coverage = NULL;
62    struct sched_node *preload = NULL;
63 
64    /* Last memory load, to serialize stores against */
65    struct sched_node *memory_load = NULL;
66 
67    /* Last memory store, to serialize loads and stores against */
68    struct sched_node *memory_store = NULL;
69 
70    bi_foreach_instr_in_block(block, I) {
71       /* Leave branches at the end */
72       if (I->op == BI_OPCODE_JUMP || bi_opcode_props[I->op].branch)
73          break;
74 
75       assert(I->branch_target == NULL);
76 
77       struct sched_node *node = rzalloc(memctx, struct sched_node);
78       node->instr = I;
79       dag_init_node(dag, &node->dag);
80 
81       /* Reads depend on writes, no other hazards in SSA */
82       bi_foreach_ssa_src(I, s)
83          add_dep(node, last_write[I->src[s].value]);
84 
85       bi_foreach_dest(I, d)
86          last_write[I->dest[d].value] = node;
87 
88       add_dep(node, preload);
89 
90       switch (bi_opcode_props[I->op].message) {
91       case BIFROST_MESSAGE_LOAD:
92          /* Regular memory loads needs to be serialized against
93           * other memory access. However, UBO memory is read-only
94           * so it can be moved around freely.
95           */
96          if (I->seg != BI_SEG_UBO) {
97             add_dep(node, memory_store);
98             memory_load = node;
99          }
100 
101          break;
102 
103       case BIFROST_MESSAGE_ATTRIBUTE:
104          /* Regular attribute loads can be reordered, but
105           * writeable attributes can't be. Our one use of
106           * writeable attributes are images.
107           */
108          if ((I->op == BI_OPCODE_LD_TEX) || (I->op == BI_OPCODE_LD_TEX_IMM) ||
109              (I->op == BI_OPCODE_LD_ATTR_TEX)) {
110             add_dep(node, memory_store);
111             memory_load = node;
112          }
113 
114          break;
115 
116       case BIFROST_MESSAGE_STORE:
117          assert(I->seg != BI_SEG_UBO);
118          add_dep(node, memory_load);
119          add_dep(node, memory_store);
120          memory_store = node;
121          break;
122 
123       case BIFROST_MESSAGE_ATOMIC:
124       case BIFROST_MESSAGE_BARRIER:
125          add_dep(node, memory_load);
126          add_dep(node, memory_store);
127          memory_load = node;
128          memory_store = node;
129          break;
130 
131       case BIFROST_MESSAGE_BLEND:
132       case BIFROST_MESSAGE_Z_STENCIL:
133       case BIFROST_MESSAGE_TILE:
134          add_dep(node, coverage);
135          coverage = node;
136          break;
137 
138       case BIFROST_MESSAGE_ATEST:
139          /* ATEST signals the end of shader side effects */
140          add_dep(node, memory_store);
141          memory_store = node;
142 
143          /* ATEST also updates coverage */
144          add_dep(node, coverage);
145          coverage = node;
146          break;
147       default:
148          break;
149       }
150 
151       if (I->op == BI_OPCODE_DISCARD_F32) {
152          /* Serialize against ATEST */
153          add_dep(node, coverage);
154          coverage = node;
155       }
156       if (I->op == BI_OPCODE_DISCARD_F32 ||
157           I->op == BI_OPCODE_MEMORY_BARRIER) {
158          /* Serialize against memory operations and barriers */
159          add_dep(node, memory_load);
160          add_dep(node, memory_store);
161          memory_load = node;
162          memory_store = node;
163       }
164       if ((I->op == BI_OPCODE_PHI) ||
165           (I->op == BI_OPCODE_MOV_I32 &&
166            I->src[0].type == BI_INDEX_REGISTER)) {
167          preload = node;
168       }
169    }
170 
171    free(last_write);
172 
173    return dag;
174 }
175 
176 /*
177  * Calculate the change in register pressure from scheduling a given
178  * instruction. Equivalently, calculate the difference in the number of live
179  * registers before and after the instruction, given the live set after the
180  * instruction. This calculation follows immediately from the dataflow
181  * definition of liveness:
182  *
183  *      live_in = (live_out - KILL) + GEN
184  */
185 static signed
calculate_pressure_delta(bi_instr * I,BITSET_WORD * live)186 calculate_pressure_delta(bi_instr *I, BITSET_WORD *live)
187 {
188    signed delta = 0;
189 
190    /* Destinations must be unique */
191    bi_foreach_dest(I, d) {
192       if (BITSET_TEST(live, I->dest[d].value))
193          delta -= bi_count_write_registers(I, d);
194    }
195 
196    bi_foreach_ssa_src(I, src) {
197       /* Filter duplicates */
198       bool dupe = false;
199 
200       for (unsigned i = 0; i < src; ++i) {
201          if (bi_is_equiv(I->src[i], I->src[src])) {
202             dupe = true;
203             break;
204          }
205       }
206 
207       if (!dupe && !BITSET_TEST(live, I->src[src].value))
208          delta += bi_count_read_registers(I, src);
209    }
210 
211    return delta;
212 }
213 
214 /*
215  * Choose the next instruction, bottom-up. For now we use a simple greedy
216  * heuristic: choose the instruction that has the best effect on liveness.
217  */
218 static struct sched_node *
choose_instr(struct sched_ctx * s)219 choose_instr(struct sched_ctx *s)
220 {
221    int32_t min_delta = INT32_MAX;
222    struct sched_node *best = NULL;
223 
224    list_for_each_entry(struct sched_node, n, &s->dag->heads, dag.link) {
225       int32_t delta = calculate_pressure_delta(n->instr, s->live);
226 
227       if (delta < min_delta) {
228          best = n;
229          min_delta = delta;
230       }
231    }
232 
233    return best;
234 }
235 
236 static void
pressure_schedule_block(bi_context * ctx,bi_block * block,struct sched_ctx * s)237 pressure_schedule_block(bi_context *ctx, bi_block *block, struct sched_ctx *s)
238 {
239    /* off by a constant, that's ok */
240    signed pressure = 0;
241    signed orig_max_pressure = 0;
242    unsigned nr_ins = 0;
243 
244    memcpy(s->live, block->ssa_live_out,
245           BITSET_WORDS(ctx->ssa_alloc) * sizeof(BITSET_WORD));
246 
247    bi_foreach_instr_in_block_rev(block, I) {
248       pressure += calculate_pressure_delta(I, s->live);
249       orig_max_pressure = MAX2(pressure, orig_max_pressure);
250       bi_liveness_ins_update_ssa(s->live, I);
251       nr_ins++;
252    }
253 
254    memcpy(s->live, block->ssa_live_out,
255           BITSET_WORDS(ctx->ssa_alloc) * sizeof(BITSET_WORD));
256 
257    /* off by a constant, that's ok */
258    signed max_pressure = 0;
259    pressure = 0;
260 
261    struct sched_node **schedule = calloc(nr_ins, sizeof(struct sched_node *));
262    nr_ins = 0;
263 
264    while (!list_is_empty(&s->dag->heads)) {
265       struct sched_node *node = choose_instr(s);
266       pressure += calculate_pressure_delta(node->instr, s->live);
267       max_pressure = MAX2(pressure, max_pressure);
268       dag_prune_head(s->dag, &node->dag);
269 
270       schedule[nr_ins++] = node;
271       bi_liveness_ins_update_ssa(s->live, node->instr);
272    }
273 
274    /* Bail if it looks like it's worse */
275    if (max_pressure >= orig_max_pressure) {
276       free(schedule);
277       return;
278    }
279 
280    /* Apply the schedule */
281    for (unsigned i = 0; i < nr_ins; ++i) {
282       bi_remove_instruction(schedule[i]->instr);
283       list_add(&schedule[i]->instr->link, &block->instructions);
284    }
285 
286    free(schedule);
287 }
288 
289 void
bi_pressure_schedule(bi_context * ctx)290 bi_pressure_schedule(bi_context *ctx)
291 {
292    bi_compute_liveness_ssa(ctx);
293    void *memctx = ralloc_context(ctx);
294    BITSET_WORD *live =
295       ralloc_array(memctx, BITSET_WORD, BITSET_WORDS(ctx->ssa_alloc));
296 
297    bi_foreach_block(ctx, block) {
298       struct sched_ctx sctx = {.dag = create_dag(ctx, block, memctx),
299                                .live = live};
300 
301       pressure_schedule_block(ctx, block, &sctx);
302    }
303 
304    ralloc_free(memctx);
305 }
306