1 /* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
2
3 /*
4 * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
5 *
6 * Permission is hereby granted, free of charge, to any person obtaining a
7 * copy of this software and associated documentation files (the "Software"),
8 * to deal in the Software without restriction, including without limitation
9 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10 * and/or sell copies of the Software, and to permit persons to whom the
11 * Software is furnished to do so, subject to the following conditions:
12 *
13 * The above copyright notice and this permission notice (including the next
14 * paragraph) shall be included in all copies or substantial portions of the
15 * Software.
16 *
17 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
20 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23 * SOFTWARE.
24 *
25 * Authors:
26 * Rob Clark <robclark@freedesktop.org>
27 */
28
29
30 #include "util/u_math.h"
31
32 #include "ir3.h"
33
34 /*
35 * Instruction Scheduling:
36 *
37 * A recursive depth based scheduling algo. Recursively find an eligible
38 * instruction to schedule from the deepest instruction (recursing through
39 * it's unscheduled src instructions). Normally this would result in a
40 * lot of re-traversal of the same instructions, so we cache results in
41 * instr->data (and clear cached results that would be no longer valid
42 * after scheduling an instruction).
43 *
44 * There are a few special cases that need to be handled, since sched
45 * is currently independent of register allocation. Usages of address
46 * register (a0.x) or predicate register (p0.x) must be serialized. Ie.
47 * if you have two pairs of instructions that write the same special
48 * register and then read it, then those pairs cannot be interleaved.
49 * To solve this, when we are in such a scheduling "critical section",
50 * and we encounter a conflicting write to a special register, we try
51 * to schedule any remaining instructions that use that value first.
52 */
53
54 struct ir3_sched_ctx {
55 struct ir3_block *block; /* the current block */
56 struct list_head depth_list; /* depth sorted unscheduled instrs */
57 struct ir3_instruction *scheduled; /* last scheduled instr XXX remove*/
58 struct ir3_instruction *addr; /* current a0.x user, if any */
59 struct ir3_instruction *pred; /* current p0.x user, if any */
60 bool error;
61 };
62
is_sfu_or_mem(struct ir3_instruction * instr)63 static bool is_sfu_or_mem(struct ir3_instruction *instr)
64 {
65 return is_sfu(instr) || is_mem(instr);
66 }
67
68 #define NULL_INSTR ((void *)~0)
69
70 static void
clear_cache(struct ir3_sched_ctx * ctx,struct ir3_instruction * instr)71 clear_cache(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr)
72 {
73 list_for_each_entry (struct ir3_instruction, instr2, &ctx->depth_list, node) {
74 if ((instr2->data == instr) || (instr2->data == NULL_INSTR) || !instr)
75 instr2->data = NULL;
76 }
77 }
78
79 static void
schedule(struct ir3_sched_ctx * ctx,struct ir3_instruction * instr)80 schedule(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr)
81 {
82 debug_assert(ctx->block == instr->block);
83
84 /* maybe there is a better way to handle this than just stuffing
85 * a nop.. ideally we'd know about this constraint in the
86 * scheduling and depth calculation..
87 */
88 if (ctx->scheduled && is_sfu_or_mem(ctx->scheduled) && is_sfu_or_mem(instr))
89 ir3_NOP(ctx->block);
90
91 /* remove from depth list:
92 */
93 list_delinit(&instr->node);
94
95 if (writes_addr(instr)) {
96 debug_assert(ctx->addr == NULL);
97 ctx->addr = instr;
98 }
99
100 if (writes_pred(instr)) {
101 debug_assert(ctx->pred == NULL);
102 ctx->pred = instr;
103 }
104
105 instr->flags |= IR3_INSTR_MARK;
106
107 list_addtail(&instr->node, &instr->block->instr_list);
108 ctx->scheduled = instr;
109
110 if (writes_addr(instr) || writes_pred(instr) || is_input(instr)) {
111 clear_cache(ctx, NULL);
112 } else {
113 /* invalidate only the necessary entries.. */
114 clear_cache(ctx, instr);
115 }
116 }
117
118 static struct ir3_instruction *
deepest(struct ir3_instruction ** srcs,unsigned nsrcs)119 deepest(struct ir3_instruction **srcs, unsigned nsrcs)
120 {
121 struct ir3_instruction *d = NULL;
122 unsigned i = 0, id = 0;
123
124 while ((i < nsrcs) && !(d = srcs[id = i]))
125 i++;
126
127 if (!d)
128 return NULL;
129
130 for (; i < nsrcs; i++)
131 if (srcs[i] && (srcs[i]->depth > d->depth))
132 d = srcs[id = i];
133
134 srcs[id] = NULL;
135
136 return d;
137 }
138
139 static unsigned
distance(struct ir3_sched_ctx * ctx,struct ir3_instruction * instr,unsigned maxd)140 distance(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr,
141 unsigned maxd)
142 {
143 struct list_head *instr_list = &ctx->block->instr_list;
144 unsigned d = 0;
145
146 list_for_each_entry_rev (struct ir3_instruction, n, instr_list, node) {
147 if ((n == instr) || (d >= maxd))
148 break;
149 if (is_alu(n) || is_flow(n))
150 d++;
151 }
152
153 return d;
154 }
155
156 /* calculate delay for specified src: */
157 static unsigned
delay_calc_srcn(struct ir3_sched_ctx * ctx,struct ir3_instruction * assigner,struct ir3_instruction * consumer,unsigned srcn,bool soft)158 delay_calc_srcn(struct ir3_sched_ctx *ctx,
159 struct ir3_instruction *assigner,
160 struct ir3_instruction *consumer,
161 unsigned srcn, bool soft)
162 {
163 unsigned delay = 0;
164
165 if (is_meta(assigner)) {
166 struct ir3_instruction *src;
167 foreach_ssa_src(src, assigner) {
168 unsigned d;
169 if (src->block != assigner->block)
170 break;
171 d = delay_calc_srcn(ctx, src, consumer, srcn, soft);
172 delay = MAX2(delay, d);
173 }
174 } else {
175 if (soft) {
176 if (is_sfu(assigner)) {
177 delay = 4;
178 } else {
179 delay = ir3_delayslots(assigner, consumer, srcn);
180 }
181 } else {
182 delay = ir3_delayslots(assigner, consumer, srcn);
183 }
184 delay -= distance(ctx, assigner, delay);
185 }
186
187 return delay;
188 }
189
190 /* calculate delay for instruction (maximum of delay for all srcs): */
191 static unsigned
delay_calc(struct ir3_sched_ctx * ctx,struct ir3_instruction * instr,bool soft)192 delay_calc(struct ir3_sched_ctx *ctx, struct ir3_instruction *instr, bool soft)
193 {
194 unsigned delay = 0;
195 struct ir3_instruction *src;
196
197 foreach_ssa_src_n(src, i, instr) {
198 unsigned d;
199 /* for array writes, no need to delay on previous write: */
200 if (i == 0)
201 continue;
202 if (src->block != instr->block)
203 continue;
204 d = delay_calc_srcn(ctx, src, instr, i, soft);
205 delay = MAX2(delay, d);
206 }
207
208 return delay;
209 }
210
211 struct ir3_sched_notes {
212 /* there is at least one kill which could be scheduled, except
213 * for unscheduled bary.f's:
214 */
215 bool blocked_kill;
216 /* there is at least one instruction that could be scheduled,
217 * except for conflicting address/predicate register usage:
218 */
219 bool addr_conflict, pred_conflict;
220 };
221
is_scheduled(struct ir3_instruction * instr)222 static bool is_scheduled(struct ir3_instruction *instr)
223 {
224 return !!(instr->flags & IR3_INSTR_MARK);
225 }
226
227 /* could an instruction be scheduled if specified ssa src was scheduled? */
228 static bool
could_sched(struct ir3_instruction * instr,struct ir3_instruction * src)229 could_sched(struct ir3_instruction *instr, struct ir3_instruction *src)
230 {
231 struct ir3_instruction *other_src;
232 foreach_ssa_src(other_src, instr) {
233 /* if dependency not scheduled, we aren't ready yet: */
234 if ((src != other_src) && !is_scheduled(other_src)) {
235 return false;
236 }
237 }
238 return true;
239 }
240
241 /* Check if instruction is ok to schedule. Make sure it is not blocked
242 * by use of addr/predicate register, etc.
243 */
244 static bool
check_instr(struct ir3_sched_ctx * ctx,struct ir3_sched_notes * notes,struct ir3_instruction * instr)245 check_instr(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes,
246 struct ir3_instruction *instr)
247 {
248 /* For instructions that write address register we need to
249 * make sure there is at least one instruction that uses the
250 * addr value which is otherwise ready.
251 *
252 * TODO if any instructions use pred register and have other
253 * src args, we would need to do the same for writes_pred()..
254 */
255 if (writes_addr(instr)) {
256 struct ir3 *ir = instr->block->shader;
257 bool ready = false;
258 for (unsigned i = 0; (i < ir->indirects_count) && !ready; i++) {
259 struct ir3_instruction *indirect = ir->indirects[i];
260 if (!indirect)
261 continue;
262 if (indirect->address != instr)
263 continue;
264 ready = could_sched(indirect, instr);
265 }
266
267 /* nothing could be scheduled, so keep looking: */
268 if (!ready)
269 return false;
270 }
271
272 /* if this is a write to address/predicate register, and that
273 * register is currently in use, we need to defer until it is
274 * free:
275 */
276 if (writes_addr(instr) && ctx->addr) {
277 debug_assert(ctx->addr != instr);
278 notes->addr_conflict = true;
279 return false;
280 }
281
282 if (writes_pred(instr) && ctx->pred) {
283 debug_assert(ctx->pred != instr);
284 notes->pred_conflict = true;
285 return false;
286 }
287
288 /* if the instruction is a kill, we need to ensure *every*
289 * bary.f is scheduled. The hw seems unhappy if the thread
290 * gets killed before the end-input (ei) flag is hit.
291 *
292 * We could do this by adding each bary.f instruction as
293 * virtual ssa src for the kill instruction. But we have
294 * fixed length instr->regs[].
295 *
296 * TODO this wouldn't be quite right if we had multiple
297 * basic blocks, if any block was conditional. We'd need
298 * to schedule the bary.f's outside of any block which
299 * was conditional that contained a kill.. I think..
300 */
301 if (is_kill(instr)) {
302 struct ir3 *ir = instr->block->shader;
303
304 for (unsigned i = 0; i < ir->baryfs_count; i++) {
305 struct ir3_instruction *baryf = ir->baryfs[i];
306 if (baryf->flags & IR3_INSTR_UNUSED)
307 continue;
308 if (!is_scheduled(baryf)) {
309 notes->blocked_kill = true;
310 return false;
311 }
312 }
313 }
314
315 return true;
316 }
317
318 /* Find the best instruction to schedule from specified instruction or
319 * recursively it's ssa sources.
320 */
321 static struct ir3_instruction *
find_instr_recursive(struct ir3_sched_ctx * ctx,struct ir3_sched_notes * notes,struct ir3_instruction * instr)322 find_instr_recursive(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes,
323 struct ir3_instruction *instr)
324 {
325 struct ir3_instruction *srcs[__ssa_src_cnt(instr)];
326 struct ir3_instruction *src;
327 unsigned nsrcs = 0;
328
329 if (is_scheduled(instr))
330 return NULL;
331
332 /* use instr->data to cache the results of recursing up the
333 * instr src's. Otherwise the recursive algo can scale quite
334 * badly w/ shader size. But this takes some care to clear
335 * the cache appropriately when instructions are scheduled.
336 */
337 if (instr->data) {
338 if (instr->data == NULL_INSTR)
339 return NULL;
340 return instr->data;
341 }
342
343 /* find unscheduled srcs: */
344 foreach_ssa_src(src, instr) {
345 if (!is_scheduled(src)) {
346 debug_assert(nsrcs < ARRAY_SIZE(srcs));
347 srcs[nsrcs++] = src;
348 }
349 }
350
351 /* if all our src's are already scheduled: */
352 if (nsrcs == 0) {
353 if (check_instr(ctx, notes, instr)) {
354 instr->data = instr;
355 return instr;
356 }
357 return NULL;
358 }
359
360 while ((src = deepest(srcs, nsrcs))) {
361 struct ir3_instruction *candidate;
362
363 candidate = find_instr_recursive(ctx, notes, src);
364 if (!candidate)
365 continue;
366
367 if (check_instr(ctx, notes, candidate)) {
368 instr->data = candidate;
369 return candidate;
370 }
371 }
372
373 instr->data = NULL_INSTR;
374 return NULL;
375 }
376
377 /* find instruction to schedule: */
378 static struct ir3_instruction *
find_eligible_instr(struct ir3_sched_ctx * ctx,struct ir3_sched_notes * notes,bool soft)379 find_eligible_instr(struct ir3_sched_ctx *ctx, struct ir3_sched_notes *notes,
380 bool soft)
381 {
382 struct ir3_instruction *best_instr = NULL;
383 unsigned min_delay = ~0;
384
385 /* TODO we'd really rather use the list/array of block outputs. But we
386 * don't have such a thing. Recursing *every* instruction in the list
387 * will result in a lot of repeated traversal, since instructions will
388 * get traversed both when they appear as ssa src to a later instruction
389 * as well as where they appear in the depth_list.
390 */
391 list_for_each_entry_rev (struct ir3_instruction, instr, &ctx->depth_list, node) {
392 struct ir3_instruction *candidate;
393 unsigned delay;
394
395 candidate = find_instr_recursive(ctx, notes, instr);
396 if (!candidate)
397 continue;
398
399 delay = delay_calc(ctx, candidate, soft);
400 if (delay < min_delay) {
401 best_instr = candidate;
402 min_delay = delay;
403 }
404
405 if (min_delay == 0)
406 break;
407 }
408
409 return best_instr;
410 }
411
412 /* "spill" the address register by remapping any unscheduled
413 * instructions which depend on the current address register
414 * to a clone of the instruction which wrote the address reg.
415 */
416 static struct ir3_instruction *
split_addr(struct ir3_sched_ctx * ctx)417 split_addr(struct ir3_sched_ctx *ctx)
418 {
419 struct ir3 *ir;
420 struct ir3_instruction *new_addr = NULL;
421 unsigned i;
422
423 debug_assert(ctx->addr);
424
425 ir = ctx->addr->block->shader;
426
427 for (i = 0; i < ir->indirects_count; i++) {
428 struct ir3_instruction *indirect = ir->indirects[i];
429
430 if (!indirect)
431 continue;
432
433 /* skip instructions already scheduled: */
434 if (is_scheduled(indirect))
435 continue;
436
437 /* remap remaining instructions using current addr
438 * to new addr:
439 */
440 if (indirect->address == ctx->addr) {
441 if (!new_addr) {
442 new_addr = ir3_instr_clone(ctx->addr);
443 /* original addr is scheduled, but new one isn't: */
444 new_addr->flags &= ~IR3_INSTR_MARK;
445 }
446 ir3_instr_set_address(indirect, new_addr);
447 }
448 }
449
450 /* all remaining indirects remapped to new addr: */
451 ctx->addr = NULL;
452
453 return new_addr;
454 }
455
456 /* "spill" the predicate register by remapping any unscheduled
457 * instructions which depend on the current predicate register
458 * to a clone of the instruction which wrote the address reg.
459 */
460 static struct ir3_instruction *
split_pred(struct ir3_sched_ctx * ctx)461 split_pred(struct ir3_sched_ctx *ctx)
462 {
463 struct ir3 *ir;
464 struct ir3_instruction *new_pred = NULL;
465 unsigned i;
466
467 debug_assert(ctx->pred);
468
469 ir = ctx->pred->block->shader;
470
471 for (i = 0; i < ir->predicates_count; i++) {
472 struct ir3_instruction *predicated = ir->predicates[i];
473
474 /* skip instructions already scheduled: */
475 if (is_scheduled(predicated))
476 continue;
477
478 /* remap remaining instructions using current pred
479 * to new pred:
480 *
481 * TODO is there ever a case when pred isn't first
482 * (and only) src?
483 */
484 if (ssa(predicated->regs[1]) == ctx->pred) {
485 if (!new_pred) {
486 new_pred = ir3_instr_clone(ctx->pred);
487 /* original pred is scheduled, but new one isn't: */
488 new_pred->flags &= ~IR3_INSTR_MARK;
489 }
490 predicated->regs[1]->instr = new_pred;
491 }
492 }
493
494 /* all remaining predicated remapped to new pred: */
495 ctx->pred = NULL;
496
497 return new_pred;
498 }
499
500 static void
sched_block(struct ir3_sched_ctx * ctx,struct ir3_block * block)501 sched_block(struct ir3_sched_ctx *ctx, struct ir3_block *block)
502 {
503 struct list_head unscheduled_list;
504
505 ctx->block = block;
506
507 /* addr/pred writes are per-block: */
508 ctx->addr = NULL;
509 ctx->pred = NULL;
510
511 /* move all instructions to the unscheduled list, and
512 * empty the block's instruction list (to which we will
513 * be inserting).
514 */
515 list_replace(&block->instr_list, &unscheduled_list);
516 list_inithead(&block->instr_list);
517 list_inithead(&ctx->depth_list);
518
519 /* first a pre-pass to schedule all meta:input/phi instructions
520 * (which need to appear first so that RA knows the register is
521 * occupied), and move remaining to depth sorted list:
522 */
523 list_for_each_entry_safe (struct ir3_instruction, instr, &unscheduled_list, node) {
524 if ((instr->opc == OPC_META_INPUT) || (instr->opc == OPC_META_PHI)) {
525 schedule(ctx, instr);
526 } else {
527 ir3_insert_by_depth(instr, &ctx->depth_list);
528 }
529 }
530
531 while (!list_empty(&ctx->depth_list)) {
532 struct ir3_sched_notes notes = {0};
533 struct ir3_instruction *instr;
534
535 instr = find_eligible_instr(ctx, ¬es, true);
536 if (!instr)
537 instr = find_eligible_instr(ctx, ¬es, false);
538
539 if (instr) {
540 unsigned delay = delay_calc(ctx, instr, false);
541
542 /* and if we run out of instructions that can be scheduled,
543 * then it is time for nop's:
544 */
545 debug_assert(delay <= 6);
546 while (delay > 0) {
547 ir3_NOP(block);
548 delay--;
549 }
550
551 schedule(ctx, instr);
552 } else {
553 struct ir3_instruction *new_instr = NULL;
554
555 /* nothing available to schedule.. if we are blocked on
556 * address/predicate register conflict, then break the
557 * deadlock by cloning the instruction that wrote that
558 * reg:
559 */
560 if (notes.addr_conflict) {
561 new_instr = split_addr(ctx);
562 } else if (notes.pred_conflict) {
563 new_instr = split_pred(ctx);
564 } else {
565 debug_assert(0);
566 ctx->error = true;
567 return;
568 }
569
570 if (new_instr) {
571 /* clearing current addr/pred can change what is
572 * available to schedule, so clear cache..
573 */
574 clear_cache(ctx, NULL);
575
576 ir3_insert_by_depth(new_instr, &ctx->depth_list);
577 /* the original instr that wrote addr/pred may have
578 * originated from a different block:
579 */
580 new_instr->block = block;
581 }
582 }
583 }
584
585 /* And lastly, insert branch/jump instructions to take us to
586 * the next block. Later we'll strip back out the branches
587 * that simply jump to next instruction.
588 */
589 if (block->successors[1]) {
590 /* if/else, conditional branches to "then" or "else": */
591 struct ir3_instruction *br;
592 unsigned delay = 6;
593
594 debug_assert(ctx->pred);
595 debug_assert(block->condition);
596
597 delay -= distance(ctx, ctx->pred, delay);
598
599 while (delay > 0) {
600 ir3_NOP(block);
601 delay--;
602 }
603
604 /* create "else" branch first (since "then" block should
605 * frequently/always end up being a fall-thru):
606 */
607 br = ir3_BR(block);
608 br->cat0.inv = true;
609 br->cat0.target = block->successors[1];
610
611 /* NOTE: we have to hard code delay of 6 above, since
612 * we want to insert the nop's before constructing the
613 * branch. Throw in an assert so we notice if this
614 * ever breaks on future generation:
615 */
616 debug_assert(ir3_delayslots(ctx->pred, br, 0) == 6);
617
618 br = ir3_BR(block);
619 br->cat0.target = block->successors[0];
620
621 } else if (block->successors[0]) {
622 /* otherwise unconditional jump to next block: */
623 struct ir3_instruction *jmp;
624
625 jmp = ir3_JUMP(block);
626 jmp->cat0.target = block->successors[0];
627 }
628
629 /* NOTE: if we kept track of the predecessors, we could do a better
630 * job w/ (jp) flags.. every node w/ > predecessor is a join point.
631 * Note that as we eliminate blocks which contain only an unconditional
632 * jump we probably need to propagate (jp) flag..
633 */
634 }
635
636 /* this is needed to ensure later RA stage succeeds: */
637 static void
sched_insert_parallel_copies(struct ir3_block * block)638 sched_insert_parallel_copies(struct ir3_block *block)
639 {
640 list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
641 if (instr->opc == OPC_META_PHI) {
642 struct ir3_register *reg, *reg2;
643 foreach_src(reg, instr) {
644 struct ir3_instruction *src = reg->instr;
645 struct ir3_instruction *mov = NULL;
646
647 /* after CP we could end up w/ duplicate phi srcs: */
648 foreach_src(reg2, instr) {
649 if (reg == reg2)
650 break;
651 /* reg2 is before reg1 so already an inserted mov: */
652 else if (reg2->instr->regs[1]->instr == src) {
653 mov = reg2->instr;
654 break;
655 }
656 }
657
658 if (!mov) {
659 mov = ir3_MOV(src->block, src, TYPE_U32);
660 mov->regs[0]->flags |= IR3_REG_PHI_SRC;
661 mov->regs[0]->instr = instr;
662 }
663
664 reg->instr = mov;
665 }
666 }
667 }
668 }
669
ir3_sched(struct ir3 * ir)670 int ir3_sched(struct ir3 *ir)
671 {
672 struct ir3_sched_ctx ctx = {0};
673 list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
674 sched_insert_parallel_copies(block);
675 }
676 ir3_clear_mark(ir);
677 list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
678 sched_block(&ctx, block);
679 }
680 if (ctx.error)
681 return -1;
682 return 0;
683 }
684
685 /* does instruction 'prior' need to be scheduled before 'instr'? */
686 static bool
depends_on(struct ir3_instruction * instr,struct ir3_instruction * prior)687 depends_on(struct ir3_instruction *instr, struct ir3_instruction *prior)
688 {
689 /* TODO for dependencies that are related to a specific object, ie
690 * a specific SSBO/image/array, we could relax this constraint to
691 * make accesses to unrelated objects not depend on each other (at
692 * least as long as not declared coherent)
693 */
694 if (((instr->barrier_class & IR3_BARRIER_EVERYTHING) && prior->barrier_class) ||
695 ((prior->barrier_class & IR3_BARRIER_EVERYTHING) && instr->barrier_class))
696 return true;
697 return !!(instr->barrier_class & prior->barrier_conflict);
698 }
699
700 static void
add_barrier_deps(struct ir3_block * block,struct ir3_instruction * instr)701 add_barrier_deps(struct ir3_block *block, struct ir3_instruction *instr)
702 {
703 struct list_head *prev = instr->node.prev;
704 struct list_head *next = instr->node.next;
705
706 /* add dependencies on previous instructions that must be scheduled
707 * prior to the current instruction
708 */
709 while (prev != &block->instr_list) {
710 struct ir3_instruction *pi =
711 LIST_ENTRY(struct ir3_instruction, prev, node);
712
713 prev = prev->prev;
714
715 if (is_meta(pi))
716 continue;
717
718 if (instr->barrier_class == pi->barrier_class) {
719 ir3_instr_add_dep(instr, pi);
720 break;
721 }
722
723 if (depends_on(instr, pi))
724 ir3_instr_add_dep(instr, pi);
725 }
726
727 /* add dependencies on this instruction to following instructions
728 * that must be scheduled after the current instruction:
729 */
730 while (next != &block->instr_list) {
731 struct ir3_instruction *ni =
732 LIST_ENTRY(struct ir3_instruction, next, node);
733
734 next = next->next;
735
736 if (is_meta(ni))
737 continue;
738
739 if (instr->barrier_class == ni->barrier_class) {
740 ir3_instr_add_dep(ni, instr);
741 break;
742 }
743
744 if (depends_on(ni, instr))
745 ir3_instr_add_dep(ni, instr);
746 }
747 }
748
749 /* before scheduling a block, we need to add any necessary false-dependencies
750 * to ensure that:
751 *
752 * (1) barriers are scheduled in the right order wrt instructions related
753 * to the barrier
754 *
755 * (2) reads that come before a write actually get scheduled before the
756 * write
757 */
758 static void
calculate_deps(struct ir3_block * block)759 calculate_deps(struct ir3_block *block)
760 {
761 list_for_each_entry (struct ir3_instruction, instr, &block->instr_list, node) {
762 if (instr->barrier_class) {
763 add_barrier_deps(block, instr);
764 }
765 }
766 }
767
768 void
ir3_sched_add_deps(struct ir3 * ir)769 ir3_sched_add_deps(struct ir3 *ir)
770 {
771 list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
772 calculate_deps(block);
773 }
774 }
775