• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2020 Collabora Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors (Collabora):
24  *      Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
25  */
26 
27 #ifndef __BIFROST_COMPILER_H
28 #define __BIFROST_COMPILER_H
29 
30 #include "bifrost.h"
31 #include "bi_opcodes.h"
32 #include "compiler/nir/nir.h"
33 #include "panfrost/util/pan_ir.h"
34 #include "util/u_math.h"
35 #include "util/half_float.h"
36 #include "util/u_worklist.h"
37 
38 #ifdef __cplusplus
39 extern "C" {
40 #endif
41 
42 /* Swizzles across bytes in a 32-bit word. Expresses swz in the XML directly.
43  * To express widen, use the correpsonding replicated form, i.e. H01 = identity
44  * for widen = none, H00 for widen = h0, B1111 for widen = b1. For lane, also
45  * use the replicated form (interpretation is governed by the opcode). For
46  * 8-bit lanes with two channels, use replicated forms for replicated forms
47  * (TODO: what about others?). For 8-bit lanes with four channels using
48  * matching form (TODO: what about others?).
49  */
50 
51 enum bi_swizzle {
52         /* 16-bit swizzle ordering deliberate for fast compute */
53         BI_SWIZZLE_H00 = 0, /* = B0101 */
54         BI_SWIZZLE_H01 = 1, /* = B0123 = W0 */
55         BI_SWIZZLE_H10 = 2, /* = B2301 */
56         BI_SWIZZLE_H11 = 3, /* = B2323 */
57 
58         /* replication order should be maintained for fast compute */
59         BI_SWIZZLE_B0000 = 4, /* single channel (replicate) */
60         BI_SWIZZLE_B1111 = 5,
61         BI_SWIZZLE_B2222 = 6,
62         BI_SWIZZLE_B3333 = 7,
63 
64         /* totally special for explicit pattern matching */
65         BI_SWIZZLE_B0011 = 8, /* +SWZ.v4i8 */
66         BI_SWIZZLE_B2233 = 9, /* +SWZ.v4i8 */
67         BI_SWIZZLE_B1032 = 10, /* +SWZ.v4i8 */
68         BI_SWIZZLE_B3210 = 11, /* +SWZ.v4i8 */
69 
70         BI_SWIZZLE_B0022 = 12, /* for b02 lanes */
71 };
72 
73 /* Given a packed i16vec2/i8vec4 constant, apply a swizzle. Useful for constant
74  * folding and Valhall constant optimization. */
75 
76 static inline uint32_t
bi_apply_swizzle(uint32_t value,enum bi_swizzle swz)77 bi_apply_swizzle(uint32_t value, enum bi_swizzle swz)
78 {
79    const uint16_t *h = (const uint16_t *) &value;
80    const uint8_t  *b = (const uint8_t *) &value;
81 
82 #define H(h0, h1) (h[h0] | (h[h1] << 16))
83 #define B(b0, b1, b2, b3) (b[b0] | (b[b1] << 8) | (b[b2] << 16) | (b[b3] << 24))
84 
85    switch (swz) {
86    case BI_SWIZZLE_H00: return H(0, 0);
87    case BI_SWIZZLE_H01: return H(0, 1);
88    case BI_SWIZZLE_H10: return H(1, 0);
89    case BI_SWIZZLE_H11: return H(1, 1);
90    case BI_SWIZZLE_B0000: return B(0, 0, 0, 0);
91    case BI_SWIZZLE_B1111: return B(1, 1, 1, 1);
92    case BI_SWIZZLE_B2222: return B(2, 2, 2, 2);
93    case BI_SWIZZLE_B3333: return B(3, 3, 3, 3);
94    case BI_SWIZZLE_B0011: return B(0, 0, 1, 1);
95    case BI_SWIZZLE_B2233: return B(2, 2, 3, 3);
96    case BI_SWIZZLE_B1032: return B(1, 0, 3, 2);
97    case BI_SWIZZLE_B3210: return B(3, 2, 1, 0);
98    case BI_SWIZZLE_B0022: return B(0, 0, 2, 2);
99    }
100 
101 #undef H
102 #undef B
103 
104    unreachable("Invalid swizzle");
105 }
106 
107 enum bi_index_type {
108         BI_INDEX_NULL = 0,
109         BI_INDEX_NORMAL = 1,
110         BI_INDEX_REGISTER = 2,
111         BI_INDEX_CONSTANT = 3,
112         BI_INDEX_PASS = 4,
113         BI_INDEX_FAU = 5
114 };
115 
116 typedef struct {
117         uint32_t value;
118 
119         /* modifiers, should only be set if applicable for a given instruction.
120          * For *IDP.v4i8, abs plays the role of sign. For bitwise ops where
121          * applicable, neg plays the role of not */
122         bool abs : 1;
123         bool neg : 1;
124 
125         /* The last use of a value, should be purged from the register cache.
126          * Set by liveness analysis. */
127         bool discard : 1;
128 
129         /* For a source, the swizzle. For a destination, acts a bit like a
130          * write mask. Identity for the full 32-bit, H00 for only caring about
131          * the lower half, other values unused. */
132         enum bi_swizzle swizzle : 4;
133         uint32_t offset : 3;
134         bool reg : 1;
135         enum bi_index_type type : 3;
136 
137         /* Must be zeroed so we can hash the whole 64-bits at a time */
138         unsigned padding : (32 - 14);
139 } bi_index;
140 
141 static inline bi_index
bi_get_index(unsigned value,bool is_reg,unsigned offset)142 bi_get_index(unsigned value, bool is_reg, unsigned offset)
143 {
144         return (bi_index) {
145                 .value = value,
146                 .swizzle = BI_SWIZZLE_H01,
147                 .offset = offset,
148                 .reg = is_reg,
149                 .type = BI_INDEX_NORMAL,
150         };
151 }
152 
153 static inline bi_index
bi_register(unsigned reg)154 bi_register(unsigned reg)
155 {
156         assert(reg < 64);
157 
158         return (bi_index) {
159                 .value = reg,
160                 .swizzle = BI_SWIZZLE_H01,
161                 .type = BI_INDEX_REGISTER,
162         };
163 }
164 
165 static inline bi_index
bi_imm_u32(uint32_t imm)166 bi_imm_u32(uint32_t imm)
167 {
168         return (bi_index) {
169                 .value = imm,
170                 .swizzle = BI_SWIZZLE_H01,
171                 .type = BI_INDEX_CONSTANT,
172         };
173 }
174 
175 static inline bi_index
bi_imm_f32(float imm)176 bi_imm_f32(float imm)
177 {
178         return bi_imm_u32(fui(imm));
179 }
180 
181 static inline bi_index
bi_null()182 bi_null()
183 {
184         return (bi_index) { .type = BI_INDEX_NULL };
185 }
186 
187 static inline bi_index
bi_zero()188 bi_zero()
189 {
190         return bi_imm_u32(0);
191 }
192 
193 static inline bi_index
bi_passthrough(enum bifrost_packed_src value)194 bi_passthrough(enum bifrost_packed_src value)
195 {
196         return (bi_index) {
197                 .value = value,
198                 .swizzle = BI_SWIZZLE_H01,
199                 .type = BI_INDEX_PASS,
200         };
201 }
202 
203 /* Helps construct swizzles */
204 static inline bi_index
bi_swz_16(bi_index idx,bool x,bool y)205 bi_swz_16(bi_index idx, bool x, bool y)
206 {
207         assert(idx.swizzle == BI_SWIZZLE_H01);
208         idx.swizzle = (enum bi_swizzle)(BI_SWIZZLE_H00 | (x << 1) | y);
209         return idx;
210 }
211 
212 static inline bi_index
bi_half(bi_index idx,bool upper)213 bi_half(bi_index idx, bool upper)
214 {
215         return bi_swz_16(idx, upper, upper);
216 }
217 
218 static inline bi_index
bi_byte(bi_index idx,unsigned lane)219 bi_byte(bi_index idx, unsigned lane)
220 {
221         assert(idx.swizzle == BI_SWIZZLE_H01);
222         assert(lane < 4);
223         idx.swizzle = (enum bi_swizzle)(BI_SWIZZLE_B0000 + lane);
224         return idx;
225 }
226 
227 static inline bi_index
bi_abs(bi_index idx)228 bi_abs(bi_index idx)
229 {
230         idx.abs = true;
231         return idx;
232 }
233 
234 static inline bi_index
bi_neg(bi_index idx)235 bi_neg(bi_index idx)
236 {
237         idx.neg ^= true;
238         return idx;
239 }
240 
241 static inline bi_index
bi_discard(bi_index idx)242 bi_discard(bi_index idx)
243 {
244         idx.discard = true;
245         return idx;
246 }
247 
248 /* Additive identity in IEEE 754 arithmetic */
249 static inline bi_index
bi_negzero()250 bi_negzero()
251 {
252         return bi_neg(bi_zero());
253 }
254 
255 /* Replaces an index, preserving any modifiers */
256 
257 static inline bi_index
bi_replace_index(bi_index old,bi_index replacement)258 bi_replace_index(bi_index old, bi_index replacement)
259 {
260         replacement.abs = old.abs;
261         replacement.neg = old.neg;
262         replacement.swizzle = old.swizzle;
263         replacement.discard = false; /* needs liveness analysis to set */
264         return replacement;
265 }
266 
267 /* Remove any modifiers. This has the property:
268  *
269  *     replace_index(x, strip_index(x)) = x
270  *
271  * This ensures it is suitable to use when lowering sources to moves */
272 
273 static inline bi_index
bi_strip_index(bi_index index)274 bi_strip_index(bi_index index)
275 {
276         index.abs = index.neg = false;
277         index.swizzle = BI_SWIZZLE_H01;
278         return index;
279 }
280 
281 /* For bitwise instructions */
282 #define bi_not(x) bi_neg(x)
283 
284 static inline bi_index
bi_imm_u8(uint8_t imm)285 bi_imm_u8(uint8_t imm)
286 {
287         return bi_byte(bi_imm_u32(imm), 0);
288 }
289 
290 static inline bi_index
bi_imm_u16(uint16_t imm)291 bi_imm_u16(uint16_t imm)
292 {
293         return bi_half(bi_imm_u32(imm), false);
294 }
295 
296 static inline bi_index
bi_imm_uintN(uint32_t imm,unsigned sz)297 bi_imm_uintN(uint32_t imm, unsigned sz)
298 {
299         assert(sz == 8 || sz == 16 || sz == 32);
300         return (sz == 8) ? bi_imm_u8(imm) :
301                 (sz == 16) ? bi_imm_u16(imm) :
302                 bi_imm_u32(imm);
303 }
304 
305 static inline bi_index
bi_imm_f16(float imm)306 bi_imm_f16(float imm)
307 {
308         return bi_imm_u16(_mesa_float_to_half(imm));
309 }
310 
311 static inline bool
bi_is_null(bi_index idx)312 bi_is_null(bi_index idx)
313 {
314         return idx.type == BI_INDEX_NULL;
315 }
316 
317 static inline bool
bi_is_ssa(bi_index idx)318 bi_is_ssa(bi_index idx)
319 {
320         return idx.type == BI_INDEX_NORMAL && !idx.reg;
321 }
322 
323 /* Compares equivalence as references. Does not compare offsets, swizzles, or
324  * modifiers. In other words, this forms bi_index equivalence classes by
325  * partitioning memory. E.g. -abs(foo[1].yx) == foo.xy but foo != bar */
326 
327 static inline bool
bi_is_equiv(bi_index left,bi_index right)328 bi_is_equiv(bi_index left, bi_index right)
329 {
330         return (left.type == right.type) &&
331                 (left.reg == right.reg) &&
332                 (left.value == right.value);
333 }
334 
335 /* A stronger equivalence relation that requires the indices access the
336  * same offset, useful for RA/scheduling to see what registers will
337  * correspond to */
338 
339 static inline bool
bi_is_word_equiv(bi_index left,bi_index right)340 bi_is_word_equiv(bi_index left, bi_index right)
341 {
342         return bi_is_equiv(left, right) && left.offset == right.offset;
343 }
344 
345 /* An even stronger equivalence that checks if indices correspond to the
346  * right value when evaluated
347  */
348 static inline bool
bi_is_value_equiv(bi_index left,bi_index right)349 bi_is_value_equiv(bi_index left, bi_index right)
350 {
351         if (left.type == BI_INDEX_CONSTANT && right.type == BI_INDEX_CONSTANT) {
352                 return (bi_apply_swizzle(left.value, left.swizzle) ==
353                         bi_apply_swizzle(right.value, right.swizzle)) &&
354                        (left.abs == right.abs) &&
355                        (left.neg == right.neg);
356         } else {
357                 return (left.value == right.value) &&
358                        (left.abs == right.abs) &&
359                        (left.neg == right.neg) &&
360                        (left.swizzle == right.swizzle) &&
361                        (left.offset == right.offset) &&
362                        (left.reg == right.reg) &&
363                        (left.type == right.type);
364         }
365 }
366 
367 #define BI_MAX_VEC 8
368 #define BI_MAX_DESTS 4
369 #define BI_MAX_SRCS 6
370 
371 typedef struct {
372         /* Must be first */
373         struct list_head link;
374 
375         enum bi_opcode op;
376         uint8_t nr_srcs;
377         uint8_t nr_dests;
378 
379         /* Data flow */
380         bi_index dest[BI_MAX_DESTS];
381         bi_index src[BI_MAX_SRCS];
382 
383         /* For a branch */
384         struct bi_block *branch_target;
385 
386         /* These don't fit neatly with anything else.. */
387         enum bi_register_format register_format;
388         enum bi_vecsize vecsize;
389 
390         /* Flow control associated with a Valhall instruction */
391         uint8_t flow;
392 
393         /* Slot associated with a message-passing instruction */
394         uint8_t slot;
395 
396         /* Can we spill the value written here? Used to prevent
397          * useless double fills */
398         bool no_spill;
399 
400         /* On Bifrost: A value of bi_table to override the table, inducing a
401          * DTSEL_IMM pair if nonzero.
402          *
403          * On Valhall: the table index to use for resource instructions.
404          *
405          * These two interpretations are equivalent if you squint a bit.
406          */
407         unsigned table;
408 
409         /* Everything after this MUST NOT be accessed directly, since
410          * interpretation depends on opcodes */
411 
412         /* Destination modifiers */
413         union {
414                 enum bi_clamp clamp;
415                 bool saturate;
416                 bool not_result;
417                 unsigned dest_mod;
418         };
419 
420         /* Immediates. All seen alone in an instruction, except for varying/texture
421          * which are specified jointly for VARTEX */
422         union {
423                 uint32_t shift;
424                 uint32_t fill;
425                 uint32_t index;
426                 uint32_t attribute_index;
427 
428                 struct {
429                         uint32_t varying_index;
430                         uint32_t sampler_index;
431                         uint32_t texture_index;
432                 };
433 
434                 /* TEXC, ATOM_CX: # of staging registers used */
435                 struct {
436                         uint32_t sr_count;
437                         uint32_t sr_count_2;
438 
439                         union {
440                                 /* Atomics effectively require all three */
441                                 int32_t byte_offset;
442 
443                                 /* BLEND requires all three */
444                                 int32_t branch_offset;
445                         };
446                 };
447         };
448 
449         /* Modifiers specific to particular instructions are thrown in a union */
450         union {
451                 enum bi_adj adj; /* FEXP_TABLE.u4 */
452                 enum bi_atom_opc atom_opc; /* atomics */
453                 enum bi_func func; /* FPOW_SC_DET */
454                 enum bi_function function; /* LD_VAR_FLAT */
455                 enum bi_mux mux; /* MUX */
456                 enum bi_sem sem; /* FMAX, FMIN */
457                 enum bi_source source; /* LD_GCLK */
458                 bool scale; /* VN_ASST2, FSINCOS_OFFSET */
459                 bool offset; /* FSIN_TABLE, FOCS_TABLE */
460                 bool mask; /* CLZ */
461                 bool threads; /* IMULD, IMOV_FMA */
462                 bool combine; /* BRANCHC */
463                 bool format; /* LEA_TEX */
464 
465                 struct {
466                         enum bi_special special; /* FADD_RSCALE, FMA_RSCALE */
467                         enum bi_round round; /* FMA, converts, FADD, _RSCALE, etc */
468                         bool ftz; /* Flush-to-zero for F16_TO_F32 */
469                 };
470 
471                 struct {
472                         enum bi_result_type result_type; /* FCMP, ICMP */
473                         enum bi_cmpf cmpf; /* CSEL, FCMP, ICMP, BRANCH */
474                 };
475 
476                 struct {
477                         enum bi_stack_mode stack_mode; /* JUMP_EX */
478                         bool test_mode;
479                 };
480 
481                 struct {
482                         enum bi_seg seg; /* LOAD, STORE, SEG_ADD, SEG_SUB */
483                         bool preserve_null; /* SEG_ADD, SEG_SUB */
484                         enum bi_extend extend; /* LOAD, IMUL */
485                 };
486 
487                 struct {
488                         enum bi_sample sample; /* VAR_TEX, LD_VAR */
489                         enum bi_update update; /* VAR_TEX, LD_VAR */
490                         enum bi_varying_name varying_name; /* LD_VAR_SPECIAL */
491                         bool skip; /* VAR_TEX, TEXS, TEXC */
492                         bool lod_mode; /* VAR_TEX, TEXS, implicitly for TEXC */
493                         enum bi_source_format source_format; /* LD_VAR_BUF */
494 
495                         /* Used for valhall texturing */
496                         bool shadow;
497                         bool texel_offset;
498                         bool array_enable;
499                         bool integer_coordinates;
500                         enum bi_fetch_component fetch_component;
501                         enum bi_va_lod_mode va_lod_mode;
502                         enum bi_dimension dimension;
503                         enum bi_write_mask write_mask;
504                 };
505 
506                 /* Maximum size, for hashing */
507                 unsigned flags[14];
508 
509                 struct {
510                         enum bi_subgroup subgroup; /* WMASK, CLPER */
511                         enum bi_inactive_result inactive_result; /* CLPER */
512                         enum bi_lane_op lane_op; /* CLPER */
513                 };
514 
515                 struct {
516                         bool z; /* ZS_EMIT */
517                         bool stencil; /* ZS_EMIT */
518                 };
519 
520                 struct {
521                         bool h; /* VN_ASST1.f16 */
522                         bool l; /* VN_ASST1.f16 */
523                 };
524 
525                 struct {
526                         bool bytes2; /* RROT_DOUBLE, FRSHIFT_DOUBLE */
527                         bool result_word;
528                         bool arithmetic; /* ARSHIFT_OR */
529                 };
530 
531                 struct {
532                         bool sqrt; /* FREXPM */
533                         bool log; /* FREXPM */
534                 };
535 
536                 struct {
537                         enum bi_mode mode; /* FLOG_TABLE */
538                         enum bi_precision precision; /* FLOG_TABLE */
539                         bool divzero; /* FRSQ_APPROX, FRSQ */
540                 };
541         };
542 } bi_instr;
543 
544 static inline bool
bi_is_staging_src(const bi_instr * I,unsigned s)545 bi_is_staging_src(const bi_instr *I, unsigned s)
546 {
547         return (s == 0 || s == 4) && bi_opcode_props[I->op].sr_read;
548 }
549 
550 /* Represents the assignment of slots for a given bi_tuple */
551 
552 typedef struct {
553         /* Register to assign to each slot */
554         unsigned slot[4];
555 
556         /* Read slots can be disabled */
557         bool enabled[2];
558 
559         /* Configuration for slots 2/3 */
560         struct bifrost_reg_ctrl_23 slot23;
561 
562         /* Fast-Access-Uniform RAM index */
563         uint8_t fau_idx;
564 
565         /* Whether writes are actually for the last instruction */
566         bool first_instruction;
567 } bi_registers;
568 
569 /* A bi_tuple contains two paired instruction pointers. If a slot is unfilled,
570  * leave it NULL; the emitter will fill in a nop. Instructions reference
571  * registers via slots which are assigned per tuple.
572  */
573 
574 typedef struct {
575         uint8_t fau_idx;
576         bi_registers regs;
577         bi_instr *fma;
578         bi_instr *add;
579 } bi_tuple;
580 
581 struct bi_block;
582 
583 typedef struct {
584         struct list_head link;
585 
586         /* Link back up for branch calculations */
587         struct bi_block *block;
588 
589         /* Architectural limit of 8 tuples/clause */
590         unsigned tuple_count;
591         bi_tuple tuples[8];
592 
593         /* For scoreboarding -- the clause ID (this is not globally unique!)
594          * and its dependencies in terms of other clauses, computed during
595          * scheduling and used when emitting code. Dependencies expressed as a
596          * bitfield matching the hardware, except shifted by a clause (the
597          * shift back to the ISA's off-by-one encoding is worked out when
598          * emitting clauses) */
599         unsigned scoreboard_id;
600         uint8_t dependencies;
601 
602         /* See ISA header for description */
603         enum bifrost_flow flow_control;
604 
605         /* Can we prefetch the next clause? Usually it makes sense, except for
606          * clauses ending in unconditional branches */
607         bool next_clause_prefetch;
608 
609         /* Assigned data register */
610         unsigned staging_register;
611 
612         /* Corresponds to the usual bit but shifted by a clause */
613         bool staging_barrier;
614 
615         /* Constants read by this clause. ISA limit. Must satisfy:
616          *
617          *      constant_count + tuple_count <= 13
618          *
619          * Also implicitly constant_count <= tuple_count since a tuple only
620          * reads a single constant.
621          */
622         uint64_t constants[8];
623         unsigned constant_count;
624 
625         /* Index of a constant to be PC-relative */
626         unsigned pcrel_idx;
627 
628         /* Branches encode a constant offset relative to the program counter
629          * with some magic flags. By convention, if there is a branch, its
630          * constant will be last. Set this flag to indicate this is required.
631          */
632         bool branch_constant;
633 
634         /* Unique in a clause */
635         enum bifrost_message_type message_type;
636         bi_instr *message;
637 
638         /* Discard helper threads */
639         bool td;
640 
641         /* Should flush-to-zero mode be enabled for this clause? */
642         bool ftz;
643 } bi_clause;
644 
645 #define BI_NUM_SLOTS 8
646 
647 /* A model for the state of the scoreboard */
648 struct bi_scoreboard_state {
649         /** Bitmap of registers read/written by a slot */
650         uint64_t read[BI_NUM_SLOTS];
651         uint64_t write[BI_NUM_SLOTS];
652 
653         /* Nonregister dependencies present by a slot */
654         uint8_t varying : BI_NUM_SLOTS;
655         uint8_t memory : BI_NUM_SLOTS;
656 };
657 
658 typedef struct bi_block {
659         /* Link to next block. Must be first for mir_get_block */
660         struct list_head link;
661 
662         /* List of instructions emitted for the current block */
663         struct list_head instructions;
664 
665         /* Index of the block in source order */
666         unsigned index;
667 
668         /* Control flow graph */
669         struct bi_block *successors[2];
670         struct util_dynarray predecessors;
671         bool unconditional_jumps;
672 
673         /* Per 32-bit word live masks for the block indexed by node */
674         uint8_t *live_in;
675         uint8_t *live_out;
676 
677         /* If true, uses clauses; if false, uses instructions */
678         bool scheduled;
679         struct list_head clauses; /* list of bi_clause */
680 
681         /* Post-RA liveness */
682         uint64_t reg_live_in, reg_live_out;
683 
684         /* Scoreboard state at the start/end of block */
685         struct bi_scoreboard_state scoreboard_in, scoreboard_out;
686 
687         /* On Valhall, indicates we need a terminal NOP to implement jumps to
688          * the end of the shader.
689          */
690         bool needs_nop;
691 
692         /* Flags available for pass-internal use */
693         uint8_t pass_flags;
694 } bi_block;
695 
696 static inline unsigned
bi_num_predecessors(bi_block * block)697 bi_num_predecessors(bi_block *block)
698 {
699         return util_dynarray_num_elements(&block->predecessors, bi_block *);
700 }
701 
702 static inline bi_block *
bi_start_block(struct list_head * blocks)703 bi_start_block(struct list_head *blocks)
704 {
705         bi_block *first = list_first_entry(blocks, bi_block, link);
706         assert(bi_num_predecessors(first) == 0);
707         return first;
708 }
709 
710 static inline bi_block *
bi_exit_block(struct list_head * blocks)711 bi_exit_block(struct list_head *blocks)
712 {
713         bi_block *last = list_last_entry(blocks, bi_block, link);
714         assert(!last->successors[0] && !last->successors[1]);
715         return last;
716 }
717 
718 static inline void
bi_block_add_successor(bi_block * block,bi_block * successor)719 bi_block_add_successor(bi_block *block, bi_block *successor)
720 {
721         assert(block != NULL && successor != NULL);
722 
723         /* Cull impossible edges */
724         if (block->unconditional_jumps)
725                 return;
726 
727         for (unsigned i = 0; i < ARRAY_SIZE(block->successors); ++i) {
728                 if (block->successors[i]) {
729                        if (block->successors[i] == successor)
730                                return;
731                        else
732                                continue;
733                 }
734 
735                 block->successors[i] = successor;
736                 util_dynarray_append(&successor->predecessors, bi_block *, block);
737                 return;
738         }
739 
740         unreachable("Too many successors");
741 }
742 
743 /* Subset of pan_shader_info needed per-variant, in order to support IDVS */
744 struct bi_shader_info {
745         struct panfrost_ubo_push *push;
746         struct bifrost_shader_info *bifrost;
747         struct panfrost_sysvals *sysvals;
748         unsigned tls_size;
749         unsigned work_reg_count;
750         unsigned push_offset;
751 };
752 
753 /* State of index-driven vertex shading for current shader */
754 enum bi_idvs_mode {
755         /* IDVS not in use */
756         BI_IDVS_NONE = 0,
757 
758         /* IDVS in use. Compiling a position shader */
759         BI_IDVS_POSITION = 1,
760 
761         /* IDVS in use. Compiling a varying shader */
762         BI_IDVS_VARYING = 2,
763 };
764 
765 typedef struct {
766        const struct panfrost_compile_inputs *inputs;
767        nir_shader *nir;
768        struct bi_shader_info info;
769        gl_shader_stage stage;
770        struct list_head blocks; /* list of bi_block */
771        struct hash_table_u64 *sysval_to_id;
772        uint32_t quirks;
773        unsigned arch;
774        enum bi_idvs_mode idvs;
775        unsigned num_blocks;
776 
777        /* In any graphics shader, whether the "IDVS with memory
778         * allocation" flow is used. This affects how varyings are loaded and
779         * stored. Ignore for compute.
780         */
781        bool malloc_idvs;
782 
783        /* During NIR->BIR */
784        bi_block *current_block;
785        bi_block *after_block;
786        bi_block *break_block;
787        bi_block *continue_block;
788        bool emitted_atest;
789 
790        /* During NIR->BIR, the coverage bitmap. If this is NULL, the default
791         * coverage bitmap should be source from preloaded register r60. This is
792         * written by ATEST and ZS_EMIT
793         */
794        bi_index coverage;
795 
796        /* During NIR->BIR, table of preloaded registers, or NULL if never
797         * preloaded.
798         */
799        bi_index preloaded[64];
800 
801        /* For creating temporaries */
802        unsigned ssa_alloc;
803        unsigned reg_alloc;
804 
805        /* Mask of UBOs that need to be uploaded */
806        uint32_t ubo_mask;
807 
808        /* During instruction selection, map from vector bi_index to its scalar
809         * components, populated by a split.
810         */
811        struct hash_table_u64 *allocated_vec;
812 
813        /* Stats for shader-db */
814        unsigned instruction_count;
815        unsigned loop_count;
816        unsigned spills;
817        unsigned fills;
818 } bi_context;
819 
820 static inline void
bi_remove_instruction(bi_instr * ins)821 bi_remove_instruction(bi_instr *ins)
822 {
823         list_del(&ins->link);
824 }
825 
826 enum bir_fau {
827         BIR_FAU_ZERO = 0,
828         BIR_FAU_LANE_ID = 1,
829         BIR_FAU_WARP_ID = 2,
830         BIR_FAU_CORE_ID = 3,
831         BIR_FAU_FB_EXTENT = 4,
832         BIR_FAU_ATEST_PARAM = 5,
833         BIR_FAU_SAMPLE_POS_ARRAY = 6,
834         BIR_FAU_BLEND_0 = 8,
835         /* blend descs 1 - 7 */
836         BIR_FAU_TYPE_MASK = 15,
837 
838         /* Valhall only */
839         BIR_FAU_TLS_PTR = 16,
840         BIR_FAU_WLS_PTR = 17,
841         BIR_FAU_PROGRAM_COUNTER = 18,
842 
843         BIR_FAU_UNIFORM = (1 << 7),
844         /* Look up table on Valhall */
845         BIR_FAU_IMMEDIATE = (1 << 8),
846 
847 };
848 
849 static inline bi_index
bi_fau(enum bir_fau value,bool hi)850 bi_fau(enum bir_fau value, bool hi)
851 {
852         return (bi_index) {
853                 .value = value,
854                 .swizzle = BI_SWIZZLE_H01,
855                 .offset = hi ? 1u : 0u,
856                 .type = BI_INDEX_FAU,
857         };
858 }
859 
860 /*
861  * Builder for Valhall LUT entries. Generally, constants are modeled with
862  * BI_INDEX_IMMEDIATE in the intermediate representation. This helper is only
863  * necessary for passes running after lowering constants, as well as when
864  * lowering constants.
865  *
866  */
867 static inline bi_index
va_lut(unsigned index)868 va_lut(unsigned index)
869 {
870         return bi_fau((enum bir_fau) (BIR_FAU_IMMEDIATE | (index >> 1)),
871                       index & 1);
872 }
873 
874 /*
875  * va_lut_zero is like bi_zero but only works on Valhall. It is intended for
876  * use by late passes that run after constants are lowered, specifically
877  * register allocation. bi_zero() is preferred where possible.
878  */
879 static inline bi_index
va_zero_lut()880 va_zero_lut()
881 {
882         return va_lut(0);
883 }
884 
885 static inline unsigned
bi_max_temp(bi_context * ctx)886 bi_max_temp(bi_context *ctx)
887 {
888         return (MAX2(ctx->reg_alloc, ctx->ssa_alloc) + 2) << 1;
889 }
890 
891 static inline bi_index
bi_temp(bi_context * ctx)892 bi_temp(bi_context *ctx)
893 {
894         return bi_get_index(ctx->ssa_alloc++, false, 0);
895 }
896 
897 static inline bi_index
bi_temp_reg(bi_context * ctx)898 bi_temp_reg(bi_context *ctx)
899 {
900         return bi_get_index(ctx->reg_alloc++, true, 0);
901 }
902 
903 /* Inline constants automatically, will be lowered out by bi_lower_fau where a
904  * constant is not allowed. load_const_to_scalar gaurantees that this makes
905  * sense */
906 
907 static inline bi_index
bi_src_index(nir_src * src)908 bi_src_index(nir_src *src)
909 {
910         if (nir_src_is_const(*src) && nir_src_bit_size(*src) <= 32)
911                 return bi_imm_u32(nir_src_as_uint(*src));
912         else if (src->is_ssa)
913                 return bi_get_index(src->ssa->index, false, 0);
914         else {
915                 assert(!src->reg.indirect);
916                 return bi_get_index(src->reg.reg->index, true, 0);
917         }
918 }
919 
920 static inline bi_index
bi_dest_index(nir_dest * dst)921 bi_dest_index(nir_dest *dst)
922 {
923         if (dst->is_ssa)
924                 return bi_get_index(dst->ssa.index, false, 0);
925         else {
926                 assert(!dst->reg.indirect);
927                 return bi_get_index(dst->reg.reg->index, true, 0);
928         }
929 }
930 
931 static inline unsigned
bi_get_node(bi_index index)932 bi_get_node(bi_index index)
933 {
934         if (bi_is_null(index) || index.type != BI_INDEX_NORMAL)
935                 return ~0;
936         else
937                 return (index.value << 1) | index.reg;
938 }
939 
940 static inline bi_index
bi_node_to_index(unsigned node,unsigned node_count)941 bi_node_to_index(unsigned node, unsigned node_count)
942 {
943         assert(node < node_count);
944         assert(node_count < ~0u);
945 
946         return bi_get_index(node >> 1, node & PAN_IS_REG, 0);
947 }
948 
949 /* Iterators for Bifrost IR */
950 
951 #define bi_foreach_block(ctx, v) \
952         list_for_each_entry(bi_block, v, &ctx->blocks, link)
953 
954 #define bi_foreach_block_rev(ctx, v) \
955         list_for_each_entry_rev(bi_block, v, &ctx->blocks, link)
956 
957 #define bi_foreach_block_from(ctx, from, v) \
958         list_for_each_entry_from(bi_block, v, from, &ctx->blocks, link)
959 
960 #define bi_foreach_block_from_rev(ctx, from, v) \
961         list_for_each_entry_from_rev(bi_block, v, from, &ctx->blocks, link)
962 
963 #define bi_foreach_instr_in_block(block, v) \
964         list_for_each_entry(bi_instr, v, &(block)->instructions, link)
965 
966 #define bi_foreach_instr_in_block_rev(block, v) \
967         list_for_each_entry_rev(bi_instr, v, &(block)->instructions, link)
968 
969 #define bi_foreach_instr_in_block_safe(block, v) \
970         list_for_each_entry_safe(bi_instr, v, &(block)->instructions, link)
971 
972 #define bi_foreach_instr_in_block_safe_rev(block, v) \
973         list_for_each_entry_safe_rev(bi_instr, v, &(block)->instructions, link)
974 
975 #define bi_foreach_instr_in_block_from(block, v, from) \
976         list_for_each_entry_from(bi_instr, v, from, &(block)->instructions, link)
977 
978 #define bi_foreach_instr_in_block_from_rev(block, v, from) \
979         list_for_each_entry_from_rev(bi_instr, v, from, &(block)->instructions, link)
980 
981 #define bi_foreach_clause_in_block(block, v) \
982         list_for_each_entry(bi_clause, v, &(block)->clauses, link)
983 
984 #define bi_foreach_clause_in_block_rev(block, v) \
985         list_for_each_entry_rev(bi_clause, v, &(block)->clauses, link)
986 
987 #define bi_foreach_clause_in_block_safe(block, v) \
988         list_for_each_entry_safe(bi_clause, v, &(block)->clauses, link)
989 
990 #define bi_foreach_clause_in_block_from(block, v, from) \
991         list_for_each_entry_from(bi_clause, v, from, &(block)->clauses, link)
992 
993 #define bi_foreach_clause_in_block_from_rev(block, v, from) \
994         list_for_each_entry_from_rev(bi_clause, v, from, &(block)->clauses, link)
995 
996 #define bi_foreach_instr_global(ctx, v) \
997         bi_foreach_block(ctx, v_block) \
998                 bi_foreach_instr_in_block(v_block, v)
999 
1000 #define bi_foreach_instr_global_rev(ctx, v) \
1001         bi_foreach_block_rev(ctx, v_block) \
1002                 bi_foreach_instr_in_block_rev(v_block, v)
1003 
1004 #define bi_foreach_instr_global_safe(ctx, v) \
1005         bi_foreach_block(ctx, v_block) \
1006                 bi_foreach_instr_in_block_safe(v_block, v)
1007 
1008 #define bi_foreach_instr_global_rev_safe(ctx, v) \
1009         bi_foreach_block_rev(ctx, v_block) \
1010                 bi_foreach_instr_in_block_rev_safe(v_block, v)
1011 
1012 #define bi_foreach_instr_in_tuple(tuple, v) \
1013         for (bi_instr *v = (tuple)->fma ?: (tuple)->add; \
1014                         v != NULL; \
1015                         v = (v == (tuple)->add) ? NULL : (tuple)->add)
1016 
1017 #define bi_foreach_successor(blk, v) \
1018         bi_block *v; \
1019         bi_block **_v; \
1020         for (_v = &blk->successors[0], \
1021                 v = *_v; \
1022                 v != NULL && _v < &blk->successors[2]; \
1023                 _v++, v = *_v) \
1024 
1025 #define bi_foreach_predecessor(blk, v) \
1026         util_dynarray_foreach(&(blk)->predecessors, bi_block *, v)
1027 
1028 #define bi_foreach_src(ins, v) \
1029         for (unsigned v = 0; v < ARRAY_SIZE(ins->src); ++v)
1030 
1031 #define bi_foreach_dest(ins, v) \
1032         for (unsigned v = 0; v < ARRAY_SIZE(ins->dest); ++v)
1033 
1034 #define bi_foreach_instr_and_src_in_tuple(tuple, ins, s) \
1035         bi_foreach_instr_in_tuple(tuple, ins) \
1036                 bi_foreach_src(ins, s)
1037 
1038 static inline bi_instr *
bi_prev_op(bi_instr * ins)1039 bi_prev_op(bi_instr *ins)
1040 {
1041         return list_last_entry(&(ins->link), bi_instr, link);
1042 }
1043 
1044 static inline bi_instr *
bi_next_op(bi_instr * ins)1045 bi_next_op(bi_instr *ins)
1046 {
1047         return list_first_entry(&(ins->link), bi_instr, link);
1048 }
1049 
1050 static inline bi_block *
bi_next_block(bi_block * block)1051 bi_next_block(bi_block *block)
1052 {
1053         return list_first_entry(&(block->link), bi_block, link);
1054 }
1055 
1056 static inline bi_block *
bi_entry_block(bi_context * ctx)1057 bi_entry_block(bi_context *ctx)
1058 {
1059         return list_first_entry(&ctx->blocks, bi_block, link);
1060 }
1061 
1062 /* BIR manipulation */
1063 
1064 bool bi_has_arg(const bi_instr *ins, bi_index arg);
1065 unsigned bi_count_read_registers(const bi_instr *ins, unsigned src);
1066 unsigned bi_count_write_registers(const bi_instr *ins, unsigned dest);
1067 bool bi_is_regfmt_16(enum bi_register_format fmt);
1068 unsigned bi_writemask(const bi_instr *ins, unsigned dest);
1069 bi_clause * bi_next_clause(bi_context *ctx, bi_block *block, bi_clause *clause);
1070 bool bi_side_effects(const bi_instr *I);
1071 bool bi_reconverge_branches(bi_block *block);
1072 
1073 bool bi_can_replace_with_csel(bi_instr *I);
1074 void bi_replace_mux_with_csel(bi_instr *I, bool must_sign);
1075 
1076 void bi_print_instr(const bi_instr *I, FILE *fp);
1077 void bi_print_slots(bi_registers *regs, FILE *fp);
1078 void bi_print_tuple(bi_tuple *tuple, FILE *fp);
1079 void bi_print_clause(bi_clause *clause, FILE *fp);
1080 void bi_print_block(bi_block *block, FILE *fp);
1081 void bi_print_shader(bi_context *ctx, FILE *fp);
1082 
1083 /* BIR passes */
1084 
1085 bool bi_instr_uses_helpers(bi_instr *I);
1086 bool bi_block_terminates_helpers(bi_block *block);
1087 void bi_analyze_helper_terminate(bi_context *ctx);
1088 void bi_mark_clauses_td(bi_context *ctx);
1089 
1090 void bi_analyze_helper_requirements(bi_context *ctx);
1091 void bi_opt_copy_prop(bi_context *ctx);
1092 void bi_opt_cse(bi_context *ctx);
1093 void bi_opt_mod_prop_forward(bi_context *ctx);
1094 void bi_opt_mod_prop_backward(bi_context *ctx);
1095 void bi_opt_dead_code_eliminate(bi_context *ctx);
1096 void bi_opt_fuse_dual_texture(bi_context *ctx);
1097 void bi_opt_dce_post_ra(bi_context *ctx);
1098 void bi_opt_message_preload(bi_context *ctx);
1099 void bi_opt_push_ubo(bi_context *ctx);
1100 void bi_opt_reorder_push(bi_context *ctx);
1101 void bi_lower_swizzle(bi_context *ctx);
1102 void bi_lower_fau(bi_context *ctx);
1103 void bi_assign_scoreboard(bi_context *ctx);
1104 void bi_register_allocate(bi_context *ctx);
1105 void va_optimize(bi_context *ctx);
1106 void va_lower_split_64bit(bi_context *ctx);
1107 
1108 void bi_lower_opt_instruction(bi_instr *I);
1109 
1110 void bi_pressure_schedule(bi_context *ctx);
1111 void bi_schedule(bi_context *ctx);
1112 bool bi_can_fma(bi_instr *ins);
1113 bool bi_can_add(bi_instr *ins);
1114 bool bi_must_message(bi_instr *ins);
1115 bool bi_reads_zero(bi_instr *ins);
1116 bool bi_reads_temps(bi_instr *ins, unsigned src);
1117 bool bi_reads_t(bi_instr *ins, unsigned src);
1118 
1119 #ifndef NDEBUG
1120 bool bi_validate_initialization(bi_context *ctx);
1121 void bi_validate(bi_context *ctx, const char *after_str);
1122 #else
bi_validate_initialization(UNUSED bi_context * ctx)1123 static inline bool bi_validate_initialization(UNUSED bi_context *ctx) { return true; }
bi_validate(UNUSED bi_context * ctx,UNUSED const char * after_str)1124 static inline void bi_validate(UNUSED bi_context *ctx, UNUSED const char *after_str) { return; }
1125 #endif
1126 
1127 uint32_t bi_fold_constant(bi_instr *I, bool *unsupported);
1128 bool bi_opt_constant_fold(bi_context *ctx);
1129 
1130 /* Liveness */
1131 
1132 void bi_compute_liveness(bi_context *ctx);
1133 void bi_liveness_ins_update(uint8_t *live, bi_instr *ins, unsigned max);
1134 
1135 void bi_postra_liveness(bi_context *ctx);
1136 uint64_t MUST_CHECK bi_postra_liveness_ins(uint64_t live, bi_instr *ins);
1137 
1138 /* Layout */
1139 
1140 signed bi_block_offset(bi_context *ctx, bi_clause *start, bi_block *target);
1141 bool bi_ec0_packed(unsigned tuple_count);
1142 
1143 /* Check if there are no more instructions starting with a given block, this
1144  * needs to recurse in case a shader ends with multiple empty blocks */
1145 
1146 static inline bool
bi_is_terminal_block(bi_block * block)1147 bi_is_terminal_block(bi_block *block)
1148 {
1149         return (block == NULL) ||
1150                 (list_is_empty(&block->instructions) &&
1151                  bi_is_terminal_block(block->successors[0]) &&
1152                  bi_is_terminal_block(block->successors[1]));
1153 }
1154 
1155 /* Code emit */
1156 
1157 /* Returns the size of the final clause */
1158 unsigned bi_pack(bi_context *ctx, struct util_dynarray *emission);
1159 void bi_pack_valhall(bi_context *ctx, struct util_dynarray *emission);
1160 
1161 struct bi_packed_tuple {
1162         uint64_t lo;
1163         uint64_t hi;
1164 };
1165 
1166 uint8_t bi_pack_literal(enum bi_clause_subword literal);
1167 
1168 uint8_t
1169 bi_pack_upper(enum bi_clause_subword upper,
1170                 struct bi_packed_tuple *tuples,
1171                 ASSERTED unsigned tuple_count);
1172 uint64_t
1173 bi_pack_tuple_bits(enum bi_clause_subword idx,
1174                 struct bi_packed_tuple *tuples,
1175                 ASSERTED unsigned tuple_count,
1176                 unsigned offset, unsigned nbits);
1177 
1178 uint8_t
1179 bi_pack_sync(enum bi_clause_subword t1,
1180              enum bi_clause_subword t2,
1181              enum bi_clause_subword t3,
1182              struct bi_packed_tuple *tuples,
1183              ASSERTED unsigned tuple_count,
1184              bool z);
1185 
1186 void
1187 bi_pack_format(struct util_dynarray *emission,
1188                 unsigned index,
1189                 struct bi_packed_tuple *tuples,
1190                 ASSERTED unsigned tuple_count,
1191                 uint64_t header, uint64_t ec0,
1192                 unsigned m0, bool z);
1193 
1194 unsigned bi_pack_fma(bi_instr *I,
1195                 enum bifrost_packed_src src0,
1196                 enum bifrost_packed_src src1,
1197                 enum bifrost_packed_src src2,
1198                 enum bifrost_packed_src src3);
1199 unsigned bi_pack_add(bi_instr *I,
1200                 enum bifrost_packed_src src0,
1201                 enum bifrost_packed_src src1,
1202                 enum bifrost_packed_src src2,
1203                 enum bifrost_packed_src src3);
1204 
1205 /* Like in NIR, for use with the builder */
1206 
1207 enum bi_cursor_option {
1208     bi_cursor_after_block,
1209     bi_cursor_before_instr,
1210     bi_cursor_after_instr
1211 };
1212 
1213 typedef struct {
1214     enum bi_cursor_option option;
1215 
1216     union {
1217         bi_block *block;
1218         bi_instr *instr;
1219     };
1220 } bi_cursor;
1221 
1222 static inline bi_cursor
bi_after_block(bi_block * block)1223 bi_after_block(bi_block *block)
1224 {
1225     return (bi_cursor) {
1226         .option = bi_cursor_after_block,
1227         .block = block
1228     };
1229 }
1230 
1231 static inline bi_cursor
bi_before_instr(bi_instr * instr)1232 bi_before_instr(bi_instr *instr)
1233 {
1234     return (bi_cursor) {
1235         .option = bi_cursor_before_instr,
1236         .instr = instr
1237     };
1238 }
1239 
1240 static inline bi_cursor
bi_after_instr(bi_instr * instr)1241 bi_after_instr(bi_instr *instr)
1242 {
1243     return (bi_cursor) {
1244         .option = bi_cursor_after_instr,
1245         .instr = instr
1246     };
1247 }
1248 
1249 static inline bi_cursor
bi_before_nonempty_block(bi_block * block)1250 bi_before_nonempty_block(bi_block *block)
1251 {
1252         bi_instr *I = list_first_entry(&block->instructions, bi_instr, link);
1253         assert(I != NULL);
1254 
1255         return bi_before_instr(I);
1256 }
1257 
1258 static inline bi_cursor
bi_before_block(bi_block * block)1259 bi_before_block(bi_block *block)
1260 {
1261         if (list_is_empty(&block->instructions))
1262                 return bi_after_block(block);
1263         else
1264                 return bi_before_nonempty_block(block);
1265 }
1266 
1267 /* Invariant: a tuple must be nonempty UNLESS it is the last tuple of a clause,
1268  * in which case there must exist a nonempty penultimate tuple */
1269 
1270 ATTRIBUTE_RETURNS_NONNULL static inline bi_instr *
bi_first_instr_in_tuple(bi_tuple * tuple)1271 bi_first_instr_in_tuple(bi_tuple *tuple)
1272 {
1273         bi_instr *instr = tuple->fma ?: tuple->add;
1274         assert(instr != NULL);
1275         return instr;
1276 }
1277 
1278 ATTRIBUTE_RETURNS_NONNULL static inline bi_instr *
bi_first_instr_in_clause(bi_clause * clause)1279 bi_first_instr_in_clause(bi_clause *clause)
1280 {
1281         return bi_first_instr_in_tuple(&clause->tuples[0]);
1282 }
1283 
1284 ATTRIBUTE_RETURNS_NONNULL static inline bi_instr *
bi_last_instr_in_clause(bi_clause * clause)1285 bi_last_instr_in_clause(bi_clause *clause)
1286 {
1287         bi_tuple tuple = clause->tuples[clause->tuple_count - 1];
1288         bi_instr *instr = tuple.add ?: tuple.fma;
1289 
1290         if (!instr) {
1291                 assert(clause->tuple_count >= 2);
1292                 tuple = clause->tuples[clause->tuple_count - 2];
1293                 instr = tuple.add ?: tuple.fma;
1294         }
1295 
1296         assert(instr != NULL);
1297         return instr;
1298 }
1299 
1300 /* Implemented by expanding bi_foreach_instr_in_block_from(_rev) with the start
1301  * (end) of the clause and adding a condition for the clause boundary */
1302 
1303 #define bi_foreach_instr_in_clause(block, clause, pos) \
1304    for (bi_instr *pos = list_entry(bi_first_instr_in_clause(clause), bi_instr, link); \
1305 	(&pos->link != &(block)->instructions) \
1306                 && (pos != bi_next_op(bi_last_instr_in_clause(clause))); \
1307 	pos = list_entry(pos->link.next, bi_instr, link))
1308 
1309 #define bi_foreach_instr_in_clause_rev(block, clause, pos) \
1310    for (bi_instr *pos = list_entry(bi_last_instr_in_clause(clause), bi_instr, link); \
1311 	(&pos->link != &(block)->instructions) \
1312 	        && pos != bi_prev_op(bi_first_instr_in_clause(clause)); \
1313 	pos = list_entry(pos->link.prev, bi_instr, link))
1314 
1315 static inline bi_cursor
bi_before_clause(bi_clause * clause)1316 bi_before_clause(bi_clause *clause)
1317 {
1318     return bi_before_instr(bi_first_instr_in_clause(clause));
1319 }
1320 
1321 static inline bi_cursor
bi_before_tuple(bi_tuple * tuple)1322 bi_before_tuple(bi_tuple *tuple)
1323 {
1324     return bi_before_instr(bi_first_instr_in_tuple(tuple));
1325 }
1326 
1327 static inline bi_cursor
bi_after_clause(bi_clause * clause)1328 bi_after_clause(bi_clause *clause)
1329 {
1330     return bi_after_instr(bi_last_instr_in_clause(clause));
1331 }
1332 
1333 /* IR builder in terms of cursor infrastructure */
1334 
1335 typedef struct {
1336     bi_context *shader;
1337     bi_cursor cursor;
1338 } bi_builder;
1339 
1340 static inline bi_builder
bi_init_builder(bi_context * ctx,bi_cursor cursor)1341 bi_init_builder(bi_context *ctx, bi_cursor cursor)
1342 {
1343         return (bi_builder) {
1344                 .shader = ctx,
1345                 .cursor = cursor
1346         };
1347 }
1348 
1349 /* Insert an instruction at the cursor and move the cursor */
1350 
1351 static inline void
bi_builder_insert(bi_cursor * cursor,bi_instr * I)1352 bi_builder_insert(bi_cursor *cursor, bi_instr *I)
1353 {
1354     switch (cursor->option) {
1355     case bi_cursor_after_instr:
1356         list_add(&I->link, &cursor->instr->link);
1357         cursor->instr = I;
1358         return;
1359 
1360     case bi_cursor_after_block:
1361         list_addtail(&I->link, &cursor->block->instructions);
1362         cursor->option = bi_cursor_after_instr;
1363         cursor->instr = I;
1364         return;
1365 
1366     case bi_cursor_before_instr:
1367         list_addtail(&I->link, &cursor->instr->link);
1368         cursor->option = bi_cursor_after_instr;
1369         cursor->instr = I;
1370         return;
1371     }
1372 
1373     unreachable("Invalid cursor option");
1374 }
1375 
1376 /* Read back power-efficent garbage, TODO maybe merge with null? */
1377 static inline bi_index
bi_dontcare(bi_builder * b)1378 bi_dontcare(bi_builder *b)
1379 {
1380         if (b->shader->arch >= 9)
1381                return bi_zero();
1382         else
1383                return bi_passthrough(BIFROST_SRC_FAU_HI);
1384 }
1385 
1386 #define bi_worklist_init(ctx, w) u_worklist_init(w, ctx->num_blocks, ctx)
1387 #define bi_worklist_push_head(w, block) u_worklist_push_head(w, block, index)
1388 #define bi_worklist_push_tail(w, block) u_worklist_push_tail(w, block, index)
1389 #define bi_worklist_peek_head(w) u_worklist_peek_head(w, bi_block, index)
1390 #define bi_worklist_pop_head(w)  u_worklist_pop_head( w, bi_block, index)
1391 #define bi_worklist_peek_tail(w) u_worklist_peek_tail(w, bi_block, index)
1392 #define bi_worklist_pop_tail(w)  u_worklist_pop_tail( w, bi_block, index)
1393 
1394 /* NIR passes */
1395 
1396 bool bi_lower_divergent_indirects(nir_shader *shader, unsigned lanes);
1397 
1398 #ifdef __cplusplus
1399 } /* extern C */
1400 #endif
1401 
1402 #endif
1403