• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2020 Collabora Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors (Collabora):
24  *      Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
25  */
26 
27 #ifndef __BIFROST_COMPILER_H
28 #define __BIFROST_COMPILER_H
29 
30 #include "compiler/nir/nir.h"
31 #include "panfrost/util/pan_ir.h"
32 #include "util/half_float.h"
33 #include "util/u_math.h"
34 #include "util/u_worklist.h"
35 #include "bi_opcodes.h"
36 #include "bifrost.h"
37 
38 #ifdef __cplusplus
39 extern "C" {
40 #endif
41 
42 /* Swizzles across bytes in a 32-bit word. Expresses swz in the XML directly.
43  * To express widen, use the correpsonding replicated form, i.e. H01 = identity
44  * for widen = none, H00 for widen = h0, B1111 for widen = b1. For lane, also
45  * use the replicated form (interpretation is governed by the opcode). For
46  * 8-bit lanes with two channels, use replicated forms for replicated forms
47  * (TODO: what about others?). For 8-bit lanes with four channels using
48  * matching form (TODO: what about others?).
49  */
50 
51 enum bi_swizzle {
52    /* 16-bit swizzle ordering deliberate for fast compute */
53    BI_SWIZZLE_H00 = 0, /* = B0101 */
54    BI_SWIZZLE_H01 = 1, /* = B0123 = W0 */
55    BI_SWIZZLE_H10 = 2, /* = B2301 */
56    BI_SWIZZLE_H11 = 3, /* = B2323 */
57 
58    /* replication order should be maintained for fast compute */
59    BI_SWIZZLE_B0000 = 4, /* single channel (replicate) */
60    BI_SWIZZLE_B1111 = 5,
61    BI_SWIZZLE_B2222 = 6,
62    BI_SWIZZLE_B3333 = 7,
63 
64    /* totally special for explicit pattern matching */
65    BI_SWIZZLE_B0011 = 8,  /* +SWZ.v4i8 */
66    BI_SWIZZLE_B2233 = 9,  /* +SWZ.v4i8 */
67    BI_SWIZZLE_B1032 = 10, /* +SWZ.v4i8 */
68    BI_SWIZZLE_B3210 = 11, /* +SWZ.v4i8 */
69 
70    BI_SWIZZLE_B0022 = 12, /* for b02 lanes */
71 };
72 
73 /* Given a packed i16vec2/i8vec4 constant, apply a swizzle. Useful for constant
74  * folding and Valhall constant optimization. */
75 
76 static inline uint32_t
bi_apply_swizzle(uint32_t value,enum bi_swizzle swz)77 bi_apply_swizzle(uint32_t value, enum bi_swizzle swz)
78 {
79    const uint16_t *h = (const uint16_t *)&value;
80    const uint8_t *b = (const uint8_t *)&value;
81 
82 #define H(h0, h1) (h[h0] | ((uint32_t)h[h1] << 16))
83 #define B(b0, b1, b2, b3)                                                      \
84    (b[b0] | ((uint32_t)b[b1] << 8) | ((uint32_t)b[b2] << 16) |                 \
85     ((uint32_t)b[b3] << 24))
86 
87    switch (swz) {
88    case BI_SWIZZLE_H00:
89       return H(0, 0);
90    case BI_SWIZZLE_H01:
91       return H(0, 1);
92    case BI_SWIZZLE_H10:
93       return H(1, 0);
94    case BI_SWIZZLE_H11:
95       return H(1, 1);
96    case BI_SWIZZLE_B0000:
97       return B(0, 0, 0, 0);
98    case BI_SWIZZLE_B1111:
99       return B(1, 1, 1, 1);
100    case BI_SWIZZLE_B2222:
101       return B(2, 2, 2, 2);
102    case BI_SWIZZLE_B3333:
103       return B(3, 3, 3, 3);
104    case BI_SWIZZLE_B0011:
105       return B(0, 0, 1, 1);
106    case BI_SWIZZLE_B2233:
107       return B(2, 2, 3, 3);
108    case BI_SWIZZLE_B1032:
109       return B(1, 0, 3, 2);
110    case BI_SWIZZLE_B3210:
111       return B(3, 2, 1, 0);
112    case BI_SWIZZLE_B0022:
113       return B(0, 0, 2, 2);
114    }
115 
116 #undef H
117 #undef B
118 
119    unreachable("Invalid swizzle");
120 }
121 
122 enum bi_index_type {
123    BI_INDEX_NULL = 0,
124    BI_INDEX_NORMAL = 1,
125    BI_INDEX_REGISTER = 2,
126    BI_INDEX_CONSTANT = 3,
127    BI_INDEX_PASS = 4,
128    BI_INDEX_FAU = 5
129 };
130 
131 typedef struct {
132    uint32_t value;
133 
134    /* modifiers, should only be set if applicable for a given instruction.
135     * For *IDP.v4i8, abs plays the role of sign. For bitwise ops where
136     * applicable, neg plays the role of not */
137    bool abs : 1;
138    bool neg : 1;
139 
140    /* The last use of a value, should be purged from the register cache.
141     * Set by liveness analysis. */
142    bool discard : 1;
143 
144    /* For a source, the swizzle. For a destination, acts a bit like a
145     * write mask. Identity for the full 32-bit, H00 for only caring about
146     * the lower half, other values unused. */
147    enum bi_swizzle swizzle : 4;
148    uint32_t offset         : 3;
149    enum bi_index_type type : 3;
150 
151    /* Must be zeroed so we can hash the whole 64-bits at a time */
152    unsigned padding : (32 - 13);
153 } bi_index;
154 
155 static inline bi_index
bi_get_index(unsigned value)156 bi_get_index(unsigned value)
157 {
158    return (bi_index){
159       .value = value,
160       .swizzle = BI_SWIZZLE_H01,
161       .type = BI_INDEX_NORMAL,
162    };
163 }
164 
165 static inline bi_index
bi_register(unsigned reg)166 bi_register(unsigned reg)
167 {
168    assert(reg < 64);
169 
170    return (bi_index){
171       .value = reg,
172       .swizzle = BI_SWIZZLE_H01,
173       .type = BI_INDEX_REGISTER,
174    };
175 }
176 
177 static inline bi_index
bi_imm_u32(uint32_t imm)178 bi_imm_u32(uint32_t imm)
179 {
180    return (bi_index){
181       .value = imm,
182       .swizzle = BI_SWIZZLE_H01,
183       .type = BI_INDEX_CONSTANT,
184    };
185 }
186 
187 static inline bi_index
bi_imm_f32(float imm)188 bi_imm_f32(float imm)
189 {
190    return bi_imm_u32(fui(imm));
191 }
192 
193 static inline bi_index
bi_null()194 bi_null()
195 {
196    return (bi_index){.type = BI_INDEX_NULL};
197 }
198 
199 static inline bi_index
bi_zero()200 bi_zero()
201 {
202    return bi_imm_u32(0);
203 }
204 
205 static inline bi_index
bi_passthrough(enum bifrost_packed_src value)206 bi_passthrough(enum bifrost_packed_src value)
207 {
208    return (bi_index){
209       .value = value,
210       .swizzle = BI_SWIZZLE_H01,
211       .type = BI_INDEX_PASS,
212    };
213 }
214 
215 /* Helps construct swizzles */
216 static inline bi_index
bi_swz_16(bi_index idx,bool x,bool y)217 bi_swz_16(bi_index idx, bool x, bool y)
218 {
219    assert(idx.swizzle == BI_SWIZZLE_H01);
220    idx.swizzle = (enum bi_swizzle)(BI_SWIZZLE_H00 | (x << 1) | y);
221    return idx;
222 }
223 
224 static inline bi_index
bi_half(bi_index idx,bool upper)225 bi_half(bi_index idx, bool upper)
226 {
227    return bi_swz_16(idx, upper, upper);
228 }
229 
230 static inline bi_index
bi_byte(bi_index idx,unsigned lane)231 bi_byte(bi_index idx, unsigned lane)
232 {
233    assert(idx.swizzle == BI_SWIZZLE_H01);
234    assert(lane < 4);
235    idx.swizzle = (enum bi_swizzle)(BI_SWIZZLE_B0000 + lane);
236    return idx;
237 }
238 
239 static inline bi_index
bi_abs(bi_index idx)240 bi_abs(bi_index idx)
241 {
242    idx.abs = true;
243    return idx;
244 }
245 
246 static inline bi_index
bi_neg(bi_index idx)247 bi_neg(bi_index idx)
248 {
249    idx.neg ^= true;
250    return idx;
251 }
252 
253 static inline bi_index
bi_discard(bi_index idx)254 bi_discard(bi_index idx)
255 {
256    idx.discard = true;
257    return idx;
258 }
259 
260 /* Additive identity in IEEE 754 arithmetic */
261 static inline bi_index
bi_negzero()262 bi_negzero()
263 {
264    return bi_neg(bi_zero());
265 }
266 
267 /* Replaces an index, preserving any modifiers */
268 
269 static inline bi_index
bi_replace_index(bi_index old,bi_index replacement)270 bi_replace_index(bi_index old, bi_index replacement)
271 {
272    replacement.abs = old.abs;
273    replacement.neg = old.neg;
274    replacement.swizzle = old.swizzle;
275    replacement.discard = false; /* needs liveness analysis to set */
276    return replacement;
277 }
278 
279 /* Remove any modifiers. This has the property:
280  *
281  *     replace_index(x, strip_index(x)) = x
282  *
283  * This ensures it is suitable to use when lowering sources to moves */
284 
285 static inline bi_index
bi_strip_index(bi_index index)286 bi_strip_index(bi_index index)
287 {
288    index.abs = index.neg = false;
289    index.swizzle = BI_SWIZZLE_H01;
290    return index;
291 }
292 
293 /* For bitwise instructions */
294 #define bi_not(x) bi_neg(x)
295 
296 static inline bi_index
bi_imm_u8(uint8_t imm)297 bi_imm_u8(uint8_t imm)
298 {
299    return bi_byte(bi_imm_u32(imm), 0);
300 }
301 
302 static inline bi_index
bi_imm_u16(uint16_t imm)303 bi_imm_u16(uint16_t imm)
304 {
305    return bi_half(bi_imm_u32(imm), false);
306 }
307 
308 static inline bi_index
bi_imm_uintN(uint32_t imm,unsigned sz)309 bi_imm_uintN(uint32_t imm, unsigned sz)
310 {
311    assert(sz == 8 || sz == 16 || sz == 32);
312    return (sz == 8)    ? bi_imm_u8(imm)
313           : (sz == 16) ? bi_imm_u16(imm)
314                        : bi_imm_u32(imm);
315 }
316 
317 static inline bi_index
bi_imm_f16(float imm)318 bi_imm_f16(float imm)
319 {
320    return bi_imm_u16(_mesa_float_to_half(imm));
321 }
322 
323 static inline bool
bi_is_null(bi_index idx)324 bi_is_null(bi_index idx)
325 {
326    return idx.type == BI_INDEX_NULL;
327 }
328 
329 static inline bool
bi_is_ssa(bi_index idx)330 bi_is_ssa(bi_index idx)
331 {
332    return idx.type == BI_INDEX_NORMAL;
333 }
334 
335 /* Compares equivalence as references. Does not compare offsets, swizzles, or
336  * modifiers. In other words, this forms bi_index equivalence classes by
337  * partitioning memory. E.g. -abs(foo[1].yx) == foo.xy but foo != bar */
338 
339 static inline bool
bi_is_equiv(bi_index left,bi_index right)340 bi_is_equiv(bi_index left, bi_index right)
341 {
342    return (left.type == right.type) && (left.value == right.value);
343 }
344 
345 /* A stronger equivalence relation that requires the indices access the
346  * same offset, useful for RA/scheduling to see what registers will
347  * correspond to */
348 
349 static inline bool
bi_is_word_equiv(bi_index left,bi_index right)350 bi_is_word_equiv(bi_index left, bi_index right)
351 {
352    return bi_is_equiv(left, right) && left.offset == right.offset;
353 }
354 
355 /* An even stronger equivalence that checks if indices correspond to the
356  * right value when evaluated
357  */
358 static inline bool
bi_is_value_equiv(bi_index left,bi_index right)359 bi_is_value_equiv(bi_index left, bi_index right)
360 {
361    if (left.type == BI_INDEX_CONSTANT && right.type == BI_INDEX_CONSTANT) {
362       return (bi_apply_swizzle(left.value, left.swizzle) ==
363               bi_apply_swizzle(right.value, right.swizzle)) &&
364              (left.abs == right.abs) && (left.neg == right.neg);
365    } else {
366       return (left.value == right.value) && (left.abs == right.abs) &&
367              (left.neg == right.neg) && (left.swizzle == right.swizzle) &&
368              (left.offset == right.offset) && (left.type == right.type);
369    }
370 }
371 
372 #define BI_MAX_VEC   8
373 #define BI_MAX_DESTS 4
374 #define BI_MAX_SRCS  6
375 
376 typedef struct {
377    /* Must be first */
378    struct list_head link;
379    bi_index *dest;
380    bi_index *src;
381 
382    enum bi_opcode op;
383    uint8_t nr_srcs;
384    uint8_t nr_dests;
385 
386    union {
387       /* For a branch */
388       struct bi_block *branch_target;
389 
390       /* For a phi node that hasn't been translated yet. This is only
391        * used during NIR->BIR
392        */
393       nir_phi_instr *phi;
394    };
395 
396    /* These don't fit neatly with anything else.. */
397    enum bi_register_format register_format;
398    enum bi_vecsize vecsize;
399 
400    /* Flow control associated with a Valhall instruction */
401    uint8_t flow;
402 
403    /* Slot associated with a message-passing instruction */
404    uint8_t slot;
405 
406    /* Can we spill the value written here? Used to prevent
407     * useless double fills */
408    bool no_spill;
409 
410    /* On Bifrost: A value of bi_table to override the table, inducing a
411     * DTSEL_IMM pair if nonzero.
412     *
413     * On Valhall: the table index to use for resource instructions.
414     *
415     * These two interpretations are equivalent if you squint a bit.
416     */
417    unsigned table;
418 
419    /* Everything after this MUST NOT be accessed directly, since
420     * interpretation depends on opcodes */
421 
422    /* Destination modifiers */
423    union {
424       enum bi_clamp clamp;
425       bool saturate;
426       bool not_result;
427       unsigned dest_mod;
428    };
429 
430    /* Immediates. All seen alone in an instruction, except for varying/texture
431     * which are specified jointly for VARTEX */
432    union {
433       uint32_t shift;
434       uint32_t fill;
435       uint32_t index;
436       uint32_t attribute_index;
437 
438       struct {
439          uint32_t varying_index;
440          uint32_t sampler_index;
441          uint32_t texture_index;
442       };
443 
444       /* TEXC, ATOM_CX: # of staging registers used */
445       struct {
446          uint32_t sr_count;
447          uint32_t sr_count_2;
448 
449          union {
450             /* Atomics effectively require all three */
451             int32_t byte_offset;
452 
453             /* BLEND requires all three */
454             int32_t branch_offset;
455          };
456       };
457    };
458 
459    /* Modifiers specific to particular instructions are thrown in a union */
460    union {
461       enum bi_adj adj;           /* FEXP_TABLE.u4 */
462       enum bi_atom_opc atom_opc; /* atomics */
463       enum bi_func func;         /* FPOW_SC_DET */
464       enum bi_function function; /* LD_VAR_FLAT */
465       enum bi_mux mux;           /* MUX */
466       enum bi_sem sem;           /* FMAX, FMIN */
467       enum bi_source source;     /* LD_GCLK */
468       bool scale;                /* VN_ASST2, FSINCOS_OFFSET */
469       bool offset;               /* FSIN_TABLE, FOCS_TABLE */
470       bool mask;                 /* CLZ */
471       bool threads;              /* IMULD, IMOV_FMA */
472       bool combine;              /* BRANCHC */
473       bool format;               /* LEA_TEX */
474 
475       struct {
476          enum bi_special special; /* FADD_RSCALE, FMA_RSCALE */
477          enum bi_round round;     /* FMA, converts, FADD, _RSCALE, etc */
478          bool ftz;                /* Flush-to-zero for F16_TO_F32 */
479       };
480 
481       struct {
482          enum bi_result_type result_type; /* FCMP, ICMP */
483          enum bi_cmpf cmpf;               /* CSEL, FCMP, ICMP, BRANCH */
484       };
485 
486       struct {
487          enum bi_stack_mode stack_mode; /* JUMP_EX */
488          bool test_mode;
489       };
490 
491       struct {
492          enum bi_seg seg;       /* LOAD, STORE, SEG_ADD, SEG_SUB */
493          bool preserve_null;    /* SEG_ADD, SEG_SUB */
494          enum bi_extend extend; /* LOAD, IMUL */
495       };
496 
497       struct {
498          enum bi_sample sample;             /* VAR_TEX, LD_VAR */
499          enum bi_update update;             /* VAR_TEX, LD_VAR */
500          enum bi_varying_name varying_name; /* LD_VAR_SPECIAL */
501          bool skip;                         /* VAR_TEX, TEXS, TEXC */
502          bool lod_mode; /* VAR_TEX, TEXS, implicitly for TEXC */
503          enum bi_source_format source_format; /* LD_VAR_BUF */
504 
505          /* Used for valhall texturing */
506          bool shadow;
507          bool texel_offset;
508          bool array_enable;
509          bool integer_coordinates;
510          enum bi_fetch_component fetch_component;
511          enum bi_va_lod_mode va_lod_mode;
512          enum bi_dimension dimension;
513          enum bi_write_mask write_mask;
514       };
515 
516       /* Maximum size, for hashing */
517       unsigned flags[14];
518 
519       struct {
520          enum bi_subgroup subgroup;               /* WMASK, CLPER */
521          enum bi_inactive_result inactive_result; /* CLPER */
522          enum bi_lane_op lane_op;                 /* CLPER */
523       };
524 
525       struct {
526          bool z;       /* ZS_EMIT */
527          bool stencil; /* ZS_EMIT */
528       };
529 
530       struct {
531          bool h; /* VN_ASST1.f16 */
532          bool l; /* VN_ASST1.f16 */
533       };
534 
535       struct {
536          bool bytes2; /* RROT_DOUBLE, FRSHIFT_DOUBLE */
537          bool result_word;
538          bool arithmetic; /* ARSHIFT_OR */
539       };
540 
541       struct {
542          bool sqrt; /* FREXPM */
543          bool log;  /* FREXPM */
544       };
545 
546       struct {
547          enum bi_mode mode;           /* FLOG_TABLE */
548          enum bi_precision precision; /* FLOG_TABLE */
549          bool divzero;                /* FRSQ_APPROX, FRSQ */
550       };
551    };
552 } bi_instr;
553 
554 static inline bool
bi_is_staging_src(const bi_instr * I,unsigned s)555 bi_is_staging_src(const bi_instr *I, unsigned s)
556 {
557    return (s == 0 || s == 4) && bi_opcode_props[I->op].sr_read;
558 }
559 
560 /*
561  * Safe helpers to remove destinations/sources at the end of the
562  * destination/source array when changing opcodes. Unlike adding
563  * sources/destinations, this does not require reallocation.
564  */
565 static inline void
bi_drop_dests(bi_instr * I,unsigned new_count)566 bi_drop_dests(bi_instr *I, unsigned new_count)
567 {
568    assert(new_count < I->nr_dests);
569 
570    for (unsigned i = new_count; i < I->nr_dests; ++i)
571       I->dest[i] = bi_null();
572 
573    I->nr_dests = new_count;
574 }
575 
576 static inline void
bi_drop_srcs(bi_instr * I,unsigned new_count)577 bi_drop_srcs(bi_instr *I, unsigned new_count)
578 {
579    assert(new_count < I->nr_srcs);
580 
581    for (unsigned i = new_count; i < I->nr_srcs; ++i)
582       I->src[i] = bi_null();
583 
584    I->nr_srcs = new_count;
585 }
586 
587 static inline void
bi_replace_src(bi_instr * I,unsigned src_index,bi_index replacement)588 bi_replace_src(bi_instr *I, unsigned src_index, bi_index replacement)
589 {
590    I->src[src_index] = bi_replace_index(I->src[src_index], replacement);
591 }
592 
593 /* Represents the assignment of slots for a given bi_tuple */
594 
595 typedef struct {
596    /* Register to assign to each slot */
597    unsigned slot[4];
598 
599    /* Read slots can be disabled */
600    bool enabled[2];
601 
602    /* Configuration for slots 2/3 */
603    struct bifrost_reg_ctrl_23 slot23;
604 
605    /* Fast-Access-Uniform RAM index */
606    uint8_t fau_idx;
607 
608    /* Whether writes are actually for the last instruction */
609    bool first_instruction;
610 } bi_registers;
611 
612 /* A bi_tuple contains two paired instruction pointers. If a slot is unfilled,
613  * leave it NULL; the emitter will fill in a nop. Instructions reference
614  * registers via slots which are assigned per tuple.
615  */
616 
617 typedef struct {
618    uint8_t fau_idx;
619    bi_registers regs;
620    bi_instr *fma;
621    bi_instr *add;
622 } bi_tuple;
623 
624 struct bi_block;
625 
626 typedef struct {
627    struct list_head link;
628 
629    /* Link back up for branch calculations */
630    struct bi_block *block;
631 
632    /* Architectural limit of 8 tuples/clause */
633    unsigned tuple_count;
634    bi_tuple tuples[8];
635 
636    /* For scoreboarding -- the clause ID (this is not globally unique!)
637     * and its dependencies in terms of other clauses, computed during
638     * scheduling and used when emitting code. Dependencies expressed as a
639     * bitfield matching the hardware, except shifted by a clause (the
640     * shift back to the ISA's off-by-one encoding is worked out when
641     * emitting clauses) */
642    unsigned scoreboard_id;
643    uint8_t dependencies;
644 
645    /* See ISA header for description */
646    enum bifrost_flow flow_control;
647 
648    /* Can we prefetch the next clause? Usually it makes sense, except for
649     * clauses ending in unconditional branches */
650    bool next_clause_prefetch;
651 
652    /* Assigned data register */
653    unsigned staging_register;
654 
655    /* Corresponds to the usual bit but shifted by a clause */
656    bool staging_barrier;
657 
658    /* Constants read by this clause. ISA limit. Must satisfy:
659     *
660     *      constant_count + tuple_count <= 13
661     *
662     * Also implicitly constant_count <= tuple_count since a tuple only
663     * reads a single constant.
664     */
665    uint64_t constants[8];
666    unsigned constant_count;
667 
668    /* Index of a constant to be PC-relative */
669    unsigned pcrel_idx;
670 
671    /* Branches encode a constant offset relative to the program counter
672     * with some magic flags. By convention, if there is a branch, its
673     * constant will be last. Set this flag to indicate this is required.
674     */
675    bool branch_constant;
676 
677    /* Unique in a clause */
678    enum bifrost_message_type message_type;
679    bi_instr *message;
680 
681    /* Discard helper threads */
682    bool td;
683 
684    /* Should flush-to-zero mode be enabled for this clause? */
685    bool ftz;
686 } bi_clause;
687 
688 #define BI_NUM_SLOTS 8
689 
690 /* A model for the state of the scoreboard */
691 struct bi_scoreboard_state {
692    /** Bitmap of registers read/written by a slot */
693    uint64_t read[BI_NUM_SLOTS];
694    uint64_t write[BI_NUM_SLOTS];
695 
696    /* Nonregister dependencies present by a slot */
697    uint8_t varying : BI_NUM_SLOTS;
698    uint8_t memory : BI_NUM_SLOTS;
699 };
700 
701 typedef struct bi_block {
702    /* Link to next block. Must be first for mir_get_block */
703    struct list_head link;
704 
705    /* List of instructions emitted for the current block */
706    struct list_head instructions;
707 
708    /* Index of the block in source order */
709    unsigned index;
710 
711    /* Control flow graph */
712    struct bi_block *successors[2];
713    struct util_dynarray predecessors;
714    bool unconditional_jumps;
715 
716    /* Per 32-bit word live masks for the block indexed by node */
717    uint8_t *live_in;
718    uint8_t *live_out;
719 
720    /* Scalar liveness indexed by SSA index */
721    BITSET_WORD *ssa_live_in;
722    BITSET_WORD *ssa_live_out;
723 
724    /* If true, uses clauses; if false, uses instructions */
725    bool scheduled;
726    struct list_head clauses; /* list of bi_clause */
727 
728    /* Post-RA liveness */
729    uint64_t reg_live_in, reg_live_out;
730 
731    /* Scoreboard state at the start/end of block */
732    struct bi_scoreboard_state scoreboard_in, scoreboard_out;
733 
734    /* On Valhall, indicates we need a terminal NOP to implement jumps to
735     * the end of the shader.
736     */
737    bool needs_nop;
738 
739    /* Flags available for pass-internal use */
740    uint8_t pass_flags;
741 } bi_block;
742 
743 static inline unsigned
bi_num_successors(bi_block * block)744 bi_num_successors(bi_block *block)
745 {
746    STATIC_ASSERT(ARRAY_SIZE(block->successors) == 2);
747    assert(block->successors[0] || !block->successors[1]);
748 
749    if (block->successors[1])
750       return 2;
751    else if (block->successors[0])
752       return 1;
753    else
754       return 0;
755 }
756 
757 static inline unsigned
bi_num_predecessors(bi_block * block)758 bi_num_predecessors(bi_block *block)
759 {
760    return util_dynarray_num_elements(&block->predecessors, bi_block *);
761 }
762 
763 static inline bi_block *
bi_start_block(struct list_head * blocks)764 bi_start_block(struct list_head *blocks)
765 {
766    bi_block *first = list_first_entry(blocks, bi_block, link);
767    assert(bi_num_predecessors(first) == 0);
768    return first;
769 }
770 
771 static inline bi_block *
bi_exit_block(struct list_head * blocks)772 bi_exit_block(struct list_head *blocks)
773 {
774    bi_block *last = list_last_entry(blocks, bi_block, link);
775    assert(bi_num_successors(last) == 0);
776    return last;
777 }
778 
779 static inline void
bi_block_add_successor(bi_block * block,bi_block * successor)780 bi_block_add_successor(bi_block *block, bi_block *successor)
781 {
782    assert(block != NULL && successor != NULL);
783 
784    /* Cull impossible edges */
785    if (block->unconditional_jumps)
786       return;
787 
788    for (unsigned i = 0; i < ARRAY_SIZE(block->successors); ++i) {
789       if (block->successors[i]) {
790          if (block->successors[i] == successor)
791             return;
792          else
793             continue;
794       }
795 
796       block->successors[i] = successor;
797       util_dynarray_append(&successor->predecessors, bi_block *, block);
798       return;
799    }
800 
801    unreachable("Too many successors");
802 }
803 
804 /* Subset of pan_shader_info needed per-variant, in order to support IDVS */
805 struct bi_shader_info {
806    struct panfrost_ubo_push *push;
807    struct bifrost_shader_info *bifrost;
808    unsigned tls_size;
809    unsigned work_reg_count;
810    unsigned push_offset;
811 };
812 
813 /* State of index-driven vertex shading for current shader */
814 enum bi_idvs_mode {
815    /* IDVS not in use */
816    BI_IDVS_NONE = 0,
817 
818    /* IDVS in use. Compiling a position shader */
819    BI_IDVS_POSITION = 1,
820 
821    /* IDVS in use. Compiling a varying shader */
822    BI_IDVS_VARYING = 2,
823 };
824 
825 typedef struct {
826    const struct panfrost_compile_inputs *inputs;
827    nir_shader *nir;
828    struct bi_shader_info info;
829    gl_shader_stage stage;
830    struct list_head blocks; /* list of bi_block */
831    uint32_t quirks;
832    unsigned arch;
833    enum bi_idvs_mode idvs;
834    unsigned num_blocks;
835 
836    /* In any graphics shader, whether the "IDVS with memory
837     * allocation" flow is used. This affects how varyings are loaded and
838     * stored. Ignore for compute.
839     */
840    bool malloc_idvs;
841 
842    /* During NIR->BIR */
843    bi_block *current_block;
844    bi_block *after_block;
845    bi_block *break_block;
846    bi_block *continue_block;
847    bi_block **indexed_nir_blocks;
848    bool emitted_atest;
849 
850    /* During NIR->BIR, the coverage bitmap. If this is NULL, the default
851     * coverage bitmap should be source from preloaded register r60. This is
852     * written by ATEST and ZS_EMIT
853     */
854    bi_index coverage;
855 
856    /* During NIR->BIR, table of preloaded registers, or NULL if never
857     * preloaded.
858     */
859    bi_index preloaded[64];
860 
861    /* For creating temporaries */
862    unsigned ssa_alloc;
863    unsigned reg_alloc;
864 
865    /* Mask of UBOs that need to be uploaded */
866    uint32_t ubo_mask;
867 
868    /* During instruction selection, map from vector bi_index to its scalar
869     * components, populated by a split.
870     */
871    struct hash_table_u64 *allocated_vec;
872 
873    /* Stats for shader-db */
874    unsigned loop_count;
875    unsigned spills;
876    unsigned fills;
877 } bi_context;
878 
879 static inline void
bi_remove_instruction(bi_instr * ins)880 bi_remove_instruction(bi_instr *ins)
881 {
882    list_del(&ins->link);
883 }
884 
885 enum bir_fau {
886    BIR_FAU_ZERO = 0,
887    BIR_FAU_LANE_ID = 1,
888    BIR_FAU_WARP_ID = 2,
889    BIR_FAU_CORE_ID = 3,
890    BIR_FAU_FB_EXTENT = 4,
891    BIR_FAU_ATEST_PARAM = 5,
892    BIR_FAU_SAMPLE_POS_ARRAY = 6,
893    BIR_FAU_BLEND_0 = 8,
894    /* blend descs 1 - 7 */
895    BIR_FAU_TYPE_MASK = 15,
896 
897    /* Valhall only */
898    BIR_FAU_TLS_PTR = 16,
899    BIR_FAU_WLS_PTR = 17,
900    BIR_FAU_PROGRAM_COUNTER = 18,
901 
902    BIR_FAU_UNIFORM = (1 << 7),
903    /* Look up table on Valhall */
904    BIR_FAU_IMMEDIATE = (1 << 8),
905 
906 };
907 
908 static inline bi_index
bi_fau(enum bir_fau value,bool hi)909 bi_fau(enum bir_fau value, bool hi)
910 {
911    return (bi_index){
912       .value = value,
913       .swizzle = BI_SWIZZLE_H01,
914       .offset = hi ? 1u : 0u,
915       .type = BI_INDEX_FAU,
916    };
917 }
918 
919 /*
920  * Builder for Valhall LUT entries. Generally, constants are modeled with
921  * BI_INDEX_IMMEDIATE in the intermediate representation. This helper is only
922  * necessary for passes running after lowering constants, as well as when
923  * lowering constants.
924  *
925  */
926 static inline bi_index
va_lut(unsigned index)927 va_lut(unsigned index)
928 {
929    return bi_fau((enum bir_fau)(BIR_FAU_IMMEDIATE | (index >> 1)), index & 1);
930 }
931 
932 /*
933  * va_lut_zero is like bi_zero but only works on Valhall. It is intended for
934  * use by late passes that run after constants are lowered, specifically
935  * register allocation. bi_zero() is preferred where possible.
936  */
937 static inline bi_index
va_zero_lut()938 va_zero_lut()
939 {
940    return va_lut(0);
941 }
942 
943 static inline bi_index
bi_temp(bi_context * ctx)944 bi_temp(bi_context *ctx)
945 {
946    return bi_get_index(ctx->ssa_alloc++);
947 }
948 
949 static inline bi_index
bi_def_index(nir_def * def)950 bi_def_index(nir_def *def)
951 {
952    return bi_get_index(def->index);
953 }
954 
955 /* Inline constants automatically, will be lowered out by bi_lower_fau where a
956  * constant is not allowed. load_const_to_scalar gaurantees that this makes
957  * sense */
958 
959 static inline bi_index
bi_src_index(nir_src * src)960 bi_src_index(nir_src *src)
961 {
962    if (nir_src_is_const(*src) && nir_src_bit_size(*src) <= 32) {
963       return bi_imm_u32(nir_src_as_uint(*src));
964    } else {
965       return bi_def_index(src->ssa);
966    }
967 }
968 
969 /* Iterators for Bifrost IR */
970 
971 #define bi_foreach_block(ctx, v)                                               \
972    list_for_each_entry(bi_block, v, &ctx->blocks, link)
973 
974 #define bi_foreach_block_rev(ctx, v)                                           \
975    list_for_each_entry_rev(bi_block, v, &ctx->blocks, link)
976 
977 #define bi_foreach_block_from(ctx, from, v)                                    \
978    list_for_each_entry_from(bi_block, v, from, &ctx->blocks, link)
979 
980 #define bi_foreach_block_from_rev(ctx, from, v)                                \
981    list_for_each_entry_from_rev(bi_block, v, from, &ctx->blocks, link)
982 
983 #define bi_foreach_instr_in_block(block, v)                                    \
984    list_for_each_entry(bi_instr, v, &(block)->instructions, link)
985 
986 #define bi_foreach_instr_in_block_rev(block, v)                                \
987    list_for_each_entry_rev(bi_instr, v, &(block)->instructions, link)
988 
989 #define bi_foreach_instr_in_block_safe(block, v)                               \
990    list_for_each_entry_safe(bi_instr, v, &(block)->instructions, link)
991 
992 #define bi_foreach_instr_in_block_safe_rev(block, v)                           \
993    list_for_each_entry_safe_rev(bi_instr, v, &(block)->instructions, link)
994 
995 #define bi_foreach_instr_in_block_from(block, v, from)                         \
996    list_for_each_entry_from(bi_instr, v, from, &(block)->instructions, link)
997 
998 #define bi_foreach_instr_in_block_from_rev(block, v, from)                     \
999    list_for_each_entry_from_rev(bi_instr, v, from, &(block)->instructions, link)
1000 
1001 #define bi_foreach_clause_in_block(block, v)                                   \
1002    list_for_each_entry(bi_clause, v, &(block)->clauses, link)
1003 
1004 #define bi_foreach_clause_in_block_rev(block, v)                               \
1005    list_for_each_entry_rev(bi_clause, v, &(block)->clauses, link)
1006 
1007 #define bi_foreach_clause_in_block_safe(block, v)                              \
1008    list_for_each_entry_safe(bi_clause, v, &(block)->clauses, link)
1009 
1010 #define bi_foreach_clause_in_block_from(block, v, from)                        \
1011    list_for_each_entry_from(bi_clause, v, from, &(block)->clauses, link)
1012 
1013 #define bi_foreach_clause_in_block_from_rev(block, v, from)                    \
1014    list_for_each_entry_from_rev(bi_clause, v, from, &(block)->clauses, link)
1015 
1016 #define bi_foreach_instr_global(ctx, v)                                        \
1017    bi_foreach_block(ctx, v_block)                                              \
1018       bi_foreach_instr_in_block(v_block, v)
1019 
1020 #define bi_foreach_instr_global_rev(ctx, v)                                    \
1021    bi_foreach_block_rev(ctx, v_block)                                          \
1022       bi_foreach_instr_in_block_rev(v_block, v)
1023 
1024 #define bi_foreach_instr_global_safe(ctx, v)                                   \
1025    bi_foreach_block(ctx, v_block)                                              \
1026       bi_foreach_instr_in_block_safe(v_block, v)
1027 
1028 #define bi_foreach_instr_global_rev_safe(ctx, v)                               \
1029    bi_foreach_block_rev(ctx, v_block)                                          \
1030    bi_foreach_instr_in_block_rev_safe(v_block, v)
1031 
1032 #define bi_foreach_instr_in_tuple(tuple, v)                                    \
1033    for (bi_instr *v = (tuple)->fma ?: (tuple)->add; v != NULL;                 \
1034         v = (v == (tuple)->add) ? NULL : (tuple)->add)
1035 
1036 #define bi_foreach_successor(blk, v)                                           \
1037    bi_block *v;                                                                \
1038    bi_block **_v;                                                              \
1039    for (_v = &blk->successors[0], v = *_v;                                     \
1040         v != NULL && _v < &blk->successors[2]; _v++, v = *_v)
1041 
1042 #define bi_foreach_predecessor(blk, v)                                         \
1043    util_dynarray_foreach(&(blk)->predecessors, bi_block *, v)
1044 
1045 #define bi_foreach_src(ins, v) for (unsigned v = 0; v < ins->nr_srcs; ++v)
1046 
1047 #define bi_foreach_dest(ins, v) for (unsigned v = 0; v < ins->nr_dests; ++v)
1048 
1049 #define bi_foreach_ssa_src(ins, v)                                             \
1050    for (unsigned v = 0; v < ins->nr_srcs; ++v)                                 \
1051       if (ins->src[v].type == BI_INDEX_NORMAL)
1052 
1053 #define bi_foreach_instr_and_src_in_tuple(tuple, ins, s)                       \
1054    bi_foreach_instr_in_tuple(tuple, ins)                                       \
1055       bi_foreach_src(ins, s)
1056 
1057 /*
1058  * Find the index of a predecessor, used as the implicit order of phi sources.
1059  */
1060 static inline unsigned
bi_predecessor_index(bi_block * succ,bi_block * pred)1061 bi_predecessor_index(bi_block *succ, bi_block *pred)
1062 {
1063    unsigned index = 0;
1064 
1065    bi_foreach_predecessor(succ, x) {
1066       if (*x == pred)
1067          return index;
1068 
1069       index++;
1070    }
1071 
1072    unreachable("Invalid predecessor");
1073 }
1074 
1075 static inline bi_instr *
bi_prev_op(bi_instr * ins)1076 bi_prev_op(bi_instr *ins)
1077 {
1078    return list_last_entry(&(ins->link), bi_instr, link);
1079 }
1080 
1081 static inline bi_instr *
bi_next_op(bi_instr * ins)1082 bi_next_op(bi_instr *ins)
1083 {
1084    return list_first_entry(&(ins->link), bi_instr, link);
1085 }
1086 
1087 static inline bi_block *
bi_next_block(bi_block * block)1088 bi_next_block(bi_block *block)
1089 {
1090    return list_first_entry(&(block->link), bi_block, link);
1091 }
1092 
1093 static inline bi_block *
bi_entry_block(bi_context * ctx)1094 bi_entry_block(bi_context *ctx)
1095 {
1096    return list_first_entry(&ctx->blocks, bi_block, link);
1097 }
1098 
1099 /* BIR manipulation */
1100 
1101 bool bi_has_arg(const bi_instr *ins, bi_index arg);
1102 unsigned bi_count_read_registers(const bi_instr *ins, unsigned src);
1103 unsigned bi_count_write_registers(const bi_instr *ins, unsigned dest);
1104 bool bi_is_regfmt_16(enum bi_register_format fmt);
1105 unsigned bi_writemask(const bi_instr *ins, unsigned dest);
1106 bi_clause *bi_next_clause(bi_context *ctx, bi_block *block, bi_clause *clause);
1107 bool bi_side_effects(const bi_instr *I);
1108 bool bi_reconverge_branches(bi_block *block);
1109 
1110 bool bi_can_replace_with_csel(bi_instr *I);
1111 
1112 void bi_print_instr(const bi_instr *I, FILE *fp);
1113 void bi_print_slots(bi_registers *regs, FILE *fp);
1114 void bi_print_tuple(bi_tuple *tuple, FILE *fp);
1115 void bi_print_clause(bi_clause *clause, FILE *fp);
1116 void bi_print_block(bi_block *block, FILE *fp);
1117 void bi_print_shader(bi_context *ctx, FILE *fp);
1118 
1119 /* BIR passes */
1120 
1121 bool bi_instr_uses_helpers(bi_instr *I);
1122 bool bi_block_terminates_helpers(bi_block *block);
1123 void bi_analyze_helper_terminate(bi_context *ctx);
1124 void bi_mark_clauses_td(bi_context *ctx);
1125 
1126 void bi_analyze_helper_requirements(bi_context *ctx);
1127 void bi_opt_copy_prop(bi_context *ctx);
1128 void bi_opt_cse(bi_context *ctx);
1129 void bi_opt_mod_prop_forward(bi_context *ctx);
1130 void bi_opt_mod_prop_backward(bi_context *ctx);
1131 void bi_opt_dead_code_eliminate(bi_context *ctx);
1132 void bi_opt_fuse_dual_texture(bi_context *ctx);
1133 void bi_opt_dce_post_ra(bi_context *ctx);
1134 void bi_opt_message_preload(bi_context *ctx);
1135 void bi_opt_push_ubo(bi_context *ctx);
1136 void bi_opt_reorder_push(bi_context *ctx);
1137 void bi_lower_swizzle(bi_context *ctx);
1138 void bi_lower_fau(bi_context *ctx);
1139 void bi_assign_scoreboard(bi_context *ctx);
1140 void bi_register_allocate(bi_context *ctx);
1141 void va_optimize(bi_context *ctx);
1142 void va_lower_split_64bit(bi_context *ctx);
1143 
1144 void bi_lower_opt_instructions(bi_context *ctx);
1145 
1146 void bi_pressure_schedule(bi_context *ctx);
1147 void bi_schedule(bi_context *ctx);
1148 bool bi_can_fma(bi_instr *ins);
1149 bool bi_can_add(bi_instr *ins);
1150 bool bi_must_message(bi_instr *ins);
1151 bool bi_reads_zero(bi_instr *ins);
1152 bool bi_reads_temps(bi_instr *ins, unsigned src);
1153 bool bi_reads_t(bi_instr *ins, unsigned src);
1154 
1155 #ifndef NDEBUG
1156 bool bi_validate_initialization(bi_context *ctx);
1157 void bi_validate(bi_context *ctx, const char *after_str);
1158 #else
1159 static inline bool
bi_validate_initialization(UNUSED bi_context * ctx)1160 bi_validate_initialization(UNUSED bi_context *ctx)
1161 {
1162    return true;
1163 }
1164 static inline void
bi_validate(UNUSED bi_context * ctx,UNUSED const char * after_str)1165 bi_validate(UNUSED bi_context *ctx, UNUSED const char *after_str)
1166 {
1167    return;
1168 }
1169 #endif
1170 
1171 uint32_t bi_fold_constant(bi_instr *I, bool *unsupported);
1172 bool bi_opt_constant_fold(bi_context *ctx);
1173 
1174 /* Liveness */
1175 
1176 void bi_compute_liveness_ssa(bi_context *ctx);
1177 void bi_liveness_ins_update_ssa(BITSET_WORD *live, const bi_instr *ins);
1178 
1179 void bi_postra_liveness(bi_context *ctx);
1180 uint64_t MUST_CHECK bi_postra_liveness_ins(uint64_t live, bi_instr *ins);
1181 
1182 /* Layout */
1183 
1184 signed bi_block_offset(bi_context *ctx, bi_clause *start, bi_block *target);
1185 bool bi_ec0_packed(unsigned tuple_count);
1186 
1187 /* Check if there are no more instructions starting with a given block, this
1188  * needs to recurse in case a shader ends with multiple empty blocks */
1189 
1190 static inline bool
bi_is_terminal_block(bi_block * block)1191 bi_is_terminal_block(bi_block *block)
1192 {
1193    return (block == NULL) || (list_is_empty(&block->instructions) &&
1194                               bi_is_terminal_block(block->successors[0]) &&
1195                               bi_is_terminal_block(block->successors[1]));
1196 }
1197 
1198 /* Code emit */
1199 
1200 /* Returns the size of the final clause */
1201 unsigned bi_pack(bi_context *ctx, struct util_dynarray *emission);
1202 void bi_pack_valhall(bi_context *ctx, struct util_dynarray *emission);
1203 
1204 struct bi_packed_tuple {
1205    uint64_t lo;
1206    uint64_t hi;
1207 };
1208 
1209 uint8_t bi_pack_literal(enum bi_clause_subword literal);
1210 
1211 uint8_t bi_pack_upper(enum bi_clause_subword upper,
1212                       struct bi_packed_tuple *tuples,
1213                       ASSERTED unsigned tuple_count);
1214 uint64_t bi_pack_tuple_bits(enum bi_clause_subword idx,
1215                             struct bi_packed_tuple *tuples,
1216                             ASSERTED unsigned tuple_count, unsigned offset,
1217                             unsigned nbits);
1218 
1219 uint8_t bi_pack_sync(enum bi_clause_subword t1, enum bi_clause_subword t2,
1220                      enum bi_clause_subword t3, struct bi_packed_tuple *tuples,
1221                      ASSERTED unsigned tuple_count, bool z);
1222 
1223 void bi_pack_format(struct util_dynarray *emission, unsigned index,
1224                     struct bi_packed_tuple *tuples,
1225                     ASSERTED unsigned tuple_count, uint64_t header,
1226                     uint64_t ec0, unsigned m0, bool z);
1227 
1228 unsigned bi_pack_fma(bi_instr *I, enum bifrost_packed_src src0,
1229                      enum bifrost_packed_src src1, enum bifrost_packed_src src2,
1230                      enum bifrost_packed_src src3);
1231 unsigned bi_pack_add(bi_instr *I, enum bifrost_packed_src src0,
1232                      enum bifrost_packed_src src1, enum bifrost_packed_src src2,
1233                      enum bifrost_packed_src src3);
1234 
1235 /* Like in NIR, for use with the builder */
1236 
1237 enum bi_cursor_option {
1238    bi_cursor_after_block,
1239    bi_cursor_before_instr,
1240    bi_cursor_after_instr
1241 };
1242 
1243 typedef struct {
1244    enum bi_cursor_option option;
1245 
1246    union {
1247       bi_block *block;
1248       bi_instr *instr;
1249    };
1250 } bi_cursor;
1251 
1252 static inline bi_cursor
bi_after_block(bi_block * block)1253 bi_after_block(bi_block *block)
1254 {
1255    return (bi_cursor){.option = bi_cursor_after_block, .block = block};
1256 }
1257 
1258 static inline bi_cursor
bi_before_instr(bi_instr * instr)1259 bi_before_instr(bi_instr *instr)
1260 {
1261    return (bi_cursor){.option = bi_cursor_before_instr, .instr = instr};
1262 }
1263 
1264 static inline bi_cursor
bi_after_instr(bi_instr * instr)1265 bi_after_instr(bi_instr *instr)
1266 {
1267    return (bi_cursor){.option = bi_cursor_after_instr, .instr = instr};
1268 }
1269 
1270 static inline bi_cursor
bi_after_block_logical(bi_block * block)1271 bi_after_block_logical(bi_block *block)
1272 {
1273    if (list_is_empty(&block->instructions))
1274       return bi_after_block(block);
1275 
1276    bi_instr *last = list_last_entry(&block->instructions, bi_instr, link);
1277    assert(last != NULL);
1278 
1279    if (last->branch_target)
1280       return bi_before_instr(last);
1281    else
1282       return bi_after_block(block);
1283 }
1284 
1285 static inline bi_cursor
bi_before_nonempty_block(bi_block * block)1286 bi_before_nonempty_block(bi_block *block)
1287 {
1288    bi_instr *I = list_first_entry(&block->instructions, bi_instr, link);
1289    assert(I != NULL);
1290 
1291    return bi_before_instr(I);
1292 }
1293 
1294 static inline bi_cursor
bi_before_block(bi_block * block)1295 bi_before_block(bi_block *block)
1296 {
1297    if (list_is_empty(&block->instructions))
1298       return bi_after_block(block);
1299    else
1300       return bi_before_nonempty_block(block);
1301 }
1302 
1303 /* Invariant: a tuple must be nonempty UNLESS it is the last tuple of a clause,
1304  * in which case there must exist a nonempty penultimate tuple */
1305 
1306 ATTRIBUTE_RETURNS_NONNULL static inline bi_instr *
bi_first_instr_in_tuple(bi_tuple * tuple)1307 bi_first_instr_in_tuple(bi_tuple *tuple)
1308 {
1309    bi_instr *instr = tuple->fma ?: tuple->add;
1310    assert(instr != NULL);
1311    return instr;
1312 }
1313 
1314 ATTRIBUTE_RETURNS_NONNULL static inline bi_instr *
bi_first_instr_in_clause(bi_clause * clause)1315 bi_first_instr_in_clause(bi_clause *clause)
1316 {
1317    return bi_first_instr_in_tuple(&clause->tuples[0]);
1318 }
1319 
1320 ATTRIBUTE_RETURNS_NONNULL static inline bi_instr *
bi_last_instr_in_clause(bi_clause * clause)1321 bi_last_instr_in_clause(bi_clause *clause)
1322 {
1323    bi_tuple tuple = clause->tuples[clause->tuple_count - 1];
1324    bi_instr *instr = tuple.add ?: tuple.fma;
1325 
1326    if (!instr) {
1327       assert(clause->tuple_count >= 2);
1328       tuple = clause->tuples[clause->tuple_count - 2];
1329       instr = tuple.add ?: tuple.fma;
1330    }
1331 
1332    assert(instr != NULL);
1333    return instr;
1334 }
1335 
1336 /* Implemented by expanding bi_foreach_instr_in_block_from(_rev) with the start
1337  * (end) of the clause and adding a condition for the clause boundary */
1338 
1339 #define bi_foreach_instr_in_clause(block, clause, pos)                         \
1340    for (bi_instr *pos =                                                        \
1341            list_entry(bi_first_instr_in_clause(clause), bi_instr, link);       \
1342         (&pos->link != &(block)->instructions) &&                              \
1343         (pos != bi_next_op(bi_last_instr_in_clause(clause)));                  \
1344         pos = list_entry(pos->link.next, bi_instr, link))
1345 
1346 #define bi_foreach_instr_in_clause_rev(block, clause, pos)                     \
1347    for (bi_instr *pos =                                                        \
1348            list_entry(bi_last_instr_in_clause(clause), bi_instr, link);        \
1349         (&pos->link != &(block)->instructions) &&                              \
1350         pos != bi_prev_op(bi_first_instr_in_clause(clause));                   \
1351         pos = list_entry(pos->link.prev, bi_instr, link))
1352 
1353 static inline bi_cursor
bi_before_clause(bi_clause * clause)1354 bi_before_clause(bi_clause *clause)
1355 {
1356    return bi_before_instr(bi_first_instr_in_clause(clause));
1357 }
1358 
1359 static inline bi_cursor
bi_before_tuple(bi_tuple * tuple)1360 bi_before_tuple(bi_tuple *tuple)
1361 {
1362    return bi_before_instr(bi_first_instr_in_tuple(tuple));
1363 }
1364 
1365 static inline bi_cursor
bi_after_clause(bi_clause * clause)1366 bi_after_clause(bi_clause *clause)
1367 {
1368    return bi_after_instr(bi_last_instr_in_clause(clause));
1369 }
1370 
1371 /* IR builder in terms of cursor infrastructure */
1372 
1373 typedef struct {
1374    bi_context *shader;
1375    bi_cursor cursor;
1376 } bi_builder;
1377 
1378 static inline bi_builder
bi_init_builder(bi_context * ctx,bi_cursor cursor)1379 bi_init_builder(bi_context *ctx, bi_cursor cursor)
1380 {
1381    return (bi_builder){.shader = ctx, .cursor = cursor};
1382 }
1383 
1384 /* Insert an instruction at the cursor and move the cursor */
1385 
1386 static inline void
bi_builder_insert(bi_cursor * cursor,bi_instr * I)1387 bi_builder_insert(bi_cursor *cursor, bi_instr *I)
1388 {
1389    switch (cursor->option) {
1390    case bi_cursor_after_instr:
1391       list_add(&I->link, &cursor->instr->link);
1392       cursor->instr = I;
1393       return;
1394 
1395    case bi_cursor_after_block:
1396       list_addtail(&I->link, &cursor->block->instructions);
1397       cursor->option = bi_cursor_after_instr;
1398       cursor->instr = I;
1399       return;
1400 
1401    case bi_cursor_before_instr:
1402       list_addtail(&I->link, &cursor->instr->link);
1403       cursor->option = bi_cursor_after_instr;
1404       cursor->instr = I;
1405       return;
1406    }
1407 
1408    unreachable("Invalid cursor option");
1409 }
1410 
1411 bi_instr *bi_csel_from_mux(bi_builder *b, const bi_instr *I, bool must_sign);
1412 
1413 /* Read back power-efficent garbage, TODO maybe merge with null? */
1414 static inline bi_index
bi_dontcare(bi_builder * b)1415 bi_dontcare(bi_builder *b)
1416 {
1417    if (b->shader->arch >= 9)
1418       return bi_zero();
1419    else
1420       return bi_passthrough(BIFROST_SRC_FAU_HI);
1421 }
1422 
1423 #define bi_worklist_init(ctx, w)        u_worklist_init(w, ctx->num_blocks, ctx)
1424 #define bi_worklist_push_head(w, block) u_worklist_push_head(w, block, index)
1425 #define bi_worklist_push_tail(w, block) u_worklist_push_tail(w, block, index)
1426 #define bi_worklist_peek_head(w)        u_worklist_peek_head(w, bi_block, index)
1427 #define bi_worklist_pop_head(w)         u_worklist_pop_head(w, bi_block, index)
1428 #define bi_worklist_peek_tail(w)        u_worklist_peek_tail(w, bi_block, index)
1429 #define bi_worklist_pop_tail(w)         u_worklist_pop_tail(w, bi_block, index)
1430 
1431 /* NIR passes */
1432 
1433 bool bi_lower_divergent_indirects(nir_shader *shader, unsigned lanes);
1434 
1435 #ifdef __cplusplus
1436 } /* extern C */
1437 #endif
1438 
1439 #endif
1440