1 /*
2  * Copyright (C) 2020 Collabora Ltd.
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors (Collabora):
24  *      Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
25  */
26 
27 #ifndef __BIFROST_COMPILER_H
28 #define __BIFROST_COMPILER_H
29 
30 #include "compiler/nir/nir.h"
31 #include "panfrost/util/pan_ir.h"
32 #include "util/half_float.h"
33 #include "util/u_math.h"
34 #include "util/u_worklist.h"
35 #include "bi_opcodes.h"
36 #include "bifrost.h"
37 #include "valhall_enums.h"
38 
39 #ifdef __cplusplus
40 extern "C" {
41 #endif
42 
43 /* Swizzles across bytes in a 32-bit word. Expresses swz in the XML directly.
44  * To express widen, use the correpsonding replicated form, i.e. H01 = identity
45  * for widen = none, H00 for widen = h0, B1111 for widen = b1. For lane, also
46  * use the replicated form (interpretation is governed by the opcode). For
47  * 8-bit lanes with two channels, use replicated forms for replicated forms
48  * (TODO: what about others?). For 8-bit lanes with four channels using
49  * matching form (TODO: what about others?).
50  */
51 
52 enum bi_swizzle {
53    /* 16-bit swizzle ordering deliberate for fast compute */
54    BI_SWIZZLE_H00 = 0, /* = B0101 */
55    BI_SWIZZLE_H01 = 1, /* = B0123 = W0 */
56    BI_SWIZZLE_H10 = 2, /* = B2301 */
57    BI_SWIZZLE_H11 = 3, /* = B2323 */
58 
59    /* replication order should be maintained for fast compute */
60    BI_SWIZZLE_B0000 = 4, /* single channel (replicate) */
61    BI_SWIZZLE_B1111 = 5,
62    BI_SWIZZLE_B2222 = 6,
63    BI_SWIZZLE_B3333 = 7,
64 
65    /* totally special for explicit pattern matching */
66    BI_SWIZZLE_B0011 = 8,  /* +SWZ.v4i8 */
67    BI_SWIZZLE_B2233 = 9,  /* +SWZ.v4i8 */
68    BI_SWIZZLE_B1032 = 10, /* +SWZ.v4i8 */
69    BI_SWIZZLE_B3210 = 11, /* +SWZ.v4i8 */
70 
71    BI_SWIZZLE_B0022 = 12, /* for b02 lanes */
72 };
73 
74 /* Given a packed i16vec2/i8vec4 constant, apply a swizzle. Useful for constant
75  * folding and Valhall constant optimization. */
76 
77 static inline uint32_t
bi_apply_swizzle(uint32_t value,enum bi_swizzle swz)78 bi_apply_swizzle(uint32_t value, enum bi_swizzle swz)
79 {
80    const uint16_t *h = (const uint16_t *)&value;
81    const uint8_t *b = (const uint8_t *)&value;
82 
83 #define H(h0, h1) (h[h0] | ((uint32_t)h[h1] << 16))
84 #define B(b0, b1, b2, b3)                                                      \
85    (b[b0] | ((uint32_t)b[b1] << 8) | ((uint32_t)b[b2] << 16) |                 \
86     ((uint32_t)b[b3] << 24))
87 
88    switch (swz) {
89    case BI_SWIZZLE_H00:
90       return H(0, 0);
91    case BI_SWIZZLE_H01:
92       return H(0, 1);
93    case BI_SWIZZLE_H10:
94       return H(1, 0);
95    case BI_SWIZZLE_H11:
96       return H(1, 1);
97    case BI_SWIZZLE_B0000:
98       return B(0, 0, 0, 0);
99    case BI_SWIZZLE_B1111:
100       return B(1, 1, 1, 1);
101    case BI_SWIZZLE_B2222:
102       return B(2, 2, 2, 2);
103    case BI_SWIZZLE_B3333:
104       return B(3, 3, 3, 3);
105    case BI_SWIZZLE_B0011:
106       return B(0, 0, 1, 1);
107    case BI_SWIZZLE_B2233:
108       return B(2, 2, 3, 3);
109    case BI_SWIZZLE_B1032:
110       return B(1, 0, 3, 2);
111    case BI_SWIZZLE_B3210:
112       return B(3, 2, 1, 0);
113    case BI_SWIZZLE_B0022:
114       return B(0, 0, 2, 2);
115    }
116 
117 #undef H
118 #undef B
119 
120    unreachable("Invalid swizzle");
121 }
122 
123 enum bi_index_type {
124    BI_INDEX_NULL = 0,
125    BI_INDEX_NORMAL = 1,
126    BI_INDEX_REGISTER = 2,
127    BI_INDEX_CONSTANT = 3,
128    BI_INDEX_PASS = 4,
129    BI_INDEX_FAU = 5
130 };
131 
132 typedef struct {
133    uint32_t value;
134 
135    /* modifiers, should only be set if applicable for a given instruction.
136     * For *IDP.v4i8, abs plays the role of sign. For bitwise ops where
137     * applicable, neg plays the role of not */
138    bool abs : 1;
139    bool neg : 1;
140 
141    /* The last use of a value, should be purged from the register cache.
142     * Set by liveness analysis. */
143    bool discard : 1;
144 
145    /* For a source, the swizzle. For a destination, acts a bit like a
146     * write mask. Identity for the full 32-bit, H00 for only caring about
147     * the lower half, other values unused. */
148    enum bi_swizzle swizzle : 4;
149    uint32_t offset         : 3;
150    enum bi_index_type type : 3;
151 
152    /* Must be zeroed so we can hash the whole 64-bits at a time */
153    unsigned padding : (32 - 13);
154 } bi_index;
155 
156 static inline bi_index
bi_get_index(unsigned value)157 bi_get_index(unsigned value)
158 {
159    return (bi_index){
160       .value = value,
161       .swizzle = BI_SWIZZLE_H01,
162       .type = BI_INDEX_NORMAL,
163    };
164 }
165 
166 static inline bi_index
bi_register(unsigned reg)167 bi_register(unsigned reg)
168 {
169    assert(reg < 64);
170 
171    return (bi_index){
172       .value = reg,
173       .swizzle = BI_SWIZZLE_H01,
174       .type = BI_INDEX_REGISTER,
175    };
176 }
177 
178 static inline bi_index
bi_imm_u32(uint32_t imm)179 bi_imm_u32(uint32_t imm)
180 {
181    return (bi_index){
182       .value = imm,
183       .swizzle = BI_SWIZZLE_H01,
184       .type = BI_INDEX_CONSTANT,
185    };
186 }
187 
188 static inline bi_index
bi_imm_f32(float imm)189 bi_imm_f32(float imm)
190 {
191    return bi_imm_u32(fui(imm));
192 }
193 
194 static inline bi_index
bi_null()195 bi_null()
196 {
197    return (bi_index){.type = BI_INDEX_NULL};
198 }
199 
200 static inline bi_index
bi_zero()201 bi_zero()
202 {
203    return bi_imm_u32(0);
204 }
205 
206 static inline bi_index
bi_passthrough(enum bifrost_packed_src value)207 bi_passthrough(enum bifrost_packed_src value)
208 {
209    return (bi_index){
210       .value = value,
211       .swizzle = BI_SWIZZLE_H01,
212       .type = BI_INDEX_PASS,
213    };
214 }
215 
216 /* Helps construct swizzles */
217 static inline bi_index
bi_swz_16(bi_index idx,bool x,bool y)218 bi_swz_16(bi_index idx, bool x, bool y)
219 {
220    assert(idx.swizzle == BI_SWIZZLE_H01);
221    idx.swizzle = (enum bi_swizzle)(BI_SWIZZLE_H00 | (x << 1) | y);
222    return idx;
223 }
224 
225 static inline bi_index
bi_half(bi_index idx,bool upper)226 bi_half(bi_index idx, bool upper)
227 {
228    return bi_swz_16(idx, upper, upper);
229 }
230 
231 static inline bi_index
bi_byte(bi_index idx,unsigned lane)232 bi_byte(bi_index idx, unsigned lane)
233 {
234    assert(idx.swizzle == BI_SWIZZLE_H01);
235    assert(lane < 4);
236    idx.swizzle = (enum bi_swizzle)(BI_SWIZZLE_B0000 + lane);
237    return idx;
238 }
239 
240 static inline bi_index
bi_abs(bi_index idx)241 bi_abs(bi_index idx)
242 {
243    idx.abs = true;
244    return idx;
245 }
246 
247 static inline bi_index
bi_neg(bi_index idx)248 bi_neg(bi_index idx)
249 {
250    idx.neg ^= true;
251    return idx;
252 }
253 
254 static inline bi_index
bi_discard(bi_index idx)255 bi_discard(bi_index idx)
256 {
257    idx.discard = true;
258    return idx;
259 }
260 
261 /* Additive identity in IEEE 754 arithmetic */
262 static inline bi_index
bi_negzero()263 bi_negzero()
264 {
265    return bi_neg(bi_zero());
266 }
267 
268 /* Replaces an index, preserving any modifiers */
269 
270 static inline bi_index
bi_replace_index(bi_index old,bi_index replacement)271 bi_replace_index(bi_index old, bi_index replacement)
272 {
273    replacement.abs = old.abs;
274    replacement.neg = old.neg;
275    replacement.swizzle = old.swizzle;
276    replacement.discard = false; /* needs liveness analysis to set */
277    return replacement;
278 }
279 
280 /* Remove any modifiers. This has the property:
281  *
282  *     replace_index(x, strip_index(x)) = x
283  *
284  * This ensures it is suitable to use when lowering sources to moves */
285 
286 static inline bi_index
bi_strip_index(bi_index index)287 bi_strip_index(bi_index index)
288 {
289    index.abs = index.neg = false;
290    index.swizzle = BI_SWIZZLE_H01;
291    return index;
292 }
293 
294 /* For bitwise instructions */
295 #define bi_not(x) bi_neg(x)
296 
297 static inline bi_index
bi_imm_u8(uint8_t imm)298 bi_imm_u8(uint8_t imm)
299 {
300    return bi_byte(bi_imm_u32(imm), 0);
301 }
302 
303 static inline bi_index
bi_imm_u16(uint16_t imm)304 bi_imm_u16(uint16_t imm)
305 {
306    return bi_half(bi_imm_u32(imm), false);
307 }
308 
309 static inline bi_index
bi_imm_uintN(uint32_t imm,unsigned sz)310 bi_imm_uintN(uint32_t imm, unsigned sz)
311 {
312    assert(sz == 8 || sz == 16 || sz == 32);
313    return (sz == 8)    ? bi_imm_u8(imm)
314           : (sz == 16) ? bi_imm_u16(imm)
315                        : bi_imm_u32(imm);
316 }
317 
318 static inline bi_index
bi_imm_f16(float imm)319 bi_imm_f16(float imm)
320 {
321    return bi_imm_u16(_mesa_float_to_half(imm));
322 }
323 
324 static inline bool
bi_is_null(bi_index idx)325 bi_is_null(bi_index idx)
326 {
327    return idx.type == BI_INDEX_NULL;
328 }
329 
330 static inline bool
bi_is_ssa(bi_index idx)331 bi_is_ssa(bi_index idx)
332 {
333    return idx.type == BI_INDEX_NORMAL;
334 }
335 
336 /* Compares equivalence as references. Does not compare offsets, swizzles, or
337  * modifiers. In other words, this forms bi_index equivalence classes by
338  * partitioning memory. E.g. -abs(foo[1].yx) == foo.xy but foo != bar */
339 
340 static inline bool
bi_is_equiv(bi_index left,bi_index right)341 bi_is_equiv(bi_index left, bi_index right)
342 {
343    return (left.type == right.type) && (left.value == right.value);
344 }
345 
346 /* A stronger equivalence relation that requires the indices access the
347  * same offset, useful for RA/scheduling to see what registers will
348  * correspond to */
349 
350 static inline bool
bi_is_word_equiv(bi_index left,bi_index right)351 bi_is_word_equiv(bi_index left, bi_index right)
352 {
353    return bi_is_equiv(left, right) && left.offset == right.offset;
354 }
355 
356 /* An even stronger equivalence that checks if indices correspond to the
357  * right value when evaluated
358  */
359 static inline bool
bi_is_value_equiv(bi_index left,bi_index right)360 bi_is_value_equiv(bi_index left, bi_index right)
361 {
362    if (left.type == BI_INDEX_CONSTANT && right.type == BI_INDEX_CONSTANT) {
363       return (bi_apply_swizzle(left.value, left.swizzle) ==
364               bi_apply_swizzle(right.value, right.swizzle)) &&
365              (left.abs == right.abs) && (left.neg == right.neg);
366    } else {
367       return (left.value == right.value) && (left.abs == right.abs) &&
368              (left.neg == right.neg) && (left.swizzle == right.swizzle) &&
369              (left.offset == right.offset) && (left.type == right.type);
370    }
371 }
372 
373 #define BI_MAX_VEC   8
374 #define BI_MAX_DESTS 4
375 #define BI_MAX_SRCS  8
376 
377 typedef struct {
378    /* Must be first */
379    struct list_head link;
380    bi_index *dest;
381    bi_index *src;
382 
383    enum bi_opcode op;
384    uint8_t nr_srcs;
385    uint8_t nr_dests;
386 
387    union {
388       /* For a branch */
389       struct bi_block *branch_target;
390 
391       /* For a phi node that hasn't been translated yet. This is only
392        * used during NIR->BIR
393        */
394       nir_phi_instr *phi;
395    };
396 
397    /* These don't fit neatly with anything else.. */
398    enum bi_register_format register_format;
399    enum bi_vecsize vecsize;
400 
401    /* Flow control associated with a Valhall instruction */
402    uint8_t flow;
403 
404    /* Slot associated with a message-passing instruction */
405    uint8_t slot;
406 
407    /* Can we spill the value written here? Used to prevent
408     * useless double fills */
409    bool no_spill;
410 
411    /* On Bifrost: A value of bi_table to override the table, inducing a
412     * DTSEL_IMM pair if nonzero.
413     *
414     * On Valhall: the table index to use for resource instructions.
415     *
416     * These two interpretations are equivalent if you squint a bit.
417     */
418    unsigned table;
419 
420    /* Everything after this MUST NOT be accessed directly, since
421     * interpretation depends on opcodes */
422 
423    /* Destination modifiers */
424    union {
425       enum bi_clamp clamp;
426       bool saturate;
427       bool not_result;
428       unsigned dest_mod;
429    };
430 
431    /* Immediates. All seen alone in an instruction, except for varying/texture
432     * which are specified jointly for VARTEX */
433    union {
434       uint32_t shift;
435       uint32_t fill;
436       uint32_t index;
437       uint32_t attribute_index;
438 
439       struct {
440          uint32_t varying_index;
441          uint32_t sampler_index;
442          uint32_t texture_index;
443       };
444 
445       /* TEXC, ATOM_CX: # of staging registers used */
446       struct {
447          uint32_t sr_count;
448          uint32_t sr_count_2;
449 
450          union {
451             /* Atomics effectively require all three */
452             int32_t byte_offset;
453 
454             /* BLEND requires all three */
455             int32_t branch_offset;
456          };
457       };
458    };
459 
460    /* Modifiers specific to particular instructions are thrown in a union */
461    union {
462       enum bi_adj adj;           /* FEXP_TABLE.u4 */
463       enum bi_atom_opc atom_opc; /* atomics */
464       enum bi_func func;         /* FPOW_SC_DET */
465       enum bi_function function; /* LD_VAR_FLAT */
466       enum bi_mux mux;           /* MUX */
467       enum bi_sem sem;           /* FMAX, FMIN */
468       enum bi_source source;     /* LD_GCLK */
469       bool scale;                /* VN_ASST2, FSINCOS_OFFSET */
470       bool offset;               /* FSIN_TABLE, FOCS_TABLE */
471       bool mask;                 /* CLZ */
472       bool threads;              /* IMULD, IMOV_FMA */
473       bool combine;              /* BRANCHC */
474       bool format;               /* LEA_TEX */
475 
476       struct {
477          enum bi_special special;   /* FADD_RSCALE, FMA_RSCALE */
478          enum bi_round round;       /* FMA, converts, FADD, _RSCALE, etc */
479          bool ftz;                  /* Flush-to-zero for F16_TO_F32 and FLUSH */
480          enum va_nan_mode nan_mode; /* NaN flush mode, for FLUSH */
481          bool flush_inf;            /* Flush infinity to finite, for FLUSH */
482       };
483 
484       struct {
485          enum bi_result_type result_type; /* FCMP, ICMP */
486          enum bi_cmpf cmpf;               /* CSEL, FCMP, ICMP, BRANCH */
487       };
488 
489       struct {
490          enum bi_stack_mode stack_mode; /* JUMP_EX */
491          bool test_mode;
492       };
493 
494       struct {
495          enum bi_seg seg;       /* LOAD, STORE, SEG_ADD, SEG_SUB */
496          bool preserve_null;    /* SEG_ADD, SEG_SUB */
497          enum bi_extend extend; /* LOAD, IMUL */
498       };
499 
500       struct {
501          enum bi_sample sample;             /* VAR_TEX, LD_VAR */
502          enum bi_update update;             /* VAR_TEX, LD_VAR */
503          enum bi_varying_name varying_name; /* LD_VAR_SPECIAL */
504          bool skip;                         /* VAR_TEX, TEXS, TEXC */
505          bool lod_mode; /* VAR_TEX, TEXS, implicitly for TEXC */
506          enum bi_source_format source_format; /* LD_VAR_BUF */
507 
508          /* Used for valhall texturing */
509          bool shadow;
510          bool wide_indices;
511          bool texel_offset;
512          bool array_enable;
513          bool integer_coordinates;
514          bool derivative_enable;
515          bool force_delta_enable;
516          bool lod_bias_disable;
517          bool lod_clamp_disable;
518          enum bi_fetch_component fetch_component;
519          enum bi_va_lod_mode va_lod_mode;
520          enum bi_dimension dimension;
521          enum bi_write_mask write_mask;
522       };
523 
524       /* Maximum size, for hashing */
525       unsigned flags[14];
526 
527       struct {
528          enum bi_subgroup subgroup;               /* WMASK, CLPER */
529          enum bi_inactive_result inactive_result; /* CLPER */
530          enum bi_lane_op lane_op;                 /* CLPER */
531       };
532 
533       struct {
534          bool z;       /* ZS_EMIT */
535          bool stencil; /* ZS_EMIT */
536       };
537 
538       struct {
539          bool h; /* VN_ASST1.f16 */
540          bool l; /* VN_ASST1.f16 */
541       };
542 
543       struct {
544          bool bytes2; /* RROT_DOUBLE, FRSHIFT_DOUBLE */
545          bool result_word;
546          bool arithmetic; /* ARSHIFT_OR */
547       };
548 
549       struct {
550          bool sqrt; /* FREXPM */
551          bool log;  /* FREXPM */
552       };
553 
554       struct {
555          enum bi_mode mode;           /* FLOG_TABLE */
556          enum bi_precision precision; /* FLOG_TABLE */
557          bool divzero;                /* FRSQ_APPROX, FRSQ */
558       };
559    };
560 } bi_instr;
561 
562 static inline bool
bi_is_staging_src(const bi_instr * I,unsigned s)563 bi_is_staging_src(const bi_instr *I, unsigned s)
564 {
565    return (s == 0 || s == 4) && bi_opcode_props[I->op].sr_read;
566 }
567 
568 /*
569  * Safe helpers to remove destinations/sources at the end of the
570  * destination/source array when changing opcodes. Unlike adding
571  * sources/destinations, this does not require reallocation.
572  */
573 static inline void
bi_drop_dests(bi_instr * I,unsigned new_count)574 bi_drop_dests(bi_instr *I, unsigned new_count)
575 {
576    assert(new_count < I->nr_dests);
577 
578    for (unsigned i = new_count; i < I->nr_dests; ++i)
579       I->dest[i] = bi_null();
580 
581    I->nr_dests = new_count;
582 }
583 
584 static inline void
bi_drop_srcs(bi_instr * I,unsigned new_count)585 bi_drop_srcs(bi_instr *I, unsigned new_count)
586 {
587    assert(new_count < I->nr_srcs);
588 
589    for (unsigned i = new_count; i < I->nr_srcs; ++i)
590       I->src[i] = bi_null();
591 
592    I->nr_srcs = new_count;
593 }
594 
595 static inline void
bi_replace_src(bi_instr * I,unsigned src_index,bi_index replacement)596 bi_replace_src(bi_instr *I, unsigned src_index, bi_index replacement)
597 {
598    I->src[src_index] = bi_replace_index(I->src[src_index], replacement);
599 }
600 
601 /* Represents the assignment of slots for a given bi_tuple */
602 
603 typedef struct {
604    /* Register to assign to each slot */
605    unsigned slot[4];
606 
607    /* Read slots can be disabled */
608    bool enabled[2];
609 
610    /* Configuration for slots 2/3 */
611    struct bifrost_reg_ctrl_23 slot23;
612 
613    /* Fast-Access-Uniform RAM index */
614    uint8_t fau_idx;
615 
616    /* Whether writes are actually for the last instruction */
617    bool first_instruction;
618 } bi_registers;
619 
620 /* A bi_tuple contains two paired instruction pointers. If a slot is unfilled,
621  * leave it NULL; the emitter will fill in a nop. Instructions reference
622  * registers via slots which are assigned per tuple.
623  */
624 
625 typedef struct {
626    uint8_t fau_idx;
627    bi_registers regs;
628    bi_instr *fma;
629    bi_instr *add;
630 } bi_tuple;
631 
632 struct bi_block;
633 
634 typedef struct {
635    struct list_head link;
636 
637    /* Link back up for branch calculations */
638    struct bi_block *block;
639 
640    /* Architectural limit of 8 tuples/clause */
641    unsigned tuple_count;
642    bi_tuple tuples[8];
643 
644    /* For scoreboarding -- the clause ID (this is not globally unique!)
645     * and its dependencies in terms of other clauses, computed during
646     * scheduling and used when emitting code. Dependencies expressed as a
647     * bitfield matching the hardware, except shifted by a clause (the
648     * shift back to the ISA's off-by-one encoding is worked out when
649     * emitting clauses) */
650    unsigned scoreboard_id;
651    uint8_t dependencies;
652 
653    /* See ISA header for description */
654    enum bifrost_flow flow_control;
655 
656    /* Can we prefetch the next clause? Usually it makes sense, except for
657     * clauses ending in unconditional branches */
658    bool next_clause_prefetch;
659 
660    /* Assigned data register */
661    unsigned staging_register;
662 
663    /* Corresponds to the usual bit but shifted by a clause */
664    bool staging_barrier;
665 
666    /* Constants read by this clause. ISA limit. Must satisfy:
667     *
668     *      constant_count + tuple_count <= 13
669     *
670     * Also implicitly constant_count <= tuple_count since a tuple only
671     * reads a single constant.
672     */
673    uint64_t constants[8];
674    unsigned constant_count;
675 
676    /* Index of a constant to be PC-relative */
677    unsigned pcrel_idx;
678 
679    /* Branches encode a constant offset relative to the program counter
680     * with some magic flags. By convention, if there is a branch, its
681     * constant will be last. Set this flag to indicate this is required.
682     */
683    bool branch_constant;
684 
685    /* Unique in a clause */
686    enum bifrost_message_type message_type;
687    bi_instr *message;
688 
689    /* Discard helper threads */
690    bool td;
691 
692    /* Should flush-to-zero mode be enabled for this clause? */
693    bool ftz;
694 } bi_clause;
695 
696 #define BI_NUM_SLOTS 8
697 
698 /* A model for the state of the scoreboard */
699 struct bi_scoreboard_state {
700    /** Bitmap of registers read/written by a slot */
701    uint64_t read[BI_NUM_SLOTS];
702    uint64_t write[BI_NUM_SLOTS];
703 
704    /* Nonregister dependencies present by a slot */
705    uint8_t varying : BI_NUM_SLOTS;
706    uint8_t memory : BI_NUM_SLOTS;
707 };
708 
709 typedef struct bi_block {
710    /* Link to next block. Must be first for mir_get_block */
711    struct list_head link;
712 
713    /* List of instructions emitted for the current block */
714    struct list_head instructions;
715 
716    /* Index of the block in source order */
717    unsigned index;
718 
719    /* Control flow graph */
720    struct bi_block *successors[2];
721    struct util_dynarray predecessors;
722    bool unconditional_jumps;
723    bool loop_header;
724 
725    /* Per 32-bit word live masks for the block indexed by node */
726    uint8_t *live_in;
727    uint8_t *live_out;
728 
729    /* Scalar liveness indexed by SSA index */
730    BITSET_WORD *ssa_live_in;
731    BITSET_WORD *ssa_live_out;
732 
733    /* If true, uses clauses; if false, uses instructions */
734    bool scheduled;
735    struct list_head clauses; /* list of bi_clause */
736 
737    /* Post-RA liveness */
738    uint64_t reg_live_in, reg_live_out;
739 
740    /* Scoreboard state at the start/end of block */
741    struct bi_scoreboard_state scoreboard_in, scoreboard_out;
742 
743    /* On Valhall, indicates we need a terminal NOP to implement jumps to
744     * the end of the shader.
745     */
746    bool needs_nop;
747 
748    /* Flags available for pass-internal use */
749    uint8_t pass_flags;
750 } bi_block;
751 
752 static inline unsigned
bi_num_successors(bi_block * block)753 bi_num_successors(bi_block *block)
754 {
755    STATIC_ASSERT(ARRAY_SIZE(block->successors) == 2);
756    assert(block->successors[0] || !block->successors[1]);
757 
758    if (block->successors[1])
759       return 2;
760    else if (block->successors[0])
761       return 1;
762    else
763       return 0;
764 }
765 
766 static inline unsigned
bi_num_predecessors(bi_block * block)767 bi_num_predecessors(bi_block *block)
768 {
769    return util_dynarray_num_elements(&block->predecessors, bi_block *);
770 }
771 
772 static inline bi_block *
bi_start_block(struct list_head * blocks)773 bi_start_block(struct list_head *blocks)
774 {
775    bi_block *first = list_first_entry(blocks, bi_block, link);
776    assert(bi_num_predecessors(first) == 0);
777    return first;
778 }
779 
780 static inline bi_block *
bi_exit_block(struct list_head * blocks)781 bi_exit_block(struct list_head *blocks)
782 {
783    bi_block *last = list_last_entry(blocks, bi_block, link);
784    assert(bi_num_successors(last) == 0);
785    return last;
786 }
787 
788 static inline void
bi_block_add_successor(bi_block * block,bi_block * successor)789 bi_block_add_successor(bi_block *block, bi_block *successor)
790 {
791    assert(block != NULL && successor != NULL);
792 
793    /* Cull impossible edges */
794    if (block->unconditional_jumps)
795       return;
796 
797    for (unsigned i = 0; i < ARRAY_SIZE(block->successors); ++i) {
798       if (block->successors[i]) {
799          if (block->successors[i] == successor)
800             return;
801          else
802             continue;
803       }
804 
805       block->successors[i] = successor;
806       util_dynarray_append(&successor->predecessors, bi_block *, block);
807       return;
808    }
809 
810    unreachable("Too many successors");
811 }
812 
813 /* Subset of pan_shader_info needed per-variant, in order to support IDVS */
814 struct bi_shader_info {
815    struct panfrost_ubo_push *push;
816    struct bifrost_shader_info *bifrost;
817    unsigned tls_size;
818    unsigned work_reg_count;
819    unsigned push_offset;
820 };
821 
822 /* State of index-driven vertex shading for current shader */
823 enum bi_idvs_mode {
824    /* IDVS not in use */
825    BI_IDVS_NONE = 0,
826 
827    /* IDVS in use. Compiling a position shader */
828    BI_IDVS_POSITION = 1,
829 
830    /* IDVS in use. Compiling a varying shader */
831    BI_IDVS_VARYING = 2,
832 };
833 
834 typedef struct {
835    const struct panfrost_compile_inputs *inputs;
836    nir_shader *nir;
837    struct bi_shader_info info;
838    gl_shader_stage stage;
839    struct list_head blocks; /* list of bi_block */
840    uint32_t quirks;
841    unsigned arch;
842    enum bi_idvs_mode idvs;
843    unsigned num_blocks;
844 
845    /* In any graphics shader, whether the "IDVS with memory
846     * allocation" flow is used. This affects how varyings are loaded and
847     * stored. Ignore for compute.
848     */
849    bool malloc_idvs;
850 
851    /* During NIR->BIR */
852    bi_block *current_block;
853    bi_block *after_block;
854    bi_block *break_block;
855    bi_block *continue_block;
856    bi_block **indexed_nir_blocks;
857    bool emitted_atest;
858 
859    /* During NIR->BIR, the coverage bitmap. If this is NULL, the default
860     * coverage bitmap should be source from preloaded register r60. This is
861     * written by ATEST and ZS_EMIT
862     */
863    bi_index coverage;
864 
865    /* During NIR->BIR, table of preloaded registers, or NULL if never
866     * preloaded.
867     */
868    bi_index preloaded[64];
869 
870    /* For creating temporaries */
871    unsigned ssa_alloc;
872    unsigned reg_alloc;
873 
874    /* Mask of UBOs that need to be uploaded */
875    uint32_t ubo_mask;
876 
877    /* During instruction selection, map from vector bi_index to its scalar
878     * components, populated by a split.
879     */
880    struct hash_table_u64 *allocated_vec;
881 
882    /* Stats for shader-db */
883    unsigned loop_count;
884    unsigned spills;
885    unsigned fills;
886 } bi_context;
887 
888 static inline void
bi_remove_instruction(bi_instr * ins)889 bi_remove_instruction(bi_instr *ins)
890 {
891    list_del(&ins->link);
892 }
893 
894 enum bir_fau {
895    BIR_FAU_ZERO = 0,
896    BIR_FAU_LANE_ID = 1,
897    BIR_FAU_WARP_ID = 2,
898    BIR_FAU_CORE_ID = 3,
899    BIR_FAU_FB_EXTENT = 4,
900    BIR_FAU_ATEST_PARAM = 5,
901    BIR_FAU_SAMPLE_POS_ARRAY = 6,
902    BIR_FAU_BLEND_0 = 8,
903    /* blend descs 1 - 7 */
904    BIR_FAU_TYPE_MASK = 15,
905 
906    /* Valhall only */
907    BIR_FAU_TLS_PTR = 16,
908    BIR_FAU_WLS_PTR = 17,
909    BIR_FAU_PROGRAM_COUNTER = 18,
910 
911    BIR_FAU_UNIFORM = (1 << 7),
912    /* Look up table on Valhall */
913    BIR_FAU_IMMEDIATE = (1 << 8),
914 
915 };
916 
917 static inline bi_index
bi_fau(enum bir_fau value,bool hi)918 bi_fau(enum bir_fau value, bool hi)
919 {
920    return (bi_index){
921       .value = value,
922       .swizzle = BI_SWIZZLE_H01,
923       .offset = hi ? 1u : 0u,
924       .type = BI_INDEX_FAU,
925    };
926 }
927 
928 /*
929  * Builder for Valhall LUT entries. Generally, constants are modeled with
930  * BI_INDEX_IMMEDIATE in the intermediate representation. This helper is only
931  * necessary for passes running after lowering constants, as well as when
932  * lowering constants.
933  *
934  */
935 static inline bi_index
va_lut(unsigned index)936 va_lut(unsigned index)
937 {
938    return bi_fau((enum bir_fau)(BIR_FAU_IMMEDIATE | (index >> 1)), index & 1);
939 }
940 
941 /*
942  * va_lut_zero is like bi_zero but only works on Valhall. It is intended for
943  * use by late passes that run after constants are lowered, specifically
944  * register allocation. bi_zero() is preferred where possible.
945  */
946 static inline bi_index
va_zero_lut()947 va_zero_lut()
948 {
949    return va_lut(0);
950 }
951 
952 static inline bi_index
bi_temp(bi_context * ctx)953 bi_temp(bi_context *ctx)
954 {
955    return bi_get_index(ctx->ssa_alloc++);
956 }
957 
958 static inline bi_index
bi_def_index(nir_def * def)959 bi_def_index(nir_def *def)
960 {
961    return bi_get_index(def->index);
962 }
963 
964 /* Inline constants automatically, will be lowered out by bi_lower_fau where a
965  * constant is not allowed. load_const_to_scalar gaurantees that this makes
966  * sense */
967 
968 static inline bi_index
bi_src_index(nir_src * src)969 bi_src_index(nir_src *src)
970 {
971    if (nir_src_is_const(*src) && nir_src_bit_size(*src) <= 32) {
972       return bi_imm_u32(nir_src_as_uint(*src));
973    } else {
974       return bi_def_index(src->ssa);
975    }
976 }
977 
978 /* Iterators for Bifrost IR */
979 
980 #define bi_foreach_block(ctx, v)                                               \
981    list_for_each_entry(bi_block, v, &ctx->blocks, link)
982 
983 #define bi_foreach_block_rev(ctx, v)                                           \
984    list_for_each_entry_rev(bi_block, v, &ctx->blocks, link)
985 
986 #define bi_foreach_block_from(ctx, from, v)                                    \
987    list_for_each_entry_from(bi_block, v, from, &ctx->blocks, link)
988 
989 #define bi_foreach_block_from_rev(ctx, from, v)                                \
990    list_for_each_entry_from_rev(bi_block, v, from, &ctx->blocks, link)
991 
992 #define bi_foreach_instr_in_block(block, v)                                    \
993    list_for_each_entry(bi_instr, v, &(block)->instructions, link)
994 
995 #define bi_foreach_instr_in_block_rev(block, v)                                \
996    list_for_each_entry_rev(bi_instr, v, &(block)->instructions, link)
997 
998 #define bi_foreach_instr_in_block_safe(block, v)                               \
999    list_for_each_entry_safe(bi_instr, v, &(block)->instructions, link)
1000 
1001 #define bi_foreach_instr_in_block_safe_rev(block, v)                           \
1002    list_for_each_entry_safe_rev(bi_instr, v, &(block)->instructions, link)
1003 
1004 #define bi_foreach_instr_in_block_from(block, v, from)                         \
1005    list_for_each_entry_from(bi_instr, v, from, &(block)->instructions, link)
1006 
1007 #define bi_foreach_instr_in_block_from_rev(block, v, from)                     \
1008    list_for_each_entry_from_rev(bi_instr, v, from, &(block)->instructions, link)
1009 
1010 #define bi_foreach_clause_in_block(block, v)                                   \
1011    list_for_each_entry(bi_clause, v, &(block)->clauses, link)
1012 
1013 #define bi_foreach_clause_in_block_rev(block, v)                               \
1014    list_for_each_entry_rev(bi_clause, v, &(block)->clauses, link)
1015 
1016 #define bi_foreach_clause_in_block_safe(block, v)                              \
1017    list_for_each_entry_safe(bi_clause, v, &(block)->clauses, link)
1018 
1019 #define bi_foreach_clause_in_block_from(block, v, from)                        \
1020    list_for_each_entry_from(bi_clause, v, from, &(block)->clauses, link)
1021 
1022 #define bi_foreach_clause_in_block_from_rev(block, v, from)                    \
1023    list_for_each_entry_from_rev(bi_clause, v, from, &(block)->clauses, link)
1024 
1025 #define bi_foreach_instr_global(ctx, v)                                        \
1026    bi_foreach_block(ctx, v_block)                                              \
1027       bi_foreach_instr_in_block(v_block, v)
1028 
1029 #define bi_foreach_instr_global_rev(ctx, v)                                    \
1030    bi_foreach_block_rev(ctx, v_block)                                          \
1031       bi_foreach_instr_in_block_rev(v_block, v)
1032 
1033 #define bi_foreach_instr_global_safe(ctx, v)                                   \
1034    bi_foreach_block(ctx, v_block)                                              \
1035       bi_foreach_instr_in_block_safe(v_block, v)
1036 
1037 #define bi_foreach_instr_global_rev_safe(ctx, v)                               \
1038    bi_foreach_block_rev(ctx, v_block)                                          \
1039    bi_foreach_instr_in_block_rev_safe(v_block, v)
1040 
1041 #define bi_foreach_instr_in_tuple(tuple, v)                                    \
1042    for (bi_instr *v = (tuple)->fma ?: (tuple)->add; v != NULL;                 \
1043         v = (v == (tuple)->add) ? NULL : (tuple)->add)
1044 
1045 #define bi_foreach_successor(blk, v)                                           \
1046    bi_block *v;                                                                \
1047    bi_block **_v;                                                              \
1048    for (_v = &blk->successors[0], v = *_v;                                     \
1049         v != NULL && _v < &blk->successors[2]; _v++, v = *_v)
1050 
1051 #define bi_foreach_predecessor(blk, v)                                         \
1052    util_dynarray_foreach(&(blk)->predecessors, bi_block *, v)
1053 
1054 #define bi_foreach_src(ins, v) for (unsigned v = 0; v < ins->nr_srcs; ++v)
1055 
1056 #define bi_foreach_dest(ins, v) for (unsigned v = 0; v < ins->nr_dests; ++v)
1057 
1058 #define bi_foreach_ssa_src(ins, v)                                             \
1059    bi_foreach_src(ins, v)                                                      \
1060       if (ins->src[v].type == BI_INDEX_NORMAL)
1061 
1062 #define bi_foreach_ssa_dest(ins, v)                                            \
1063    bi_foreach_dest(ins, v)                                                     \
1064       if (ins->dest[v].type == BI_INDEX_NORMAL)
1065 
1066 #define bi_foreach_instr_and_src_in_tuple(tuple, ins, s)                       \
1067    bi_foreach_instr_in_tuple(tuple, ins)                                       \
1068       bi_foreach_src(ins, s)
1069 
1070 /*
1071  * Find the index of a predecessor, used as the implicit order of phi sources.
1072  */
1073 static inline unsigned
bi_predecessor_index(bi_block * succ,bi_block * pred)1074 bi_predecessor_index(bi_block *succ, bi_block *pred)
1075 {
1076    unsigned index = 0;
1077 
1078    bi_foreach_predecessor(succ, x) {
1079       if (*x == pred)
1080          return index;
1081 
1082       index++;
1083    }
1084 
1085    unreachable("Invalid predecessor");
1086 }
1087 
1088 static inline bi_instr *
bi_prev_op(bi_instr * ins)1089 bi_prev_op(bi_instr *ins)
1090 {
1091    return list_last_entry(&(ins->link), bi_instr, link);
1092 }
1093 
1094 static inline bi_instr *
bi_next_op(bi_instr * ins)1095 bi_next_op(bi_instr *ins)
1096 {
1097    return list_first_entry(&(ins->link), bi_instr, link);
1098 }
1099 
1100 static inline bi_block *
bi_next_block(bi_block * block)1101 bi_next_block(bi_block *block)
1102 {
1103    return list_first_entry(&(block->link), bi_block, link);
1104 }
1105 
1106 static inline bi_block *
bi_entry_block(bi_context * ctx)1107 bi_entry_block(bi_context *ctx)
1108 {
1109    return list_first_entry(&ctx->blocks, bi_block, link);
1110 }
1111 
1112 /* BIR manipulation */
1113 
1114 bool bi_has_arg(const bi_instr *ins, bi_index arg);
1115 unsigned bi_count_read_registers(const bi_instr *ins, unsigned src);
1116 unsigned bi_count_write_registers(const bi_instr *ins, unsigned dest);
1117 bool bi_is_regfmt_16(enum bi_register_format fmt);
1118 unsigned bi_writemask(const bi_instr *ins, unsigned dest);
1119 bi_clause *bi_next_clause(bi_context *ctx, bi_block *block, bi_clause *clause);
1120 bool bi_side_effects(const bi_instr *I);
1121 bool bi_reconverge_branches(bi_block *block);
1122 
1123 bool bi_can_replace_with_csel(bi_instr *I);
1124 
1125 void bi_print_instr(const bi_instr *I, FILE *fp);
1126 void bi_print_slots(bi_registers *regs, FILE *fp);
1127 void bi_print_tuple(bi_tuple *tuple, FILE *fp);
1128 void bi_print_clause(bi_clause *clause, FILE *fp);
1129 void bi_print_block(bi_block *block, FILE *fp);
1130 void bi_print_shader(bi_context *ctx, FILE *fp);
1131 
1132 /* BIR passes */
1133 
1134 bool bi_instr_uses_helpers(bi_instr *I);
1135 bool bi_block_terminates_helpers(bi_block *block);
1136 void bi_analyze_helper_terminate(bi_context *ctx);
1137 void bi_mark_clauses_td(bi_context *ctx);
1138 
1139 void bi_analyze_helper_requirements(bi_context *ctx);
1140 void bi_opt_copy_prop(bi_context *ctx);
1141 void bi_opt_dce(bi_context *ctx, bool partial);
1142 void bi_opt_cse(bi_context *ctx);
1143 void bi_opt_mod_prop_forward(bi_context *ctx);
1144 void bi_opt_mod_prop_backward(bi_context *ctx);
1145 void bi_opt_fuse_dual_texture(bi_context *ctx);
1146 void bi_opt_dce_post_ra(bi_context *ctx);
1147 void bi_opt_message_preload(bi_context *ctx);
1148 void bi_opt_push_ubo(bi_context *ctx);
1149 void bi_opt_reorder_push(bi_context *ctx);
1150 void bi_lower_swizzle(bi_context *ctx);
1151 void bi_lower_fau(bi_context *ctx);
1152 void bi_assign_scoreboard(bi_context *ctx);
1153 void bi_register_allocate(bi_context *ctx);
1154 void va_optimize(bi_context *ctx);
1155 void va_lower_split_64bit(bi_context *ctx);
1156 
1157 void bi_lower_opt_instructions(bi_context *ctx);
1158 
1159 void bi_pressure_schedule(bi_context *ctx);
1160 void bi_schedule(bi_context *ctx);
1161 bool bi_can_fma(bi_instr *ins);
1162 bool bi_can_add(bi_instr *ins);
1163 bool bi_must_message(bi_instr *ins);
1164 bool bi_reads_zero(bi_instr *ins);
1165 bool bi_reads_temps(bi_instr *ins, unsigned src);
1166 bool bi_reads_t(bi_instr *ins, unsigned src);
1167 
1168 #ifndef NDEBUG
1169 bool bi_validate_initialization(bi_context *ctx);
1170 void bi_validate(bi_context *ctx, const char *after_str);
1171 #else
1172 static inline bool
bi_validate_initialization(UNUSED bi_context * ctx)1173 bi_validate_initialization(UNUSED bi_context *ctx)
1174 {
1175    return true;
1176 }
1177 static inline void
bi_validate(UNUSED bi_context * ctx,UNUSED const char * after_str)1178 bi_validate(UNUSED bi_context *ctx, UNUSED const char *after_str)
1179 {
1180    return;
1181 }
1182 #endif
1183 
1184 uint32_t bi_fold_constant(bi_instr *I, bool *unsupported);
1185 bool bi_opt_constant_fold(bi_context *ctx);
1186 
1187 /* Liveness */
1188 
1189 void bi_compute_liveness_ssa(bi_context *ctx);
1190 void bi_liveness_ins_update_ssa(BITSET_WORD *live, const bi_instr *ins);
1191 
1192 void bi_postra_liveness(bi_context *ctx);
1193 uint64_t MUST_CHECK bi_postra_liveness_ins(uint64_t live, bi_instr *ins);
1194 
1195 /* Layout */
1196 
1197 signed bi_block_offset(bi_context *ctx, bi_clause *start, bi_block *target);
1198 bool bi_ec0_packed(unsigned tuple_count);
1199 
1200 /* Check if there are no more instructions starting with a given block, this
1201  * needs to recurse in case a shader ends with multiple empty blocks */
1202 
1203 static inline bool
bi_is_terminal_block(bi_block * block)1204 bi_is_terminal_block(bi_block *block)
1205 {
1206    return (block == NULL) || (list_is_empty(&block->instructions) &&
1207                               bi_is_terminal_block(block->successors[0]) &&
1208                               bi_is_terminal_block(block->successors[1]));
1209 }
1210 
1211 /* Code emit */
1212 
1213 /* Returns the size of the final clause */
1214 unsigned bi_pack(bi_context *ctx, struct util_dynarray *emission);
1215 void bi_pack_valhall(bi_context *ctx, struct util_dynarray *emission);
1216 
1217 struct bi_packed_tuple {
1218    uint64_t lo;
1219    uint64_t hi;
1220 };
1221 
1222 uint8_t bi_pack_literal(enum bi_clause_subword literal);
1223 
1224 uint8_t bi_pack_upper(enum bi_clause_subword upper,
1225                       struct bi_packed_tuple *tuples,
1226                       ASSERTED unsigned tuple_count);
1227 uint64_t bi_pack_tuple_bits(enum bi_clause_subword idx,
1228                             struct bi_packed_tuple *tuples,
1229                             ASSERTED unsigned tuple_count, unsigned offset,
1230                             unsigned nbits);
1231 
1232 uint8_t bi_pack_sync(enum bi_clause_subword t1, enum bi_clause_subword t2,
1233                      enum bi_clause_subword t3, struct bi_packed_tuple *tuples,
1234                      ASSERTED unsigned tuple_count, bool z);
1235 
1236 void bi_pack_format(struct util_dynarray *emission, unsigned index,
1237                     struct bi_packed_tuple *tuples,
1238                     ASSERTED unsigned tuple_count, uint64_t header,
1239                     uint64_t ec0, unsigned m0, bool z);
1240 
1241 unsigned bi_pack_fma(bi_instr *I, enum bifrost_packed_src src0,
1242                      enum bifrost_packed_src src1, enum bifrost_packed_src src2,
1243                      enum bifrost_packed_src src3);
1244 unsigned bi_pack_add(bi_instr *I, enum bifrost_packed_src src0,
1245                      enum bifrost_packed_src src1, enum bifrost_packed_src src2,
1246                      enum bifrost_packed_src src3);
1247 
1248 /* Like in NIR, for use with the builder */
1249 
1250 enum bi_cursor_option {
1251    bi_cursor_after_block,
1252    bi_cursor_before_instr,
1253    bi_cursor_after_instr
1254 };
1255 
1256 typedef struct {
1257    enum bi_cursor_option option;
1258 
1259    union {
1260       bi_block *block;
1261       bi_instr *instr;
1262    };
1263 } bi_cursor;
1264 
1265 static inline bi_cursor
bi_after_block(bi_block * block)1266 bi_after_block(bi_block *block)
1267 {
1268    return (bi_cursor){.option = bi_cursor_after_block, .block = block};
1269 }
1270 
1271 static inline bi_cursor
bi_before_instr(bi_instr * instr)1272 bi_before_instr(bi_instr *instr)
1273 {
1274    return (bi_cursor){.option = bi_cursor_before_instr, .instr = instr};
1275 }
1276 
1277 static inline bi_cursor
bi_after_instr(bi_instr * instr)1278 bi_after_instr(bi_instr *instr)
1279 {
1280    return (bi_cursor){.option = bi_cursor_after_instr, .instr = instr};
1281 }
1282 
1283 static inline bi_cursor
bi_after_block_logical(bi_block * block)1284 bi_after_block_logical(bi_block *block)
1285 {
1286    if (list_is_empty(&block->instructions))
1287       return bi_after_block(block);
1288 
1289    bi_instr *last = list_last_entry(&block->instructions, bi_instr, link);
1290    assert(last != NULL);
1291 
1292    if (last->branch_target)
1293       return bi_before_instr(last);
1294    else
1295       return bi_after_block(block);
1296 }
1297 
1298 static inline bi_cursor
bi_before_nonempty_block(bi_block * block)1299 bi_before_nonempty_block(bi_block *block)
1300 {
1301    bi_instr *I = list_first_entry(&block->instructions, bi_instr, link);
1302    assert(I != NULL);
1303 
1304    return bi_before_instr(I);
1305 }
1306 
1307 static inline bi_cursor
bi_before_block(bi_block * block)1308 bi_before_block(bi_block *block)
1309 {
1310    if (list_is_empty(&block->instructions))
1311       return bi_after_block(block);
1312    else
1313       return bi_before_nonempty_block(block);
1314 }
1315 
1316 /* Invariant: a tuple must be nonempty UNLESS it is the last tuple of a clause,
1317  * in which case there must exist a nonempty penultimate tuple */
1318 
1319 ATTRIBUTE_RETURNS_NONNULL static inline bi_instr *
bi_first_instr_in_tuple(bi_tuple * tuple)1320 bi_first_instr_in_tuple(bi_tuple *tuple)
1321 {
1322    bi_instr *instr = tuple->fma ?: tuple->add;
1323    assert(instr != NULL);
1324    return instr;
1325 }
1326 
1327 ATTRIBUTE_RETURNS_NONNULL static inline bi_instr *
bi_first_instr_in_clause(bi_clause * clause)1328 bi_first_instr_in_clause(bi_clause *clause)
1329 {
1330    return bi_first_instr_in_tuple(&clause->tuples[0]);
1331 }
1332 
1333 ATTRIBUTE_RETURNS_NONNULL static inline bi_instr *
bi_last_instr_in_clause(bi_clause * clause)1334 bi_last_instr_in_clause(bi_clause *clause)
1335 {
1336    bi_tuple tuple = clause->tuples[clause->tuple_count - 1];
1337    bi_instr *instr = tuple.add ?: tuple.fma;
1338 
1339    if (!instr) {
1340       assert(clause->tuple_count >= 2);
1341       tuple = clause->tuples[clause->tuple_count - 2];
1342       instr = tuple.add ?: tuple.fma;
1343    }
1344 
1345    assert(instr != NULL);
1346    return instr;
1347 }
1348 
1349 /* Implemented by expanding bi_foreach_instr_in_block_from(_rev) with the start
1350  * (end) of the clause and adding a condition for the clause boundary */
1351 
1352 #define bi_foreach_instr_in_clause(block, clause, pos)                         \
1353    for (bi_instr *pos =                                                        \
1354            list_entry(bi_first_instr_in_clause(clause), bi_instr, link);       \
1355         (&pos->link != &(block)->instructions) &&                              \
1356         (pos != bi_next_op(bi_last_instr_in_clause(clause)));                  \
1357         pos = list_entry(pos->link.next, bi_instr, link))
1358 
1359 #define bi_foreach_instr_in_clause_rev(block, clause, pos)                     \
1360    for (bi_instr *pos =                                                        \
1361            list_entry(bi_last_instr_in_clause(clause), bi_instr, link);        \
1362         (&pos->link != &(block)->instructions) &&                              \
1363         pos != bi_prev_op(bi_first_instr_in_clause(clause));                   \
1364         pos = list_entry(pos->link.prev, bi_instr, link))
1365 
1366 static inline bi_cursor
bi_before_clause(bi_clause * clause)1367 bi_before_clause(bi_clause *clause)
1368 {
1369    return bi_before_instr(bi_first_instr_in_clause(clause));
1370 }
1371 
1372 static inline bi_cursor
bi_before_tuple(bi_tuple * tuple)1373 bi_before_tuple(bi_tuple *tuple)
1374 {
1375    return bi_before_instr(bi_first_instr_in_tuple(tuple));
1376 }
1377 
1378 static inline bi_cursor
bi_after_clause(bi_clause * clause)1379 bi_after_clause(bi_clause *clause)
1380 {
1381    return bi_after_instr(bi_last_instr_in_clause(clause));
1382 }
1383 
1384 /* IR builder in terms of cursor infrastructure */
1385 
1386 typedef struct {
1387    bi_context *shader;
1388    bi_cursor cursor;
1389 } bi_builder;
1390 
1391 static inline bi_builder
bi_init_builder(bi_context * ctx,bi_cursor cursor)1392 bi_init_builder(bi_context *ctx, bi_cursor cursor)
1393 {
1394    return (bi_builder){.shader = ctx, .cursor = cursor};
1395 }
1396 
1397 /* Insert an instruction at the cursor and move the cursor */
1398 
1399 static inline void
bi_builder_insert(bi_cursor * cursor,bi_instr * I)1400 bi_builder_insert(bi_cursor *cursor, bi_instr *I)
1401 {
1402    switch (cursor->option) {
1403    case bi_cursor_after_instr:
1404       list_add(&I->link, &cursor->instr->link);
1405       cursor->instr = I;
1406       return;
1407 
1408    case bi_cursor_after_block:
1409       list_addtail(&I->link, &cursor->block->instructions);
1410       cursor->option = bi_cursor_after_instr;
1411       cursor->instr = I;
1412       return;
1413 
1414    case bi_cursor_before_instr:
1415       list_addtail(&I->link, &cursor->instr->link);
1416       cursor->option = bi_cursor_after_instr;
1417       cursor->instr = I;
1418       return;
1419    }
1420 
1421    unreachable("Invalid cursor option");
1422 }
1423 
1424 bi_instr *bi_csel_from_mux(bi_builder *b, const bi_instr *I, bool must_sign);
1425 
1426 /* Read back power-efficent garbage, TODO maybe merge with null? */
1427 static inline bi_index
bi_dontcare(bi_builder * b)1428 bi_dontcare(bi_builder *b)
1429 {
1430    if (b->shader->arch >= 9)
1431       return bi_zero();
1432    else
1433       return bi_passthrough(BIFROST_SRC_FAU_HI);
1434 }
1435 
1436 #define bi_worklist_init(ctx, w)        u_worklist_init(w, ctx->num_blocks, ctx)
1437 #define bi_worklist_push_head(w, block) u_worklist_push_head(w, block, index)
1438 #define bi_worklist_push_tail(w, block) u_worklist_push_tail(w, block, index)
1439 #define bi_worklist_peek_head(w)        u_worklist_peek_head(w, bi_block, index)
1440 #define bi_worklist_pop_head(w)         u_worklist_pop_head(w, bi_block, index)
1441 #define bi_worklist_peek_tail(w)        u_worklist_peek_tail(w, bi_block, index)
1442 #define bi_worklist_pop_tail(w)         u_worklist_pop_tail(w, bi_block, index)
1443 
1444 /* NIR passes */
1445 
1446 bool bi_lower_divergent_indirects(nir_shader *shader, unsigned lanes);
1447 
1448 #ifdef __cplusplus
1449 } /* extern C */
1450 #endif
1451 
1452 #endif
1453