1 /*
2 * Copyright (C) 2020 Collabora Ltd.
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors (Collabora):
24 * Alyssa Rosenzweig <alyssa.rosenzweig@collabora.com>
25 */
26
27 #ifndef __BIFROST_COMPILER_H
28 #define __BIFROST_COMPILER_H
29
30 #include "compiler/nir/nir.h"
31 #include "panfrost/util/pan_ir.h"
32 #include "util/half_float.h"
33 #include "util/u_math.h"
34 #include "util/u_worklist.h"
35 #include "bi_opcodes.h"
36 #include "bifrost.h"
37 #include "valhall_enums.h"
38
39 #ifdef __cplusplus
40 extern "C" {
41 #endif
42
43 /* Swizzles across bytes in a 32-bit word. Expresses swz in the XML directly.
44 * To express widen, use the correpsonding replicated form, i.e. H01 = identity
45 * for widen = none, H00 for widen = h0, B1111 for widen = b1. For lane, also
46 * use the replicated form (interpretation is governed by the opcode). For
47 * 8-bit lanes with two channels, use replicated forms for replicated forms
48 * (TODO: what about others?). For 8-bit lanes with four channels using
49 * matching form (TODO: what about others?).
50 */
51
52 enum bi_swizzle {
53 /* 16-bit swizzle ordering deliberate for fast compute */
54 BI_SWIZZLE_H00 = 0, /* = B0101 */
55 BI_SWIZZLE_H01 = 1, /* = B0123 = W0 */
56 BI_SWIZZLE_H10 = 2, /* = B2301 */
57 BI_SWIZZLE_H11 = 3, /* = B2323 */
58
59 /* replication order should be maintained for fast compute */
60 BI_SWIZZLE_B0000 = 4, /* single channel (replicate) */
61 BI_SWIZZLE_B1111 = 5,
62 BI_SWIZZLE_B2222 = 6,
63 BI_SWIZZLE_B3333 = 7,
64
65 /* totally special for explicit pattern matching */
66 BI_SWIZZLE_B0011 = 8, /* +SWZ.v4i8 */
67 BI_SWIZZLE_B2233 = 9, /* +SWZ.v4i8 */
68 BI_SWIZZLE_B1032 = 10, /* +SWZ.v4i8 */
69 BI_SWIZZLE_B3210 = 11, /* +SWZ.v4i8 */
70
71 BI_SWIZZLE_B0022 = 12, /* for b02 lanes */
72 };
73
74 /* Given a packed i16vec2/i8vec4 constant, apply a swizzle. Useful for constant
75 * folding and Valhall constant optimization. */
76
77 static inline uint32_t
bi_apply_swizzle(uint32_t value,enum bi_swizzle swz)78 bi_apply_swizzle(uint32_t value, enum bi_swizzle swz)
79 {
80 const uint16_t *h = (const uint16_t *)&value;
81 const uint8_t *b = (const uint8_t *)&value;
82
83 #define H(h0, h1) (h[h0] | ((uint32_t)h[h1] << 16))
84 #define B(b0, b1, b2, b3) \
85 (b[b0] | ((uint32_t)b[b1] << 8) | ((uint32_t)b[b2] << 16) | \
86 ((uint32_t)b[b3] << 24))
87
88 switch (swz) {
89 case BI_SWIZZLE_H00:
90 return H(0, 0);
91 case BI_SWIZZLE_H01:
92 return H(0, 1);
93 case BI_SWIZZLE_H10:
94 return H(1, 0);
95 case BI_SWIZZLE_H11:
96 return H(1, 1);
97 case BI_SWIZZLE_B0000:
98 return B(0, 0, 0, 0);
99 case BI_SWIZZLE_B1111:
100 return B(1, 1, 1, 1);
101 case BI_SWIZZLE_B2222:
102 return B(2, 2, 2, 2);
103 case BI_SWIZZLE_B3333:
104 return B(3, 3, 3, 3);
105 case BI_SWIZZLE_B0011:
106 return B(0, 0, 1, 1);
107 case BI_SWIZZLE_B2233:
108 return B(2, 2, 3, 3);
109 case BI_SWIZZLE_B1032:
110 return B(1, 0, 3, 2);
111 case BI_SWIZZLE_B3210:
112 return B(3, 2, 1, 0);
113 case BI_SWIZZLE_B0022:
114 return B(0, 0, 2, 2);
115 }
116
117 #undef H
118 #undef B
119
120 unreachable("Invalid swizzle");
121 }
122
123 enum bi_index_type {
124 BI_INDEX_NULL = 0,
125 BI_INDEX_NORMAL = 1,
126 BI_INDEX_REGISTER = 2,
127 BI_INDEX_CONSTANT = 3,
128 BI_INDEX_PASS = 4,
129 BI_INDEX_FAU = 5
130 };
131
132 typedef struct {
133 uint32_t value;
134
135 /* modifiers, should only be set if applicable for a given instruction.
136 * For *IDP.v4i8, abs plays the role of sign. For bitwise ops where
137 * applicable, neg plays the role of not */
138 bool abs : 1;
139 bool neg : 1;
140
141 /* The last use of a value, should be purged from the register cache.
142 * Set by liveness analysis. */
143 bool discard : 1;
144
145 /* For a source, the swizzle. For a destination, acts a bit like a
146 * write mask. Identity for the full 32-bit, H00 for only caring about
147 * the lower half, other values unused. */
148 enum bi_swizzle swizzle : 4;
149 uint32_t offset : 3;
150 enum bi_index_type type : 3;
151
152 /* Must be zeroed so we can hash the whole 64-bits at a time */
153 unsigned padding : (32 - 13);
154 } bi_index;
155
156 static inline bi_index
bi_get_index(unsigned value)157 bi_get_index(unsigned value)
158 {
159 return (bi_index){
160 .value = value,
161 .swizzle = BI_SWIZZLE_H01,
162 .type = BI_INDEX_NORMAL,
163 };
164 }
165
166 static inline bi_index
bi_register(unsigned reg)167 bi_register(unsigned reg)
168 {
169 assert(reg < 64);
170
171 return (bi_index){
172 .value = reg,
173 .swizzle = BI_SWIZZLE_H01,
174 .type = BI_INDEX_REGISTER,
175 };
176 }
177
178 static inline bi_index
bi_imm_u32(uint32_t imm)179 bi_imm_u32(uint32_t imm)
180 {
181 return (bi_index){
182 .value = imm,
183 .swizzle = BI_SWIZZLE_H01,
184 .type = BI_INDEX_CONSTANT,
185 };
186 }
187
188 static inline bi_index
bi_imm_f32(float imm)189 bi_imm_f32(float imm)
190 {
191 return bi_imm_u32(fui(imm));
192 }
193
194 static inline bi_index
bi_null()195 bi_null()
196 {
197 return (bi_index){.type = BI_INDEX_NULL};
198 }
199
200 static inline bi_index
bi_zero()201 bi_zero()
202 {
203 return bi_imm_u32(0);
204 }
205
206 static inline bi_index
bi_passthrough(enum bifrost_packed_src value)207 bi_passthrough(enum bifrost_packed_src value)
208 {
209 return (bi_index){
210 .value = value,
211 .swizzle = BI_SWIZZLE_H01,
212 .type = BI_INDEX_PASS,
213 };
214 }
215
216 /* Helps construct swizzles */
217 static inline bi_index
bi_swz_16(bi_index idx,bool x,bool y)218 bi_swz_16(bi_index idx, bool x, bool y)
219 {
220 assert(idx.swizzle == BI_SWIZZLE_H01);
221 idx.swizzle = (enum bi_swizzle)(BI_SWIZZLE_H00 | (x << 1) | y);
222 return idx;
223 }
224
225 static inline bi_index
bi_half(bi_index idx,bool upper)226 bi_half(bi_index idx, bool upper)
227 {
228 return bi_swz_16(idx, upper, upper);
229 }
230
231 static inline bi_index
bi_byte(bi_index idx,unsigned lane)232 bi_byte(bi_index idx, unsigned lane)
233 {
234 assert(idx.swizzle == BI_SWIZZLE_H01);
235 assert(lane < 4);
236 idx.swizzle = (enum bi_swizzle)(BI_SWIZZLE_B0000 + lane);
237 return idx;
238 }
239
240 static inline bi_index
bi_abs(bi_index idx)241 bi_abs(bi_index idx)
242 {
243 idx.abs = true;
244 return idx;
245 }
246
247 static inline bi_index
bi_neg(bi_index idx)248 bi_neg(bi_index idx)
249 {
250 idx.neg ^= true;
251 return idx;
252 }
253
254 static inline bi_index
bi_discard(bi_index idx)255 bi_discard(bi_index idx)
256 {
257 idx.discard = true;
258 return idx;
259 }
260
261 /* Additive identity in IEEE 754 arithmetic */
262 static inline bi_index
bi_negzero()263 bi_negzero()
264 {
265 return bi_neg(bi_zero());
266 }
267
268 /* Replaces an index, preserving any modifiers */
269
270 static inline bi_index
bi_replace_index(bi_index old,bi_index replacement)271 bi_replace_index(bi_index old, bi_index replacement)
272 {
273 replacement.abs = old.abs;
274 replacement.neg = old.neg;
275 replacement.swizzle = old.swizzle;
276 replacement.discard = false; /* needs liveness analysis to set */
277 return replacement;
278 }
279
280 /* Remove any modifiers. This has the property:
281 *
282 * replace_index(x, strip_index(x)) = x
283 *
284 * This ensures it is suitable to use when lowering sources to moves */
285
286 static inline bi_index
bi_strip_index(bi_index index)287 bi_strip_index(bi_index index)
288 {
289 index.abs = index.neg = false;
290 index.swizzle = BI_SWIZZLE_H01;
291 return index;
292 }
293
294 /* For bitwise instructions */
295 #define bi_not(x) bi_neg(x)
296
297 static inline bi_index
bi_imm_u8(uint8_t imm)298 bi_imm_u8(uint8_t imm)
299 {
300 return bi_byte(bi_imm_u32(imm), 0);
301 }
302
303 static inline bi_index
bi_imm_u16(uint16_t imm)304 bi_imm_u16(uint16_t imm)
305 {
306 return bi_half(bi_imm_u32(imm), false);
307 }
308
309 static inline bi_index
bi_imm_uintN(uint32_t imm,unsigned sz)310 bi_imm_uintN(uint32_t imm, unsigned sz)
311 {
312 assert(sz == 8 || sz == 16 || sz == 32);
313 return (sz == 8) ? bi_imm_u8(imm)
314 : (sz == 16) ? bi_imm_u16(imm)
315 : bi_imm_u32(imm);
316 }
317
318 static inline bi_index
bi_imm_f16(float imm)319 bi_imm_f16(float imm)
320 {
321 return bi_imm_u16(_mesa_float_to_half(imm));
322 }
323
324 static inline bool
bi_is_null(bi_index idx)325 bi_is_null(bi_index idx)
326 {
327 return idx.type == BI_INDEX_NULL;
328 }
329
330 static inline bool
bi_is_ssa(bi_index idx)331 bi_is_ssa(bi_index idx)
332 {
333 return idx.type == BI_INDEX_NORMAL;
334 }
335
336 /* Compares equivalence as references. Does not compare offsets, swizzles, or
337 * modifiers. In other words, this forms bi_index equivalence classes by
338 * partitioning memory. E.g. -abs(foo[1].yx) == foo.xy but foo != bar */
339
340 static inline bool
bi_is_equiv(bi_index left,bi_index right)341 bi_is_equiv(bi_index left, bi_index right)
342 {
343 return (left.type == right.type) && (left.value == right.value);
344 }
345
346 /* A stronger equivalence relation that requires the indices access the
347 * same offset, useful for RA/scheduling to see what registers will
348 * correspond to */
349
350 static inline bool
bi_is_word_equiv(bi_index left,bi_index right)351 bi_is_word_equiv(bi_index left, bi_index right)
352 {
353 return bi_is_equiv(left, right) && left.offset == right.offset;
354 }
355
356 /* An even stronger equivalence that checks if indices correspond to the
357 * right value when evaluated
358 */
359 static inline bool
bi_is_value_equiv(bi_index left,bi_index right)360 bi_is_value_equiv(bi_index left, bi_index right)
361 {
362 if (left.type == BI_INDEX_CONSTANT && right.type == BI_INDEX_CONSTANT) {
363 return (bi_apply_swizzle(left.value, left.swizzle) ==
364 bi_apply_swizzle(right.value, right.swizzle)) &&
365 (left.abs == right.abs) && (left.neg == right.neg);
366 } else {
367 return (left.value == right.value) && (left.abs == right.abs) &&
368 (left.neg == right.neg) && (left.swizzle == right.swizzle) &&
369 (left.offset == right.offset) && (left.type == right.type);
370 }
371 }
372
373 #define BI_MAX_VEC 8
374 #define BI_MAX_DESTS 4
375 #define BI_MAX_SRCS 8
376
377 typedef struct {
378 /* Must be first */
379 struct list_head link;
380 bi_index *dest;
381 bi_index *src;
382
383 enum bi_opcode op;
384 uint8_t nr_srcs;
385 uint8_t nr_dests;
386
387 union {
388 /* For a branch */
389 struct bi_block *branch_target;
390
391 /* For a phi node that hasn't been translated yet. This is only
392 * used during NIR->BIR
393 */
394 nir_phi_instr *phi;
395 };
396
397 /* These don't fit neatly with anything else.. */
398 enum bi_register_format register_format;
399 enum bi_vecsize vecsize;
400
401 /* Flow control associated with a Valhall instruction */
402 uint8_t flow;
403
404 /* Slot associated with a message-passing instruction */
405 uint8_t slot;
406
407 /* Can we spill the value written here? Used to prevent
408 * useless double fills */
409 bool no_spill;
410
411 /* On Bifrost: A value of bi_table to override the table, inducing a
412 * DTSEL_IMM pair if nonzero.
413 *
414 * On Valhall: the table index to use for resource instructions.
415 *
416 * These two interpretations are equivalent if you squint a bit.
417 */
418 unsigned table;
419
420 /* Everything after this MUST NOT be accessed directly, since
421 * interpretation depends on opcodes */
422
423 /* Destination modifiers */
424 union {
425 enum bi_clamp clamp;
426 bool saturate;
427 bool not_result;
428 unsigned dest_mod;
429 };
430
431 /* Immediates. All seen alone in an instruction, except for varying/texture
432 * which are specified jointly for VARTEX */
433 union {
434 uint32_t shift;
435 uint32_t fill;
436 uint32_t index;
437 uint32_t attribute_index;
438
439 struct {
440 uint32_t varying_index;
441 uint32_t sampler_index;
442 uint32_t texture_index;
443 };
444
445 /* TEXC, ATOM_CX: # of staging registers used */
446 struct {
447 uint32_t sr_count;
448 uint32_t sr_count_2;
449
450 union {
451 /* Atomics effectively require all three */
452 int32_t byte_offset;
453
454 /* BLEND requires all three */
455 int32_t branch_offset;
456 };
457 };
458 };
459
460 /* Modifiers specific to particular instructions are thrown in a union */
461 union {
462 enum bi_adj adj; /* FEXP_TABLE.u4 */
463 enum bi_atom_opc atom_opc; /* atomics */
464 enum bi_func func; /* FPOW_SC_DET */
465 enum bi_function function; /* LD_VAR_FLAT */
466 enum bi_mux mux; /* MUX */
467 enum bi_sem sem; /* FMAX, FMIN */
468 enum bi_source source; /* LD_GCLK */
469 bool scale; /* VN_ASST2, FSINCOS_OFFSET */
470 bool offset; /* FSIN_TABLE, FOCS_TABLE */
471 bool mask; /* CLZ */
472 bool threads; /* IMULD, IMOV_FMA */
473 bool combine; /* BRANCHC */
474 bool format; /* LEA_TEX */
475
476 struct {
477 enum bi_special special; /* FADD_RSCALE, FMA_RSCALE */
478 enum bi_round round; /* FMA, converts, FADD, _RSCALE, etc */
479 bool ftz; /* Flush-to-zero for F16_TO_F32 and FLUSH */
480 enum va_nan_mode nan_mode; /* NaN flush mode, for FLUSH */
481 bool flush_inf; /* Flush infinity to finite, for FLUSH */
482 };
483
484 struct {
485 enum bi_result_type result_type; /* FCMP, ICMP */
486 enum bi_cmpf cmpf; /* CSEL, FCMP, ICMP, BRANCH */
487 };
488
489 struct {
490 enum bi_stack_mode stack_mode; /* JUMP_EX */
491 bool test_mode;
492 };
493
494 struct {
495 enum bi_seg seg; /* LOAD, STORE, SEG_ADD, SEG_SUB */
496 bool preserve_null; /* SEG_ADD, SEG_SUB */
497 enum bi_extend extend; /* LOAD, IMUL */
498 };
499
500 struct {
501 enum bi_sample sample; /* VAR_TEX, LD_VAR */
502 enum bi_update update; /* VAR_TEX, LD_VAR */
503 enum bi_varying_name varying_name; /* LD_VAR_SPECIAL */
504 bool skip; /* VAR_TEX, TEXS, TEXC */
505 bool lod_mode; /* VAR_TEX, TEXS, implicitly for TEXC */
506 enum bi_source_format source_format; /* LD_VAR_BUF */
507
508 /* Used for valhall texturing */
509 bool shadow;
510 bool wide_indices;
511 bool texel_offset;
512 bool array_enable;
513 bool integer_coordinates;
514 bool derivative_enable;
515 bool force_delta_enable;
516 bool lod_bias_disable;
517 bool lod_clamp_disable;
518 enum bi_fetch_component fetch_component;
519 enum bi_va_lod_mode va_lod_mode;
520 enum bi_dimension dimension;
521 enum bi_write_mask write_mask;
522 };
523
524 /* Maximum size, for hashing */
525 unsigned flags[14];
526
527 struct {
528 enum bi_subgroup subgroup; /* WMASK, CLPER */
529 enum bi_inactive_result inactive_result; /* CLPER */
530 enum bi_lane_op lane_op; /* CLPER */
531 };
532
533 struct {
534 bool z; /* ZS_EMIT */
535 bool stencil; /* ZS_EMIT */
536 };
537
538 struct {
539 bool h; /* VN_ASST1.f16 */
540 bool l; /* VN_ASST1.f16 */
541 };
542
543 struct {
544 bool bytes2; /* RROT_DOUBLE, FRSHIFT_DOUBLE */
545 bool result_word;
546 bool arithmetic; /* ARSHIFT_OR */
547 };
548
549 struct {
550 bool sqrt; /* FREXPM */
551 bool log; /* FREXPM */
552 };
553
554 struct {
555 enum bi_mode mode; /* FLOG_TABLE */
556 enum bi_precision precision; /* FLOG_TABLE */
557 bool divzero; /* FRSQ_APPROX, FRSQ */
558 };
559 };
560 } bi_instr;
561
562 static inline bool
bi_is_staging_src(const bi_instr * I,unsigned s)563 bi_is_staging_src(const bi_instr *I, unsigned s)
564 {
565 return (s == 0 || s == 4) && bi_opcode_props[I->op].sr_read;
566 }
567
568 /*
569 * Safe helpers to remove destinations/sources at the end of the
570 * destination/source array when changing opcodes. Unlike adding
571 * sources/destinations, this does not require reallocation.
572 */
573 static inline void
bi_drop_dests(bi_instr * I,unsigned new_count)574 bi_drop_dests(bi_instr *I, unsigned new_count)
575 {
576 assert(new_count < I->nr_dests);
577
578 for (unsigned i = new_count; i < I->nr_dests; ++i)
579 I->dest[i] = bi_null();
580
581 I->nr_dests = new_count;
582 }
583
584 static inline void
bi_drop_srcs(bi_instr * I,unsigned new_count)585 bi_drop_srcs(bi_instr *I, unsigned new_count)
586 {
587 assert(new_count < I->nr_srcs);
588
589 for (unsigned i = new_count; i < I->nr_srcs; ++i)
590 I->src[i] = bi_null();
591
592 I->nr_srcs = new_count;
593 }
594
595 static inline void
bi_replace_src(bi_instr * I,unsigned src_index,bi_index replacement)596 bi_replace_src(bi_instr *I, unsigned src_index, bi_index replacement)
597 {
598 I->src[src_index] = bi_replace_index(I->src[src_index], replacement);
599 }
600
601 /* Represents the assignment of slots for a given bi_tuple */
602
603 typedef struct {
604 /* Register to assign to each slot */
605 unsigned slot[4];
606
607 /* Read slots can be disabled */
608 bool enabled[2];
609
610 /* Configuration for slots 2/3 */
611 struct bifrost_reg_ctrl_23 slot23;
612
613 /* Fast-Access-Uniform RAM index */
614 uint8_t fau_idx;
615
616 /* Whether writes are actually for the last instruction */
617 bool first_instruction;
618 } bi_registers;
619
620 /* A bi_tuple contains two paired instruction pointers. If a slot is unfilled,
621 * leave it NULL; the emitter will fill in a nop. Instructions reference
622 * registers via slots which are assigned per tuple.
623 */
624
625 typedef struct {
626 uint8_t fau_idx;
627 bi_registers regs;
628 bi_instr *fma;
629 bi_instr *add;
630 } bi_tuple;
631
632 struct bi_block;
633
634 typedef struct {
635 struct list_head link;
636
637 /* Link back up for branch calculations */
638 struct bi_block *block;
639
640 /* Architectural limit of 8 tuples/clause */
641 unsigned tuple_count;
642 bi_tuple tuples[8];
643
644 /* For scoreboarding -- the clause ID (this is not globally unique!)
645 * and its dependencies in terms of other clauses, computed during
646 * scheduling and used when emitting code. Dependencies expressed as a
647 * bitfield matching the hardware, except shifted by a clause (the
648 * shift back to the ISA's off-by-one encoding is worked out when
649 * emitting clauses) */
650 unsigned scoreboard_id;
651 uint8_t dependencies;
652
653 /* See ISA header for description */
654 enum bifrost_flow flow_control;
655
656 /* Can we prefetch the next clause? Usually it makes sense, except for
657 * clauses ending in unconditional branches */
658 bool next_clause_prefetch;
659
660 /* Assigned data register */
661 unsigned staging_register;
662
663 /* Corresponds to the usual bit but shifted by a clause */
664 bool staging_barrier;
665
666 /* Constants read by this clause. ISA limit. Must satisfy:
667 *
668 * constant_count + tuple_count <= 13
669 *
670 * Also implicitly constant_count <= tuple_count since a tuple only
671 * reads a single constant.
672 */
673 uint64_t constants[8];
674 unsigned constant_count;
675
676 /* Index of a constant to be PC-relative */
677 unsigned pcrel_idx;
678
679 /* Branches encode a constant offset relative to the program counter
680 * with some magic flags. By convention, if there is a branch, its
681 * constant will be last. Set this flag to indicate this is required.
682 */
683 bool branch_constant;
684
685 /* Unique in a clause */
686 enum bifrost_message_type message_type;
687 bi_instr *message;
688
689 /* Discard helper threads */
690 bool td;
691
692 /* Should flush-to-zero mode be enabled for this clause? */
693 bool ftz;
694 } bi_clause;
695
696 #define BI_NUM_SLOTS 8
697
698 /* A model for the state of the scoreboard */
699 struct bi_scoreboard_state {
700 /** Bitmap of registers read/written by a slot */
701 uint64_t read[BI_NUM_SLOTS];
702 uint64_t write[BI_NUM_SLOTS];
703
704 /* Nonregister dependencies present by a slot */
705 uint8_t varying : BI_NUM_SLOTS;
706 uint8_t memory : BI_NUM_SLOTS;
707 };
708
709 typedef struct bi_block {
710 /* Link to next block. Must be first for mir_get_block */
711 struct list_head link;
712
713 /* List of instructions emitted for the current block */
714 struct list_head instructions;
715
716 /* Index of the block in source order */
717 unsigned index;
718
719 /* Control flow graph */
720 struct bi_block *successors[2];
721 struct util_dynarray predecessors;
722 bool unconditional_jumps;
723 bool loop_header;
724
725 /* Per 32-bit word live masks for the block indexed by node */
726 uint8_t *live_in;
727 uint8_t *live_out;
728
729 /* Scalar liveness indexed by SSA index */
730 BITSET_WORD *ssa_live_in;
731 BITSET_WORD *ssa_live_out;
732
733 /* If true, uses clauses; if false, uses instructions */
734 bool scheduled;
735 struct list_head clauses; /* list of bi_clause */
736
737 /* Post-RA liveness */
738 uint64_t reg_live_in, reg_live_out;
739
740 /* Scoreboard state at the start/end of block */
741 struct bi_scoreboard_state scoreboard_in, scoreboard_out;
742
743 /* On Valhall, indicates we need a terminal NOP to implement jumps to
744 * the end of the shader.
745 */
746 bool needs_nop;
747
748 /* Flags available for pass-internal use */
749 uint8_t pass_flags;
750 } bi_block;
751
752 static inline unsigned
bi_num_successors(bi_block * block)753 bi_num_successors(bi_block *block)
754 {
755 STATIC_ASSERT(ARRAY_SIZE(block->successors) == 2);
756 assert(block->successors[0] || !block->successors[1]);
757
758 if (block->successors[1])
759 return 2;
760 else if (block->successors[0])
761 return 1;
762 else
763 return 0;
764 }
765
766 static inline unsigned
bi_num_predecessors(bi_block * block)767 bi_num_predecessors(bi_block *block)
768 {
769 return util_dynarray_num_elements(&block->predecessors, bi_block *);
770 }
771
772 static inline bi_block *
bi_start_block(struct list_head * blocks)773 bi_start_block(struct list_head *blocks)
774 {
775 bi_block *first = list_first_entry(blocks, bi_block, link);
776 assert(bi_num_predecessors(first) == 0);
777 return first;
778 }
779
780 static inline bi_block *
bi_exit_block(struct list_head * blocks)781 bi_exit_block(struct list_head *blocks)
782 {
783 bi_block *last = list_last_entry(blocks, bi_block, link);
784 assert(bi_num_successors(last) == 0);
785 return last;
786 }
787
788 static inline void
bi_block_add_successor(bi_block * block,bi_block * successor)789 bi_block_add_successor(bi_block *block, bi_block *successor)
790 {
791 assert(block != NULL && successor != NULL);
792
793 /* Cull impossible edges */
794 if (block->unconditional_jumps)
795 return;
796
797 for (unsigned i = 0; i < ARRAY_SIZE(block->successors); ++i) {
798 if (block->successors[i]) {
799 if (block->successors[i] == successor)
800 return;
801 else
802 continue;
803 }
804
805 block->successors[i] = successor;
806 util_dynarray_append(&successor->predecessors, bi_block *, block);
807 return;
808 }
809
810 unreachable("Too many successors");
811 }
812
813 /* Subset of pan_shader_info needed per-variant, in order to support IDVS */
814 struct bi_shader_info {
815 struct panfrost_ubo_push *push;
816 struct bifrost_shader_info *bifrost;
817 unsigned tls_size;
818 unsigned work_reg_count;
819 unsigned push_offset;
820 };
821
822 /* State of index-driven vertex shading for current shader */
823 enum bi_idvs_mode {
824 /* IDVS not in use */
825 BI_IDVS_NONE = 0,
826
827 /* IDVS in use. Compiling a position shader */
828 BI_IDVS_POSITION = 1,
829
830 /* IDVS in use. Compiling a varying shader */
831 BI_IDVS_VARYING = 2,
832 };
833
834 typedef struct {
835 const struct panfrost_compile_inputs *inputs;
836 nir_shader *nir;
837 struct bi_shader_info info;
838 gl_shader_stage stage;
839 struct list_head blocks; /* list of bi_block */
840 uint32_t quirks;
841 unsigned arch;
842 enum bi_idvs_mode idvs;
843 unsigned num_blocks;
844
845 /* In any graphics shader, whether the "IDVS with memory
846 * allocation" flow is used. This affects how varyings are loaded and
847 * stored. Ignore for compute.
848 */
849 bool malloc_idvs;
850
851 /* During NIR->BIR */
852 bi_block *current_block;
853 bi_block *after_block;
854 bi_block *break_block;
855 bi_block *continue_block;
856 bi_block **indexed_nir_blocks;
857 bool emitted_atest;
858
859 /* During NIR->BIR, the coverage bitmap. If this is NULL, the default
860 * coverage bitmap should be source from preloaded register r60. This is
861 * written by ATEST and ZS_EMIT
862 */
863 bi_index coverage;
864
865 /* During NIR->BIR, table of preloaded registers, or NULL if never
866 * preloaded.
867 */
868 bi_index preloaded[64];
869
870 /* For creating temporaries */
871 unsigned ssa_alloc;
872 unsigned reg_alloc;
873
874 /* Mask of UBOs that need to be uploaded */
875 uint32_t ubo_mask;
876
877 /* During instruction selection, map from vector bi_index to its scalar
878 * components, populated by a split.
879 */
880 struct hash_table_u64 *allocated_vec;
881
882 /* Stats for shader-db */
883 unsigned loop_count;
884 unsigned spills;
885 unsigned fills;
886 } bi_context;
887
888 static inline void
bi_remove_instruction(bi_instr * ins)889 bi_remove_instruction(bi_instr *ins)
890 {
891 list_del(&ins->link);
892 }
893
894 enum bir_fau {
895 BIR_FAU_ZERO = 0,
896 BIR_FAU_LANE_ID = 1,
897 BIR_FAU_WARP_ID = 2,
898 BIR_FAU_CORE_ID = 3,
899 BIR_FAU_FB_EXTENT = 4,
900 BIR_FAU_ATEST_PARAM = 5,
901 BIR_FAU_SAMPLE_POS_ARRAY = 6,
902 BIR_FAU_BLEND_0 = 8,
903 /* blend descs 1 - 7 */
904 BIR_FAU_TYPE_MASK = 15,
905
906 /* Valhall only */
907 BIR_FAU_TLS_PTR = 16,
908 BIR_FAU_WLS_PTR = 17,
909 BIR_FAU_PROGRAM_COUNTER = 18,
910
911 BIR_FAU_UNIFORM = (1 << 7),
912 /* Look up table on Valhall */
913 BIR_FAU_IMMEDIATE = (1 << 8),
914
915 };
916
917 static inline bi_index
bi_fau(enum bir_fau value,bool hi)918 bi_fau(enum bir_fau value, bool hi)
919 {
920 return (bi_index){
921 .value = value,
922 .swizzle = BI_SWIZZLE_H01,
923 .offset = hi ? 1u : 0u,
924 .type = BI_INDEX_FAU,
925 };
926 }
927
928 /*
929 * Builder for Valhall LUT entries. Generally, constants are modeled with
930 * BI_INDEX_IMMEDIATE in the intermediate representation. This helper is only
931 * necessary for passes running after lowering constants, as well as when
932 * lowering constants.
933 *
934 */
935 static inline bi_index
va_lut(unsigned index)936 va_lut(unsigned index)
937 {
938 return bi_fau((enum bir_fau)(BIR_FAU_IMMEDIATE | (index >> 1)), index & 1);
939 }
940
941 /*
942 * va_lut_zero is like bi_zero but only works on Valhall. It is intended for
943 * use by late passes that run after constants are lowered, specifically
944 * register allocation. bi_zero() is preferred where possible.
945 */
946 static inline bi_index
va_zero_lut()947 va_zero_lut()
948 {
949 return va_lut(0);
950 }
951
952 static inline bi_index
bi_temp(bi_context * ctx)953 bi_temp(bi_context *ctx)
954 {
955 return bi_get_index(ctx->ssa_alloc++);
956 }
957
958 static inline bi_index
bi_def_index(nir_def * def)959 bi_def_index(nir_def *def)
960 {
961 return bi_get_index(def->index);
962 }
963
964 /* Inline constants automatically, will be lowered out by bi_lower_fau where a
965 * constant is not allowed. load_const_to_scalar gaurantees that this makes
966 * sense */
967
968 static inline bi_index
bi_src_index(nir_src * src)969 bi_src_index(nir_src *src)
970 {
971 if (nir_src_is_const(*src) && nir_src_bit_size(*src) <= 32) {
972 return bi_imm_u32(nir_src_as_uint(*src));
973 } else {
974 return bi_def_index(src->ssa);
975 }
976 }
977
978 /* Iterators for Bifrost IR */
979
980 #define bi_foreach_block(ctx, v) \
981 list_for_each_entry(bi_block, v, &ctx->blocks, link)
982
983 #define bi_foreach_block_rev(ctx, v) \
984 list_for_each_entry_rev(bi_block, v, &ctx->blocks, link)
985
986 #define bi_foreach_block_from(ctx, from, v) \
987 list_for_each_entry_from(bi_block, v, from, &ctx->blocks, link)
988
989 #define bi_foreach_block_from_rev(ctx, from, v) \
990 list_for_each_entry_from_rev(bi_block, v, from, &ctx->blocks, link)
991
992 #define bi_foreach_instr_in_block(block, v) \
993 list_for_each_entry(bi_instr, v, &(block)->instructions, link)
994
995 #define bi_foreach_instr_in_block_rev(block, v) \
996 list_for_each_entry_rev(bi_instr, v, &(block)->instructions, link)
997
998 #define bi_foreach_instr_in_block_safe(block, v) \
999 list_for_each_entry_safe(bi_instr, v, &(block)->instructions, link)
1000
1001 #define bi_foreach_instr_in_block_safe_rev(block, v) \
1002 list_for_each_entry_safe_rev(bi_instr, v, &(block)->instructions, link)
1003
1004 #define bi_foreach_instr_in_block_from(block, v, from) \
1005 list_for_each_entry_from(bi_instr, v, from, &(block)->instructions, link)
1006
1007 #define bi_foreach_instr_in_block_from_rev(block, v, from) \
1008 list_for_each_entry_from_rev(bi_instr, v, from, &(block)->instructions, link)
1009
1010 #define bi_foreach_clause_in_block(block, v) \
1011 list_for_each_entry(bi_clause, v, &(block)->clauses, link)
1012
1013 #define bi_foreach_clause_in_block_rev(block, v) \
1014 list_for_each_entry_rev(bi_clause, v, &(block)->clauses, link)
1015
1016 #define bi_foreach_clause_in_block_safe(block, v) \
1017 list_for_each_entry_safe(bi_clause, v, &(block)->clauses, link)
1018
1019 #define bi_foreach_clause_in_block_from(block, v, from) \
1020 list_for_each_entry_from(bi_clause, v, from, &(block)->clauses, link)
1021
1022 #define bi_foreach_clause_in_block_from_rev(block, v, from) \
1023 list_for_each_entry_from_rev(bi_clause, v, from, &(block)->clauses, link)
1024
1025 #define bi_foreach_instr_global(ctx, v) \
1026 bi_foreach_block(ctx, v_block) \
1027 bi_foreach_instr_in_block(v_block, v)
1028
1029 #define bi_foreach_instr_global_rev(ctx, v) \
1030 bi_foreach_block_rev(ctx, v_block) \
1031 bi_foreach_instr_in_block_rev(v_block, v)
1032
1033 #define bi_foreach_instr_global_safe(ctx, v) \
1034 bi_foreach_block(ctx, v_block) \
1035 bi_foreach_instr_in_block_safe(v_block, v)
1036
1037 #define bi_foreach_instr_global_rev_safe(ctx, v) \
1038 bi_foreach_block_rev(ctx, v_block) \
1039 bi_foreach_instr_in_block_rev_safe(v_block, v)
1040
1041 #define bi_foreach_instr_in_tuple(tuple, v) \
1042 for (bi_instr *v = (tuple)->fma ?: (tuple)->add; v != NULL; \
1043 v = (v == (tuple)->add) ? NULL : (tuple)->add)
1044
1045 #define bi_foreach_successor(blk, v) \
1046 bi_block *v; \
1047 bi_block **_v; \
1048 for (_v = &blk->successors[0], v = *_v; \
1049 v != NULL && _v < &blk->successors[2]; _v++, v = *_v)
1050
1051 #define bi_foreach_predecessor(blk, v) \
1052 util_dynarray_foreach(&(blk)->predecessors, bi_block *, v)
1053
1054 #define bi_foreach_src(ins, v) for (unsigned v = 0; v < ins->nr_srcs; ++v)
1055
1056 #define bi_foreach_dest(ins, v) for (unsigned v = 0; v < ins->nr_dests; ++v)
1057
1058 #define bi_foreach_ssa_src(ins, v) \
1059 bi_foreach_src(ins, v) \
1060 if (ins->src[v].type == BI_INDEX_NORMAL)
1061
1062 #define bi_foreach_ssa_dest(ins, v) \
1063 bi_foreach_dest(ins, v) \
1064 if (ins->dest[v].type == BI_INDEX_NORMAL)
1065
1066 #define bi_foreach_instr_and_src_in_tuple(tuple, ins, s) \
1067 bi_foreach_instr_in_tuple(tuple, ins) \
1068 bi_foreach_src(ins, s)
1069
1070 /*
1071 * Find the index of a predecessor, used as the implicit order of phi sources.
1072 */
1073 static inline unsigned
bi_predecessor_index(bi_block * succ,bi_block * pred)1074 bi_predecessor_index(bi_block *succ, bi_block *pred)
1075 {
1076 unsigned index = 0;
1077
1078 bi_foreach_predecessor(succ, x) {
1079 if (*x == pred)
1080 return index;
1081
1082 index++;
1083 }
1084
1085 unreachable("Invalid predecessor");
1086 }
1087
1088 static inline bi_instr *
bi_prev_op(bi_instr * ins)1089 bi_prev_op(bi_instr *ins)
1090 {
1091 return list_last_entry(&(ins->link), bi_instr, link);
1092 }
1093
1094 static inline bi_instr *
bi_next_op(bi_instr * ins)1095 bi_next_op(bi_instr *ins)
1096 {
1097 return list_first_entry(&(ins->link), bi_instr, link);
1098 }
1099
1100 static inline bi_block *
bi_next_block(bi_block * block)1101 bi_next_block(bi_block *block)
1102 {
1103 return list_first_entry(&(block->link), bi_block, link);
1104 }
1105
1106 static inline bi_block *
bi_entry_block(bi_context * ctx)1107 bi_entry_block(bi_context *ctx)
1108 {
1109 return list_first_entry(&ctx->blocks, bi_block, link);
1110 }
1111
1112 /* BIR manipulation */
1113
1114 bool bi_has_arg(const bi_instr *ins, bi_index arg);
1115 unsigned bi_count_read_registers(const bi_instr *ins, unsigned src);
1116 unsigned bi_count_write_registers(const bi_instr *ins, unsigned dest);
1117 bool bi_is_regfmt_16(enum bi_register_format fmt);
1118 unsigned bi_writemask(const bi_instr *ins, unsigned dest);
1119 bi_clause *bi_next_clause(bi_context *ctx, bi_block *block, bi_clause *clause);
1120 bool bi_side_effects(const bi_instr *I);
1121 bool bi_reconverge_branches(bi_block *block);
1122
1123 bool bi_can_replace_with_csel(bi_instr *I);
1124
1125 void bi_print_instr(const bi_instr *I, FILE *fp);
1126 void bi_print_slots(bi_registers *regs, FILE *fp);
1127 void bi_print_tuple(bi_tuple *tuple, FILE *fp);
1128 void bi_print_clause(bi_clause *clause, FILE *fp);
1129 void bi_print_block(bi_block *block, FILE *fp);
1130 void bi_print_shader(bi_context *ctx, FILE *fp);
1131
1132 /* BIR passes */
1133
1134 bool bi_instr_uses_helpers(bi_instr *I);
1135 bool bi_block_terminates_helpers(bi_block *block);
1136 void bi_analyze_helper_terminate(bi_context *ctx);
1137 void bi_mark_clauses_td(bi_context *ctx);
1138
1139 void bi_analyze_helper_requirements(bi_context *ctx);
1140 void bi_opt_copy_prop(bi_context *ctx);
1141 void bi_opt_dce(bi_context *ctx, bool partial);
1142 void bi_opt_cse(bi_context *ctx);
1143 void bi_opt_mod_prop_forward(bi_context *ctx);
1144 void bi_opt_mod_prop_backward(bi_context *ctx);
1145 void bi_opt_fuse_dual_texture(bi_context *ctx);
1146 void bi_opt_dce_post_ra(bi_context *ctx);
1147 void bi_opt_message_preload(bi_context *ctx);
1148 void bi_opt_push_ubo(bi_context *ctx);
1149 void bi_opt_reorder_push(bi_context *ctx);
1150 void bi_lower_swizzle(bi_context *ctx);
1151 void bi_lower_fau(bi_context *ctx);
1152 void bi_assign_scoreboard(bi_context *ctx);
1153 void bi_register_allocate(bi_context *ctx);
1154 void va_optimize(bi_context *ctx);
1155 void va_lower_split_64bit(bi_context *ctx);
1156
1157 void bi_lower_opt_instructions(bi_context *ctx);
1158
1159 void bi_pressure_schedule(bi_context *ctx);
1160 void bi_schedule(bi_context *ctx);
1161 bool bi_can_fma(bi_instr *ins);
1162 bool bi_can_add(bi_instr *ins);
1163 bool bi_must_message(bi_instr *ins);
1164 bool bi_reads_zero(bi_instr *ins);
1165 bool bi_reads_temps(bi_instr *ins, unsigned src);
1166 bool bi_reads_t(bi_instr *ins, unsigned src);
1167
1168 #ifndef NDEBUG
1169 bool bi_validate_initialization(bi_context *ctx);
1170 void bi_validate(bi_context *ctx, const char *after_str);
1171 #else
1172 static inline bool
bi_validate_initialization(UNUSED bi_context * ctx)1173 bi_validate_initialization(UNUSED bi_context *ctx)
1174 {
1175 return true;
1176 }
1177 static inline void
bi_validate(UNUSED bi_context * ctx,UNUSED const char * after_str)1178 bi_validate(UNUSED bi_context *ctx, UNUSED const char *after_str)
1179 {
1180 return;
1181 }
1182 #endif
1183
1184 uint32_t bi_fold_constant(bi_instr *I, bool *unsupported);
1185 bool bi_opt_constant_fold(bi_context *ctx);
1186
1187 /* Liveness */
1188
1189 void bi_compute_liveness_ssa(bi_context *ctx);
1190 void bi_liveness_ins_update_ssa(BITSET_WORD *live, const bi_instr *ins);
1191
1192 void bi_postra_liveness(bi_context *ctx);
1193 uint64_t MUST_CHECK bi_postra_liveness_ins(uint64_t live, bi_instr *ins);
1194
1195 /* Layout */
1196
1197 signed bi_block_offset(bi_context *ctx, bi_clause *start, bi_block *target);
1198 bool bi_ec0_packed(unsigned tuple_count);
1199
1200 /* Check if there are no more instructions starting with a given block, this
1201 * needs to recurse in case a shader ends with multiple empty blocks */
1202
1203 static inline bool
bi_is_terminal_block(bi_block * block)1204 bi_is_terminal_block(bi_block *block)
1205 {
1206 return (block == NULL) || (list_is_empty(&block->instructions) &&
1207 bi_is_terminal_block(block->successors[0]) &&
1208 bi_is_terminal_block(block->successors[1]));
1209 }
1210
1211 /* Code emit */
1212
1213 /* Returns the size of the final clause */
1214 unsigned bi_pack(bi_context *ctx, struct util_dynarray *emission);
1215 void bi_pack_valhall(bi_context *ctx, struct util_dynarray *emission);
1216
1217 struct bi_packed_tuple {
1218 uint64_t lo;
1219 uint64_t hi;
1220 };
1221
1222 uint8_t bi_pack_literal(enum bi_clause_subword literal);
1223
1224 uint8_t bi_pack_upper(enum bi_clause_subword upper,
1225 struct bi_packed_tuple *tuples,
1226 ASSERTED unsigned tuple_count);
1227 uint64_t bi_pack_tuple_bits(enum bi_clause_subword idx,
1228 struct bi_packed_tuple *tuples,
1229 ASSERTED unsigned tuple_count, unsigned offset,
1230 unsigned nbits);
1231
1232 uint8_t bi_pack_sync(enum bi_clause_subword t1, enum bi_clause_subword t2,
1233 enum bi_clause_subword t3, struct bi_packed_tuple *tuples,
1234 ASSERTED unsigned tuple_count, bool z);
1235
1236 void bi_pack_format(struct util_dynarray *emission, unsigned index,
1237 struct bi_packed_tuple *tuples,
1238 ASSERTED unsigned tuple_count, uint64_t header,
1239 uint64_t ec0, unsigned m0, bool z);
1240
1241 unsigned bi_pack_fma(bi_instr *I, enum bifrost_packed_src src0,
1242 enum bifrost_packed_src src1, enum bifrost_packed_src src2,
1243 enum bifrost_packed_src src3);
1244 unsigned bi_pack_add(bi_instr *I, enum bifrost_packed_src src0,
1245 enum bifrost_packed_src src1, enum bifrost_packed_src src2,
1246 enum bifrost_packed_src src3);
1247
1248 /* Like in NIR, for use with the builder */
1249
1250 enum bi_cursor_option {
1251 bi_cursor_after_block,
1252 bi_cursor_before_instr,
1253 bi_cursor_after_instr
1254 };
1255
1256 typedef struct {
1257 enum bi_cursor_option option;
1258
1259 union {
1260 bi_block *block;
1261 bi_instr *instr;
1262 };
1263 } bi_cursor;
1264
1265 static inline bi_cursor
bi_after_block(bi_block * block)1266 bi_after_block(bi_block *block)
1267 {
1268 return (bi_cursor){.option = bi_cursor_after_block, .block = block};
1269 }
1270
1271 static inline bi_cursor
bi_before_instr(bi_instr * instr)1272 bi_before_instr(bi_instr *instr)
1273 {
1274 return (bi_cursor){.option = bi_cursor_before_instr, .instr = instr};
1275 }
1276
1277 static inline bi_cursor
bi_after_instr(bi_instr * instr)1278 bi_after_instr(bi_instr *instr)
1279 {
1280 return (bi_cursor){.option = bi_cursor_after_instr, .instr = instr};
1281 }
1282
1283 static inline bi_cursor
bi_after_block_logical(bi_block * block)1284 bi_after_block_logical(bi_block *block)
1285 {
1286 if (list_is_empty(&block->instructions))
1287 return bi_after_block(block);
1288
1289 bi_instr *last = list_last_entry(&block->instructions, bi_instr, link);
1290 assert(last != NULL);
1291
1292 if (last->branch_target)
1293 return bi_before_instr(last);
1294 else
1295 return bi_after_block(block);
1296 }
1297
1298 static inline bi_cursor
bi_before_nonempty_block(bi_block * block)1299 bi_before_nonempty_block(bi_block *block)
1300 {
1301 bi_instr *I = list_first_entry(&block->instructions, bi_instr, link);
1302 assert(I != NULL);
1303
1304 return bi_before_instr(I);
1305 }
1306
1307 static inline bi_cursor
bi_before_block(bi_block * block)1308 bi_before_block(bi_block *block)
1309 {
1310 if (list_is_empty(&block->instructions))
1311 return bi_after_block(block);
1312 else
1313 return bi_before_nonempty_block(block);
1314 }
1315
1316 /* Invariant: a tuple must be nonempty UNLESS it is the last tuple of a clause,
1317 * in which case there must exist a nonempty penultimate tuple */
1318
1319 ATTRIBUTE_RETURNS_NONNULL static inline bi_instr *
bi_first_instr_in_tuple(bi_tuple * tuple)1320 bi_first_instr_in_tuple(bi_tuple *tuple)
1321 {
1322 bi_instr *instr = tuple->fma ?: tuple->add;
1323 assert(instr != NULL);
1324 return instr;
1325 }
1326
1327 ATTRIBUTE_RETURNS_NONNULL static inline bi_instr *
bi_first_instr_in_clause(bi_clause * clause)1328 bi_first_instr_in_clause(bi_clause *clause)
1329 {
1330 return bi_first_instr_in_tuple(&clause->tuples[0]);
1331 }
1332
1333 ATTRIBUTE_RETURNS_NONNULL static inline bi_instr *
bi_last_instr_in_clause(bi_clause * clause)1334 bi_last_instr_in_clause(bi_clause *clause)
1335 {
1336 bi_tuple tuple = clause->tuples[clause->tuple_count - 1];
1337 bi_instr *instr = tuple.add ?: tuple.fma;
1338
1339 if (!instr) {
1340 assert(clause->tuple_count >= 2);
1341 tuple = clause->tuples[clause->tuple_count - 2];
1342 instr = tuple.add ?: tuple.fma;
1343 }
1344
1345 assert(instr != NULL);
1346 return instr;
1347 }
1348
1349 /* Implemented by expanding bi_foreach_instr_in_block_from(_rev) with the start
1350 * (end) of the clause and adding a condition for the clause boundary */
1351
1352 #define bi_foreach_instr_in_clause(block, clause, pos) \
1353 for (bi_instr *pos = \
1354 list_entry(bi_first_instr_in_clause(clause), bi_instr, link); \
1355 (&pos->link != &(block)->instructions) && \
1356 (pos != bi_next_op(bi_last_instr_in_clause(clause))); \
1357 pos = list_entry(pos->link.next, bi_instr, link))
1358
1359 #define bi_foreach_instr_in_clause_rev(block, clause, pos) \
1360 for (bi_instr *pos = \
1361 list_entry(bi_last_instr_in_clause(clause), bi_instr, link); \
1362 (&pos->link != &(block)->instructions) && \
1363 pos != bi_prev_op(bi_first_instr_in_clause(clause)); \
1364 pos = list_entry(pos->link.prev, bi_instr, link))
1365
1366 static inline bi_cursor
bi_before_clause(bi_clause * clause)1367 bi_before_clause(bi_clause *clause)
1368 {
1369 return bi_before_instr(bi_first_instr_in_clause(clause));
1370 }
1371
1372 static inline bi_cursor
bi_before_tuple(bi_tuple * tuple)1373 bi_before_tuple(bi_tuple *tuple)
1374 {
1375 return bi_before_instr(bi_first_instr_in_tuple(tuple));
1376 }
1377
1378 static inline bi_cursor
bi_after_clause(bi_clause * clause)1379 bi_after_clause(bi_clause *clause)
1380 {
1381 return bi_after_instr(bi_last_instr_in_clause(clause));
1382 }
1383
1384 /* IR builder in terms of cursor infrastructure */
1385
1386 typedef struct {
1387 bi_context *shader;
1388 bi_cursor cursor;
1389 } bi_builder;
1390
1391 static inline bi_builder
bi_init_builder(bi_context * ctx,bi_cursor cursor)1392 bi_init_builder(bi_context *ctx, bi_cursor cursor)
1393 {
1394 return (bi_builder){.shader = ctx, .cursor = cursor};
1395 }
1396
1397 /* Insert an instruction at the cursor and move the cursor */
1398
1399 static inline void
bi_builder_insert(bi_cursor * cursor,bi_instr * I)1400 bi_builder_insert(bi_cursor *cursor, bi_instr *I)
1401 {
1402 switch (cursor->option) {
1403 case bi_cursor_after_instr:
1404 list_add(&I->link, &cursor->instr->link);
1405 cursor->instr = I;
1406 return;
1407
1408 case bi_cursor_after_block:
1409 list_addtail(&I->link, &cursor->block->instructions);
1410 cursor->option = bi_cursor_after_instr;
1411 cursor->instr = I;
1412 return;
1413
1414 case bi_cursor_before_instr:
1415 list_addtail(&I->link, &cursor->instr->link);
1416 cursor->option = bi_cursor_after_instr;
1417 cursor->instr = I;
1418 return;
1419 }
1420
1421 unreachable("Invalid cursor option");
1422 }
1423
1424 bi_instr *bi_csel_from_mux(bi_builder *b, const bi_instr *I, bool must_sign);
1425
1426 /* Read back power-efficent garbage, TODO maybe merge with null? */
1427 static inline bi_index
bi_dontcare(bi_builder * b)1428 bi_dontcare(bi_builder *b)
1429 {
1430 if (b->shader->arch >= 9)
1431 return bi_zero();
1432 else
1433 return bi_passthrough(BIFROST_SRC_FAU_HI);
1434 }
1435
1436 #define bi_worklist_init(ctx, w) u_worklist_init(w, ctx->num_blocks, ctx)
1437 #define bi_worklist_push_head(w, block) u_worklist_push_head(w, block, index)
1438 #define bi_worklist_push_tail(w, block) u_worklist_push_tail(w, block, index)
1439 #define bi_worklist_peek_head(w) u_worklist_peek_head(w, bi_block, index)
1440 #define bi_worklist_pop_head(w) u_worklist_pop_head(w, bi_block, index)
1441 #define bi_worklist_peek_tail(w) u_worklist_peek_tail(w, bi_block, index)
1442 #define bi_worklist_pop_tail(w) u_worklist_pop_tail(w, bi_block, index)
1443
1444 /* NIR passes */
1445
1446 bool bi_lower_divergent_indirects(nir_shader *shader, unsigned lanes);
1447
1448 #ifdef __cplusplus
1449 } /* extern C */
1450 #endif
1451
1452 #endif
1453