• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2021 Alyssa Rosenzweig
3  * Copyright 2020 Collabora Ltd.
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #pragma once
8 
9 #include "compiler/nir/nir.h"
10 #include "util/half_float.h"
11 #include "util/u_dynarray.h"
12 #include "util/u_math.h"
13 #include "util/u_worklist.h"
14 #include "agx_compile.h"
15 #include "agx_minifloat.h"
16 #include "agx_opcodes.h"
17 
18 #ifdef __cplusplus
19 extern "C" {
20 #endif
21 
22 /* r0-r127 inclusive, as pairs of 16-bits, gives 256 registers */
23 #define AGX_NUM_REGS (256)
24 
25 /* u0-u255 inclusive, as pairs of 16-bits */
26 #define AGX_NUM_UNIFORMS (512)
27 
28 /* Semi-arbitrary limit for spill slot allocation */
29 #define AGX_NUM_MODELED_REGS (2048)
30 
31 enum agx_index_type {
32    AGX_INDEX_NULL = 0,
33    AGX_INDEX_NORMAL = 1,
34    AGX_INDEX_IMMEDIATE = 2,
35    AGX_INDEX_UNIFORM = 3,
36    AGX_INDEX_REGISTER = 4,
37    AGX_INDEX_UNDEF = 5,
38 };
39 
40 enum agx_size { AGX_SIZE_16 = 0, AGX_SIZE_32 = 1, AGX_SIZE_64 = 2 };
41 
42 static inline unsigned
agx_size_align_16(enum agx_size size)43 agx_size_align_16(enum agx_size size)
44 {
45    switch (size) {
46    case AGX_SIZE_16:
47       return 1;
48    case AGX_SIZE_32:
49       return 2;
50    case AGX_SIZE_64:
51       return 4;
52    }
53 
54    unreachable("Invalid size");
55 }
56 
57 /* Keep synced with hash_index */
58 typedef struct {
59    /* Sufficient for as many SSA values, immediates, and uniforms as we need. */
60    uint32_t value;
61 
62    /* Indicates that this source kills the referenced value (because it is the
63     * last use in a block and the source is not live after the block). Set by
64     * liveness analysis.
65     */
66    bool kill : 1;
67 
68    /* Cache hints */
69    bool cache   : 1;
70    bool discard : 1;
71 
72    /* src - float modifiers */
73    bool abs : 1;
74    bool neg : 1;
75 
76    /* Register class */
77    bool memory : 1;
78 
79    unsigned channels_m1     : 3;
80    enum agx_size size       : 2;
81    enum agx_index_type type : 3;
82    unsigned padding         : 18;
83 } agx_index;
84 
85 static inline unsigned
agx_channels(agx_index idx)86 agx_channels(agx_index idx)
87 {
88    return idx.channels_m1 + 1;
89 }
90 
91 static inline unsigned
agx_index_size_16(agx_index idx)92 agx_index_size_16(agx_index idx)
93 {
94    return agx_size_align_16(idx.size) * agx_channels(idx);
95 }
96 
97 static inline agx_index
agx_get_vec_index(unsigned value,enum agx_size size,unsigned channels)98 agx_get_vec_index(unsigned value, enum agx_size size, unsigned channels)
99 {
100    return (agx_index){
101       .value = value,
102       .channels_m1 = channels - 1,
103       .size = size,
104       .type = AGX_INDEX_NORMAL,
105    };
106 }
107 
108 static inline agx_index
agx_get_index(unsigned value,enum agx_size size)109 agx_get_index(unsigned value, enum agx_size size)
110 {
111    return agx_get_vec_index(value, size, 1);
112 }
113 
114 static inline agx_index
agx_immediate(uint32_t imm)115 agx_immediate(uint32_t imm)
116 {
117    assert(imm < (1 << 16) && "overflowed immediate");
118 
119    return (agx_index){
120       .value = imm,
121       .size = AGX_SIZE_16,
122       .type = AGX_INDEX_IMMEDIATE,
123    };
124 }
125 
126 static inline agx_index
agx_immediate_f(float f)127 agx_immediate_f(float f)
128 {
129    assert(agx_minifloat_exact(f));
130    return agx_immediate(agx_minifloat_encode(f));
131 }
132 
133 /* in half-words, specify r0h as 1, r1 as 2... */
134 static inline agx_index
agx_register(uint32_t imm,enum agx_size size)135 agx_register(uint32_t imm, enum agx_size size)
136 {
137    assert(imm < AGX_NUM_REGS);
138 
139    return (agx_index){
140       .value = imm,
141       .size = size,
142       .type = AGX_INDEX_REGISTER,
143    };
144 }
145 
146 static inline agx_index
agx_memory_register(uint32_t imm,enum agx_size size)147 agx_memory_register(uint32_t imm, enum agx_size size)
148 {
149    return (agx_index){
150       .value = imm,
151       .memory = true,
152       .size = size,
153       .type = AGX_INDEX_REGISTER,
154    };
155 }
156 
157 static inline agx_index
agx_register_like(uint32_t imm,agx_index like)158 agx_register_like(uint32_t imm, agx_index like)
159 {
160    return (agx_index){
161       .value = imm,
162       .memory = like.memory,
163       .channels_m1 = like.channels_m1,
164       .size = like.size,
165       .type = AGX_INDEX_REGISTER,
166    };
167 }
168 
169 static inline agx_index
agx_undef(enum agx_size size)170 agx_undef(enum agx_size size)
171 {
172    return (agx_index){
173       .size = size,
174       .type = AGX_INDEX_UNDEF,
175    };
176 }
177 
178 /* Also in half-words */
179 static inline agx_index
agx_uniform(uint32_t imm,enum agx_size size)180 agx_uniform(uint32_t imm, enum agx_size size)
181 {
182    assert(imm < AGX_NUM_UNIFORMS);
183 
184    return (agx_index){
185       .value = imm,
186       .size = size,
187       .type = AGX_INDEX_UNIFORM,
188    };
189 }
190 
191 static inline agx_index
agx_null()192 agx_null()
193 {
194    return (agx_index){.type = AGX_INDEX_NULL};
195 }
196 
197 static inline agx_index
agx_zero()198 agx_zero()
199 {
200    return agx_immediate(0);
201 }
202 
203 /* IEEE 754 additive identity -0.0, stored as an 8-bit AGX minifloat: mantissa
204  * = exponent = 0, sign bit set */
205 
206 static inline agx_index
agx_negzero()207 agx_negzero()
208 {
209    return agx_immediate(0x80);
210 }
211 
212 static inline agx_index
agx_abs(agx_index idx)213 agx_abs(agx_index idx)
214 {
215    idx.abs = true;
216    idx.neg = false;
217    return idx;
218 }
219 
220 static inline agx_index
agx_neg(agx_index idx)221 agx_neg(agx_index idx)
222 {
223    idx.neg ^= true;
224    return idx;
225 }
226 
227 /* Replaces an index, preserving any modifiers */
228 
229 static inline agx_index
agx_replace_index(agx_index old,agx_index replacement)230 agx_replace_index(agx_index old, agx_index replacement)
231 {
232    replacement.abs = old.abs;
233    replacement.neg = old.neg;
234    return replacement;
235 }
236 
237 static inline bool
agx_is_null(agx_index idx)238 agx_is_null(agx_index idx)
239 {
240    return idx.type == AGX_INDEX_NULL;
241 }
242 
243 /* Compares equivalence as references */
244 
245 static inline bool
agx_is_equiv(agx_index left,agx_index right)246 agx_is_equiv(agx_index left, agx_index right)
247 {
248    return (left.type == right.type) && (left.value == right.value);
249 }
250 
251 enum agx_icond {
252    AGX_ICOND_UEQ = 0,
253    AGX_ICOND_ULT = 1,
254    AGX_ICOND_UGT = 2,
255    /* unknown */
256    AGX_ICOND_SEQ = 4,
257    AGX_ICOND_SLT = 5,
258    AGX_ICOND_SGT = 6,
259    /* unknown */
260 };
261 
262 enum agx_fcond {
263    AGX_FCOND_EQ = 0,
264    AGX_FCOND_LT = 1,
265    AGX_FCOND_GT = 2,
266    AGX_FCOND_LTN = 3,
267    /* unknown */
268    AGX_FCOND_GE = 5,
269    AGX_FCOND_LE = 6,
270    AGX_FCOND_GTN = 7,
271 };
272 
273 enum agx_round {
274    AGX_ROUND_RTZ = 0,
275    AGX_ROUND_RTE = 1,
276 };
277 
278 enum agx_convert {
279    AGX_CONVERT_U8_TO_F = 0,
280    AGX_CONVERT_S8_TO_F = 1,
281    AGX_CONVERT_F_TO_U16 = 4,
282    AGX_CONVERT_F_TO_S16 = 5,
283    AGX_CONVERT_U16_TO_F = 6,
284    AGX_CONVERT_S16_TO_F = 7,
285    AGX_CONVERT_F_TO_U32 = 8,
286    AGX_CONVERT_F_TO_S32 = 9,
287    AGX_CONVERT_U32_TO_F = 10,
288    AGX_CONVERT_S32_TO_F = 11
289 };
290 
291 enum agx_lod_mode {
292    AGX_LOD_MODE_AUTO_LOD = 0,
293    AGX_LOD_MODE_AUTO_LOD_BIAS_UNIFORM = 1,
294    AGX_LOD_MODE_LOD_MIN_UNIFORM = 2,
295    AGX_LOD_MODE_AUTO_LOD_BIAS = 5,
296    AGX_LOD_MODE_LOD_MIN = 6,
297    AGX_LOD_MODE_LOD_GRAD = 4,
298    AGX_LOD_MODE_LOD_GRAD_MIN = 12
299 };
300 
301 /* Forward declare for branch target */
302 struct agx_block;
303 
304 /* Keep synced with hash_instr */
305 typedef struct {
306    /* Must be first */
307    struct list_head link;
308 
309    /* The sources list.
310     *
311     * As a special case to workaround ordering issues when translating phis, if
312     * nr_srcs == 0 and the opcode is PHI, holds a pointer to the NIR phi node.
313     */
314    union {
315       agx_index *src;
316       nir_phi_instr *phi;
317    };
318 
319    /* Data flow */
320    agx_index *dest;
321 
322    enum agx_opcode op;
323 
324    uint8_t nr_dests;
325    uint8_t nr_srcs;
326 
327    /* TODO: More efficient */
328    union {
329       enum agx_icond icond;
330       enum agx_fcond fcond;
331    };
332 
333    union {
334       uint64_t imm;
335       uint32_t writeout;
336       uint32_t truth_table;
337       uint32_t component;
338       uint32_t channels;
339       uint32_t bfi_mask;
340       uint16_t pixel_offset;
341       uint16_t zs;
342       int16_t stack_size;
343       enum agx_sr sr;
344       enum agx_round round;
345       enum agx_atomic_opc atomic_opc;
346       enum agx_lod_mode lod_mode;
347       struct agx_block *target;
348    };
349 
350    /* For local access */
351    enum agx_format format;
352 
353    /* Number of nested control flow layers to jump by. TODO: Optimize */
354    uint32_t nest;
355 
356    /* Invert icond/fcond */
357    bool invert_cond : 1;
358 
359    /* TODO: Handle tex ops more efficient */
360    enum agx_dim dim       : 4;
361    bool offset            : 1;
362    bool shadow            : 1;
363    bool query_lod         : 1;
364    enum agx_gather gather : 3;
365 
366    /* TODO: Handle iter ops more efficient */
367    enum agx_interpolation interpolation : 2;
368 
369    /* Final st_vary op */
370    bool last : 1;
371 
372    /* Shift for a bitwise or memory op (conflicts with format for memory ops) */
373    unsigned shift : 4;
374 
375    /* Scoreboard index, 0 or 1. Leave as 0 for instructions that do not require
376     * scoreboarding (everything but memory load/store and texturing). */
377    unsigned scoreboard : 1;
378 
379    /* Output modifiers */
380    bool saturate : 1;
381    unsigned mask : 4;
382 
383    unsigned padding : 8;
384 } agx_instr;
385 
386 static inline void
agx_replace_src(agx_instr * I,unsigned src_index,agx_index replacement)387 agx_replace_src(agx_instr *I, unsigned src_index, agx_index replacement)
388 {
389    I->src[src_index] = agx_replace_index(I->src[src_index], replacement);
390 }
391 
392 struct agx_block;
393 
394 typedef struct agx_block {
395    /* Link to next block. Must be first */
396    struct list_head link;
397 
398    /* List of instructions emitted for the current block */
399    struct list_head instructions;
400 
401    /* Index of the block in source order */
402    unsigned index;
403 
404    /* Control flow graph */
405    struct agx_block *successors[2];
406    struct util_dynarray predecessors;
407    bool unconditional_jumps;
408 
409    /* Liveness analysis results */
410    BITSET_WORD *live_in;
411    BITSET_WORD *live_out;
412 
413    /* For visited blocks during register assignment and live-out registers, the
414     * mapping of SSA names to registers at the end of the block.
415     */
416    uint16_t *ssa_to_reg_out;
417 
418    /* Is this block a loop header? If not, all of its predecessors precede it in
419     * source order.
420     */
421    bool loop_header;
422 
423    /* Offset of the block in the emitted binary */
424    off_t offset, last_offset;
425 
426    /** Available for passes to use for metadata */
427    uint8_t pass_flags;
428 } agx_block;
429 
430 typedef struct {
431    nir_shader *nir;
432    gl_shader_stage stage;
433    bool is_preamble;
434    unsigned scratch_size;
435 
436    struct list_head blocks; /* list of agx_block */
437    struct agx_shader_info *out;
438    struct agx_shader_key *key;
439 
440    /* Maximum block index */
441    unsigned num_blocks;
442 
443    /* For creating temporaries */
444    unsigned alloc;
445 
446    /* Does the shader statically use scratch memory? */
447    bool any_scratch;
448 
449    /* I don't really understand how writeout ops work yet */
450    bool did_writeout;
451 
452    /* Has r0l been zeroed yet due to control flow? */
453    bool any_cf;
454 
455    /* Number of nested control flow structures within the innermost loop. Since
456     * NIR is just loop and if-else, this is the number of nested if-else
457     * statements in the loop */
458    unsigned loop_nesting;
459 
460    /* Total nesting across all loops, to determine if we need push_exec */
461    unsigned total_nesting;
462 
463    /* Whether loop being emitted used any `continue` jumps */
464    bool loop_continues;
465 
466    /* During instruction selection, for inserting control flow */
467    agx_block *current_block;
468    agx_block *continue_block;
469    agx_block *break_block;
470    agx_block *after_block;
471    agx_block **indexed_nir_blocks;
472 
473    /* During instruction selection, map from vector agx_index to its scalar
474     * components, populated by a split. */
475    struct hash_table_u64 *allocated_vec;
476 
477    /* During instruction selection, preloaded values,
478     * or NULL if it hasn't been preloaded
479     */
480    agx_index vertex_id, instance_id;
481 
482    /* Beginning of our stack allocation used for spilling, below that is
483     * NIR-level scratch.
484     */
485    unsigned spill_base;
486 
487    /* Beginning of stack allocation used for parallel copy lowering */
488    bool has_spill_pcopy_reserved;
489    unsigned spill_pcopy_base;
490 
491    /* Stats for shader-db */
492    unsigned loop_count;
493    unsigned spills;
494    unsigned fills;
495    unsigned max_reg;
496 } agx_context;
497 
498 static inline void
agx_remove_instruction(agx_instr * ins)499 agx_remove_instruction(agx_instr *ins)
500 {
501    list_del(&ins->link);
502 }
503 
504 static inline agx_index
agx_vec_temp(agx_context * ctx,enum agx_size size,unsigned channels)505 agx_vec_temp(agx_context *ctx, enum agx_size size, unsigned channels)
506 {
507    return agx_get_vec_index(ctx->alloc++, size, channels);
508 }
509 
510 static inline agx_index
agx_temp(agx_context * ctx,enum agx_size size)511 agx_temp(agx_context *ctx, enum agx_size size)
512 {
513    return agx_get_index(ctx->alloc++, size);
514 }
515 
516 static enum agx_size
agx_size_for_bits(unsigned bits)517 agx_size_for_bits(unsigned bits)
518 {
519    switch (bits) {
520    case 1:
521    case 8:
522    case 16:
523       return AGX_SIZE_16;
524    case 32:
525       return AGX_SIZE_32;
526    case 64:
527       return AGX_SIZE_64;
528    default:
529       unreachable("Invalid bitsize");
530    }
531 }
532 
533 static inline agx_index
agx_def_index(nir_def * ssa)534 agx_def_index(nir_def *ssa)
535 {
536    return agx_get_vec_index(ssa->index, agx_size_for_bits(ssa->bit_size),
537                             ssa->num_components);
538 }
539 
540 static inline agx_index
agx_src_index(nir_src * src)541 agx_src_index(nir_src *src)
542 {
543    return agx_def_index(src->ssa);
544 }
545 
546 static inline agx_index
agx_vec_for_def(agx_context * ctx,nir_def * def)547 agx_vec_for_def(agx_context *ctx, nir_def *def)
548 {
549    return agx_vec_temp(ctx, agx_size_for_bits(def->bit_size),
550                        def->num_components);
551 }
552 
553 static inline agx_index
agx_vec_for_intr(agx_context * ctx,nir_intrinsic_instr * instr)554 agx_vec_for_intr(agx_context *ctx, nir_intrinsic_instr *instr)
555 {
556    return agx_vec_for_def(ctx, &instr->def);
557 }
558 
559 static inline unsigned
agx_num_predecessors(agx_block * block)560 agx_num_predecessors(agx_block *block)
561 {
562    return util_dynarray_num_elements(&block->predecessors, agx_block *);
563 }
564 
565 static inline agx_block *
agx_start_block(agx_context * ctx)566 agx_start_block(agx_context *ctx)
567 {
568    agx_block *first = list_first_entry(&ctx->blocks, agx_block, link);
569    assert(agx_num_predecessors(first) == 0);
570    return first;
571 }
572 
573 /* Iterators for AGX IR */
574 
575 #define agx_foreach_block(ctx, v)                                              \
576    list_for_each_entry(agx_block, v, &ctx->blocks, link)
577 
578 #define agx_foreach_block_rev(ctx, v)                                          \
579    list_for_each_entry_rev(agx_block, v, &ctx->blocks, link)
580 
581 #define agx_foreach_block_from(ctx, from, v)                                   \
582    list_for_each_entry_from(agx_block, v, from, &ctx->blocks, link)
583 
584 #define agx_foreach_block_from_rev(ctx, from, v)                               \
585    list_for_each_entry_from_rev(agx_block, v, from, &ctx->blocks, link)
586 
587 #define agx_foreach_instr_in_block(block, v)                                   \
588    list_for_each_entry(agx_instr, v, &(block)->instructions, link)
589 
590 #define agx_foreach_instr_in_block_rev(block, v)                               \
591    list_for_each_entry_rev(agx_instr, v, &(block)->instructions, link)
592 
593 #define agx_foreach_instr_in_block_safe(block, v)                              \
594    list_for_each_entry_safe(agx_instr, v, &(block)->instructions, link)
595 
596 #define agx_foreach_instr_in_block_safe_rev(block, v)                          \
597    list_for_each_entry_safe_rev(agx_instr, v, &(block)->instructions, link)
598 
599 #define agx_foreach_instr_in_block_from(block, v, from)                        \
600    list_for_each_entry_from(agx_instr, v, from, &(block)->instructions, link)
601 
602 #define agx_foreach_instr_in_block_from_rev(block, v, from)                    \
603    list_for_each_entry_from_rev(agx_instr, v, from, &(block)->instructions,    \
604                                 link)
605 
606 #define agx_foreach_instr_global(ctx, v)                                       \
607    agx_foreach_block(ctx, v_block)                                             \
608       agx_foreach_instr_in_block(v_block, v)
609 
610 #define agx_foreach_instr_global_rev(ctx, v)                                   \
611    agx_foreach_block_rev(ctx, v_block)                                         \
612       agx_foreach_instr_in_block_rev(v_block, v)
613 
614 #define agx_foreach_instr_global_safe(ctx, v)                                  \
615    agx_foreach_block(ctx, v_block)                                             \
616       agx_foreach_instr_in_block_safe(v_block, v)
617 
618 #define agx_foreach_instr_global_safe_rev(ctx, v)                              \
619    agx_foreach_block_rev(ctx, v_block)                                         \
620       agx_foreach_instr_in_block_safe_rev(v_block, v)
621 
622 /* Based on set_foreach, expanded with automatic type casts */
623 
624 #define agx_foreach_successor(blk, v)                                          \
625    agx_block *v;                                                               \
626    agx_block **_v;                                                             \
627    for (_v = (agx_block **)&blk->successors[0], v = *_v;                       \
628         v != NULL && _v < (agx_block **)&blk->successors[2]; _v++, v = *_v)
629 
630 #define agx_foreach_predecessor(blk, v)                                        \
631    util_dynarray_foreach(&blk->predecessors, agx_block *, v)
632 
633 #define agx_foreach_src(ins, v) for (unsigned v = 0; v < ins->nr_srcs; ++v)
634 
635 #define agx_foreach_dest(ins, v) for (unsigned v = 0; v < ins->nr_dests; ++v)
636 
637 #define agx_foreach_ssa_src(ins, v)                                            \
638    agx_foreach_src(ins, v)                                                     \
639       if (ins->src[v].type == AGX_INDEX_NORMAL)
640 
641 #define agx_foreach_ssa_dest(ins, v)                                           \
642    agx_foreach_dest(ins, v)                                                    \
643       if (ins->dest[v].type == AGX_INDEX_NORMAL)
644 
645 /* Phis only come at the start (after else instructions) so we stop as soon as
646  * we hit a non-phi
647  */
648 #define agx_foreach_phi_in_block(block, v)                                     \
649    agx_foreach_instr_in_block(block, v)                                        \
650       if (v->op == AGX_OPCODE_ELSE_ICMP || v->op == AGX_OPCODE_ELSE_FCMP)      \
651          continue;                                                             \
652       else if (v->op != AGX_OPCODE_PHI)                                        \
653          break;                                                                \
654       else
655 
656 /*
657  * Find the index of a predecessor, used as the implicit order of phi sources.
658  */
659 static inline unsigned
agx_predecessor_index(agx_block * succ,agx_block * pred)660 agx_predecessor_index(agx_block *succ, agx_block *pred)
661 {
662    unsigned index = 0;
663 
664    agx_foreach_predecessor(succ, x) {
665       if (*x == pred)
666          return index;
667 
668       index++;
669    }
670 
671    unreachable("Invalid predecessor");
672 }
673 
674 static inline agx_block *
agx_prev_block(agx_block * ins)675 agx_prev_block(agx_block *ins)
676 {
677    return list_last_entry(&(ins->link), agx_block, link);
678 }
679 
680 static inline agx_instr *
agx_prev_op(agx_instr * ins)681 agx_prev_op(agx_instr *ins)
682 {
683    return list_last_entry(&(ins->link), agx_instr, link);
684 }
685 
686 static inline agx_instr *
agx_first_instr(agx_block * block)687 agx_first_instr(agx_block *block)
688 {
689    if (list_is_empty(&block->instructions))
690       return NULL;
691    else
692       return list_first_entry(&block->instructions, agx_instr, link);
693 }
694 
695 static inline agx_instr *
agx_last_instr(agx_block * block)696 agx_last_instr(agx_block *block)
697 {
698    if (list_is_empty(&block->instructions))
699       return NULL;
700    else
701       return list_last_entry(&block->instructions, agx_instr, link);
702 }
703 
704 static inline agx_instr *
agx_next_op(agx_instr * ins)705 agx_next_op(agx_instr *ins)
706 {
707    return list_first_entry(&(ins->link), agx_instr, link);
708 }
709 
710 static inline agx_block *
agx_next_block(agx_block * block)711 agx_next_block(agx_block *block)
712 {
713    return list_first_entry(&(block->link), agx_block, link);
714 }
715 
716 static inline agx_block *
agx_exit_block(agx_context * ctx)717 agx_exit_block(agx_context *ctx)
718 {
719    agx_block *last = list_last_entry(&ctx->blocks, agx_block, link);
720    assert(!last->successors[0] && !last->successors[1]);
721    return last;
722 }
723 
724 #define agx_worklist_init(ctx, w)        u_worklist_init(w, ctx->num_blocks, ctx)
725 #define agx_worklist_push_head(w, block) u_worklist_push_head(w, block, index)
726 #define agx_worklist_push_tail(w, block) u_worklist_push_tail(w, block, index)
727 #define agx_worklist_peek_head(w)        u_worklist_peek_head(w, agx_block, index)
728 #define agx_worklist_pop_head(w)         u_worklist_pop_head(w, agx_block, index)
729 #define agx_worklist_peek_tail(w)        u_worklist_peek_tail(w, agx_block, index)
730 #define agx_worklist_pop_tail(w)         u_worklist_pop_tail(w, agx_block, index)
731 
732 /* Like in NIR, for use with the builder */
733 
734 enum agx_cursor_option {
735    agx_cursor_after_block,
736    agx_cursor_before_instr,
737    agx_cursor_after_instr
738 };
739 
740 typedef struct {
741    enum agx_cursor_option option;
742 
743    union {
744       agx_block *block;
745       agx_instr *instr;
746    };
747 } agx_cursor;
748 
749 static inline agx_cursor
agx_after_block(agx_block * block)750 agx_after_block(agx_block *block)
751 {
752    return (agx_cursor){
753       .option = agx_cursor_after_block,
754       .block = block,
755    };
756 }
757 
758 static inline agx_cursor
agx_before_instr(agx_instr * instr)759 agx_before_instr(agx_instr *instr)
760 {
761    return (agx_cursor){
762       .option = agx_cursor_before_instr,
763       .instr = instr,
764    };
765 }
766 
767 static inline agx_cursor
agx_after_instr(agx_instr * instr)768 agx_after_instr(agx_instr *instr)
769 {
770    return (agx_cursor){
771       .option = agx_cursor_after_instr,
772       .instr = instr,
773    };
774 }
775 
776 static inline agx_cursor
agx_before_nonempty_block(agx_block * block)777 agx_before_nonempty_block(agx_block *block)
778 {
779    agx_instr *I = list_first_entry(&block->instructions, agx_instr, link);
780    assert(I != NULL);
781 
782    return agx_before_instr(I);
783 }
784 
785 static inline agx_cursor
agx_before_block(agx_block * block)786 agx_before_block(agx_block *block)
787 {
788    if (list_is_empty(&block->instructions))
789       return agx_after_block(block);
790    else
791       return agx_before_nonempty_block(block);
792 }
793 
794 static inline bool
instr_after_logical_end(const agx_instr * I)795 instr_after_logical_end(const agx_instr *I)
796 {
797    switch (I->op) {
798    case AGX_OPCODE_JMP_EXEC_ANY:
799    case AGX_OPCODE_JMP_EXEC_NONE:
800    case AGX_OPCODE_POP_EXEC:
801    case AGX_OPCODE_BREAK:
802    case AGX_OPCODE_IF_ICMP:
803    case AGX_OPCODE_WHILE_ICMP:
804    case AGX_OPCODE_IF_FCMP:
805    case AGX_OPCODE_WHILE_FCMP:
806    case AGX_OPCODE_STOP:
807       return true;
808    default:
809       return false;
810    }
811 }
812 
813 /*
814  * Get a cursor inserting at the logical end of the block. In particular, this
815  * is before branches or control flow instructions, which occur after the
816  * logical end but before the physical end.
817  */
818 static inline agx_cursor
agx_after_block_logical(agx_block * block)819 agx_after_block_logical(agx_block *block)
820 {
821    /* Search for the first instruction that's not past the logical end */
822    agx_foreach_instr_in_block_rev(block, I) {
823       if (!instr_after_logical_end(I))
824          return agx_after_instr(I);
825    }
826 
827    /* If we got here, the block is either empty or entirely control flow */
828    return agx_before_block(block);
829 }
830 
831 /* IR builder in terms of cursor infrastructure */
832 
833 typedef struct {
834    agx_context *shader;
835    agx_cursor cursor;
836 } agx_builder;
837 
838 static inline agx_builder
agx_init_builder(agx_context * ctx,agx_cursor cursor)839 agx_init_builder(agx_context *ctx, agx_cursor cursor)
840 {
841    return (agx_builder){
842       .shader = ctx,
843       .cursor = cursor,
844    };
845 }
846 
847 /* Insert an instruction at the cursor and move the cursor */
848 
849 static inline void
agx_builder_insert(agx_cursor * cursor,agx_instr * I)850 agx_builder_insert(agx_cursor *cursor, agx_instr *I)
851 {
852    switch (cursor->option) {
853    case agx_cursor_after_instr:
854       list_add(&I->link, &cursor->instr->link);
855       cursor->instr = I;
856       return;
857 
858    case agx_cursor_after_block:
859       list_addtail(&I->link, &cursor->block->instructions);
860       cursor->option = agx_cursor_after_instr;
861       cursor->instr = I;
862       return;
863 
864    case agx_cursor_before_instr:
865       list_addtail(&I->link, &cursor->instr->link);
866       cursor->option = agx_cursor_after_instr;
867       cursor->instr = I;
868       return;
869    }
870 
871    unreachable("Invalid cursor option");
872 }
873 
874 /* Routines defined for AIR */
875 
876 void agx_print_index(agx_index index, bool is_float, FILE *fp);
877 void agx_print_instr(const agx_instr *I, FILE *fp);
878 void agx_print_block(const agx_block *block, FILE *fp);
879 void agx_print_shader(const agx_context *ctx, FILE *fp);
880 void agx_optimizer(agx_context *ctx);
881 void agx_lower_pseudo(agx_context *ctx);
882 void agx_lower_spill(agx_context *ctx);
883 void agx_lower_uniform_sources(agx_context *ctx);
884 void agx_opt_cse(agx_context *ctx);
885 void agx_dce(agx_context *ctx, bool partial);
886 void agx_pressure_schedule(agx_context *ctx);
887 void agx_ra(agx_context *ctx);
888 void agx_lower_64bit_postra(agx_context *ctx);
889 void agx_insert_waits(agx_context *ctx);
890 void agx_opt_empty_else(agx_context *ctx);
891 void agx_opt_break_if(agx_context *ctx);
892 void agx_opt_jmp_none(agx_context *ctx);
893 void agx_pack_binary(agx_context *ctx, struct util_dynarray *emission);
894 
895 #ifndef NDEBUG
896 void agx_validate(agx_context *ctx, const char *after_str);
897 #else
898 static inline void
agx_validate(UNUSED agx_context * ctx,UNUSED const char * after_str)899 agx_validate(UNUSED agx_context *ctx, UNUSED const char *after_str)
900 {
901    return;
902 }
903 #endif
904 
905 enum agx_size agx_split_width(const agx_instr *I);
906 bool agx_allows_16bit_immediate(agx_instr *I);
907 
908 struct agx_copy {
909    /* Base register destination of the copy */
910    unsigned dest;
911 
912    /* Destination is memory */
913    bool dest_mem;
914 
915    /* Source of the copy */
916    agx_index src;
917 
918    /* Whether the copy has been handled. Callers must leave to false. */
919    bool done;
920 };
921 
922 void agx_emit_parallel_copies(agx_builder *b, struct agx_copy *copies,
923                               unsigned n);
924 
925 void agx_compute_liveness(agx_context *ctx);
926 void agx_liveness_ins_update(BITSET_WORD *live, agx_instr *I);
927 
928 bool agx_nir_opt_preamble(nir_shader *s, unsigned *preamble_size);
929 bool agx_nir_lower_load_mask(nir_shader *shader);
930 bool agx_nir_lower_address(nir_shader *shader);
931 bool agx_nir_lower_ubo(nir_shader *shader);
932 bool agx_nir_lower_shared_bitsize(nir_shader *shader);
933 bool agx_nir_lower_frag_sidefx(nir_shader *s);
934 
935 extern int agx_compiler_debug;
936 
937 #ifdef __cplusplus
938 } /* extern C */
939 #endif
940