• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright 2021 Alyssa Rosenzweig
3  * Copyright 2020 Collabora Ltd.
4  * SPDX-License-Identifier: MIT
5  */
6 
7 #pragma once
8 
9 #include "compiler/nir/nir.h"
10 #include "util/half_float.h"
11 #include "util/u_dynarray.h"
12 #include "util/u_math.h"
13 #include "util/u_worklist.h"
14 #include "agx_compile.h"
15 #include "agx_minifloat.h"
16 #include "agx_opcodes.h"
17 
18 #ifdef __cplusplus
19 extern "C" {
20 #endif
21 
22 /* r0-r127 inclusive, as pairs of 16-bits, gives 256 registers */
23 #define AGX_NUM_REGS (256)
24 
25 /* u0-u255 inclusive, as pairs of 16-bits */
26 #define AGX_NUM_UNIFORMS (512)
27 
28 /* Semi-arbitrary limit for spill slot allocation */
29 #define AGX_NUM_MODELED_REGS_LOG2 (11)
30 #define AGX_NUM_MODELED_REGS      (1 << AGX_NUM_MODELED_REGS_LOG2)
31 
32 /* Limit on number of sources for non-phi instructions */
33 #define AGX_MAX_NORMAL_SOURCES (16)
34 
35 enum agx_index_type {
36    AGX_INDEX_NULL = 0,
37    AGX_INDEX_NORMAL = 1,
38    AGX_INDEX_IMMEDIATE = 2,
39    AGX_INDEX_UNIFORM = 3,
40    AGX_INDEX_REGISTER = 4,
41    AGX_INDEX_UNDEF = 5,
42 };
43 
44 enum agx_size { AGX_SIZE_16 = 0, AGX_SIZE_32 = 1, AGX_SIZE_64 = 2 };
45 
46 static inline unsigned
agx_size_align_16(enum agx_size size)47 agx_size_align_16(enum agx_size size)
48 {
49    switch (size) {
50    case AGX_SIZE_16:
51       return 1;
52    case AGX_SIZE_32:
53       return 2;
54    case AGX_SIZE_64:
55       return 4;
56    }
57 
58    unreachable("Invalid size");
59 }
60 
61 /* Keep synced with hash_index */
62 typedef struct {
63    /* Sufficient for as many SSA values, immediates, and uniforms as we need. */
64    uint32_t value;
65 
66    /* Indicates that this source kills the referenced value (because it is the
67     * last use in a block and the source is not live after the block). Set by
68     * liveness analysis.
69     */
70    bool kill : 1;
71 
72    /* Cache hints */
73    bool cache   : 1;
74    bool discard : 1;
75 
76    /* src - float modifiers */
77    bool abs : 1;
78    bool neg : 1;
79 
80    /* Register class */
81    bool memory : 1;
82 
83    unsigned channels_m1     : 3;
84    enum agx_size size       : 2;
85    enum agx_index_type type : 3;
86 
87    /* If has_reg is set (during register allocation), the register assigned to
88     * this SSA value This is used with NORMAL. Contrast REGISTER which uses
89     * value instead.
90     *
91     * TODO: Unify.
92     */
93    unsigned reg : AGX_NUM_MODELED_REGS_LOG2;
94    bool has_reg     : 1;
95    unsigned padding : 6;
96 } agx_index;
97 static_assert(sizeof(agx_index) == 8, "packed");
98 
99 static inline unsigned
agx_channels(agx_index idx)100 agx_channels(agx_index idx)
101 {
102    return idx.channels_m1 + 1;
103 }
104 
105 static inline unsigned
agx_index_size_16(agx_index idx)106 agx_index_size_16(agx_index idx)
107 {
108    return agx_size_align_16(idx.size) * agx_channels(idx);
109 }
110 
111 static inline agx_index
agx_get_vec_index(unsigned value,enum agx_size size,unsigned channels)112 agx_get_vec_index(unsigned value, enum agx_size size, unsigned channels)
113 {
114    return (agx_index){
115       .value = value,
116       .channels_m1 = channels - 1,
117       .size = size,
118       .type = AGX_INDEX_NORMAL,
119    };
120 }
121 
122 static inline agx_index
agx_get_index(unsigned value,enum agx_size size)123 agx_get_index(unsigned value, enum agx_size size)
124 {
125    return agx_get_vec_index(value, size, 1);
126 }
127 
128 static inline agx_index
agx_immediate(uint32_t imm)129 agx_immediate(uint32_t imm)
130 {
131    assert(imm < (1 << 16) && "overflowed immediate");
132 
133    return (agx_index){
134       .value = imm,
135       .size = AGX_SIZE_16,
136       .type = AGX_INDEX_IMMEDIATE,
137    };
138 }
139 
140 static inline agx_index
agx_immediate_f(float f)141 agx_immediate_f(float f)
142 {
143    assert(agx_minifloat_exact(f));
144    return agx_immediate(agx_minifloat_encode(f));
145 }
146 
147 /* in half-words, specify r0h as 1, r1 as 2... */
148 static inline agx_index
agx_register(uint32_t imm,enum agx_size size)149 agx_register(uint32_t imm, enum agx_size size)
150 {
151    assert(imm < AGX_NUM_REGS);
152 
153    return (agx_index){
154       .value = imm,
155       .size = size,
156       .type = AGX_INDEX_REGISTER,
157    };
158 }
159 
160 static inline agx_index
agx_memory_register(uint32_t imm,enum agx_size size)161 agx_memory_register(uint32_t imm, enum agx_size size)
162 {
163    return (agx_index){
164       .value = imm,
165       .memory = true,
166       .size = size,
167       .type = AGX_INDEX_REGISTER,
168    };
169 }
170 
171 static inline agx_index
agx_register_like(uint32_t imm,agx_index like)172 agx_register_like(uint32_t imm, agx_index like)
173 {
174    return (agx_index){
175       .value = imm,
176       .memory = like.memory,
177       .channels_m1 = like.channels_m1,
178       .size = like.size,
179       .type = AGX_INDEX_REGISTER,
180    };
181 }
182 
183 static inline agx_index
agx_as_register(agx_index x)184 agx_as_register(agx_index x)
185 {
186    assert(x.has_reg);
187    return agx_register_like(x.reg, x);
188 }
189 
190 static inline agx_index
agx_undef(enum agx_size size)191 agx_undef(enum agx_size size)
192 {
193    return (agx_index){
194       .size = size,
195       .type = AGX_INDEX_UNDEF,
196    };
197 }
198 
199 /* Also in half-words */
200 static inline agx_index
agx_uniform(uint32_t imm,enum agx_size size)201 agx_uniform(uint32_t imm, enum agx_size size)
202 {
203    assert(imm < AGX_NUM_UNIFORMS);
204 
205    return (agx_index){
206       .value = imm,
207       .size = size,
208       .type = AGX_INDEX_UNIFORM,
209    };
210 }
211 
212 static inline agx_index
agx_null()213 agx_null()
214 {
215    return (agx_index){.type = AGX_INDEX_NULL};
216 }
217 
218 static inline agx_index
agx_zero()219 agx_zero()
220 {
221    return agx_immediate(0);
222 }
223 
224 /* IEEE 754 additive identity -0.0, stored as an 8-bit AGX minifloat: mantissa
225  * = exponent = 0, sign bit set */
226 
227 static inline agx_index
agx_negzero()228 agx_negzero()
229 {
230    return agx_immediate(0x80);
231 }
232 
233 static inline agx_index
agx_abs(agx_index idx)234 agx_abs(agx_index idx)
235 {
236    idx.abs = true;
237    idx.neg = false;
238    return idx;
239 }
240 
241 static inline agx_index
agx_neg(agx_index idx)242 agx_neg(agx_index idx)
243 {
244    idx.neg ^= true;
245    return idx;
246 }
247 
248 /* Replaces an index, preserving any modifiers */
249 
250 static inline agx_index
agx_replace_index(agx_index old,agx_index replacement)251 agx_replace_index(agx_index old, agx_index replacement)
252 {
253    replacement.abs = old.abs;
254    replacement.neg = old.neg;
255    return replacement;
256 }
257 
258 static inline bool
agx_is_null(agx_index idx)259 agx_is_null(agx_index idx)
260 {
261    return idx.type == AGX_INDEX_NULL;
262 }
263 
264 /* Compares equivalence as references */
265 
266 static inline bool
agx_is_equiv(agx_index left,agx_index right)267 agx_is_equiv(agx_index left, agx_index right)
268 {
269    return (left.type == right.type) && (left.value == right.value);
270 }
271 
272 enum ra_class {
273    /* General purpose register */
274    RA_GPR,
275 
276    /* Memory, used to assign stack slots */
277    RA_MEM,
278 
279    /* Keep last */
280    RA_CLASSES,
281 };
282 
283 static inline enum ra_class
ra_class_for_index(agx_index idx)284 ra_class_for_index(agx_index idx)
285 {
286    return idx.memory ? RA_MEM : RA_GPR;
287 }
288 
289 enum agx_icond {
290    AGX_ICOND_UEQ = 0,
291    AGX_ICOND_ULT = 1,
292    AGX_ICOND_UGT = 2,
293    /* unknown */
294    AGX_ICOND_SEQ = 4,
295    AGX_ICOND_SLT = 5,
296    AGX_ICOND_SGT = 6,
297    /* unknown */
298 };
299 
300 enum agx_fcond {
301    AGX_FCOND_EQ = 0,
302    AGX_FCOND_LT = 1,
303    AGX_FCOND_GT = 2,
304    AGX_FCOND_LTN = 3,
305    /* unknown */
306    AGX_FCOND_GE = 5,
307    AGX_FCOND_LE = 6,
308    AGX_FCOND_GTN = 7,
309 };
310 
311 enum agx_round {
312    AGX_ROUND_RTZ = 0,
313    AGX_ROUND_RTE = 1,
314 };
315 
316 enum agx_convert {
317    AGX_CONVERT_U8_TO_F = 0,
318    AGX_CONVERT_S8_TO_F = 1,
319    AGX_CONVERT_F_TO_U16 = 4,
320    AGX_CONVERT_F_TO_S16 = 5,
321    AGX_CONVERT_U16_TO_F = 6,
322    AGX_CONVERT_S16_TO_F = 7,
323    AGX_CONVERT_F_TO_U32 = 8,
324    AGX_CONVERT_F_TO_S32 = 9,
325    AGX_CONVERT_U32_TO_F = 10,
326    AGX_CONVERT_S32_TO_F = 11
327 };
328 
329 enum agx_lod_mode {
330    AGX_LOD_MODE_AUTO_LOD = 0,
331    AGX_LOD_MODE_AUTO_LOD_BIAS_UNIFORM = 1,
332    AGX_LOD_MODE_LOD_MIN_UNIFORM = 2,
333    AGX_LOD_MODE_AUTO_LOD_BIAS = 5,
334    AGX_LOD_MODE_LOD_GRAD = 4,
335    AGX_LOD_MODE_LOD_MIN = 6,
336    AGX_LOD_MODE_AUTO_LOD_BIAS_MIN_UNIFORM = 9,
337    AGX_LOD_MODE_LOD_GRAD_MIN = 12,
338    AGX_LOD_MODE_AUTO_LOD_BIAS_MIN = 13,
339 };
340 
341 /* Forward declare for branch target */
342 struct agx_block;
343 
344 /* Keep synced with hash_instr */
345 typedef struct {
346    /* Must be first */
347    struct list_head link;
348 
349    /* The sources list. */
350    agx_index *src;
351 
352    /* Data flow */
353    agx_index *dest;
354 
355    enum agx_opcode op;
356 
357    uint8_t nr_dests;
358    uint8_t nr_srcs;
359 
360    /* TODO: More efficient */
361    union {
362       enum agx_icond icond;
363       enum agx_fcond fcond;
364    };
365 
366    union {
367       uint64_t imm;
368       uint32_t writeout;
369       uint32_t truth_table;
370       uint32_t component;
371       uint32_t channels;
372       uint32_t bfi_mask;
373       uint16_t pixel_offset;
374       uint16_t zs;
375       int16_t stack_size;
376       enum agx_sr sr;
377       enum agx_round round;
378       enum agx_atomic_opc atomic_opc;
379       enum agx_lod_mode lod_mode;
380       enum agx_simd_op simd_op;
381       struct agx_block *target;
382 
383       /* As a special case to workaround ordering issues when translating phis,
384        * if nr_srcs == 0 and the opcode is PHI, points to the NIR phi.
385        */
386       nir_phi_instr *phi;
387    };
388 
389    /* For local access */
390    enum agx_format format;
391 
392    /* Number of nested control flow layers to jump by. TODO: Optimize */
393    uint32_t nest;
394 
395    /* Invert icond/fcond */
396    bool invert_cond : 1;
397 
398    /* TODO: Handle tex ops more efficient */
399    enum agx_dim dim       : 4;
400    bool offset            : 1;
401    bool shadow            : 1;
402    bool query_lod         : 1;
403    enum agx_gather gather : 3;
404 
405    /* TODO: Handle tilebuffer ops more efficient */
406    bool explicit_coords : 1;
407 
408    /* TODO: Handle iter ops more efficient */
409    enum agx_interpolation interpolation : 2;
410 
411    /* TODO: Handle loads more efficiently */
412    bool coherent : 1;
413 
414    /* Final st_vary op */
415    bool last : 1;
416 
417    /* Shift for a bitwise or memory op (conflicts with format for memory ops) */
418    unsigned shift : 4;
419 
420    /* Scoreboard index, 0 or 1. Leave as 0 for instructions that do not require
421     * scoreboarding (everything but memory load/store and texturing). */
422    unsigned scoreboard : 1;
423 
424    /* Output modifiers */
425    bool saturate : 1;
426    unsigned mask : 4;
427 
428    unsigned padding : 8;
429 } agx_instr;
430 
431 static inline void
agx_replace_src(agx_instr * I,unsigned src_index,agx_index replacement)432 agx_replace_src(agx_instr *I, unsigned src_index, agx_index replacement)
433 {
434    I->src[src_index] = agx_replace_index(I->src[src_index], replacement);
435 }
436 
437 struct agx_block;
438 
439 typedef struct agx_block {
440    /* Link to next block. Must be first */
441    struct list_head link;
442 
443    /* List of instructions emitted for the current block */
444    struct list_head instructions;
445 
446    /* Index of the block in source order */
447    unsigned index;
448 
449    /* Control flow graph */
450    struct agx_block *successors[2];
451    struct util_dynarray predecessors;
452    bool unconditional_jumps;
453 
454    /* Liveness analysis results */
455    BITSET_WORD *live_in;
456    BITSET_WORD *live_out;
457 
458    /* For visited blocks during register assignment and live-out registers, the
459     * mapping of registers to SSA names at the end of the block. This is dense,
460     * unlike its inverse.
461     */
462    uint32_t *reg_to_ssa_out[2];
463 
464    /* Is this block a loop header? If not, all of its predecessors precede it in
465     * source order.
466     */
467    bool loop_header;
468 
469    /* Offset of the block in the emitted binary */
470    off_t offset, last_offset;
471 
472    /** Available for passes to use for metadata */
473    uint8_t pass_flags;
474 } agx_block;
475 
476 typedef struct {
477    nir_shader *nir;
478    gl_shader_stage stage;
479    bool is_preamble;
480    unsigned scratch_size_B;
481 
482    struct list_head blocks; /* list of agx_block */
483    struct agx_shader_info *out;
484    struct agx_shader_key *key;
485 
486    /* Maximum block index */
487    unsigned num_blocks;
488 
489    /* For creating temporaries */
490    unsigned alloc;
491 
492    /* Does the shader statically use scratch memory? */
493    bool any_scratch;
494 
495    /* Mask of pixel fences we've definitely already waited for. */
496    uint16_t already_pixel_waited;
497 
498    /* Has r0l been zeroed yet due to control flow? */
499    bool any_cf;
500 
501    /* Do we need r0h zero throughout the program to handle quad-divergent
502     * shuffle?
503     */
504    bool any_quad_divergent_shuffle;
505 
506    /* Number of nested control flow structures within the innermost loop. Since
507     * NIR is just loop and if-else, this is the number of nested if-else
508     * statements in the loop */
509    unsigned loop_nesting;
510 
511    /* Total nesting across all loops, to determine if we need push_exec */
512    unsigned total_nesting;
513 
514    /* Whether loop being emitted used any `continue` jumps */
515    bool loop_continues;
516 
517    /* During instruction selection, for inserting control flow */
518    agx_block *current_block;
519    agx_block *continue_block;
520    agx_block *break_block;
521    agx_block *after_block;
522    agx_block **indexed_nir_blocks;
523 
524    /* During instruction selection, map from vector agx_index to its scalar
525     * components, populated by a split. */
526    struct hash_table_u64 *allocated_vec;
527 
528    /* During instruction selection, preloaded values or NULL if it hasn't been
529     * preloaded.
530     */
531    agx_index preloaded[AGX_NUM_REGS];
532 
533    /* Beginning of our stack allocation used for spilling, below that is
534     * NIR-level scratch.
535     */
536    unsigned spill_base_B;
537 
538    /* Beginning of stack allocation used for parallel copy lowering */
539    bool has_spill_pcopy_reserved;
540    unsigned spill_pcopy_base;
541 
542    /* Stats for shader-db */
543    unsigned loop_count;
544    unsigned max_reg;
545 
546    /* Promoted constants. These will be appended to the binary at the end. */
547    uint16_t rodata[512];
548 } agx_context;
549 
550 static inline void
agx_remove_instruction(agx_instr * ins)551 agx_remove_instruction(agx_instr *ins)
552 {
553    list_del(&ins->link);
554 }
555 
556 static inline agx_index
agx_vec_temp(agx_context * ctx,enum agx_size size,unsigned channels)557 agx_vec_temp(agx_context *ctx, enum agx_size size, unsigned channels)
558 {
559    return agx_get_vec_index(ctx->alloc++, size, channels);
560 }
561 
562 static inline agx_index
agx_temp(agx_context * ctx,enum agx_size size)563 agx_temp(agx_context *ctx, enum agx_size size)
564 {
565    return agx_get_index(ctx->alloc++, size);
566 }
567 
568 static inline agx_index
agx_temp_like(agx_context * ctx,agx_index idx)569 agx_temp_like(agx_context *ctx, agx_index idx)
570 {
571    idx.value = ctx->alloc++;
572    return idx;
573 }
574 
575 static enum agx_size
agx_size_for_bits(unsigned bits)576 agx_size_for_bits(unsigned bits)
577 {
578    switch (bits) {
579    case 1:
580    case 8:
581    case 16:
582       return AGX_SIZE_16;
583    case 32:
584       return AGX_SIZE_32;
585    case 64:
586       return AGX_SIZE_64;
587    default:
588       unreachable("Invalid bitsize");
589    }
590 }
591 
592 static inline agx_index
agx_def_index(nir_def * ssa)593 agx_def_index(nir_def *ssa)
594 {
595    return agx_get_vec_index(ssa->index, agx_size_for_bits(ssa->bit_size),
596                             ssa->num_components);
597 }
598 
599 static inline agx_index
agx_src_index(nir_src * src)600 agx_src_index(nir_src *src)
601 {
602    return agx_def_index(src->ssa);
603 }
604 
605 static inline agx_index
agx_vec_for_def(agx_context * ctx,nir_def * def)606 agx_vec_for_def(agx_context *ctx, nir_def *def)
607 {
608    return agx_vec_temp(ctx, agx_size_for_bits(def->bit_size),
609                        def->num_components);
610 }
611 
612 static inline agx_index
agx_vec_for_intr(agx_context * ctx,nir_intrinsic_instr * instr)613 agx_vec_for_intr(agx_context *ctx, nir_intrinsic_instr *instr)
614 {
615    return agx_vec_for_def(ctx, &instr->def);
616 }
617 
618 static inline unsigned
agx_num_predecessors(agx_block * block)619 agx_num_predecessors(agx_block *block)
620 {
621    return util_dynarray_num_elements(&block->predecessors, agx_block *);
622 }
623 
624 static inline unsigned
agx_num_successors(agx_block * block)625 agx_num_successors(agx_block *block)
626 {
627    STATIC_ASSERT(ARRAY_SIZE(block->successors) == 2);
628    return (block->successors[0] ? 1 : 0) + (block->successors[1] ? 1 : 0);
629 }
630 
631 static inline agx_block *
agx_start_block(agx_context * ctx)632 agx_start_block(agx_context *ctx)
633 {
634    agx_block *first = list_first_entry(&ctx->blocks, agx_block, link);
635    assert(agx_num_predecessors(first) == 0);
636    return first;
637 }
638 
639 static inline agx_block *
agx_end_block(agx_context * ctx)640 agx_end_block(agx_context *ctx)
641 {
642    agx_block *last = list_last_entry(&ctx->blocks, agx_block, link);
643    assert(agx_num_successors(last) == 0);
644    return last;
645 }
646 
647 void agx_block_add_successor(agx_block *block, agx_block *successor);
648 
649 /* Iterators for AGX IR */
650 
651 #define agx_foreach_block(ctx, v)                                              \
652    list_for_each_entry(agx_block, v, &ctx->blocks, link)
653 
654 #define agx_foreach_block_safe(ctx, v)                                         \
655    list_for_each_entry_safe(agx_block, v, &ctx->blocks, link)
656 
657 #define agx_foreach_block_rev(ctx, v)                                          \
658    list_for_each_entry_rev(agx_block, v, &ctx->blocks, link)
659 
660 #define agx_foreach_block_from(ctx, from, v)                                   \
661    list_for_each_entry_from(agx_block, v, from, &ctx->blocks, link)
662 
663 #define agx_foreach_block_from_rev(ctx, from, v)                               \
664    list_for_each_entry_from_rev(agx_block, v, from, &ctx->blocks, link)
665 
666 #define agx_foreach_instr_in_block(block, v)                                   \
667    list_for_each_entry(agx_instr, v, &(block)->instructions, link)
668 
669 #define agx_foreach_instr_in_block_rev(block, v)                               \
670    list_for_each_entry_rev(agx_instr, v, &(block)->instructions, link)
671 
672 #define agx_foreach_instr_in_block_safe(block, v)                              \
673    list_for_each_entry_safe(agx_instr, v, &(block)->instructions, link)
674 
675 #define agx_foreach_instr_in_block_safe_rev(block, v)                          \
676    list_for_each_entry_safe_rev(agx_instr, v, &(block)->instructions, link)
677 
678 #define agx_foreach_instr_in_block_from(block, v, from)                        \
679    list_for_each_entry_from(agx_instr, v, from, &(block)->instructions, link)
680 
681 #define agx_foreach_instr_in_block_from_rev(block, v, from)                    \
682    list_for_each_entry_from_rev(agx_instr, v, from, &(block)->instructions,    \
683                                 link)
684 
685 #define agx_foreach_instr_global(ctx, v)                                       \
686    agx_foreach_block(ctx, v_block)                                             \
687       agx_foreach_instr_in_block(v_block, v)
688 
689 #define agx_foreach_instr_global_rev(ctx, v)                                   \
690    agx_foreach_block_rev(ctx, v_block)                                         \
691       agx_foreach_instr_in_block_rev(v_block, v)
692 
693 #define agx_foreach_instr_global_safe(ctx, v)                                  \
694    agx_foreach_block(ctx, v_block)                                             \
695       agx_foreach_instr_in_block_safe(v_block, v)
696 
697 #define agx_foreach_instr_global_safe_rev(ctx, v)                              \
698    agx_foreach_block_rev(ctx, v_block)                                         \
699       agx_foreach_instr_in_block_safe_rev(v_block, v)
700 
701 /* Based on set_foreach, expanded with automatic type casts */
702 
703 #define agx_foreach_successor(blk, v)                                          \
704    agx_block *v;                                                               \
705    agx_block **_v;                                                             \
706    for (_v = (agx_block **)&blk->successors[0], v = *_v;                       \
707         v != NULL && _v < (agx_block **)&blk->successors[2]; _v++, v = *_v)
708 
709 #define agx_foreach_predecessor(blk, v)                                        \
710    util_dynarray_foreach(&blk->predecessors, agx_block *, v)
711 
712 #define agx_foreach_src(ins, v) for (unsigned v = 0; v < ins->nr_srcs; ++v)
713 
714 #define agx_foreach_src_rev(ins, v)                                            \
715    for (signed v = ins->nr_srcs - 1; v >= 0; --v)
716 
717 #define agx_foreach_dest(ins, v) for (unsigned v = 0; v < ins->nr_dests; ++v)
718 
719 #define agx_foreach_dest_rev(ins, v)                                           \
720    for (signed v = ins->nr_dests - 1; v >= 0; --v)
721 
722 #define agx_foreach_ssa_src(ins, v)                                            \
723    agx_foreach_src(ins, v)                                                     \
724       if (ins->src[v].type == AGX_INDEX_NORMAL)
725 
726 #define agx_foreach_ssa_src_rev(ins, v)                                        \
727    agx_foreach_src_rev(ins, v)                                                 \
728       if (ins->src[v].type == AGX_INDEX_NORMAL)
729 
730 #define agx_foreach_ssa_dest(ins, v)                                           \
731    agx_foreach_dest(ins, v)                                                    \
732       if (ins->dest[v].type == AGX_INDEX_NORMAL)
733 
734 #define agx_foreach_ssa_dest_rev(ins, v)                                       \
735    agx_foreach_dest_rev(ins, v)                                                \
736       if (ins->dest[v].type == AGX_INDEX_NORMAL)
737 
738 /* Phis only come at the start (after else instructions) so we stop as soon as
739  * we hit a non-phi
740  */
741 #define agx_foreach_phi_in_block(block, v)                                     \
742    agx_foreach_instr_in_block(block, v)                                        \
743       if (v->op == AGX_OPCODE_ELSE_ICMP || v->op == AGX_OPCODE_ELSE_FCMP)      \
744          continue;                                                             \
745       else if (v->op != AGX_OPCODE_PHI)                                        \
746          break;                                                                \
747       else
748 
749 #define agx_foreach_phi_in_block_safe(block, v)                                \
750    agx_foreach_instr_in_block_safe(block, v)                                   \
751       if (v->op == AGX_OPCODE_ELSE_ICMP || v->op == AGX_OPCODE_ELSE_FCMP)      \
752          continue;                                                             \
753       else if (v->op != AGX_OPCODE_PHI)                                        \
754          break;                                                                \
755       else
756 
757 /*
758  * Find the index of a predecessor, used as the implicit order of phi sources.
759  */
760 static inline unsigned
agx_predecessor_index(agx_block * succ,agx_block * pred)761 agx_predecessor_index(agx_block *succ, agx_block *pred)
762 {
763    unsigned index = 0;
764 
765    agx_foreach_predecessor(succ, x) {
766       if (*x == pred)
767          return index;
768 
769       index++;
770    }
771 
772    unreachable("Invalid predecessor");
773 }
774 
775 static inline agx_block *
agx_prev_block(agx_block * ins)776 agx_prev_block(agx_block *ins)
777 {
778    return list_last_entry(&(ins->link), agx_block, link);
779 }
780 
781 static inline agx_instr *
agx_prev_op(agx_instr * ins)782 agx_prev_op(agx_instr *ins)
783 {
784    return list_last_entry(&(ins->link), agx_instr, link);
785 }
786 
787 static inline agx_instr *
agx_first_instr(agx_block * block)788 agx_first_instr(agx_block *block)
789 {
790    if (list_is_empty(&block->instructions))
791       return NULL;
792    else
793       return list_first_entry(&block->instructions, agx_instr, link);
794 }
795 
796 static inline agx_instr *
agx_last_instr(agx_block * block)797 agx_last_instr(agx_block *block)
798 {
799    if (list_is_empty(&block->instructions))
800       return NULL;
801    else
802       return list_last_entry(&block->instructions, agx_instr, link);
803 }
804 
805 static inline agx_instr *
agx_next_op(agx_instr * ins)806 agx_next_op(agx_instr *ins)
807 {
808    return list_first_entry(&(ins->link), agx_instr, link);
809 }
810 
811 static inline agx_block *
agx_next_block(agx_block * block)812 agx_next_block(agx_block *block)
813 {
814    return list_first_entry(&(block->link), agx_block, link);
815 }
816 
817 static inline agx_block *
agx_exit_block(agx_context * ctx)818 agx_exit_block(agx_context *ctx)
819 {
820    agx_block *last = list_last_entry(&ctx->blocks, agx_block, link);
821    assert(!last->successors[0] && !last->successors[1]);
822    return last;
823 }
824 
825 #define agx_worklist_init(ctx, w)        u_worklist_init(w, ctx->num_blocks, ctx)
826 #define agx_worklist_push_head(w, block) u_worklist_push_head(w, block, index)
827 #define agx_worklist_push_tail(w, block) u_worklist_push_tail(w, block, index)
828 #define agx_worklist_peek_head(w)        u_worklist_peek_head(w, agx_block, index)
829 #define agx_worklist_pop_head(w)         u_worklist_pop_head(w, agx_block, index)
830 #define agx_worklist_peek_tail(w)        u_worklist_peek_tail(w, agx_block, index)
831 #define agx_worklist_pop_tail(w)         u_worklist_pop_tail(w, agx_block, index)
832 
833 /* Like in NIR, for use with the builder */
834 
835 enum agx_cursor_option {
836    agx_cursor_after_block,
837    agx_cursor_before_instr,
838    agx_cursor_after_instr
839 };
840 
841 typedef struct {
842    enum agx_cursor_option option;
843 
844    union {
845       agx_block *block;
846       agx_instr *instr;
847    };
848 } agx_cursor;
849 
850 static inline bool
agx_cursors_equal(agx_cursor a,agx_cursor b)851 agx_cursors_equal(agx_cursor a, agx_cursor b)
852 {
853    if (a.option != b.option)
854       return false;
855 
856    if (a.option == agx_cursor_after_block)
857       return a.block == b.block;
858    else
859       return a.instr == b.instr;
860 }
861 
862 static inline agx_cursor
agx_after_block(agx_block * block)863 agx_after_block(agx_block *block)
864 {
865    return (agx_cursor){
866       .option = agx_cursor_after_block,
867       .block = block,
868    };
869 }
870 
871 static inline agx_cursor
agx_before_instr(agx_instr * instr)872 agx_before_instr(agx_instr *instr)
873 {
874    return (agx_cursor){
875       .option = agx_cursor_before_instr,
876       .instr = instr,
877    };
878 }
879 
880 static inline agx_cursor
agx_after_instr(agx_instr * instr)881 agx_after_instr(agx_instr *instr)
882 {
883    return (agx_cursor){
884       .option = agx_cursor_after_instr,
885       .instr = instr,
886    };
887 }
888 
889 static inline agx_cursor
agx_before_nonempty_block(agx_block * block)890 agx_before_nonempty_block(agx_block *block)
891 {
892    agx_instr *I = list_first_entry(&block->instructions, agx_instr, link);
893    assert(I != NULL);
894 
895    return agx_before_instr(I);
896 }
897 
898 static inline agx_cursor
agx_before_block(agx_block * block)899 agx_before_block(agx_block *block)
900 {
901    if (list_is_empty(&block->instructions))
902       return agx_after_block(block);
903    else
904       return agx_before_nonempty_block(block);
905 }
906 
907 static inline bool
instr_after_logical_end(const agx_instr * I)908 instr_after_logical_end(const agx_instr *I)
909 {
910    switch (I->op) {
911    case AGX_OPCODE_JMP_EXEC_ANY:
912    case AGX_OPCODE_JMP_EXEC_NONE:
913    case AGX_OPCODE_POP_EXEC:
914    case AGX_OPCODE_BREAK:
915    case AGX_OPCODE_IF_ICMP:
916    case AGX_OPCODE_WHILE_ICMP:
917    case AGX_OPCODE_IF_FCMP:
918    case AGX_OPCODE_WHILE_FCMP:
919    case AGX_OPCODE_STOP:
920    case AGX_OPCODE_EXPORT:
921       return true;
922    default:
923       return false;
924    }
925 }
926 
927 /*
928  * Get a cursor inserting at the logical end of the block. In particular, this
929  * is before branches or control flow instructions, which occur after the
930  * logical end but before the physical end.
931  */
932 static inline agx_cursor
agx_after_block_logical(agx_block * block)933 agx_after_block_logical(agx_block *block)
934 {
935    /* Search for the first instruction that's not past the logical end */
936    agx_foreach_instr_in_block_rev(block, I) {
937       if (!instr_after_logical_end(I))
938          return agx_after_instr(I);
939    }
940 
941    /* If we got here, the block is either empty or entirely control flow */
942    return agx_before_block(block);
943 }
944 
945 /* Get a cursor at the start of a function, after any preloads */
946 static inline agx_cursor
agx_before_function(agx_context * ctx)947 agx_before_function(agx_context *ctx)
948 {
949    agx_block *block = agx_start_block(ctx);
950 
951    agx_foreach_instr_in_block(block, I) {
952       if (I->op != AGX_OPCODE_PRELOAD)
953          return agx_before_instr(I);
954    }
955 
956    /* The whole block is preloads, so insert at the end */
957    return agx_after_block(block);
958 }
959 
960 /* IR builder in terms of cursor infrastructure */
961 
962 typedef struct {
963    agx_context *shader;
964    agx_cursor cursor;
965 } agx_builder;
966 
967 static inline agx_builder
agx_init_builder(agx_context * ctx,agx_cursor cursor)968 agx_init_builder(agx_context *ctx, agx_cursor cursor)
969 {
970    return (agx_builder){
971       .shader = ctx,
972       .cursor = cursor,
973    };
974 }
975 
976 /* Insert an instruction at the cursor and move the cursor */
977 
978 static inline void
agx_builder_insert(agx_cursor * cursor,agx_instr * I)979 agx_builder_insert(agx_cursor *cursor, agx_instr *I)
980 {
981    switch (cursor->option) {
982    case agx_cursor_after_instr:
983       list_add(&I->link, &cursor->instr->link);
984       cursor->instr = I;
985       return;
986 
987    case agx_cursor_after_block:
988       list_addtail(&I->link, &cursor->block->instructions);
989       cursor->option = agx_cursor_after_instr;
990       cursor->instr = I;
991       return;
992 
993    case agx_cursor_before_instr:
994       list_addtail(&I->link, &cursor->instr->link);
995       cursor->option = agx_cursor_after_instr;
996       cursor->instr = I;
997       return;
998    }
999 
1000    unreachable("Invalid cursor option");
1001 }
1002 
1003 bool agx_instr_accepts_uniform(enum agx_opcode op, unsigned src_index,
1004                                unsigned value, enum agx_size size);
1005 
1006 /* Routines defined for AIR */
1007 void agx_print_index(agx_index index, bool is_float, FILE *fp);
1008 void agx_print_instr(const agx_instr *I, FILE *fp);
1009 void agx_print_block(const agx_block *block, FILE *fp);
1010 void agx_print_shader(const agx_context *ctx, FILE *fp);
1011 void agx_optimizer_forward(agx_context *ctx);
1012 void agx_optimizer_backward(agx_context *ctx);
1013 void agx_lower_divergent_shuffle(agx_context *ctx);
1014 void agx_lower_pseudo(agx_context *ctx);
1015 void agx_lower_spill(agx_context *ctx);
1016 void agx_lower_uniform_sources(agx_context *ctx);
1017 void agx_opt_cse(agx_context *ctx);
1018 void agx_opt_compact_constants(agx_context *ctx);
1019 void agx_opt_promote_constants(agx_context *ctx);
1020 void agx_dce(agx_context *ctx, bool partial);
1021 void agx_pressure_schedule(agx_context *ctx);
1022 void agx_spill(agx_context *ctx, unsigned k);
1023 void agx_repair_ssa(agx_context *ctx);
1024 void agx_reindex_ssa(agx_context *ctx);
1025 void agx_ra(agx_context *ctx);
1026 void agx_lower_64bit_postra(agx_context *ctx);
1027 void agx_insert_waits(agx_context *ctx);
1028 void agx_opt_empty_else(agx_context *ctx);
1029 void agx_opt_break_if(agx_context *ctx);
1030 void agx_opt_jmp_none(agx_context *ctx);
1031 void agx_pack_binary(agx_context *ctx, struct util_dynarray *emission);
1032 
1033 #ifndef NDEBUG
1034 void agx_validate(agx_context *ctx, const char *after_str);
1035 void agx_validate_ra(agx_context *ctx);
1036 #else
1037 static inline void
agx_validate(UNUSED agx_context * ctx,UNUSED const char * after_str)1038 agx_validate(UNUSED agx_context *ctx, UNUSED const char *after_str)
1039 {
1040 }
1041 
1042 static inline void
agx_validate_ra(UNUSED agx_context * ctx)1043 agx_validate_ra(UNUSED agx_context *ctx)
1044 {
1045 }
1046 #endif
1047 
1048 enum agx_size agx_split_width(const agx_instr *I);
1049 bool agx_allows_16bit_immediate(agx_instr *I);
1050 unsigned agx_negate_src_index(agx_instr *I);
1051 
1052 static inline bool
agx_is_float_src(const agx_instr * I,unsigned s)1053 agx_is_float_src(const agx_instr *I, unsigned s)
1054 {
1055    struct agx_opcode_info info = agx_opcodes_info[I->op];
1056    bool fcmp = (I->op == AGX_OPCODE_FCMPSEL || I->op == AGX_OPCODE_FCMP);
1057 
1058    /* fcmp takes first 2 as floats but returns an integer */
1059    return info.is_float || (s < 2 && fcmp);
1060 }
1061 
1062 struct agx_copy {
1063    /* Base register destination of the copy */
1064    unsigned dest;
1065 
1066    /* Destination is memory */
1067    bool dest_mem;
1068 
1069    /* Source of the copy */
1070    agx_index src;
1071 
1072    /* Whether the copy has been handled. Callers must leave to false. */
1073    bool done;
1074 };
1075 
1076 void agx_emit_parallel_copies(agx_builder *b, struct agx_copy *copies,
1077                               unsigned n);
1078 
1079 void agx_compute_liveness(agx_context *ctx);
1080 void agx_liveness_ins_update(BITSET_WORD *live, agx_instr *I);
1081 
1082 bool agx_nir_opt_preamble(nir_shader *s, unsigned *preamble_size);
1083 bool agx_nir_lower_load_mask(nir_shader *shader);
1084 bool agx_nir_lower_ubo(nir_shader *shader);
1085 bool agx_nir_lower_shared_bitsize(nir_shader *shader);
1086 bool agx_nir_lower_frag_sidefx(nir_shader *s);
1087 
1088 struct agx_cycle_estimate {
1089    /* ALU throughput */
1090    unsigned alu;
1091 
1092    /* Floating point and SCIB (select, conditional, integer, and boolean)
1093     * throughput.
1094     */
1095    unsigned f_scib;
1096 
1097    /* IC (Integer and complex) throughput */
1098    unsigned ic;
1099 };
1100 
1101 struct agx_cycle_estimate agx_estimate_cycles(agx_context *ctx);
1102 
1103 extern int agx_compiler_debug;
1104 
1105 #ifdef __cplusplus
1106 } /* extern C */
1107 #endif
1108