1 /*
2  * Copyright (c) 2017 Lima Project
3  * Copyright (c) 2013 Connor Abbott
4  *
5  * Permission is hereby granted, free of charge, to any person obtaining a copy
6  * of this software and associated documentation files (the "Software"), to deal
7  * in the Software without restriction, including without limitation the rights
8  * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9  * copies of the Software, and to permit persons to whom the Software is
10  * furnished to do so, subject to the following conditions:
11  *
12  * The above copyright notice and this permission notice shall be included in
13  * all copies or substantial portions of the Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18  * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21  * THE SOFTWARE.
22  *
23  */
24 
25 #ifndef LIMA_IR_GP_GPIR_H
26 #define LIMA_IR_GP_GPIR_H
27 
28 #include "util/list.h"
29 #include "util/u_math.h"
30 
31 #include "ir/lima_ir.h"
32 
33 /* list of operations that a node can do. */
34 typedef enum {
35    gpir_op_unsupported = 0,
36    gpir_op_mov,
37 
38    /* mul ops */
39    gpir_op_mul,
40    gpir_op_select,
41    gpir_op_complex1,
42    gpir_op_complex2,
43 
44    /* add ops */
45    gpir_op_add,
46    gpir_op_floor,
47    gpir_op_sign,
48    gpir_op_ge,
49    gpir_op_lt,
50    gpir_op_min,
51    gpir_op_max,
52    gpir_op_abs,
53    gpir_op_not,
54 
55    /* mul/add ops */
56    gpir_op_neg,
57 
58    /* passthrough ops */
59    gpir_op_clamp_const,
60    gpir_op_preexp2,
61    gpir_op_postlog2,
62 
63    /* complex ops */
64    gpir_op_exp2_impl,
65    gpir_op_log2_impl,
66    gpir_op_rcp_impl,
67    gpir_op_rsqrt_impl,
68 
69    /* load/store ops */
70    gpir_op_load_uniform,
71    gpir_op_load_temp,
72    gpir_op_load_attribute,
73    gpir_op_load_reg,
74    gpir_op_store_temp,
75    gpir_op_store_reg,
76    gpir_op_store_varying,
77    gpir_op_store_temp_load_off0,
78    gpir_op_store_temp_load_off1,
79    gpir_op_store_temp_load_off2,
80 
81    /* branch */
82    gpir_op_branch_cond,
83 
84    /* const (emulated) */
85    gpir_op_const,
86 
87    /* emulated ops */
88    gpir_op_exp2,
89    gpir_op_log2,
90    gpir_op_rcp,
91    gpir_op_rsqrt,
92    gpir_op_ceil,
93    gpir_op_exp,
94    gpir_op_log,
95    gpir_op_sin,
96    gpir_op_cos,
97    gpir_op_tan,
98    gpir_op_branch_uncond,
99    gpir_op_eq,
100    gpir_op_ne,
101 
102    /* auxiliary ops */
103    gpir_op_dummy_f,
104    gpir_op_dummy_m,
105 
106    gpir_op_num,
107 } gpir_op;
108 
109 typedef enum {
110    gpir_node_type_alu,
111    gpir_node_type_const,
112    gpir_node_type_load,
113    gpir_node_type_store,
114    gpir_node_type_branch,
115 } gpir_node_type;
116 
117 typedef struct {
118    char *name;
119    bool dest_neg;
120    bool src_neg[4];
121    int *slots;
122    gpir_node_type type;
123    bool spillless;
124    bool schedule_first;
125    bool may_consume_two_slots;
126 } gpir_op_info;
127 
128 extern const gpir_op_info gpir_op_infos[];
129 
130 typedef struct {
131    enum {
132       GPIR_DEP_INPUT,     /* def is the input of use */
133       GPIR_DEP_OFFSET,    /* def is the offset of use (i.e. temp store) */
134       GPIR_DEP_READ_AFTER_WRITE,
135       GPIR_DEP_WRITE_AFTER_READ,
136    } type;
137 
138    /* node execute before succ */
139    struct gpir_node *pred;
140    /* node execute after pred */
141    struct gpir_node *succ;
142 
143    /* for node pred_list */
144    struct list_head pred_link;
145    /* for node succ_list */
146    struct list_head succ_link;
147 } gpir_dep;
148 
149 struct gpir_instr;
150 struct gpir_store_node;
151 
152 typedef struct gpir_node {
153    struct list_head list;
154    gpir_op op;
155    gpir_node_type type;
156    int index;
157    char name[16];
158    bool printed;
159    struct gpir_block *block;
160 
161    /* for nodes relationship */
162    /* for node who uses this node (successor) */
163    struct list_head succ_list;
164    /* for node this node uses (predecessor) */
165    struct list_head pred_list;
166 
167    /* for scheduler and regalloc */
168    int value_reg;
169    union {
170       struct {
171          struct gpir_instr *instr;
172          struct gpir_store_node *physreg_store;
173          int pos;
174          int dist;
175          int index;
176          bool ready;
177          bool inserted;
178          bool max_node, next_max_node;
179          bool complex_allowed;
180       } sched;
181       struct {
182          int parent_index;
183          float reg_pressure;
184          int est;
185          bool scheduled;
186       } rsched;
187       struct {
188          float index;
189          struct gpir_node *last;
190       } vreg;
191       struct {
192          int index;
193       } preg;
194    };
195 } gpir_node;
196 
197 typedef struct {
198    gpir_node node;
199 
200    gpir_node *children[3];
201    bool children_negate[3];
202    int num_child;
203 
204    bool dest_negate;
205 } gpir_alu_node;
206 
207 typedef struct {
208    gpir_node node;
209    union fi value;
210 } gpir_const_node;
211 
212 typedef struct {
213    int index;
214    struct list_head list;
215 } gpir_reg;
216 
217 typedef struct {
218    gpir_node node;
219 
220    unsigned index;
221    unsigned component;
222 
223    gpir_reg *reg;
224    struct list_head reg_link;
225 } gpir_load_node;
226 
227 typedef struct gpir_store_node {
228    gpir_node node;
229 
230    unsigned index;
231    unsigned component;
232    gpir_node *child;
233 
234    gpir_reg *reg;
235 } gpir_store_node;
236 
237 enum gpir_instr_slot {
238    GPIR_INSTR_SLOT_MUL0,
239    GPIR_INSTR_SLOT_MUL1,
240    GPIR_INSTR_SLOT_ADD0,
241    GPIR_INSTR_SLOT_ADD1,
242    GPIR_INSTR_SLOT_PASS,
243    GPIR_INSTR_SLOT_COMPLEX,
244    GPIR_INSTR_SLOT_REG0_LOAD0,
245    GPIR_INSTR_SLOT_REG0_LOAD1,
246    GPIR_INSTR_SLOT_REG0_LOAD2,
247    GPIR_INSTR_SLOT_REG0_LOAD3,
248    GPIR_INSTR_SLOT_REG1_LOAD0,
249    GPIR_INSTR_SLOT_REG1_LOAD1,
250    GPIR_INSTR_SLOT_REG1_LOAD2,
251    GPIR_INSTR_SLOT_REG1_LOAD3,
252    GPIR_INSTR_SLOT_MEM_LOAD0,
253    GPIR_INSTR_SLOT_MEM_LOAD1,
254    GPIR_INSTR_SLOT_MEM_LOAD2,
255    GPIR_INSTR_SLOT_MEM_LOAD3,
256    GPIR_INSTR_SLOT_STORE0,
257    GPIR_INSTR_SLOT_STORE1,
258    GPIR_INSTR_SLOT_STORE2,
259    GPIR_INSTR_SLOT_STORE3,
260    GPIR_INSTR_SLOT_NUM,
261    GPIR_INSTR_SLOT_END,
262    GPIR_INSTR_SLOT_ALU_BEGIN      = GPIR_INSTR_SLOT_MUL0,
263    GPIR_INSTR_SLOT_ALU_END        = GPIR_INSTR_SLOT_COMPLEX,
264    GPIR_INSTR_SLOT_DIST_TWO_BEGIN = GPIR_INSTR_SLOT_MUL0,
265    GPIR_INSTR_SLOT_DIST_TWO_END   = GPIR_INSTR_SLOT_PASS,
266 };
267 
268 typedef struct gpir_instr {
269    int index;
270    struct list_head list;
271 
272    gpir_node *slots[GPIR_INSTR_SLOT_NUM];
273 
274    /* The number of ALU slots free for moves. */
275    int alu_num_slot_free;
276 
277    /* The number of ALU slots free for moves, except for the complex slot. */
278    int alu_non_cplx_slot_free;
279 
280    /* We need to make sure that we can insert moves in the following cases:
281     * (1) There was a use of a value two cycles ago.
282     * (2) There were more than 5 uses of a value 1 cycle ago (or else we can't
283     *     possibly satisfy (1) for the next cycle).
284     * (3) There is a store instruction scheduled, but not its child.
285     *
286     * The complex slot cannot be used for a move in case (1), since it only
287     * has a FIFO depth of 1, but it can be used for (2) as well as (3) as long
288     * as the uses aren't in certain slots. It turns out that we don't have to
289     * worry about nodes that can't use the complex slot for (2), since there
290     * are at most 4 uses 1 cycle ago that can't use the complex slot, but we
291     * do have to worry about (3). This means tracking stores whose children
292     * cannot be in the complex slot. In order to ensure that we have enough
293     * space for all three, we maintain the following invariants:
294     *
295     * (1) alu_num_slot_free >= alu_num_slot_needed_by_store +
296     *       alu_num_slot_needed_by_max +
297     *       max(alu_num_unscheduled_next_max - alu_max_allowed_next_max, 0)
298     * (2) alu_non_cplx_slot_free >= alu_num_slot_needed_by_max +
299     *       alu_num_slot_needed_by_non_cplx_store
300     *
301     * alu_max_allowed_next_max is normally 5 (since there can be at most 5 max
302     * nodes for the next instruction) but when there is a complex1 node in
303     * this instruction it reduces to 4 to reserve a slot for complex2 in the
304     * next instruction.
305     */
306    int alu_num_slot_needed_by_store;
307    int alu_num_slot_needed_by_non_cplx_store;
308    int alu_num_slot_needed_by_max;
309    int alu_num_unscheduled_next_max;
310    int alu_max_allowed_next_max;
311 
312    /* Used to communicate to the scheduler how many slots need to be cleared
313     * up in order to satisfy the invariants.
314     */
315    int slot_difference;
316    int non_cplx_slot_difference;
317 
318    int reg0_use_count;
319    bool reg0_is_attr;
320    int reg0_index;
321 
322    int reg1_use_count;
323    int reg1_index;
324 
325    int mem_use_count;
326    bool mem_is_temp;
327    int mem_index;
328 
329    enum {
330       GPIR_INSTR_STORE_NONE,
331       GPIR_INSTR_STORE_VARYING,
332       GPIR_INSTR_STORE_REG,
333       GPIR_INSTR_STORE_TEMP,
334    } store_content[2];
335    int store_index[2];
336 } gpir_instr;
337 
338 typedef struct gpir_block {
339    struct list_head list;
340    struct list_head node_list;
341    struct list_head instr_list;
342    struct gpir_compiler *comp;
343 
344    struct gpir_block *successors[2];
345    struct list_head predecessors;
346    struct list_head predecessors_node;
347 
348    /* for regalloc */
349 
350    /* The set of live registers, i.e. registers whose value may be used
351     * eventually, at the beginning of the block.
352     */
353    BITSET_WORD *live_in;
354 
355    /* Set of live registers at the end of the block. */
356    BITSET_WORD *live_out;
357 
358    /* Set of registers that may have a value defined at the end of the
359     * block.
360     */
361    BITSET_WORD *def_out;
362 
363    /* After register allocation, the set of live physical registers at the end
364     * of the block. Needed for scheduling.
365     */
366    uint64_t live_out_phys;
367 
368    /* For codegen, the offset in the final program. */
369    unsigned instr_offset;
370 
371    /* for scheduler */
372    union {
373       struct {
374          int instr_index;
375       } sched;
376       struct {
377          int node_index;
378       } rsched;
379    };
380 } gpir_block;
381 
382 typedef struct {
383    gpir_node node;
384    gpir_block *dest;
385    gpir_node *cond;
386 } gpir_branch_node;
387 
388 struct lima_vs_compiled_shader;
389 
390 #define GPIR_VECTOR_SSA_VIEWPORT_SCALE  0
391 #define GPIR_VECTOR_SSA_VIEWPORT_OFFSET 1
392 #define GPIR_VECTOR_SSA_NUM 2
393 
394 typedef struct gpir_compiler {
395    struct list_head block_list;
396    int cur_index;
397 
398    /* Find the gpir node for a given NIR SSA def. */
399    gpir_node **node_for_ssa;
400 
401    /* Find the gpir register for a given NIR SSA def. */
402    gpir_reg **reg_for_ssa;
403 
404    /* gpir block for NIR block. */
405    gpir_block **blocks;
406 
407    /* for physical reg */
408    struct list_head reg_list;
409    int cur_reg;
410 
411    /* lookup for vector ssa */
412    struct {
413       int ssa;
414       gpir_node *nodes[4];
415    } vector_ssa[GPIR_VECTOR_SSA_NUM];
416 
417    struct lima_vs_compiled_shader *prog;
418    int constant_base;
419 
420    /* shaderdb */
421    int num_instr;
422    int num_loops;
423    int num_spills;
424    int num_fills;
425 } gpir_compiler;
426 
427 #define GPIR_VALUE_REG_NUM 11
428 #define GPIR_PHYSICAL_REG_NUM 64
429 
430 void *gpir_node_create(gpir_block *block, gpir_op op);
431 gpir_dep *gpir_node_add_dep(gpir_node *succ, gpir_node *pred, int type);
432 void gpir_node_remove_dep(gpir_node *succ, gpir_node *pred);
433 void gpir_node_replace_succ(gpir_node *dst, gpir_node *src);
434 void gpir_node_replace_pred(gpir_dep *dep, gpir_node *new_pred);
435 void gpir_node_replace_child(gpir_node *parent, gpir_node *old_child, gpir_node *new_child);
436 void gpir_node_insert_child(gpir_node *parent, gpir_node *child, gpir_node *insert_child);
437 void gpir_node_delete(gpir_node *node);
438 void gpir_node_print_prog_dep(gpir_compiler *comp);
439 void gpir_node_print_prog_seq(gpir_compiler *comp);
440 
441 #define gpir_node_foreach_succ(node, dep) \
442    list_for_each_entry(gpir_dep, dep, &node->succ_list, succ_link)
443 #define gpir_node_foreach_succ_safe(node, dep) \
444    list_for_each_entry_safe(gpir_dep, dep, &node->succ_list, succ_link)
445 #define gpir_node_foreach_pred(node, dep) \
446    list_for_each_entry(gpir_dep, dep, &node->pred_list, pred_link)
447 #define gpir_node_foreach_pred_safe(node, dep) \
448    list_for_each_entry_safe(gpir_dep, dep, &node->pred_list, pred_link)
449 
gpir_node_is_root(gpir_node * node)450 static inline bool gpir_node_is_root(gpir_node *node)
451 {
452    return list_is_empty(&node->succ_list);
453 }
454 
gpir_node_is_leaf(gpir_node * node)455 static inline bool gpir_node_is_leaf(gpir_node *node)
456 {
457    return list_is_empty(&node->pred_list);
458 }
459 
460 #define gpir_node_to_alu(node) ((gpir_alu_node *)(node))
461 #define gpir_node_to_const(node) ((gpir_const_node *)(node))
462 #define gpir_node_to_load(node) ((gpir_load_node *)(node))
463 #define gpir_node_to_store(node) ((gpir_store_node *)(node))
464 #define gpir_node_to_branch(node) ((gpir_branch_node *)(node))
465 
466 gpir_instr *gpir_instr_create(gpir_block *block);
467 bool gpir_instr_try_insert_node(gpir_instr *instr, gpir_node *node);
468 void gpir_instr_remove_node(gpir_instr *instr, gpir_node *node);
469 void gpir_instr_print_prog(gpir_compiler *comp);
470 
471 bool gpir_codegen_acc_same_op(gpir_op op1, gpir_op op2);
472 
473 bool gpir_optimize(gpir_compiler *comp);
474 bool gpir_pre_rsched_lower_prog(gpir_compiler *comp);
475 bool gpir_reduce_reg_pressure_schedule_prog(gpir_compiler *comp);
476 bool gpir_regalloc_prog(gpir_compiler *comp);
477 bool gpir_schedule_prog(gpir_compiler *comp);
478 bool gpir_codegen_prog(gpir_compiler *comp);
479 
480 gpir_reg *gpir_create_reg(gpir_compiler *comp);
481 
482 #endif
483