1 /*
2 * Copyright (c) 2017 Lima Project
3 * Copyright (c) 2013 Connor Abbott
4 *
5 * Permission is hereby granted, free of charge, to any person obtaining a copy
6 * of this software and associated documentation files (the "Software"), to deal
7 * in the Software without restriction, including without limitation the rights
8 * to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 * copies of the Software, and to permit persons to whom the Software is
10 * furnished to do so, subject to the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be included in
13 * all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
21 * THE SOFTWARE.
22 *
23 */
24
25 #ifndef LIMA_IR_GP_GPIR_H
26 #define LIMA_IR_GP_GPIR_H
27
28 #include "util/list.h"
29 #include "util/u_math.h"
30
31 #include "ir/lima_ir.h"
32
33 /* list of operations that a node can do. */
34 typedef enum {
35 gpir_op_unsupported = 0,
36 gpir_op_mov,
37
38 /* mul ops */
39 gpir_op_mul,
40 gpir_op_select,
41 gpir_op_complex1,
42 gpir_op_complex2,
43
44 /* add ops */
45 gpir_op_add,
46 gpir_op_floor,
47 gpir_op_sign,
48 gpir_op_ge,
49 gpir_op_lt,
50 gpir_op_min,
51 gpir_op_max,
52 gpir_op_abs,
53 gpir_op_not,
54
55 /* mul/add ops */
56 gpir_op_neg,
57
58 /* passthrough ops */
59 gpir_op_clamp_const,
60 gpir_op_preexp2,
61 gpir_op_postlog2,
62
63 /* complex ops */
64 gpir_op_exp2_impl,
65 gpir_op_log2_impl,
66 gpir_op_rcp_impl,
67 gpir_op_rsqrt_impl,
68
69 /* load/store ops */
70 gpir_op_load_uniform,
71 gpir_op_load_temp,
72 gpir_op_load_attribute,
73 gpir_op_load_reg,
74 gpir_op_store_temp,
75 gpir_op_store_reg,
76 gpir_op_store_varying,
77 gpir_op_store_temp_load_off0,
78 gpir_op_store_temp_load_off1,
79 gpir_op_store_temp_load_off2,
80
81 /* branch */
82 gpir_op_branch_cond,
83
84 /* const (emulated) */
85 gpir_op_const,
86
87 /* emulated ops */
88 gpir_op_exp2,
89 gpir_op_log2,
90 gpir_op_rcp,
91 gpir_op_rsqrt,
92 gpir_op_ceil,
93 gpir_op_exp,
94 gpir_op_log,
95 gpir_op_sin,
96 gpir_op_cos,
97 gpir_op_tan,
98 gpir_op_branch_uncond,
99 gpir_op_eq,
100 gpir_op_ne,
101
102 /* auxiliary ops */
103 gpir_op_dummy_f,
104 gpir_op_dummy_m,
105
106 gpir_op_num,
107 } gpir_op;
108
109 typedef enum {
110 gpir_node_type_alu,
111 gpir_node_type_const,
112 gpir_node_type_load,
113 gpir_node_type_store,
114 gpir_node_type_branch,
115 } gpir_node_type;
116
117 typedef struct {
118 char *name;
119 bool dest_neg;
120 bool src_neg[4];
121 int *slots;
122 gpir_node_type type;
123 bool spillless;
124 bool schedule_first;
125 bool may_consume_two_slots;
126 } gpir_op_info;
127
128 extern const gpir_op_info gpir_op_infos[];
129
130 typedef struct {
131 enum {
132 GPIR_DEP_INPUT, /* def is the input of use */
133 GPIR_DEP_OFFSET, /* def is the offset of use (i.e. temp store) */
134 GPIR_DEP_READ_AFTER_WRITE,
135 GPIR_DEP_WRITE_AFTER_READ,
136 } type;
137
138 /* node execute before succ */
139 struct gpir_node *pred;
140 /* node execute after pred */
141 struct gpir_node *succ;
142
143 /* for node pred_list */
144 struct list_head pred_link;
145 /* for ndoe succ_list */
146 struct list_head succ_link;
147 } gpir_dep;
148
149 struct gpir_instr;
150 struct gpir_store_node;
151
152 typedef struct gpir_node {
153 struct list_head list;
154 gpir_op op;
155 gpir_node_type type;
156 int index;
157 char name[16];
158 bool printed;
159 struct gpir_block *block;
160
161 /* for nodes relationship */
162 /* for node who uses this node (successor) */
163 struct list_head succ_list;
164 /* for node this node uses (predecessor) */
165 struct list_head pred_list;
166
167 /* for scheduler and regalloc */
168 int value_reg;
169 union {
170 struct {
171 struct gpir_instr *instr;
172 struct gpir_store_node *physreg_store;
173 int pos;
174 int dist;
175 int index;
176 bool ready;
177 bool inserted;
178 bool max_node, next_max_node;
179 bool complex_allowed;
180 } sched;
181 struct {
182 int parent_index;
183 float reg_pressure;
184 int est;
185 bool scheduled;
186 } rsched;
187 struct {
188 float index;
189 struct gpir_node *last;
190 } vreg;
191 struct {
192 int index;
193 } preg;
194 };
195 } gpir_node;
196
197 typedef struct {
198 gpir_node node;
199
200 gpir_node *children[3];
201 bool children_negate[3];
202 int num_child;
203
204 bool dest_negate;
205 } gpir_alu_node;
206
207 typedef struct {
208 gpir_node node;
209 union fi value;
210 } gpir_const_node;
211
212 typedef struct {
213 int index;
214 struct list_head list;
215 } gpir_reg;
216
217 typedef struct {
218 gpir_node node;
219
220 unsigned index;
221 unsigned component;
222
223 gpir_reg *reg;
224 struct list_head reg_link;
225 } gpir_load_node;
226
227 typedef struct gpir_store_node {
228 gpir_node node;
229
230 unsigned index;
231 unsigned component;
232 gpir_node *child;
233
234 gpir_reg *reg;
235 } gpir_store_node;
236
237 enum gpir_instr_slot {
238 GPIR_INSTR_SLOT_MUL0,
239 GPIR_INSTR_SLOT_MUL1,
240 GPIR_INSTR_SLOT_ADD0,
241 GPIR_INSTR_SLOT_ADD1,
242 GPIR_INSTR_SLOT_PASS,
243 GPIR_INSTR_SLOT_COMPLEX,
244 GPIR_INSTR_SLOT_REG0_LOAD0,
245 GPIR_INSTR_SLOT_REG0_LOAD1,
246 GPIR_INSTR_SLOT_REG0_LOAD2,
247 GPIR_INSTR_SLOT_REG0_LOAD3,
248 GPIR_INSTR_SLOT_REG1_LOAD0,
249 GPIR_INSTR_SLOT_REG1_LOAD1,
250 GPIR_INSTR_SLOT_REG1_LOAD2,
251 GPIR_INSTR_SLOT_REG1_LOAD3,
252 GPIR_INSTR_SLOT_MEM_LOAD0,
253 GPIR_INSTR_SLOT_MEM_LOAD1,
254 GPIR_INSTR_SLOT_MEM_LOAD2,
255 GPIR_INSTR_SLOT_MEM_LOAD3,
256 GPIR_INSTR_SLOT_STORE0,
257 GPIR_INSTR_SLOT_STORE1,
258 GPIR_INSTR_SLOT_STORE2,
259 GPIR_INSTR_SLOT_STORE3,
260 GPIR_INSTR_SLOT_NUM,
261 GPIR_INSTR_SLOT_END,
262 GPIR_INSTR_SLOT_ALU_BEGIN = GPIR_INSTR_SLOT_MUL0,
263 GPIR_INSTR_SLOT_ALU_END = GPIR_INSTR_SLOT_COMPLEX,
264 GPIR_INSTR_SLOT_DIST_TWO_BEGIN = GPIR_INSTR_SLOT_MUL0,
265 GPIR_INSTR_SLOT_DIST_TWO_END = GPIR_INSTR_SLOT_PASS,
266 };
267
268 typedef struct gpir_instr {
269 int index;
270 struct list_head list;
271
272 gpir_node *slots[GPIR_INSTR_SLOT_NUM];
273
274 /* The number of ALU slots free for moves. */
275 int alu_num_slot_free;
276
277 /* The number of ALU slots free for moves, except for the complex slot. */
278 int alu_non_cplx_slot_free;
279
280 /* We need to make sure that we can insert moves in the following cases:
281 * (1) There was a use of a value two cycles ago.
282 * (2) There were more than 5 uses of a value 1 cycle ago (or else we can't
283 * possibly satisfy (1) for the next cycle).
284 * (3) There is a store instruction scheduled, but not its child.
285 *
286 * The complex slot cannot be used for a move in case (1), since it only
287 * has a FIFO depth of 1, but it can be used for (2) as well as (3) as long
288 * as the uses aren't in certain slots. It turns out that we don't have to
289 * worry about nodes that can't use the complex slot for (2), since there
290 * are at most 4 uses 1 cycle ago that can't use the complex slot, but we
291 * do have to worry about (3). This means tracking stores whose children
292 * cannot be in the complex slot. In order to ensure that we have enough
293 * space for all three, we maintain the following invariants:
294 *
295 * (1) alu_num_slot_free >= alu_num_slot_needed_by_store +
296 * alu_num_slot_needed_by_max +
297 * max(alu_num_unscheduled_next_max - alu_max_allowed_next_max, 0)
298 * (2) alu_non_cplx_slot_free >= alu_num_slot_needed_by_max +
299 * alu_num_slot_needed_by_non_cplx_store
300 *
301 * alu_max_allowed_next_max is normally 5 (since there can be at most 5 max
302 * nodes for the next instruction) but when there is a complex1 node in
303 * this instruction it reduces to 4 to reserve a slot for complex2 in the
304 * next instruction.
305 */
306 int alu_num_slot_needed_by_store;
307 int alu_num_slot_needed_by_non_cplx_store;
308 int alu_num_slot_needed_by_max;
309 int alu_num_unscheduled_next_max;
310 int alu_max_allowed_next_max;
311
312 /* Used to communicate to the scheduler how many slots need to be cleared
313 * up in order to satisfy the invariants.
314 */
315 int slot_difference;
316 int non_cplx_slot_difference;
317
318 int reg0_use_count;
319 bool reg0_is_attr;
320 int reg0_index;
321
322 int reg1_use_count;
323 int reg1_index;
324
325 int mem_use_count;
326 bool mem_is_temp;
327 int mem_index;
328
329 enum {
330 GPIR_INSTR_STORE_NONE,
331 GPIR_INSTR_STORE_VARYING,
332 GPIR_INSTR_STORE_REG,
333 GPIR_INSTR_STORE_TEMP,
334 } store_content[2];
335 int store_index[2];
336 } gpir_instr;
337
338 typedef struct gpir_block {
339 struct list_head list;
340 struct list_head node_list;
341 struct list_head instr_list;
342 struct gpir_compiler *comp;
343
344 struct gpir_block *successors[2];
345 struct list_head predecessors;
346 struct list_head predecessors_node;
347
348 /* for regalloc */
349
350 /* The set of live registers, i.e. registers whose value may be used
351 * eventually, at the beginning of the block.
352 */
353 BITSET_WORD *live_in;
354
355 /* Set of live registers at the end of the block. */
356 BITSET_WORD *live_out;
357
358 /* Set of registers that may have a value defined at the end of the
359 * block.
360 */
361 BITSET_WORD *def_out;
362
363 /* After register allocation, the set of live physical registers at the end
364 * of the block. Needed for scheduling.
365 */
366 uint64_t live_out_phys;
367
368 /* For codegen, the offset in the final program. */
369 unsigned instr_offset;
370
371 /* for scheduler */
372 union {
373 struct {
374 int instr_index;
375 } sched;
376 struct {
377 int node_index;
378 } rsched;
379 };
380 } gpir_block;
381
382 typedef struct {
383 gpir_node node;
384 gpir_block *dest;
385 gpir_node *cond;
386 } gpir_branch_node;
387
388 struct lima_vs_compiled_shader;
389
390 #define GPIR_VECTOR_SSA_VIEWPORT_SCALE 0
391 #define GPIR_VECTOR_SSA_VIEWPORT_OFFSET 1
392 #define GPIR_VECTOR_SSA_NUM 2
393
394 typedef struct gpir_compiler {
395 struct list_head block_list;
396 int cur_index;
397
398 /* Find the gpir node for a given NIR SSA def. */
399 gpir_node **node_for_ssa;
400
401 /* Find the gpir node for a given NIR register. */
402 gpir_node **node_for_reg;
403
404 /* Find the gpir register for a given NIR SSA def. */
405 gpir_reg **reg_for_ssa;
406
407 /* Find the gpir register for a given NIR register. */
408 gpir_reg **reg_for_reg;
409
410 /* gpir block for NIR block. */
411 gpir_block **blocks;
412
413 /* for physical reg */
414 struct list_head reg_list;
415 int cur_reg;
416
417 /* lookup for vector ssa */
418 struct {
419 int ssa;
420 gpir_node *nodes[4];
421 } vector_ssa[GPIR_VECTOR_SSA_NUM];
422
423 struct lima_vs_compiled_shader *prog;
424 int constant_base;
425
426 /* shaderdb */
427 int num_instr;
428 int num_loops;
429 int num_spills;
430 int num_fills;
431 } gpir_compiler;
432
433 #define GPIR_VALUE_REG_NUM 11
434 #define GPIR_PHYSICAL_REG_NUM 64
435
436 void *gpir_node_create(gpir_block *block, gpir_op op);
437 gpir_dep *gpir_node_add_dep(gpir_node *succ, gpir_node *pred, int type);
438 void gpir_node_remove_dep(gpir_node *succ, gpir_node *pred);
439 void gpir_node_replace_succ(gpir_node *dst, gpir_node *src);
440 void gpir_node_replace_pred(gpir_dep *dep, gpir_node *new_pred);
441 void gpir_node_replace_child(gpir_node *parent, gpir_node *old_child, gpir_node *new_child);
442 void gpir_node_insert_child(gpir_node *parent, gpir_node *child, gpir_node *insert_child);
443 void gpir_node_delete(gpir_node *node);
444 void gpir_node_print_prog_dep(gpir_compiler *comp);
445 void gpir_node_print_prog_seq(gpir_compiler *comp);
446
447 #define gpir_node_foreach_succ(node, dep) \
448 list_for_each_entry(gpir_dep, dep, &node->succ_list, succ_link)
449 #define gpir_node_foreach_succ_safe(node, dep) \
450 list_for_each_entry_safe(gpir_dep, dep, &node->succ_list, succ_link)
451 #define gpir_node_foreach_pred(node, dep) \
452 list_for_each_entry(gpir_dep, dep, &node->pred_list, pred_link)
453 #define gpir_node_foreach_pred_safe(node, dep) \
454 list_for_each_entry_safe(gpir_dep, dep, &node->pred_list, pred_link)
455
gpir_node_is_root(gpir_node * node)456 static inline bool gpir_node_is_root(gpir_node *node)
457 {
458 return list_is_empty(&node->succ_list);
459 }
460
gpir_node_is_leaf(gpir_node * node)461 static inline bool gpir_node_is_leaf(gpir_node *node)
462 {
463 return list_is_empty(&node->pred_list);
464 }
465
466 #define gpir_node_to_alu(node) ((gpir_alu_node *)(node))
467 #define gpir_node_to_const(node) ((gpir_const_node *)(node))
468 #define gpir_node_to_load(node) ((gpir_load_node *)(node))
469 #define gpir_node_to_store(node) ((gpir_store_node *)(node))
470 #define gpir_node_to_branch(node) ((gpir_branch_node *)(node))
471
472 gpir_instr *gpir_instr_create(gpir_block *block);
473 bool gpir_instr_try_insert_node(gpir_instr *instr, gpir_node *node);
474 void gpir_instr_remove_node(gpir_instr *instr, gpir_node *node);
475 void gpir_instr_print_prog(gpir_compiler *comp);
476
477 bool gpir_codegen_acc_same_op(gpir_op op1, gpir_op op2);
478
479 bool gpir_optimize(gpir_compiler *comp);
480 bool gpir_pre_rsched_lower_prog(gpir_compiler *comp);
481 bool gpir_reduce_reg_pressure_schedule_prog(gpir_compiler *comp);
482 bool gpir_regalloc_prog(gpir_compiler *comp);
483 bool gpir_schedule_prog(gpir_compiler *comp);
484 bool gpir_codegen_prog(gpir_compiler *comp);
485
486 gpir_reg *gpir_create_reg(gpir_compiler *comp);
487
488 #endif
489