• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (c) 2013 Rob Clark <robdclark@gmail.com>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  */
23 
24 #ifndef IR3_H_
25 #define IR3_H_
26 
27 #include <stdint.h>
28 #include <stdbool.h>
29 
30 #include "util/u_debug.h"
31 #include "util/list.h"
32 
33 #include "instr-a3xx.h"
34 #include "disasm.h"  /* TODO move 'enum shader_t' somewhere else.. */
35 
36 /* low level intermediate representation of an adreno shader program */
37 
38 struct ir3_compiler;
39 struct ir3;
40 struct ir3_instruction;
41 struct ir3_block;
42 
43 struct ir3_info {
44 	uint32_t gpu_id;
45 	uint16_t sizedwords;
46 	uint16_t instrs_count;   /* expanded to account for rpt's */
47 	/* NOTE: max_reg, etc, does not include registers not touched
48 	 * by the shader (ie. vertex fetched via VFD_DECODE but not
49 	 * touched by shader)
50 	 */
51 	int8_t   max_reg;   /* highest GPR # used by shader */
52 	int8_t   max_half_reg;
53 	int16_t  max_const;
54 };
55 
56 struct ir3_register {
57 	enum {
58 		IR3_REG_CONST  = 0x001,
59 		IR3_REG_IMMED  = 0x002,
60 		IR3_REG_HALF   = 0x004,
61 		IR3_REG_RELATIV= 0x008,
62 		IR3_REG_R      = 0x010,
63 		/* Most instructions, it seems, can do float abs/neg but not
64 		 * integer.  The CP pass needs to know what is intended (int or
65 		 * float) in order to do the right thing.  For this reason the
66 		 * abs/neg flags are split out into float and int variants.  In
67 		 * addition, .b (bitwise) operations, the negate is actually a
68 		 * bitwise not, so split that out into a new flag to make it
69 		 * more clear.
70 		 */
71 		IR3_REG_FNEG   = 0x020,
72 		IR3_REG_FABS   = 0x040,
73 		IR3_REG_SNEG   = 0x080,
74 		IR3_REG_SABS   = 0x100,
75 		IR3_REG_BNOT   = 0x200,
76 		IR3_REG_EVEN   = 0x400,
77 		IR3_REG_POS_INF= 0x800,
78 		/* (ei) flag, end-input?  Set on last bary, presumably to signal
79 		 * that the shader needs no more input:
80 		 */
81 		IR3_REG_EI     = 0x1000,
82 		/* meta-flags, for intermediate stages of IR, ie.
83 		 * before register assignment is done:
84 		 */
85 		IR3_REG_SSA    = 0x2000,   /* 'instr' is ptr to assigning instr */
86 		IR3_REG_ARRAY  = 0x4000,
87 		IR3_REG_PHI_SRC= 0x8000,   /* phi src, regs[0]->instr points to phi */
88 
89 	} flags;
90 	union {
91 		/* normal registers:
92 		 * the component is in the low two bits of the reg #, so
93 		 * rN.x becomes: (N << 2) | x
94 		 */
95 		int   num;
96 		/* immediate: */
97 		int32_t  iim_val;
98 		uint32_t uim_val;
99 		float    fim_val;
100 		/* relative: */
101 		struct {
102 			uint16_t id;
103 			int16_t offset;
104 		} array;
105 	};
106 
107 	/* For IR3_REG_SSA, src registers contain ptr back to assigning
108 	 * instruction.
109 	 *
110 	 * For IR3_REG_ARRAY, the pointer is back to the last dependent
111 	 * array access (although the net effect is the same, it points
112 	 * back to a previous instruction that we depend on).
113 	 */
114 	struct ir3_instruction *instr;
115 
116 	union {
117 		/* used for cat5 instructions, but also for internal/IR level
118 		 * tracking of what registers are read/written by an instruction.
119 		 * wrmask may be a bad name since it is used to represent both
120 		 * src and dst that touch multiple adjacent registers.
121 		 */
122 		unsigned wrmask;
123 		/* for relative addressing, 32bits for array size is too small,
124 		 * but otoh we don't need to deal with disjoint sets, so instead
125 		 * use a simple size field (number of scalar components).
126 		 */
127 		unsigned size;
128 	};
129 };
130 
131 struct ir3_instruction {
132 	struct ir3_block *block;
133 	opc_t opc;
134 	enum {
135 		/* (sy) flag is set on first instruction, and after sample
136 		 * instructions (probably just on RAW hazard).
137 		 */
138 		IR3_INSTR_SY    = 0x001,
139 		/* (ss) flag is set on first instruction, and first instruction
140 		 * to depend on the result of "long" instructions (RAW hazard):
141 		 *
142 		 *   rcp, rsq, log2, exp2, sin, cos, sqrt
143 		 *
144 		 * It seems to synchronize until all in-flight instructions are
145 		 * completed, for example:
146 		 *
147 		 *   rsq hr1.w, hr1.w
148 		 *   add.f hr2.z, (neg)hr2.z, hc0.y
149 		 *   mul.f hr2.w, (neg)hr2.y, (neg)hr2.y
150 		 *   rsq hr2.x, hr2.x
151 		 *   (rpt1)nop
152 		 *   mad.f16 hr2.w, hr2.z, hr2.z, hr2.w
153 		 *   nop
154 		 *   mad.f16 hr2.w, (neg)hr0.w, (neg)hr0.w, hr2.w
155 		 *   (ss)(rpt2)mul.f hr1.x, (r)hr1.x, hr1.w
156 		 *   (rpt2)mul.f hr0.x, (neg)(r)hr0.x, hr2.x
157 		 *
158 		 * The last mul.f does not have (ss) set, presumably because the
159 		 * (ss) on the previous instruction does the job.
160 		 *
161 		 * The blob driver also seems to set it on WAR hazards, although
162 		 * not really clear if this is needed or just blob compiler being
163 		 * sloppy.  So far I haven't found a case where removing the (ss)
164 		 * causes problems for WAR hazard, but I could just be getting
165 		 * lucky:
166 		 *
167 		 *   rcp r1.y, r3.y
168 		 *   (ss)(rpt2)mad.f32 r3.y, (r)c9.x, r1.x, (r)r3.z
169 		 *
170 		 */
171 		IR3_INSTR_SS    = 0x002,
172 		/* (jp) flag is set on jump targets:
173 		 */
174 		IR3_INSTR_JP    = 0x004,
175 		IR3_INSTR_UL    = 0x008,
176 		IR3_INSTR_3D    = 0x010,
177 		IR3_INSTR_A     = 0x020,
178 		IR3_INSTR_O     = 0x040,
179 		IR3_INSTR_P     = 0x080,
180 		IR3_INSTR_S     = 0x100,
181 		IR3_INSTR_S2EN  = 0x200,
182 		IR3_INSTR_G     = 0x400,
183 		/* meta-flags, for intermediate stages of IR, ie.
184 		 * before register assignment is done:
185 		 */
186 		IR3_INSTR_MARK  = 0x1000,
187 		IR3_INSTR_UNUSED= 0x2000,
188 	} flags;
189 	int repeat;
190 #ifdef DEBUG
191 	unsigned regs_max;
192 #endif
193 	unsigned regs_count;
194 	struct ir3_register **regs;
195 	union {
196 		struct {
197 			char inv;
198 			char comp;
199 			int  immed;
200 			struct ir3_block *target;
201 		} cat0;
202 		struct {
203 			type_t src_type, dst_type;
204 		} cat1;
205 		struct {
206 			enum {
207 				IR3_COND_LT = 0,
208 				IR3_COND_LE = 1,
209 				IR3_COND_GT = 2,
210 				IR3_COND_GE = 3,
211 				IR3_COND_EQ = 4,
212 				IR3_COND_NE = 5,
213 			} condition;
214 		} cat2;
215 		struct {
216 			unsigned samp, tex;
217 			type_t type;
218 		} cat5;
219 		struct {
220 			type_t type;
221 			int src_offset;
222 			int dst_offset;
223 			int iim_val;
224 		} cat6;
225 		/* for meta-instructions, just used to hold extra data
226 		 * before instruction scheduling, etc
227 		 */
228 		struct {
229 			int off;              /* component/offset */
230 		} fo;
231 		struct {
232 			/* used to temporarily hold reference to nir_phi_instr
233 			 * until we resolve the phi srcs
234 			 */
235 			void *nphi;
236 		} phi;
237 		struct {
238 			struct ir3_block *block;
239 		} inout;
240 	};
241 
242 	/* transient values used during various algorithms: */
243 	union {
244 		/* The instruction depth is the max dependency distance to output.
245 		 *
246 		 * You can also think of it as the "cost", if we did any sort of
247 		 * optimization for register footprint.  Ie. a value that is  just
248 		 * result of moving a const to a reg would have a low cost,  so to
249 		 * it could make sense to duplicate the instruction at various
250 		 * points where the result is needed to reduce register footprint.
251 		 */
252 		unsigned depth;
253 		/* When we get to the RA stage, we no longer need depth, but
254 		 * we do need instruction's position/name:
255 		 */
256 		struct {
257 			uint16_t ip;
258 			uint16_t name;
259 		};
260 	};
261 
262 	/* used for per-pass extra instruction data.
263 	 */
264 	void *data;
265 
266 	/* Used during CP and RA stages.  For fanin and shader inputs/
267 	 * outputs where we need a sequence of consecutive registers,
268 	 * keep track of each src instructions left (ie 'n-1') and right
269 	 * (ie 'n+1') neighbor.  The front-end must insert enough mov's
270 	 * to ensure that each instruction has at most one left and at
271 	 * most one right neighbor.  During the copy-propagation pass,
272 	 * we only remove mov's when we can preserve this constraint.
273 	 * And during the RA stage, we use the neighbor information to
274 	 * allocate a block of registers in one shot.
275 	 *
276 	 * TODO: maybe just add something like:
277 	 *   struct ir3_instruction_ref {
278 	 *       struct ir3_instruction *instr;
279 	 *       unsigned cnt;
280 	 *   }
281 	 *
282 	 * Or can we get away without the refcnt stuff?  It seems like
283 	 * it should be overkill..  the problem is if, potentially after
284 	 * already eliminating some mov's, if you have a single mov that
285 	 * needs to be grouped with it's neighbors in two different
286 	 * places (ex. shader output and a fanin).
287 	 */
288 	struct {
289 		struct ir3_instruction *left, *right;
290 		uint16_t left_cnt, right_cnt;
291 	} cp;
292 
293 	/* an instruction can reference at most one address register amongst
294 	 * it's src/dst registers.  Beyond that, you need to insert mov's.
295 	 *
296 	 * NOTE: do not write this directly, use ir3_instr_set_address()
297 	 */
298 	struct ir3_instruction *address;
299 
300 	/* Entry in ir3_block's instruction list: */
301 	struct list_head node;
302 
303 #ifdef DEBUG
304 	uint32_t serialno;
305 #endif
306 };
307 
308 static inline struct ir3_instruction *
ir3_neighbor_first(struct ir3_instruction * instr)309 ir3_neighbor_first(struct ir3_instruction *instr)
310 {
311 	int cnt = 0;
312 	while (instr->cp.left) {
313 		instr = instr->cp.left;
314 		if (++cnt > 0xffff) {
315 			debug_assert(0);
316 			break;
317 		}
318 	}
319 	return instr;
320 }
321 
ir3_neighbor_count(struct ir3_instruction * instr)322 static inline int ir3_neighbor_count(struct ir3_instruction *instr)
323 {
324 	int num = 1;
325 
326 	debug_assert(!instr->cp.left);
327 
328 	while (instr->cp.right) {
329 		num++;
330 		instr = instr->cp.right;
331 		if (num > 0xffff) {
332 			debug_assert(0);
333 			break;
334 		}
335 	}
336 
337 	return num;
338 }
339 
340 struct ir3 {
341 	struct ir3_compiler *compiler;
342 
343 	unsigned ninputs, noutputs;
344 	struct ir3_instruction **inputs;
345 	struct ir3_instruction **outputs;
346 
347 	/* Track bary.f (and ldlv) instructions.. this is needed in
348 	 * scheduling to ensure that all varying fetches happen before
349 	 * any potential kill instructions.  The hw gets grumpy if all
350 	 * threads in a group are killed before the last bary.f gets
351 	 * a chance to signal end of input (ei).
352 	 */
353 	unsigned baryfs_count, baryfs_sz;
354 	struct ir3_instruction **baryfs;
355 
356 	/* Track all indirect instructions (read and write).  To avoid
357 	 * deadlock scenario where an address register gets scheduled,
358 	 * but other dependent src instructions cannot be scheduled due
359 	 * to dependency on a *different* address register value, the
360 	 * scheduler needs to ensure that all dependencies other than
361 	 * the instruction other than the address register are scheduled
362 	 * before the one that writes the address register.  Having a
363 	 * convenient list of instructions that reference some address
364 	 * register simplifies this.
365 	 */
366 	unsigned indirects_count, indirects_sz;
367 	struct ir3_instruction **indirects;
368 	/* and same for instructions that consume predicate register: */
369 	unsigned predicates_count, predicates_sz;
370 	struct ir3_instruction **predicates;
371 
372 	/* Track instructions which do not write a register but other-
373 	 * wise must not be discarded (such as kill, stg, etc)
374 	 */
375 	unsigned keeps_count, keeps_sz;
376 	struct ir3_instruction **keeps;
377 
378 	/* Track texture sample instructions which need texture state
379 	 * patched in (for astc-srgb workaround):
380 	 */
381 	unsigned astc_srgb_count, astc_srgb_sz;
382 	struct ir3_instruction **astc_srgb;
383 
384 	/* List of blocks: */
385 	struct list_head block_list;
386 
387 	/* List of ir3_array's: */
388 	struct list_head array_list;
389 };
390 
391 typedef struct nir_variable nir_variable;
392 
393 struct ir3_array {
394 	struct list_head node;
395 	unsigned length;
396 	unsigned id;
397 
398 	nir_variable *var;
399 
400 	/* We track the last write and last access (read or write) to
401 	 * setup dependencies on instructions that read or write the
402 	 * array.  Reads can be re-ordered wrt. other reads, but should
403 	 * not be re-ordered wrt. to writes.  Writes cannot be reordered
404 	 * wrt. any other access to the array.
405 	 *
406 	 * So array reads depend on last write, and array writes depend
407 	 * on the last access.
408 	 */
409 	struct ir3_instruction *last_write, *last_access;
410 
411 	/* extra stuff used in RA pass: */
412 	unsigned base;      /* base vreg name */
413 	unsigned reg;       /* base physical reg */
414 	uint16_t start_ip, end_ip;
415 };
416 
417 struct ir3_array * ir3_lookup_array(struct ir3 *ir, unsigned id);
418 
419 typedef struct nir_block nir_block;
420 
421 struct ir3_block {
422 	struct list_head node;
423 	struct ir3 *shader;
424 
425 	nir_block *nblock;
426 
427 	struct list_head instr_list;  /* list of ir3_instruction */
428 
429 	/* each block has either one or two successors.. in case of
430 	 * two successors, 'condition' decides which one to follow.
431 	 * A block preceding an if/else has two successors.
432 	 */
433 	struct ir3_instruction *condition;
434 	struct ir3_block *successors[2];
435 
436 	uint16_t start_ip, end_ip;
437 
438 	/* used for per-pass extra block data.  Mainly used right
439 	 * now in RA step to track livein/liveout.
440 	 */
441 	void *data;
442 
443 #ifdef DEBUG
444 	uint32_t serialno;
445 #endif
446 };
447 
448 static inline uint32_t
block_id(struct ir3_block * block)449 block_id(struct ir3_block *block)
450 {
451 #ifdef DEBUG
452 	return block->serialno;
453 #else
454 	return (uint32_t)(unsigned long)block;
455 #endif
456 }
457 
458 struct ir3 * ir3_create(struct ir3_compiler *compiler,
459 		unsigned nin, unsigned nout);
460 void ir3_destroy(struct ir3 *shader);
461 void * ir3_assemble(struct ir3 *shader,
462 		struct ir3_info *info, uint32_t gpu_id);
463 void * ir3_alloc(struct ir3 *shader, int sz);
464 
465 struct ir3_block * ir3_block_create(struct ir3 *shader);
466 
467 struct ir3_instruction * ir3_instr_create(struct ir3_block *block, opc_t opc);
468 struct ir3_instruction * ir3_instr_create2(struct ir3_block *block,
469 		opc_t opc, int nreg);
470 struct ir3_instruction * ir3_instr_clone(struct ir3_instruction *instr);
471 const char *ir3_instr_name(struct ir3_instruction *instr);
472 
473 struct ir3_register * ir3_reg_create(struct ir3_instruction *instr,
474 		int num, int flags);
475 struct ir3_register * ir3_reg_clone(struct ir3 *shader,
476 		struct ir3_register *reg);
477 
478 void ir3_instr_set_address(struct ir3_instruction *instr,
479 		struct ir3_instruction *addr);
480 
ir3_instr_check_mark(struct ir3_instruction * instr)481 static inline bool ir3_instr_check_mark(struct ir3_instruction *instr)
482 {
483 	if (instr->flags & IR3_INSTR_MARK)
484 		return true;  /* already visited */
485 	instr->flags |= IR3_INSTR_MARK;
486 	return false;
487 }
488 
489 void ir3_block_clear_mark(struct ir3_block *block);
490 void ir3_clear_mark(struct ir3 *shader);
491 
492 unsigned ir3_count_instructions(struct ir3 *ir);
493 
ir3_instr_regno(struct ir3_instruction * instr,struct ir3_register * reg)494 static inline int ir3_instr_regno(struct ir3_instruction *instr,
495 		struct ir3_register *reg)
496 {
497 	unsigned i;
498 	for (i = 0; i < instr->regs_count; i++)
499 		if (reg == instr->regs[i])
500 			return i;
501 	return -1;
502 }
503 
504 
505 #define MAX_ARRAYS 16
506 
507 /* comp:
508  *   0 - x
509  *   1 - y
510  *   2 - z
511  *   3 - w
512  */
regid(int num,int comp)513 static inline uint32_t regid(int num, int comp)
514 {
515 	return (num << 2) | (comp & 0x3);
516 }
517 
reg_num(struct ir3_register * reg)518 static inline uint32_t reg_num(struct ir3_register *reg)
519 {
520 	return reg->num >> 2;
521 }
522 
reg_comp(struct ir3_register * reg)523 static inline uint32_t reg_comp(struct ir3_register *reg)
524 {
525 	return reg->num & 0x3;
526 }
527 
is_flow(struct ir3_instruction * instr)528 static inline bool is_flow(struct ir3_instruction *instr)
529 {
530 	return (opc_cat(instr->opc) == 0);
531 }
532 
is_kill(struct ir3_instruction * instr)533 static inline bool is_kill(struct ir3_instruction *instr)
534 {
535 	return instr->opc == OPC_KILL;
536 }
537 
is_nop(struct ir3_instruction * instr)538 static inline bool is_nop(struct ir3_instruction *instr)
539 {
540 	return instr->opc == OPC_NOP;
541 }
542 
543 /* Is it a non-transformative (ie. not type changing) mov?  This can
544  * also include absneg.s/absneg.f, which for the most part can be
545  * treated as a mov (single src argument).
546  */
is_same_type_mov(struct ir3_instruction * instr)547 static inline bool is_same_type_mov(struct ir3_instruction *instr)
548 {
549 	struct ir3_register *dst = instr->regs[0];
550 
551 	/* mov's that write to a0.x or p0.x are special: */
552 	if (dst->num == regid(REG_P0, 0))
553 		return false;
554 	if (dst->num == regid(REG_A0, 0))
555 		return false;
556 
557 	if (dst->flags & (IR3_REG_RELATIV | IR3_REG_ARRAY))
558 		return false;
559 
560 	switch (instr->opc) {
561 	case OPC_MOV:
562 		return instr->cat1.src_type == instr->cat1.dst_type;
563 	case OPC_ABSNEG_F:
564 	case OPC_ABSNEG_S:
565 		return true;
566 	default:
567 		return false;
568 	}
569 }
570 
is_alu(struct ir3_instruction * instr)571 static inline bool is_alu(struct ir3_instruction *instr)
572 {
573 	return (1 <= opc_cat(instr->opc)) && (opc_cat(instr->opc) <= 3);
574 }
575 
is_sfu(struct ir3_instruction * instr)576 static inline bool is_sfu(struct ir3_instruction *instr)
577 {
578 	return (opc_cat(instr->opc) == 4);
579 }
580 
is_tex(struct ir3_instruction * instr)581 static inline bool is_tex(struct ir3_instruction *instr)
582 {
583 	return (opc_cat(instr->opc) == 5);
584 }
585 
is_mem(struct ir3_instruction * instr)586 static inline bool is_mem(struct ir3_instruction *instr)
587 {
588 	return (opc_cat(instr->opc) == 6);
589 }
590 
591 static inline bool
is_store(struct ir3_instruction * instr)592 is_store(struct ir3_instruction *instr)
593 {
594 	/* these instructions, the "destination" register is
595 	 * actually a source, the address to store to.
596 	 */
597 	switch (instr->opc) {
598 	case OPC_STG:
599 	case OPC_STP:
600 	case OPC_STL:
601 	case OPC_STLW:
602 	case OPC_L2G:
603 	case OPC_G2L:
604 		return true;
605 	default:
606 		return false;
607 	}
608 }
609 
is_load(struct ir3_instruction * instr)610 static inline bool is_load(struct ir3_instruction *instr)
611 {
612 	switch (instr->opc) {
613 	case OPC_LDG:
614 	case OPC_LDL:
615 	case OPC_LDP:
616 	case OPC_L2G:
617 	case OPC_LDLW:
618 	case OPC_LDC_4:
619 	case OPC_LDLV:
620 		/* probably some others too.. */
621 		return true;
622 	default:
623 		return false;
624 	}
625 }
626 
is_input(struct ir3_instruction * instr)627 static inline bool is_input(struct ir3_instruction *instr)
628 {
629 	/* in some cases, ldlv is used to fetch varying without
630 	 * interpolation.. fortunately inloc is the first src
631 	 * register in either case
632 	 */
633 	switch (instr->opc) {
634 	case OPC_LDLV:
635 	case OPC_BARY_F:
636 		return true;
637 	default:
638 		return false;
639 	}
640 }
641 
is_bool(struct ir3_instruction * instr)642 static inline bool is_bool(struct ir3_instruction *instr)
643 {
644 	switch (instr->opc) {
645 	case OPC_CMPS_F:
646 	case OPC_CMPS_S:
647 	case OPC_CMPS_U:
648 		return true;
649 	default:
650 		return false;
651 	}
652 }
653 
is_meta(struct ir3_instruction * instr)654 static inline bool is_meta(struct ir3_instruction *instr)
655 {
656 	/* TODO how should we count PHI (and maybe fan-in/out) which
657 	 * might actually contribute some instructions to the final
658 	 * result?
659 	 */
660 	return (opc_cat(instr->opc) == -1);
661 }
662 
writes_addr(struct ir3_instruction * instr)663 static inline bool writes_addr(struct ir3_instruction *instr)
664 {
665 	if (instr->regs_count > 0) {
666 		struct ir3_register *dst = instr->regs[0];
667 		return reg_num(dst) == REG_A0;
668 	}
669 	return false;
670 }
671 
writes_pred(struct ir3_instruction * instr)672 static inline bool writes_pred(struct ir3_instruction *instr)
673 {
674 	if (instr->regs_count > 0) {
675 		struct ir3_register *dst = instr->regs[0];
676 		return reg_num(dst) == REG_P0;
677 	}
678 	return false;
679 }
680 
681 /* returns defining instruction for reg */
682 /* TODO better name */
ssa(struct ir3_register * reg)683 static inline struct ir3_instruction *ssa(struct ir3_register *reg)
684 {
685 	if (reg->flags & (IR3_REG_SSA | IR3_REG_ARRAY)) {
686 		debug_assert(!(reg->instr && (reg->instr->flags & IR3_INSTR_UNUSED)));
687 		return reg->instr;
688 	}
689 	return NULL;
690 }
691 
conflicts(struct ir3_instruction * a,struct ir3_instruction * b)692 static inline bool conflicts(struct ir3_instruction *a,
693 		struct ir3_instruction *b)
694 {
695 	return (a && b) && (a != b);
696 }
697 
reg_gpr(struct ir3_register * r)698 static inline bool reg_gpr(struct ir3_register *r)
699 {
700 	if (r->flags & (IR3_REG_CONST | IR3_REG_IMMED))
701 		return false;
702 	if ((reg_num(r) == REG_A0) || (reg_num(r) == REG_P0))
703 		return false;
704 	return true;
705 }
706 
half_type(type_t type)707 static inline type_t half_type(type_t type)
708 {
709 	switch (type) {
710 	case TYPE_F32: return TYPE_F16;
711 	case TYPE_U32: return TYPE_U16;
712 	case TYPE_S32: return TYPE_S16;
713 	case TYPE_F16:
714 	case TYPE_U16:
715 	case TYPE_S16:
716 		return type;
717 	default:
718 		assert(0);
719 		return ~0;
720 	}
721 }
722 
723 /* some cat2 instructions (ie. those which are not float) can embed an
724  * immediate:
725  */
ir3_cat2_int(opc_t opc)726 static inline bool ir3_cat2_int(opc_t opc)
727 {
728 	switch (opc) {
729 	case OPC_ADD_U:
730 	case OPC_ADD_S:
731 	case OPC_SUB_U:
732 	case OPC_SUB_S:
733 	case OPC_CMPS_U:
734 	case OPC_CMPS_S:
735 	case OPC_MIN_U:
736 	case OPC_MIN_S:
737 	case OPC_MAX_U:
738 	case OPC_MAX_S:
739 	case OPC_CMPV_U:
740 	case OPC_CMPV_S:
741 	case OPC_MUL_U:
742 	case OPC_MUL_S:
743 	case OPC_MULL_U:
744 	case OPC_CLZ_S:
745 	case OPC_ABSNEG_S:
746 	case OPC_AND_B:
747 	case OPC_OR_B:
748 	case OPC_NOT_B:
749 	case OPC_XOR_B:
750 	case OPC_BFREV_B:
751 	case OPC_CLZ_B:
752 	case OPC_SHL_B:
753 	case OPC_SHR_B:
754 	case OPC_ASHR_B:
755 	case OPC_MGEN_B:
756 	case OPC_GETBIT_B:
757 	case OPC_CBITS_B:
758 	case OPC_BARY_F:
759 		return true;
760 
761 	default:
762 		return false;
763 	}
764 }
765 
766 
767 /* map cat2 instruction to valid abs/neg flags: */
ir3_cat2_absneg(opc_t opc)768 static inline unsigned ir3_cat2_absneg(opc_t opc)
769 {
770 	switch (opc) {
771 	case OPC_ADD_F:
772 	case OPC_MIN_F:
773 	case OPC_MAX_F:
774 	case OPC_MUL_F:
775 	case OPC_SIGN_F:
776 	case OPC_CMPS_F:
777 	case OPC_ABSNEG_F:
778 	case OPC_CMPV_F:
779 	case OPC_FLOOR_F:
780 	case OPC_CEIL_F:
781 	case OPC_RNDNE_F:
782 	case OPC_RNDAZ_F:
783 	case OPC_TRUNC_F:
784 	case OPC_BARY_F:
785 		return IR3_REG_FABS | IR3_REG_FNEG;
786 
787 	case OPC_ADD_U:
788 	case OPC_ADD_S:
789 	case OPC_SUB_U:
790 	case OPC_SUB_S:
791 	case OPC_CMPS_U:
792 	case OPC_CMPS_S:
793 	case OPC_MIN_U:
794 	case OPC_MIN_S:
795 	case OPC_MAX_U:
796 	case OPC_MAX_S:
797 	case OPC_CMPV_U:
798 	case OPC_CMPV_S:
799 	case OPC_MUL_U:
800 	case OPC_MUL_S:
801 	case OPC_MULL_U:
802 	case OPC_CLZ_S:
803 		return 0;
804 
805 	case OPC_ABSNEG_S:
806 		return IR3_REG_SABS | IR3_REG_SNEG;
807 
808 	case OPC_AND_B:
809 	case OPC_OR_B:
810 	case OPC_NOT_B:
811 	case OPC_XOR_B:
812 	case OPC_BFREV_B:
813 	case OPC_CLZ_B:
814 	case OPC_SHL_B:
815 	case OPC_SHR_B:
816 	case OPC_ASHR_B:
817 	case OPC_MGEN_B:
818 	case OPC_GETBIT_B:
819 	case OPC_CBITS_B:
820 		return IR3_REG_BNOT;
821 
822 	default:
823 		return 0;
824 	}
825 }
826 
827 /* map cat3 instructions to valid abs/neg flags: */
ir3_cat3_absneg(opc_t opc)828 static inline unsigned ir3_cat3_absneg(opc_t opc)
829 {
830 	switch (opc) {
831 	case OPC_MAD_F16:
832 	case OPC_MAD_F32:
833 	case OPC_SEL_F16:
834 	case OPC_SEL_F32:
835 		return IR3_REG_FNEG;
836 
837 	case OPC_MAD_U16:
838 	case OPC_MADSH_U16:
839 	case OPC_MAD_S16:
840 	case OPC_MADSH_M16:
841 	case OPC_MAD_U24:
842 	case OPC_MAD_S24:
843 	case OPC_SEL_S16:
844 	case OPC_SEL_S32:
845 	case OPC_SAD_S16:
846 	case OPC_SAD_S32:
847 		/* neg *may* work on 3rd src.. */
848 
849 	case OPC_SEL_B16:
850 	case OPC_SEL_B32:
851 
852 	default:
853 		return 0;
854 	}
855 }
856 
857 #define array_insert(arr, val) do { \
858 		if (arr ## _count == arr ## _sz) { \
859 			arr ## _sz = MAX2(2 * arr ## _sz, 16); \
860 			arr = realloc(arr, arr ## _sz * sizeof(arr[0])); \
861 		} \
862 		arr[arr ##_count++] = val; \
863 	} while (0)
864 
865 /* iterator for an instructions's sources (reg), also returns src #: */
866 #define foreach_src_n(__srcreg, __n, __instr) \
867 	if ((__instr)->regs_count) \
868 		for (unsigned __cnt = (__instr)->regs_count - 1, __n = 0; __n < __cnt; __n++) \
869 			if ((__srcreg = (__instr)->regs[__n + 1]))
870 
871 /* iterator for an instructions's sources (reg): */
872 #define foreach_src(__srcreg, __instr) \
873 	foreach_src_n(__srcreg, __i, __instr)
874 
__ssa_src_cnt(struct ir3_instruction * instr)875 static inline unsigned __ssa_src_cnt(struct ir3_instruction *instr)
876 {
877 	if (instr->address)
878 		return instr->regs_count + 1;
879 	return instr->regs_count;
880 }
881 
__ssa_src_n(struct ir3_instruction * instr,unsigned n)882 static inline struct ir3_instruction * __ssa_src_n(struct ir3_instruction *instr, unsigned n)
883 {
884 	if (n == (instr->regs_count + 0))
885 		return instr->address;
886 	return ssa(instr->regs[n]);
887 }
888 
889 #define __src_cnt(__instr) ((__instr)->address ? (__instr)->regs_count : (__instr)->regs_count - 1)
890 
891 /* iterator for an instruction's SSA sources (instr), also returns src #: */
892 #define foreach_ssa_src_n(__srcinst, __n, __instr) \
893 	if ((__instr)->regs_count) \
894 		for (unsigned __cnt = __ssa_src_cnt(__instr), __n = 0; __n < __cnt; __n++) \
895 			if ((__srcinst = __ssa_src_n(__instr, __n)))
896 
897 /* iterator for an instruction's SSA sources (instr): */
898 #define foreach_ssa_src(__srcinst, __instr) \
899 	foreach_ssa_src_n(__srcinst, __i, __instr)
900 
901 
902 /* dump: */
903 void ir3_print(struct ir3 *ir);
904 void ir3_print_instr(struct ir3_instruction *instr);
905 
906 /* depth calculation: */
907 int ir3_delayslots(struct ir3_instruction *assigner,
908 		struct ir3_instruction *consumer, unsigned n);
909 void ir3_insert_by_depth(struct ir3_instruction *instr, struct list_head *list);
910 void ir3_depth(struct ir3 *ir);
911 
912 /* copy-propagate: */
913 struct ir3_shader_variant;
914 void ir3_cp(struct ir3 *ir, struct ir3_shader_variant *so);
915 
916 /* group neighbors and insert mov's to resolve conflicts: */
917 void ir3_group(struct ir3 *ir);
918 
919 /* scheduling: */
920 int ir3_sched(struct ir3 *ir);
921 
922 /* register assignment: */
923 struct ir3_ra_reg_set * ir3_ra_alloc_reg_set(void *memctx);
924 int ir3_ra(struct ir3 *ir3, enum shader_t type,
925 		bool frag_coord, bool frag_face);
926 
927 /* legalize: */
928 void ir3_legalize(struct ir3 *ir, bool *has_samp, int *max_bary);
929 
930 /* ************************************************************************* */
931 /* instruction helpers */
932 
933 static inline struct ir3_instruction *
ir3_MOV(struct ir3_block * block,struct ir3_instruction * src,type_t type)934 ir3_MOV(struct ir3_block *block, struct ir3_instruction *src, type_t type)
935 {
936 	struct ir3_instruction *instr = ir3_instr_create(block, OPC_MOV);
937 	ir3_reg_create(instr, 0, 0);   /* dst */
938 	if (src->regs[0]->flags & IR3_REG_ARRAY) {
939 		struct ir3_register *src_reg =
940 			ir3_reg_create(instr, 0, IR3_REG_ARRAY);
941 		src_reg->array = src->regs[0]->array;
942 		src_reg->instr = src;
943 	} else {
944 		ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = src;
945 	}
946 	debug_assert(!(src->regs[0]->flags & IR3_REG_RELATIV));
947 	instr->cat1.src_type = type;
948 	instr->cat1.dst_type = type;
949 	return instr;
950 }
951 
952 static inline struct ir3_instruction *
ir3_COV(struct ir3_block * block,struct ir3_instruction * src,type_t src_type,type_t dst_type)953 ir3_COV(struct ir3_block *block, struct ir3_instruction *src,
954 		type_t src_type, type_t dst_type)
955 {
956 	struct ir3_instruction *instr = ir3_instr_create(block, OPC_MOV);
957 	ir3_reg_create(instr, 0, 0);   /* dst */
958 	ir3_reg_create(instr, 0, IR3_REG_SSA)->instr = src;
959 	instr->cat1.src_type = src_type;
960 	instr->cat1.dst_type = dst_type;
961 	debug_assert(!(src->regs[0]->flags & IR3_REG_ARRAY));
962 	return instr;
963 }
964 
965 static inline struct ir3_instruction *
ir3_NOP(struct ir3_block * block)966 ir3_NOP(struct ir3_block *block)
967 {
968 	return ir3_instr_create(block, OPC_NOP);
969 }
970 
971 #define INSTR0(name)                                                     \
972 static inline struct ir3_instruction *                                   \
973 ir3_##name(struct ir3_block *block)                                      \
974 {                                                                        \
975 	struct ir3_instruction *instr =                                      \
976 		ir3_instr_create(block, OPC_##name);                             \
977 	return instr;                                                        \
978 }
979 
980 #define INSTR1(name)                                                     \
981 static inline struct ir3_instruction *                                   \
982 ir3_##name(struct ir3_block *block,                                      \
983 		struct ir3_instruction *a, unsigned aflags)                      \
984 {                                                                        \
985 	struct ir3_instruction *instr =                                      \
986 		ir3_instr_create(block, OPC_##name);                             \
987 	ir3_reg_create(instr, 0, 0);   /* dst */                             \
988 	ir3_reg_create(instr, 0, IR3_REG_SSA | aflags)->instr = a;           \
989 	return instr;                                                        \
990 }
991 
992 #define INSTR2(name)                                                     \
993 static inline struct ir3_instruction *                                   \
994 ir3_##name(struct ir3_block *block,                                      \
995 		struct ir3_instruction *a, unsigned aflags,                      \
996 		struct ir3_instruction *b, unsigned bflags)                      \
997 {                                                                        \
998 	struct ir3_instruction *instr =                                      \
999 		ir3_instr_create(block, OPC_##name);                             \
1000 	ir3_reg_create(instr, 0, 0);   /* dst */                             \
1001 	ir3_reg_create(instr, 0, IR3_REG_SSA | aflags)->instr = a;           \
1002 	ir3_reg_create(instr, 0, IR3_REG_SSA | bflags)->instr = b;           \
1003 	return instr;                                                        \
1004 }
1005 
1006 #define INSTR3(name)                                                     \
1007 static inline struct ir3_instruction *                                   \
1008 ir3_##name(struct ir3_block *block,                                      \
1009 		struct ir3_instruction *a, unsigned aflags,                      \
1010 		struct ir3_instruction *b, unsigned bflags,                      \
1011 		struct ir3_instruction *c, unsigned cflags)                      \
1012 {                                                                        \
1013 	struct ir3_instruction *instr =                                      \
1014 		ir3_instr_create(block, OPC_##name);                             \
1015 	ir3_reg_create(instr, 0, 0);   /* dst */                             \
1016 	ir3_reg_create(instr, 0, IR3_REG_SSA | aflags)->instr = a;           \
1017 	ir3_reg_create(instr, 0, IR3_REG_SSA | bflags)->instr = b;           \
1018 	ir3_reg_create(instr, 0, IR3_REG_SSA | cflags)->instr = c;           \
1019 	return instr;                                                        \
1020 }
1021 
1022 /* cat0 instructions: */
1023 INSTR0(BR);
1024 INSTR0(JUMP);
1025 INSTR1(KILL);
1026 INSTR0(END);
1027 
1028 /* cat2 instructions, most 2 src but some 1 src: */
1029 INSTR2(ADD_F)
INSTR2(MIN_F)1030 INSTR2(MIN_F)
1031 INSTR2(MAX_F)
1032 INSTR2(MUL_F)
1033 INSTR1(SIGN_F)
1034 INSTR2(CMPS_F)
1035 INSTR1(ABSNEG_F)
1036 INSTR2(CMPV_F)
1037 INSTR1(FLOOR_F)
1038 INSTR1(CEIL_F)
1039 INSTR1(RNDNE_F)
1040 INSTR1(RNDAZ_F)
1041 INSTR1(TRUNC_F)
1042 INSTR2(ADD_U)
1043 INSTR2(ADD_S)
1044 INSTR2(SUB_U)
1045 INSTR2(SUB_S)
1046 INSTR2(CMPS_U)
1047 INSTR2(CMPS_S)
1048 INSTR2(MIN_U)
1049 INSTR2(MIN_S)
1050 INSTR2(MAX_U)
1051 INSTR2(MAX_S)
1052 INSTR1(ABSNEG_S)
1053 INSTR2(AND_B)
1054 INSTR2(OR_B)
1055 INSTR1(NOT_B)
1056 INSTR2(XOR_B)
1057 INSTR2(CMPV_U)
1058 INSTR2(CMPV_S)
1059 INSTR2(MUL_U)
1060 INSTR2(MUL_S)
1061 INSTR2(MULL_U)
1062 INSTR1(BFREV_B)
1063 INSTR1(CLZ_S)
1064 INSTR1(CLZ_B)
1065 INSTR2(SHL_B)
1066 INSTR2(SHR_B)
1067 INSTR2(ASHR_B)
1068 INSTR2(BARY_F)
1069 INSTR2(MGEN_B)
1070 INSTR2(GETBIT_B)
1071 INSTR1(SETRM)
1072 INSTR1(CBITS_B)
1073 INSTR2(SHB)
1074 INSTR2(MSAD)
1075 
1076 /* cat3 instructions: */
1077 INSTR3(MAD_U16)
1078 INSTR3(MADSH_U16)
1079 INSTR3(MAD_S16)
1080 INSTR3(MADSH_M16)
1081 INSTR3(MAD_U24)
1082 INSTR3(MAD_S24)
1083 INSTR3(MAD_F16)
1084 INSTR3(MAD_F32)
1085 INSTR3(SEL_B16)
1086 INSTR3(SEL_B32)
1087 INSTR3(SEL_S16)
1088 INSTR3(SEL_S32)
1089 INSTR3(SEL_F16)
1090 INSTR3(SEL_F32)
1091 INSTR3(SAD_S16)
1092 INSTR3(SAD_S32)
1093 
1094 /* cat4 instructions: */
1095 INSTR1(RCP)
1096 INSTR1(RSQ)
1097 INSTR1(LOG2)
1098 INSTR1(EXP2)
1099 INSTR1(SIN)
1100 INSTR1(COS)
1101 INSTR1(SQRT)
1102 
1103 /* cat5 instructions: */
1104 INSTR1(DSX)
1105 INSTR1(DSY)
1106 
1107 static inline struct ir3_instruction *
1108 ir3_SAM(struct ir3_block *block, opc_t opc, type_t type,
1109 		unsigned wrmask, unsigned flags, unsigned samp, unsigned tex,
1110 		struct ir3_instruction *src0, struct ir3_instruction *src1)
1111 {
1112 	struct ir3_instruction *sam;
1113 	struct ir3_register *reg;
1114 
1115 	sam = ir3_instr_create(block, opc);
1116 	sam->flags |= flags;
1117 	ir3_reg_create(sam, 0, 0)->wrmask = wrmask;
1118 	if (src0) {
1119 		reg = ir3_reg_create(sam, 0, IR3_REG_SSA);
1120 		reg->wrmask = (1 << (src0->regs_count - 1)) - 1;
1121 		reg->instr = src0;
1122 	}
1123 	if (src1) {
1124 		reg = ir3_reg_create(sam, 0, IR3_REG_SSA);
1125 		reg->instr = src1;
1126 		reg->wrmask = (1 << (src1->regs_count - 1)) - 1;
1127 	}
1128 	sam->cat5.samp = samp;
1129 	sam->cat5.tex  = tex;
1130 	sam->cat5.type  = type;
1131 
1132 	return sam;
1133 }
1134 
1135 /* cat6 instructions: */
1136 INSTR2(LDLV)
1137 INSTR2(LDG)
1138 INSTR3(STG)
1139 
1140 /* ************************************************************************* */
1141 /* split this out or find some helper to use.. like main/bitset.h.. */
1142 
1143 #include <string.h>
1144 
1145 #define MAX_REG 256
1146 
1147 typedef uint8_t regmask_t[2 * MAX_REG / 8];
1148 
regmask_idx(struct ir3_register * reg)1149 static inline unsigned regmask_idx(struct ir3_register *reg)
1150 {
1151 	unsigned num = (reg->flags & IR3_REG_RELATIV) ? reg->array.offset : reg->num;
1152 	debug_assert(num < MAX_REG);
1153 	if (reg->flags & IR3_REG_HALF)
1154 		num += MAX_REG;
1155 	return num;
1156 }
1157 
regmask_init(regmask_t * regmask)1158 static inline void regmask_init(regmask_t *regmask)
1159 {
1160 	memset(regmask, 0, sizeof(*regmask));
1161 }
1162 
regmask_set(regmask_t * regmask,struct ir3_register * reg)1163 static inline void regmask_set(regmask_t *regmask, struct ir3_register *reg)
1164 {
1165 	unsigned idx = regmask_idx(reg);
1166 	if (reg->flags & IR3_REG_RELATIV) {
1167 		unsigned i;
1168 		for (i = 0; i < reg->size; i++, idx++)
1169 			(*regmask)[idx / 8] |= 1 << (idx % 8);
1170 	} else {
1171 		unsigned mask;
1172 		for (mask = reg->wrmask; mask; mask >>= 1, idx++)
1173 			if (mask & 1)
1174 				(*regmask)[idx / 8] |= 1 << (idx % 8);
1175 	}
1176 }
1177 
regmask_or(regmask_t * dst,regmask_t * a,regmask_t * b)1178 static inline void regmask_or(regmask_t *dst, regmask_t *a, regmask_t *b)
1179 {
1180 	unsigned i;
1181 	for (i = 0; i < ARRAY_SIZE(*dst); i++)
1182 		(*dst)[i] = (*a)[i] | (*b)[i];
1183 }
1184 
1185 /* set bits in a if not set in b, conceptually:
1186  *   a |= (reg & ~b)
1187  */
regmask_set_if_not(regmask_t * a,struct ir3_register * reg,regmask_t * b)1188 static inline void regmask_set_if_not(regmask_t *a,
1189 		struct ir3_register *reg, regmask_t *b)
1190 {
1191 	unsigned idx = regmask_idx(reg);
1192 	if (reg->flags & IR3_REG_RELATIV) {
1193 		unsigned i;
1194 		for (i = 0; i < reg->size; i++, idx++)
1195 			if (!((*b)[idx / 8] & (1 << (idx % 8))))
1196 				(*a)[idx / 8] |= 1 << (idx % 8);
1197 	} else {
1198 		unsigned mask;
1199 		for (mask = reg->wrmask; mask; mask >>= 1, idx++)
1200 			if (mask & 1)
1201 				if (!((*b)[idx / 8] & (1 << (idx % 8))))
1202 					(*a)[idx / 8] |= 1 << (idx % 8);
1203 	}
1204 }
1205 
regmask_get(regmask_t * regmask,struct ir3_register * reg)1206 static inline bool regmask_get(regmask_t *regmask,
1207 		struct ir3_register *reg)
1208 {
1209 	unsigned idx = regmask_idx(reg);
1210 	if (reg->flags & IR3_REG_RELATIV) {
1211 		unsigned i;
1212 		for (i = 0; i < reg->size; i++, idx++)
1213 			if ((*regmask)[idx / 8] & (1 << (idx % 8)))
1214 				return true;
1215 	} else {
1216 		unsigned mask;
1217 		for (mask = reg->wrmask; mask; mask >>= 1, idx++)
1218 			if (mask & 1)
1219 				if ((*regmask)[idx / 8] & (1 << (idx % 8)))
1220 					return true;
1221 	}
1222 	return false;
1223 }
1224 
1225 /* ************************************************************************* */
1226 
1227 #endif /* IR3_H_ */
1228