• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2018 Jonathan Marek <jonathan@marek.ca>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors:
24  *    Jonathan Marek <jonathan@marek.ca>
25  */
26 
27 #include <stdlib.h>
28 #include <stdint.h>
29 #include <stdio.h>
30 #include <string.h>
31 #include <assert.h>
32 
33 #include "ir2.h"
34 #include "fd2_program.h"
35 #include "ir2/instr-a2xx.h"
36 
37 enum ir2_src_type {
38 	IR2_SRC_SSA,
39 	IR2_SRC_REG,
40 	IR2_SRC_INPUT,
41 	IR2_SRC_CONST,
42 };
43 
44 struct ir2_src {
45 	/* num can mean different things
46 	 *   ssa: index of instruction
47 	 *   reg: index in ctx->reg array
48 	 *   input: index in ctx->input array
49 	 *   const: constant index (C0, C1, etc)
50 	 */
51 	uint16_t num;
52 	uint8_t swizzle;
53 	enum ir2_src_type type : 2;
54 	uint8_t abs : 1;
55 	uint8_t negate : 1;
56 	uint8_t : 4;
57 };
58 
59 struct ir2_reg_component {
60 	uint8_t c : 3; /* assigned x/y/z/w (7=dont write, for fetch instr) */
61 	bool alloc : 1; /* is it currently allocated */
62 	uint8_t ref_count; /* for ra */
63 };
64 
65 struct ir2_reg {
66 	uint8_t idx; /* assigned hardware register */
67 	uint8_t ncomp;
68 
69 	uint8_t loop_depth;
70 	bool initialized;
71 	/* block_idx to free on (-1 = free on ref_count==0) */
72 	int block_idx_free;
73 	struct ir2_reg_component comp[4];
74 };
75 
76 struct ir2_instr {
77 	unsigned idx;
78 
79 	unsigned block_idx;
80 
81 	enum {
82 		IR2_NONE,
83 		IR2_FETCH,
84 		IR2_ALU,
85 		IR2_CF,
86 	} type : 2;
87 
88 	/* instruction needs to be emitted (for scheduling) */
89 	bool need_emit : 1;
90 
91 	/* predicate value - (usually) same for entire block */
92 	uint8_t pred : 2;
93 
94 	/* src */
95 	uint8_t src_count;
96 	struct ir2_src src[4];
97 
98 	/* dst */
99 	bool is_ssa;
100 	union {
101 		struct ir2_reg ssa;
102 		struct ir2_reg *reg;
103 	};
104 
105 	/* type-specific */
106 	union {
107 		struct {
108 			instr_fetch_opc_t opc : 5;
109 			union {
110 				struct {
111 					uint8_t const_idx;
112 					uint8_t const_idx_sel;
113 				} vtx;
114 				struct {
115 					bool is_cube : 1;
116 					bool is_rect : 1;
117 					uint8_t samp_id;
118 				} tex;
119 			};
120 		} fetch;
121 		struct {
122 			/* store possible opcs, then we can choose vector/scalar instr */
123 			instr_scalar_opc_t scalar_opc : 6;
124 			instr_vector_opc_t vector_opc : 5;
125 			/* same as nir */
126 			uint8_t write_mask : 4;
127 			bool saturate : 1;
128 
129 			/* export idx (-1 no export) */
130 			int8_t export;
131 
132 			/* for scalarized 2 src instruction */
133 			uint8_t src1_swizzle;
134 		} alu;
135 		struct {
136 			/* jmp dst block_idx */
137 			uint8_t block_idx;
138 		} cf;
139 	};
140 };
141 
142 struct ir2_sched_instr {
143 	uint32_t reg_state[8];
144 	struct ir2_instr *instr, *instr_s;
145 };
146 
147 struct ir2_context {
148 	struct fd2_shader_stateobj *so;
149 
150 	unsigned block_idx, pred_idx;
151 	uint8_t pred;
152 	bool block_has_jump[64];
153 
154 	unsigned loop_last_block[64];
155 	unsigned loop_depth;
156 
157 	nir_shader *nir;
158 
159 	/* ssa index of position output */
160 	struct ir2_src position;
161 
162 	/* to translate SSA ids to instruction ids */
163 	int16_t ssa_map[1024];
164 
165 	struct ir2_shader_info *info;
166 	struct ir2_frag_linkage *f;
167 
168 	int prev_export;
169 
170 	/* RA state */
171 	struct ir2_reg* live_regs[64];
172 	uint32_t reg_state[256/32]; /* 64*4 bits */
173 
174 	/* inputs */
175 	struct ir2_reg input[16 + 1]; /* 16 + param */
176 
177 	/* non-ssa regs */
178 	struct ir2_reg reg[64];
179 	unsigned reg_count;
180 
181 	struct ir2_instr instr[0x300];
182 	unsigned instr_count;
183 
184 	struct ir2_sched_instr instr_sched[0x180];
185 	unsigned instr_sched_count;
186 };
187 
188 void assemble(struct ir2_context *ctx, bool binning);
189 
190 void ir2_nir_compile(struct ir2_context *ctx, bool binning);
191 bool ir2_nir_lower_scalar(nir_shader * shader);
192 
193 void ra_count_refs(struct ir2_context *ctx);
194 void ra_reg(struct ir2_context *ctx, struct ir2_reg *reg, int force_idx,
195 	bool export, uint8_t export_writemask);
196 void ra_src_free(struct ir2_context *ctx, struct ir2_instr *instr);
197 void ra_block_free(struct ir2_context *ctx, unsigned block);
198 
199 void cp_src(struct ir2_context *ctx);
200 void cp_export(struct ir2_context *ctx);
201 
202 /* utils */
203 enum {
204 	IR2_SWIZZLE_Y = 1 << 0,
205 	IR2_SWIZZLE_Z = 2 << 0,
206 	IR2_SWIZZLE_W = 3 << 0,
207 
208 	IR2_SWIZZLE_ZW = 2 << 0 | 2 << 2,
209 
210 	IR2_SWIZZLE_YXW = 1 << 0 | 3 << 2 | 1 << 4,
211 
212 	IR2_SWIZZLE_XXXX = 0 << 0 | 3 << 2 | 2 << 4 | 1 << 6,
213 	IR2_SWIZZLE_YYYY = 1 << 0 | 0 << 2 | 3 << 4 | 2 << 6,
214 	IR2_SWIZZLE_ZZZZ = 2 << 0 | 1 << 2 | 0 << 4 | 3 << 6,
215 	IR2_SWIZZLE_WWWW = 3 << 0 | 2 << 2 | 1 << 4 | 0 << 6,
216 	IR2_SWIZZLE_WYWW = 3 << 0 | 0 << 2 | 1 << 4 | 0 << 6,
217 	IR2_SWIZZLE_XYXY = 0 << 0 | 0 << 2 | 2 << 4 | 2 << 6,
218 	IR2_SWIZZLE_ZZXY = 2 << 0 | 1 << 2 | 2 << 4 | 2 << 6,
219 	IR2_SWIZZLE_YXZZ = 1 << 0 | 3 << 2 | 0 << 4 | 3 << 6,
220 };
221 
222 #define compile_error(ctx, args...) ({ \
223 	printf(args); \
224 	assert(0); \
225 })
226 
227 static inline struct ir2_src
ir2_src(uint16_t num,uint8_t swizzle,enum ir2_src_type type)228 ir2_src(uint16_t num, uint8_t swizzle, enum ir2_src_type type)
229 {
230 	return (struct ir2_src) {
231 		.num = num,
232 		.swizzle = swizzle,
233 		.type = type
234 	};
235 }
236 
237 /* ir2_assemble uses it .. */
238 struct ir2_src ir2_zero(struct ir2_context *ctx);
239 
240 #define ir2_foreach_instr(it, ctx) \
241 	for (struct ir2_instr *it = (ctx)->instr; ({ \
242 		while (it != &(ctx)->instr[(ctx)->instr_count] && it->type == IR2_NONE) it++; \
243 		 it != &(ctx)->instr[(ctx)->instr_count]; }); it++)
244 
245 #define ir2_foreach_live_reg(it, ctx) \
246 	for (struct ir2_reg **__ptr = (ctx)->live_regs, *it; ({ \
247 		while (__ptr != &(ctx)->live_regs[64] && *__ptr == NULL) __ptr++; \
248 		 __ptr != &(ctx)->live_regs[64] ? (it=*__ptr) : NULL; }); it++)
249 
250 #define ir2_foreach_avail(it) \
251 	for (struct ir2_instr **__instrp = avail, *it; \
252 		it = *__instrp,  __instrp != &avail[avail_count]; __instrp++)
253 
254 #define ir2_foreach_src(it, instr) \
255 	for (struct ir2_src *it = instr->src; \
256 		 it != &instr->src[instr->src_count]; it++)
257 
258 /* mask for register allocation
259  * 64 registers with 4 components each = 256 bits
260  */
261 /* typedef struct {
262 	uint64_t data[4];
263 } regmask_t; */
264 
mask_isset(uint32_t * mask,unsigned num)265 static inline bool mask_isset(uint32_t * mask, unsigned num)
266 {
267 	return ! !(mask[num / 32] & 1 << num % 32);
268 }
269 
mask_set(uint32_t * mask,unsigned num)270 static inline void mask_set(uint32_t * mask, unsigned num)
271 {
272 	mask[num / 32] |= 1 << num % 32;
273 }
274 
mask_unset(uint32_t * mask,unsigned num)275 static inline void mask_unset(uint32_t * mask, unsigned num)
276 {
277 	mask[num / 32] &= ~(1 << num % 32);
278 }
279 
mask_reg(uint32_t * mask,unsigned num)280 static inline unsigned mask_reg(uint32_t * mask, unsigned num)
281 {
282 	return mask[num / 8] >> num % 8 * 4 & 0xf;
283 }
284 
is_export(struct ir2_instr * instr)285 static inline bool is_export(struct ir2_instr *instr)
286 {
287 	return instr->type == IR2_ALU && instr->alu.export >= 0;
288 }
289 
export_buf(unsigned num)290 static inline instr_alloc_type_t export_buf(unsigned num)
291 {
292 	return num < 32 ? SQ_PARAMETER_PIXEL :
293 		num >= 62 ? SQ_POSITION : SQ_MEMORY;
294 }
295 
296 /* component c for channel i */
swiz_set(unsigned c,unsigned i)297 static inline unsigned swiz_set(unsigned c, unsigned i)
298 {
299 	return ((c - i) & 3) << i * 2;
300 }
301 
302 /* get swizzle in channel i */
swiz_get(unsigned swiz,unsigned i)303 static inline unsigned swiz_get(unsigned swiz, unsigned i)
304 {
305 	return ((swiz >> i * 2) + i) & 3;
306 }
307 
swiz_merge(unsigned swiz0,unsigned swiz1)308 static inline unsigned swiz_merge(unsigned swiz0, unsigned swiz1)
309 {
310 	unsigned swiz = 0;
311 	for (int i = 0; i < 4; i++)
312 		swiz |= swiz_set(swiz_get(swiz0, swiz_get(swiz1, i)), i);
313 	return swiz;
314 }
315 
swiz_merge_p(uint8_t * swiz0,unsigned swiz1)316 static inline void swiz_merge_p(uint8_t *swiz0, unsigned swiz1)
317 {
318 	unsigned swiz = 0;
319 	for (int i = 0; i < 4; i++)
320 		swiz |= swiz_set(swiz_get(*swiz0, swiz_get(swiz1, i)), i);
321 	*swiz0 = swiz;
322 }
323 
get_reg(struct ir2_instr * instr)324 static inline struct ir2_reg * get_reg(struct ir2_instr *instr)
325 {
326 	return instr->is_ssa ? &instr->ssa : instr->reg;
327 }
328 
329 static inline struct ir2_reg *
get_reg_src(struct ir2_context * ctx,struct ir2_src * src)330 get_reg_src(struct ir2_context *ctx, struct ir2_src *src)
331 {
332 	switch (src->type) {
333 	case IR2_SRC_INPUT:
334 		return &ctx->input[src->num];
335 	case IR2_SRC_SSA:
336 		return &ctx->instr[src->num].ssa;
337 	case IR2_SRC_REG:
338 		return &ctx->reg[src->num];
339 	default:
340 		return NULL;
341 	}
342 }
343 
344 /* gets a ncomp value for the dst */
dst_ncomp(struct ir2_instr * instr)345 static inline unsigned dst_ncomp(struct ir2_instr *instr)
346 {
347 	if (instr->is_ssa)
348 		return instr->ssa.ncomp;
349 
350 	if (instr->type == IR2_FETCH)
351 		return instr->reg->ncomp;
352 
353 	assert(instr->type == IR2_ALU);
354 
355 	unsigned ncomp = 0;
356 	for (int i = 0; i < instr->reg->ncomp; i++)
357 		ncomp += !!(instr->alu.write_mask & 1 << i);
358 	return ncomp;
359 }
360 
361 /* gets a ncomp value for the src registers */
src_ncomp(struct ir2_instr * instr)362 static inline unsigned src_ncomp(struct ir2_instr *instr)
363 {
364 	if (instr->type == IR2_FETCH) {
365 		switch (instr->fetch.opc) {
366 		case VTX_FETCH:
367 			return 1;
368 		case TEX_FETCH:
369 			return instr->fetch.tex.is_cube ? 3 : 2;
370 		case TEX_SET_TEX_LOD:
371 			return 1;
372 		default:
373 			assert(0);
374 		}
375 	}
376 
377 	switch (instr->alu.scalar_opc) {
378 	case PRED_SETEs ... KILLONEs:
379 		return 1;
380 	default:
381 		break;
382 	}
383 
384 	switch (instr->alu.vector_opc) {
385 	case DOT2ADDv:
386 		return 2;
387 	case DOT3v:
388 		return 3;
389 	case DOT4v:
390 	case CUBEv:
391 	case PRED_SETE_PUSHv:
392 		return 4;
393 	default:
394 		return dst_ncomp(instr);
395 	}
396 }
397