• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors:
24  *    Rob Clark <robclark@freedesktop.org>
25  */
26 
27 #ifndef IR3_RA_H_
28 #define IR3_RA_H_
29 
30 #include <setjmp.h>
31 
32 #include "util/bitset.h"
33 
34 
35 static const unsigned class_sizes[] = {
36 	1, 2, 3, 4,
37 	4 + 4, /* txd + 1d/2d */
38 	4 + 6, /* txd + 3d */
39 };
40 #define class_count ARRAY_SIZE(class_sizes)
41 
42 static const unsigned half_class_sizes[] = {
43 	1, 2, 3, 4,
44 };
45 #define half_class_count  ARRAY_SIZE(half_class_sizes)
46 
47 /* seems to just be used for compute shaders?  Seems like vec1 and vec3
48  * are sufficient (for now?)
49  */
50 static const unsigned high_class_sizes[] = {
51 	1, 3,
52 };
53 #define high_class_count ARRAY_SIZE(high_class_sizes)
54 
55 #define total_class_count (class_count + half_class_count + high_class_count)
56 
57 /* Below a0.x are normal regs.  RA doesn't need to assign a0.x/p0.x. */
58 #define NUM_REGS             (4 * 48)  /* r0 to r47 */
59 #define NUM_HIGH_REGS        (4 * 8)   /* r48 to r55 */
60 #define FIRST_HIGH_REG       (4 * 48)
61 /* Number of virtual regs in a given class: */
62 
CLASS_REGS(unsigned i)63 static inline unsigned CLASS_REGS(unsigned i)
64 {
65 	assert(i < class_count);
66 
67 	return (NUM_REGS - (class_sizes[i] - 1));
68 }
69 
HALF_CLASS_REGS(unsigned i)70 static inline unsigned HALF_CLASS_REGS(unsigned i)
71 {
72 	assert(i < half_class_count);
73 
74 	return (NUM_REGS - (half_class_sizes[i] - 1));
75 }
76 
HIGH_CLASS_REGS(unsigned i)77 static inline unsigned HIGH_CLASS_REGS(unsigned i)
78 {
79 	assert(i < high_class_count);
80 
81 	return (NUM_HIGH_REGS - (high_class_sizes[i] - 1));
82 }
83 
84 #define HALF_OFFSET          (class_count)
85 #define HIGH_OFFSET          (class_count + half_class_count)
86 
87 /* register-set, created one time, used for all shaders: */
88 struct ir3_ra_reg_set {
89 	struct ra_regs *regs;
90 	unsigned int classes[class_count];
91 	unsigned int half_classes[half_class_count];
92 	unsigned int high_classes[high_class_count];
93 
94 	/* pre-fetched tex dst is limited, on current gens to regs
95 	 * 0x3f and below.  An additional register class, with one
96 	 * vreg, that is setup to conflict with any regs above that
97 	 * limit.
98 	 */
99 	unsigned prefetch_exclude_class;
100 	unsigned prefetch_exclude_reg;
101 
102 	/* The virtual register space flattens out all the classes,
103 	 * starting with full, followed by half and then high, ie:
104 	 *
105 	 *   scalar full  (starting at zero)
106 	 *   vec2 full
107 	 *   vec3 full
108 	 *   ...
109 	 *   vecN full
110 	 *   scalar half  (starting at first_half_reg)
111 	 *   vec2 half
112 	 *   ...
113 	 *   vecN half
114 	 *   scalar high  (starting at first_high_reg)
115 	 *   ...
116 	 *   vecN high
117 	 *
118 	 */
119 	unsigned first_half_reg, first_high_reg;
120 
121 	/* maps flat virtual register space to base gpr: */
122 	uint16_t *ra_reg_to_gpr;
123 	/* maps cls,gpr to flat virtual register space: */
124 	uint16_t **gpr_to_ra_reg;
125 };
126 
127 /* additional block-data (per-block) */
128 struct ir3_ra_block_data {
129 	BITSET_WORD *def;        /* variables defined before used in block */
130 	BITSET_WORD *use;        /* variables used before defined in block */
131 	BITSET_WORD *livein;     /* which defs reach entry point of block */
132 	BITSET_WORD *liveout;    /* which defs reach exit point of block */
133 };
134 
135 /* additional instruction-data (per-instruction) */
136 struct ir3_ra_instr_data {
137 	/* cached instruction 'definer' info: */
138 	struct ir3_instruction *defn;
139 	int off, sz, cls;
140 };
141 
142 /* register-assign context, per-shader */
143 struct ir3_ra_ctx {
144 	struct ir3_shader_variant *v;
145 	struct ir3 *ir;
146 
147 	struct ir3_ra_reg_set *set;
148 	struct ra_graph *g;
149 
150 	/* Are we in the scalar assignment pass?  In this pass, all larger-
151 	 * than-vec1 vales have already been assigned and pre-colored, so
152 	 * we only consider scalar values.
153 	 */
154 	bool scalar_pass;
155 
156 	unsigned alloc_count;
157 	unsigned r0_xyz_nodes; /* ra node numbers for r0.[xyz] precolors */
158 	unsigned hr0_xyz_nodes; /* ra node numbers for hr0.[xyz] precolors */
159 	unsigned prefetch_exclude_node;
160 	/* one per class, plus one slot for arrays: */
161 	unsigned class_alloc_count[total_class_count + 1];
162 	unsigned class_base[total_class_count + 1];
163 	unsigned instr_cnt;
164 	unsigned *def, *use;     /* def/use table */
165 	struct ir3_ra_instr_data *instrd;
166 
167 	/* Mapping vreg name back to instruction, used select reg callback: */
168 	struct hash_table *name_to_instr;
169 
170 	/* Tracking for select_reg callback */
171 	unsigned start_search_reg;
172 	unsigned max_target;
173 
174 	/* Temporary buffer for def/use iterators
175 	 *
176 	 * The worst case should probably be an array w/ relative access (ie.
177 	 * all elements are def'd or use'd), and that can't be larger than
178 	 * the number of registers.
179 	 *
180 	 * NOTE we could declare this on the stack if needed, but I don't
181 	 * think there is a need for nested iterators.
182 	 */
183 	unsigned namebuf[NUM_REGS];
184 	unsigned namecnt, nameidx;
185 
186 	/* Error handling: */
187 	jmp_buf jmp_env;
188 };
189 
190 #define ra_assert(ctx, expr) do { \
191 		if (!(expr)) { \
192 			_debug_printf("RA: %s:%u: %s: Assertion `%s' failed.\n", __FILE__, __LINE__, __func__, #expr); \
193 			longjmp((ctx)->jmp_env, -1); \
194 		} \
195 	} while (0)
196 #define ra_unreachable(ctx, str) ra_assert(ctx, !str)
197 
198 static inline int
ra_name(struct ir3_ra_ctx * ctx,struct ir3_ra_instr_data * id)199 ra_name(struct ir3_ra_ctx *ctx, struct ir3_ra_instr_data *id)
200 {
201 	unsigned name;
202 	debug_assert(id->cls >= 0);
203 	debug_assert(id->cls < total_class_count);  /* we shouldn't get arrays here.. */
204 	name = ctx->class_base[id->cls] + id->defn->name;
205 	debug_assert(name < ctx->alloc_count);
206 	return name;
207 }
208 
209 /* Get the scalar name of the n'th component of an instruction dst: */
210 static inline int
scalar_name(struct ir3_ra_ctx * ctx,struct ir3_instruction * instr,unsigned n)211 scalar_name(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr, unsigned n)
212 {
213 	if (ctx->scalar_pass) {
214 		if (instr->opc == OPC_META_SPLIT) {
215 			debug_assert(n == 0);     /* split results in a scalar */
216 			struct ir3_instruction *src = instr->regs[1]->instr;
217 			return scalar_name(ctx, src, instr->split.off);
218 		} else if (instr->opc == OPC_META_COLLECT) {
219 			debug_assert(n < (instr->regs_count + 1));
220 			struct ir3_instruction *src = instr->regs[n + 1]->instr;
221 			return scalar_name(ctx, src, 0);
222 		}
223 	} else {
224 		debug_assert(n == 0);
225 	}
226 
227 	return ra_name(ctx, &ctx->instrd[instr->ip]) + n;
228 }
229 
230 #define NO_NAME ~0
231 
232 /*
233  * Iterators to iterate the vreg names of an instructions def's and use's
234  */
235 
236 static inline unsigned
__ra_name_cnt(struct ir3_ra_ctx * ctx,struct ir3_instruction * instr)237 __ra_name_cnt(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr)
238 {
239 	if (!instr)
240 		return 0;
241 
242 	/* Filter special cases, ie. writes to a0.x or p0.x, or non-ssa: */
243 	if (!writes_gpr(instr) || (instr->regs[0]->flags & IR3_REG_ARRAY))
244 		return 0;
245 
246 	/* in scalar pass, we aren't considering virtual register classes, ie.
247 	 * if an instruction writes a vec2, then it defines two different scalar
248 	 * register names.
249 	 */
250 	if (ctx->scalar_pass)
251 		return dest_regs(instr);
252 
253 	return 1;
254 }
255 
256 #define foreach_name_n(__name, __n, __ctx, __instr) \
257 	for (unsigned __cnt = __ra_name_cnt(__ctx, __instr), __n = 0, __name; \
258 	     (__n < __cnt) && ({__name = scalar_name(__ctx, __instr, __n); 1;}); __n++)
259 
260 #define foreach_name(__name, __ctx, __instr) \
261 	foreach_name_n(__name, __n, __ctx, __instr)
262 
263 static inline unsigned
__ra_itr_pop(struct ir3_ra_ctx * ctx)264 __ra_itr_pop(struct ir3_ra_ctx *ctx)
265 {
266 	if (ctx->nameidx < ctx->namecnt)
267 		return ctx->namebuf[ctx->nameidx++];
268 	return NO_NAME;
269 }
270 
271 static inline void
__ra_itr_push(struct ir3_ra_ctx * ctx,unsigned name)272 __ra_itr_push(struct ir3_ra_ctx *ctx, unsigned name)
273 {
274 	assert(ctx->namecnt < ARRAY_SIZE(ctx->namebuf));
275 	ctx->namebuf[ctx->namecnt++] = name;
276 }
277 
278 static inline unsigned
__ra_init_def_itr(struct ir3_ra_ctx * ctx,struct ir3_instruction * instr)279 __ra_init_def_itr(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr)
280 {
281 	/* nested use is not supported: */
282 	assert(ctx->namecnt == ctx->nameidx);
283 
284 	ctx->namecnt = ctx->nameidx = 0;
285 
286 	if (!writes_gpr(instr))
287 		return NO_NAME;
288 
289 	struct ir3_ra_instr_data *id = &ctx->instrd[instr->ip];
290 	struct ir3_register *dst = instr->regs[0];
291 
292 	if (dst->flags & IR3_REG_ARRAY) {
293 		struct ir3_array *arr = ir3_lookup_array(ctx->ir, dst->array.id);
294 
295 		/* indirect write is treated like a write to all array
296 		 * elements, since we don't know which one is actually
297 		 * written:
298 		 */
299 		if (dst->flags & IR3_REG_RELATIV) {
300 			for (unsigned i = 0; i < arr->length; i++) {
301 				__ra_itr_push(ctx, arr->base + i);
302 			}
303 		} else {
304 			__ra_itr_push(ctx, arr->base + dst->array.offset);
305 			debug_assert(dst->array.offset < arr->length);
306 		}
307 	} else if (id->defn == instr) {
308 		foreach_name_n (name, i, ctx, instr) {
309 			/* tex instructions actually have a wrmask, and
310 			 * don't touch masked out components.  We can't do
311 			 * anything useful about that in the first pass,
312 			 * but in the scalar pass we can realize these
313 			 * registers are available:
314 			 */
315 			if (ctx->scalar_pass && is_tex_or_prefetch(instr) &&
316 					!(instr->regs[0]->wrmask & (1 << i)))
317 				continue;
318 			__ra_itr_push(ctx, name);
319 		}
320 	}
321 
322 	return __ra_itr_pop(ctx);
323 }
324 
325 static inline unsigned
__ra_init_use_itr(struct ir3_ra_ctx * ctx,struct ir3_instruction * instr)326 __ra_init_use_itr(struct ir3_ra_ctx *ctx, struct ir3_instruction *instr)
327 {
328 	/* nested use is not supported: */
329 	assert(ctx->namecnt == ctx->nameidx);
330 
331 	ctx->namecnt = ctx->nameidx = 0;
332 
333 	foreach_src (reg, instr) {
334 		if (reg->flags & IR3_REG_ARRAY) {
335 			struct ir3_array *arr =
336 				ir3_lookup_array(ctx->ir, reg->array.id);
337 
338 			/* indirect read is treated like a read from all array
339 			 * elements, since we don't know which one is actually
340 			 * read:
341 			 */
342 			if (reg->flags & IR3_REG_RELATIV) {
343 				for (unsigned i = 0; i < arr->length; i++) {
344 					__ra_itr_push(ctx, arr->base + i);
345 				}
346 			} else {
347 				__ra_itr_push(ctx, arr->base + reg->array.offset);
348 				debug_assert(reg->array.offset < arr->length);
349 			}
350 		} else {
351 			foreach_name_n (name, i, ctx, reg->instr) {
352 				/* split takes a src w/ wrmask potentially greater
353 				 * than 0x1, but it really only cares about a single
354 				 * component.  This shows up in splits coming out of
355 				 * a tex instruction w/ wrmask=.z, for example.
356 				 */
357 				if (ctx->scalar_pass && (instr->opc == OPC_META_SPLIT) &&
358 						!(i == instr->split.off))
359 					continue;
360 				__ra_itr_push(ctx, name);
361 			}
362 		}
363 	}
364 
365 	return __ra_itr_pop(ctx);
366 }
367 
368 #define foreach_def(__name, __ctx, __instr) \
369 	for (unsigned __name = __ra_init_def_itr(__ctx, __instr); \
370 	     __name != NO_NAME; __name = __ra_itr_pop(__ctx))
371 
372 #define foreach_use(__name, __ctx, __instr) \
373 	for (unsigned __name = __ra_init_use_itr(__ctx, __instr); \
374 	     __name != NO_NAME; __name = __ra_itr_pop(__ctx))
375 
376 int ra_size_to_class(unsigned sz, bool half, bool high);
377 int ra_class_to_size(unsigned class, bool *half, bool *high);
378 
379 #endif  /* IR3_RA_H_ */
380