• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
2 
3 /*
4  * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the next
14  * paragraph) shall be included in all copies or substantial portions of the
15  * Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23  * SOFTWARE.
24  *
25  * Authors:
26  *    Rob Clark <robclark@freedesktop.org>
27  */
28 
29 #include "freedreno_util.h"
30 
31 #include "ir3.h"
32 #include "ir3_shader.h"
33 
34 /*
35  * Copy Propagate:
36  */
37 
38 struct ir3_cp_ctx {
39 	struct ir3 *shader;
40 	struct ir3_shader_variant *so;
41 	unsigned immediate_idx;
42 };
43 
44 /* is it a type preserving mov, with ok flags? */
is_eligible_mov(struct ir3_instruction * instr,bool allow_flags)45 static bool is_eligible_mov(struct ir3_instruction *instr, bool allow_flags)
46 {
47 	if (is_same_type_mov(instr)) {
48 		struct ir3_register *dst = instr->regs[0];
49 		struct ir3_register *src = instr->regs[1];
50 		struct ir3_instruction *src_instr = ssa(src);
51 
52 		/* only if mov src is SSA (not const/immed): */
53 		if (!src_instr)
54 			return false;
55 
56 		/* no indirect: */
57 		if (dst->flags & IR3_REG_RELATIV)
58 			return false;
59 		if (src->flags & IR3_REG_RELATIV)
60 			return false;
61 
62 		if (!allow_flags)
63 			if (src->flags & (IR3_REG_FABS | IR3_REG_FNEG |
64 					IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_BNOT))
65 				return false;
66 
67 		/* TODO: remove this hack: */
68 		if (src_instr->opc == OPC_META_FO)
69 			return false;
70 		/* TODO: we currently don't handle left/right neighbors
71 		 * very well when inserting parallel-copies into phi..
72 		 * to avoid problems don't eliminate a mov coming out
73 		 * of phi..
74 		 */
75 		if (src_instr->opc == OPC_META_PHI)
76 			return false;
77 		return true;
78 	}
79 	return false;
80 }
81 
cp_flags(unsigned flags)82 static unsigned cp_flags(unsigned flags)
83 {
84 	/* only considering these flags (at least for now): */
85 	flags &= (IR3_REG_CONST | IR3_REG_IMMED |
86 			IR3_REG_FNEG | IR3_REG_FABS |
87 			IR3_REG_SNEG | IR3_REG_SABS |
88 			IR3_REG_BNOT | IR3_REG_RELATIV);
89 	return flags;
90 }
91 
valid_flags(struct ir3_instruction * instr,unsigned n,unsigned flags)92 static bool valid_flags(struct ir3_instruction *instr, unsigned n,
93 		unsigned flags)
94 {
95 	unsigned valid_flags;
96 	flags = cp_flags(flags);
97 
98 	/* If destination is indirect, then source cannot be.. at least
99 	 * I don't think so..
100 	 */
101 	if ((instr->regs[0]->flags & IR3_REG_RELATIV) &&
102 			(flags & IR3_REG_RELATIV))
103 		return false;
104 
105 	/* TODO it seems to *mostly* work to cp RELATIV, except we get some
106 	 * intermittent piglit variable-indexing fails.  Newer blob driver
107 	 * doesn't seem to cp these.  Possibly this is hw workaround?  Not
108 	 * sure, but until that is understood better, lets just switch off
109 	 * cp for indirect src's:
110 	 */
111 	if (flags & IR3_REG_RELATIV)
112 		return false;
113 
114 	switch (opc_cat(instr->opc)) {
115 	case 1:
116 		valid_flags = IR3_REG_IMMED | IR3_REG_CONST | IR3_REG_RELATIV;
117 		if (flags & ~valid_flags)
118 			return false;
119 		break;
120 	case 2:
121 		valid_flags = ir3_cat2_absneg(instr->opc) |
122 				IR3_REG_CONST | IR3_REG_RELATIV;
123 
124 		if (ir3_cat2_int(instr->opc))
125 			valid_flags |= IR3_REG_IMMED;
126 
127 		if (flags & ~valid_flags)
128 			return false;
129 
130 		if (flags & (IR3_REG_CONST | IR3_REG_IMMED)) {
131 			unsigned m = (n ^ 1) + 1;
132 			/* cannot deal w/ const in both srcs:
133 			 * (note that some cat2 actually only have a single src)
134 			 */
135 			if (m < instr->regs_count) {
136 				struct ir3_register *reg = instr->regs[m];
137 				if ((flags & IR3_REG_CONST) && (reg->flags & IR3_REG_CONST))
138 					return false;
139 				if ((flags & IR3_REG_IMMED) && (reg->flags & IR3_REG_IMMED))
140 					return false;
141 			}
142 			/* cannot be const + ABS|NEG: */
143 			if (flags & (IR3_REG_FABS | IR3_REG_FNEG |
144 					IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_BNOT))
145 				return false;
146 		}
147 		break;
148 	case 3:
149 		valid_flags = ir3_cat3_absneg(instr->opc) |
150 				IR3_REG_CONST | IR3_REG_RELATIV;
151 
152 		if (flags & ~valid_flags)
153 			return false;
154 
155 		if (flags & (IR3_REG_CONST | IR3_REG_RELATIV)) {
156 			/* cannot deal w/ const/relativ in 2nd src: */
157 			if (n == 1)
158 				return false;
159 		}
160 
161 		if (flags & IR3_REG_CONST) {
162 			/* cannot be const + ABS|NEG: */
163 			if (flags & (IR3_REG_FABS | IR3_REG_FNEG |
164 					IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_BNOT))
165 				return false;
166 		}
167 		break;
168 	case 4:
169 		/* seems like blob compiler avoids const as src.. */
170 		/* TODO double check if this is still the case on a4xx */
171 		if (flags & (IR3_REG_CONST | IR3_REG_IMMED))
172 			return false;
173 		if (flags & (IR3_REG_SABS | IR3_REG_SNEG))
174 			return false;
175 		break;
176 	case 5:
177 		/* no flags allowed */
178 		if (flags)
179 			return false;
180 		break;
181 	case 6:
182 		valid_flags = IR3_REG_IMMED;
183 		if (flags & ~valid_flags)
184 			return false;
185 
186 		if (flags & IR3_REG_IMMED) {
187 			/* doesn't seem like we can have immediate src for store
188 			 * instructions:
189 			 *
190 			 * TODO this restriction could also apply to load instructions,
191 			 * but for load instructions this arg is the address (and not
192 			 * really sure any good way to test a hard-coded immed addr src)
193 			 */
194 			if (is_store(instr) && (n == 1))
195 				return false;
196 
197 			if ((instr->opc == OPC_LDL) && (n != 1))
198 				return false;
199 
200 			if ((instr->opc == OPC_STL) && (n != 2))
201 				return false;
202 
203 			/* disallow CP into anything but the SSBO slot argument for
204 			 * atomics:
205 			 */
206 			if (is_atomic(instr->opc) && (n != 0))
207 				return false;
208 
209 			if (is_atomic(instr->opc) && !(instr->flags & IR3_INSTR_G))
210 				return false;
211 		}
212 
213 		break;
214 	}
215 
216 	return true;
217 }
218 
219 /* propagate register flags from src to dst.. negates need special
220  * handling to cancel each other out.
221  */
combine_flags(unsigned * dstflags,struct ir3_instruction * src)222 static void combine_flags(unsigned *dstflags, struct ir3_instruction *src)
223 {
224 	unsigned srcflags = src->regs[1]->flags;
225 
226 	/* if what we are combining into already has (abs) flags,
227 	 * we can drop (neg) from src:
228 	 */
229 	if (*dstflags & IR3_REG_FABS)
230 		srcflags &= ~IR3_REG_FNEG;
231 	if (*dstflags & IR3_REG_SABS)
232 		srcflags &= ~IR3_REG_SNEG;
233 
234 	if (srcflags & IR3_REG_FABS)
235 		*dstflags |= IR3_REG_FABS;
236 	if (srcflags & IR3_REG_SABS)
237 		*dstflags |= IR3_REG_SABS;
238 	if (srcflags & IR3_REG_FNEG)
239 		*dstflags ^= IR3_REG_FNEG;
240 	if (srcflags & IR3_REG_SNEG)
241 		*dstflags ^= IR3_REG_SNEG;
242 	if (srcflags & IR3_REG_BNOT)
243 		*dstflags ^= IR3_REG_BNOT;
244 
245 	*dstflags &= ~IR3_REG_SSA;
246 	*dstflags |= srcflags & IR3_REG_SSA;
247 	*dstflags |= srcflags & IR3_REG_CONST;
248 	*dstflags |= srcflags & IR3_REG_IMMED;
249 	*dstflags |= srcflags & IR3_REG_RELATIV;
250 	*dstflags |= srcflags & IR3_REG_ARRAY;
251 
252 	/* if src of the src is boolean we can drop the (abs) since we know
253 	 * the source value is already a postitive integer.  This cleans
254 	 * up the absnegs that get inserted when converting between nir and
255 	 * native boolean (see ir3_b2n/n2b)
256 	 */
257 	struct ir3_instruction *srcsrc = ssa(src->regs[1]);
258 	if (srcsrc && is_bool(srcsrc))
259 		*dstflags &= ~IR3_REG_SABS;
260 }
261 
262 static struct ir3_register *
lower_immed(struct ir3_cp_ctx * ctx,struct ir3_register * reg,unsigned new_flags)263 lower_immed(struct ir3_cp_ctx *ctx, struct ir3_register *reg, unsigned new_flags)
264 {
265 	unsigned swiz, idx, i;
266 
267 	reg = ir3_reg_clone(ctx->shader, reg);
268 
269 	/* in some cases, there are restrictions on (abs)/(neg) plus const..
270 	 * so just evaluate those and clear the flags:
271 	 */
272 	if (new_flags & IR3_REG_SABS) {
273 		reg->iim_val = abs(reg->iim_val);
274 		new_flags &= ~IR3_REG_SABS;
275 	}
276 
277 	if (new_flags & IR3_REG_FABS) {
278 		reg->fim_val = fabs(reg->fim_val);
279 		new_flags &= ~IR3_REG_FABS;
280 	}
281 
282 	if (new_flags & IR3_REG_SNEG) {
283 		reg->iim_val = -reg->iim_val;
284 		new_flags &= ~IR3_REG_SNEG;
285 	}
286 
287 	if (new_flags & IR3_REG_FNEG) {
288 		reg->fim_val = -reg->fim_val;
289 		new_flags &= ~IR3_REG_FNEG;
290 	}
291 
292 	for (i = 0; i < ctx->immediate_idx; i++) {
293 		swiz = i % 4;
294 		idx  = i / 4;
295 
296 		if (ctx->so->immediates[idx].val[swiz] == reg->uim_val) {
297 			break;
298 		}
299 	}
300 
301 	if (i == ctx->immediate_idx) {
302 		/* need to generate a new immediate: */
303 		swiz = i % 4;
304 		idx  = i / 4;
305 		ctx->so->immediates[idx].val[swiz] = reg->uim_val;
306 		ctx->so->immediates_count = idx + 1;
307 		ctx->immediate_idx++;
308 	}
309 
310 	new_flags &= ~IR3_REG_IMMED;
311 	new_flags |= IR3_REG_CONST;
312 	reg->flags = new_flags;
313 	reg->num = i + (4 * ctx->so->constbase.immediate);
314 
315 	return reg;
316 }
317 
318 /**
319  * Handle cp for a given src register.  This additionally handles
320  * the cases of collapsing immedate/const (which replace the src
321  * register with a non-ssa src) or collapsing mov's from relative
322  * src (which needs to also fixup the address src reference by the
323  * instruction).
324  */
325 static void
reg_cp(struct ir3_cp_ctx * ctx,struct ir3_instruction * instr,struct ir3_register * reg,unsigned n)326 reg_cp(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr,
327 		struct ir3_register *reg, unsigned n)
328 {
329 	struct ir3_instruction *src = ssa(reg);
330 
331 	/* don't propagate copies into a PHI, since we don't know if the
332 	 * src block executed:
333 	 */
334 	if (instr->opc == OPC_META_PHI)
335 		return;
336 
337 	if (is_eligible_mov(src, true)) {
338 		/* simple case, no immed/const/relativ, only mov's w/ ssa src: */
339 		struct ir3_register *src_reg = src->regs[1];
340 		unsigned new_flags = reg->flags;
341 
342 		combine_flags(&new_flags, src);
343 
344 		if (valid_flags(instr, n, new_flags)) {
345 			if (new_flags & IR3_REG_ARRAY) {
346 				debug_assert(!(reg->flags & IR3_REG_ARRAY));
347 				reg->array = src_reg->array;
348 			}
349 			reg->flags = new_flags;
350 			reg->instr = ssa(src_reg);
351 		}
352 
353 		src = ssa(reg);      /* could be null for IR3_REG_ARRAY case */
354 		if (!src)
355 			return;
356 	} else if (is_same_type_mov(src) &&
357 			/* cannot collapse const/immed/etc into meta instrs: */
358 			!is_meta(instr)) {
359 		/* immed/const/etc cases, which require some special handling: */
360 		struct ir3_register *src_reg = src->regs[1];
361 		unsigned new_flags = reg->flags;
362 
363 		combine_flags(&new_flags, src);
364 
365 		if (!valid_flags(instr, n, new_flags)) {
366 			/* See if lowering an immediate to const would help. */
367 			if (valid_flags(instr, n, (new_flags & ~IR3_REG_IMMED) | IR3_REG_CONST)) {
368 				debug_assert(new_flags & IR3_REG_IMMED);
369 				instr->regs[n + 1] = lower_immed(ctx, src_reg, new_flags);
370 				return;
371 			}
372 
373 			/* special case for "normal" mad instructions, we can
374 			 * try swapping the first two args if that fits better.
375 			 *
376 			 * the "plain" MAD's (ie. the ones that don't shift first
377 			 * src prior to multiply) can swap their first two srcs if
378 			 * src[0] is !CONST and src[1] is CONST:
379 			 */
380 			if ((n == 1) && is_mad(instr->opc) &&
381 					!(instr->regs[0 + 1]->flags & (IR3_REG_CONST | IR3_REG_RELATIV)) &&
382 					valid_flags(instr, 0, new_flags)) {
383 				/* swap src[0] and src[1]: */
384 				struct ir3_register *tmp;
385 				tmp = instr->regs[0 + 1];
386 				instr->regs[0 + 1] = instr->regs[1 + 1];
387 				instr->regs[1 + 1] = tmp;
388 				n = 0;
389 			} else {
390 				return;
391 			}
392 		}
393 
394 		/* Here we handle the special case of mov from
395 		 * CONST and/or RELATIV.  These need to be handled
396 		 * specially, because in the case of move from CONST
397 		 * there is no src ir3_instruction so we need to
398 		 * replace the ir3_register.  And in the case of
399 		 * RELATIV we need to handle the address register
400 		 * dependency.
401 		 */
402 		if (src_reg->flags & IR3_REG_CONST) {
403 			/* an instruction cannot reference two different
404 			 * address registers:
405 			 */
406 			if ((src_reg->flags & IR3_REG_RELATIV) &&
407 					conflicts(instr->address, reg->instr->address))
408 				return;
409 
410 			/* This seems to be a hw bug, or something where the timings
411 			 * just somehow don't work out.  This restriction may only
412 			 * apply if the first src is also CONST.
413 			 */
414 			if ((opc_cat(instr->opc) == 3) && (n == 2) &&
415 					(src_reg->flags & IR3_REG_RELATIV) &&
416 					(src_reg->array.offset == 0))
417 				return;
418 
419 			src_reg = ir3_reg_clone(instr->block->shader, src_reg);
420 			src_reg->flags = new_flags;
421 			instr->regs[n+1] = src_reg;
422 
423 			if (src_reg->flags & IR3_REG_RELATIV)
424 				ir3_instr_set_address(instr, reg->instr->address);
425 
426 			return;
427 		}
428 
429 		if ((src_reg->flags & IR3_REG_RELATIV) &&
430 				!conflicts(instr->address, reg->instr->address)) {
431 			src_reg = ir3_reg_clone(instr->block->shader, src_reg);
432 			src_reg->flags = new_flags;
433 			instr->regs[n+1] = src_reg;
434 			ir3_instr_set_address(instr, reg->instr->address);
435 
436 			return;
437 		}
438 
439 		/* NOTE: seems we can only do immed integers, so don't
440 		 * need to care about float.  But we do need to handle
441 		 * abs/neg *before* checking that the immediate requires
442 		 * few enough bits to encode:
443 		 *
444 		 * TODO: do we need to do something to avoid accidentally
445 		 * catching a float immed?
446 		 */
447 		if (src_reg->flags & IR3_REG_IMMED) {
448 			int32_t iim_val = src_reg->iim_val;
449 
450 			debug_assert((opc_cat(instr->opc) == 1) ||
451 					(opc_cat(instr->opc) == 6) ||
452 					ir3_cat2_int(instr->opc));
453 
454 			if (new_flags & IR3_REG_SABS)
455 				iim_val = abs(iim_val);
456 
457 			if (new_flags & IR3_REG_SNEG)
458 				iim_val = -iim_val;
459 
460 			if (new_flags & IR3_REG_BNOT)
461 				iim_val = ~iim_val;
462 
463 			/* other than category 1 (mov) we can only encode up to 10 bits: */
464 			if ((instr->opc == OPC_MOV) ||
465 					!((iim_val & ~0x3ff) && (-iim_val & ~0x3ff))) {
466 				new_flags &= ~(IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_BNOT);
467 				src_reg = ir3_reg_clone(instr->block->shader, src_reg);
468 				src_reg->flags = new_flags;
469 				src_reg->iim_val = iim_val;
470 				instr->regs[n+1] = src_reg;
471 			} else if (valid_flags(instr, n, (new_flags & ~IR3_REG_IMMED) | IR3_REG_CONST)) {
472 				/* See if lowering an immediate to const would help. */
473 				instr->regs[n+1] = lower_immed(ctx, src_reg, new_flags);
474 			}
475 
476 			return;
477 		}
478 	}
479 }
480 
481 /* Handle special case of eliminating output mov, and similar cases where
482  * there isn't a normal "consuming" instruction.  In this case we cannot
483  * collapse flags (ie. output mov from const, or w/ abs/neg flags, cannot
484  * be eliminated)
485  */
486 static struct ir3_instruction *
eliminate_output_mov(struct ir3_instruction * instr)487 eliminate_output_mov(struct ir3_instruction *instr)
488 {
489 	if (is_eligible_mov(instr, false)) {
490 		struct ir3_register *reg = instr->regs[1];
491 		if (!(reg->flags & IR3_REG_ARRAY)) {
492 			struct ir3_instruction *src_instr = ssa(reg);
493 			debug_assert(src_instr);
494 			return src_instr;
495 		}
496 	}
497 	return instr;
498 }
499 
500 /**
501  * Find instruction src's which are mov's that can be collapsed, replacing
502  * the mov dst with the mov src
503  */
504 static void
instr_cp(struct ir3_cp_ctx * ctx,struct ir3_instruction * instr)505 instr_cp(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr)
506 {
507 	struct ir3_register *reg;
508 
509 	if (instr->regs_count == 0)
510 		return;
511 
512 	if (ir3_instr_check_mark(instr))
513 		return;
514 
515 	/* walk down the graph from each src: */
516 	foreach_src_n(reg, n, instr) {
517 		struct ir3_instruction *src = ssa(reg);
518 
519 		if (!src)
520 			continue;
521 
522 		instr_cp(ctx, src);
523 
524 		/* TODO non-indirect access we could figure out which register
525 		 * we actually want and allow cp..
526 		 */
527 		if (reg->flags & IR3_REG_ARRAY)
528 			continue;
529 
530 		reg_cp(ctx, instr, reg, n);
531 	}
532 
533 	if (instr->regs[0]->flags & IR3_REG_ARRAY) {
534 		struct ir3_instruction *src = ssa(instr->regs[0]);
535 		if (src)
536 			instr_cp(ctx, src);
537 	}
538 
539 	if (instr->address) {
540 		instr_cp(ctx, instr->address);
541 		ir3_instr_set_address(instr, eliminate_output_mov(instr->address));
542 	}
543 
544 	/* we can end up with extra cmps.s from frontend, which uses a
545 	 *
546 	 *    cmps.s p0.x, cond, 0
547 	 *
548 	 * as a way to mov into the predicate register.  But frequently 'cond'
549 	 * is itself a cmps.s/cmps.f/cmps.u.  So detect this special case and
550 	 * just re-write the instruction writing predicate register to get rid
551 	 * of the double cmps.
552 	 */
553 	if ((instr->opc == OPC_CMPS_S) &&
554 			(instr->regs[0]->num == regid(REG_P0, 0)) &&
555 			ssa(instr->regs[1]) &&
556 			(instr->regs[2]->flags & IR3_REG_IMMED) &&
557 			(instr->regs[2]->iim_val == 0)) {
558 		struct ir3_instruction *cond = ssa(instr->regs[1]);
559 		switch (cond->opc) {
560 		case OPC_CMPS_S:
561 		case OPC_CMPS_F:
562 		case OPC_CMPS_U:
563 			instr->opc   = cond->opc;
564 			instr->flags = cond->flags;
565 			instr->cat2  = cond->cat2;
566 			instr->address = cond->address;
567 			instr->regs[1] = cond->regs[1];
568 			instr->regs[2] = cond->regs[2];
569 			break;
570 		default:
571 			break;
572 		}
573 	}
574 }
575 
576 void
ir3_cp(struct ir3 * ir,struct ir3_shader_variant * so)577 ir3_cp(struct ir3 *ir, struct ir3_shader_variant *so)
578 {
579 	struct ir3_cp_ctx ctx = {
580 			.shader = ir,
581 			.so = so,
582 	};
583 
584 	ir3_clear_mark(ir);
585 
586 	for (unsigned i = 0; i < ir->noutputs; i++) {
587 		if (ir->outputs[i]) {
588 			instr_cp(&ctx, ir->outputs[i]);
589 			ir->outputs[i] = eliminate_output_mov(ir->outputs[i]);
590 		}
591 	}
592 
593 	list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
594 		if (block->condition) {
595 			instr_cp(&ctx, block->condition);
596 			block->condition = eliminate_output_mov(block->condition);
597 		}
598 
599 		for (unsigned i = 0; i < block->keeps_count; i++) {
600 			instr_cp(&ctx, block->keeps[i]);
601 			block->keeps[i] = eliminate_output_mov(block->keeps[i]);
602 		}
603 	}
604 }
605