• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* -*- mode: C; c-file-style: "k&r"; tab-width 4; indent-tabs-mode: t; -*- */
2 
3 /*
4  * Copyright (C) 2014 Rob Clark <robclark@freedesktop.org>
5  *
6  * Permission is hereby granted, free of charge, to any person obtaining a
7  * copy of this software and associated documentation files (the "Software"),
8  * to deal in the Software without restriction, including without limitation
9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
10  * and/or sell copies of the Software, and to permit persons to whom the
11  * Software is furnished to do so, subject to the following conditions:
12  *
13  * The above copyright notice and this permission notice (including the next
14  * paragraph) shall be included in all copies or substantial portions of the
15  * Software.
16  *
17  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
18  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
19  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
20  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
21  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
22  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
23  * SOFTWARE.
24  *
25  * Authors:
26  *    Rob Clark <robclark@freedesktop.org>
27  */
28 
29 #include "freedreno_util.h"
30 
31 #include "ir3.h"
32 #include "ir3_shader.h"
33 
34 /*
35  * Copy Propagate:
36  */
37 
38 struct ir3_cp_ctx {
39 	struct ir3 *shader;
40 	struct ir3_shader_variant *so;
41 	unsigned immediate_idx;
42 };
43 
44 /* is it a type preserving mov, with ok flags? */
is_eligible_mov(struct ir3_instruction * instr,bool allow_flags)45 static bool is_eligible_mov(struct ir3_instruction *instr, bool allow_flags)
46 {
47 	if (is_same_type_mov(instr)) {
48 		struct ir3_register *dst = instr->regs[0];
49 		struct ir3_register *src = instr->regs[1];
50 		struct ir3_instruction *src_instr = ssa(src);
51 
52 		/* only if mov src is SSA (not const/immed): */
53 		if (!src_instr)
54 			return false;
55 
56 		/* no indirect: */
57 		if (dst->flags & IR3_REG_RELATIV)
58 			return false;
59 		if (src->flags & IR3_REG_RELATIV)
60 			return false;
61 
62 		if (!allow_flags)
63 			if (src->flags & (IR3_REG_FABS | IR3_REG_FNEG |
64 					IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_BNOT))
65 				return false;
66 
67 		/* TODO: remove this hack: */
68 		if (src_instr->opc == OPC_META_FO)
69 			return false;
70 		/* TODO: we currently don't handle left/right neighbors
71 		 * very well when inserting parallel-copies into phi..
72 		 * to avoid problems don't eliminate a mov coming out
73 		 * of phi..
74 		 */
75 		if (src_instr->opc == OPC_META_PHI)
76 			return false;
77 		return true;
78 	}
79 	return false;
80 }
81 
cp_flags(unsigned flags)82 static unsigned cp_flags(unsigned flags)
83 {
84 	/* only considering these flags (at least for now): */
85 	flags &= (IR3_REG_CONST | IR3_REG_IMMED |
86 			IR3_REG_FNEG | IR3_REG_FABS |
87 			IR3_REG_SNEG | IR3_REG_SABS |
88 			IR3_REG_BNOT | IR3_REG_RELATIV);
89 	return flags;
90 }
91 
valid_flags(struct ir3_instruction * instr,unsigned n,unsigned flags)92 static bool valid_flags(struct ir3_instruction *instr, unsigned n,
93 		unsigned flags)
94 {
95 	unsigned valid_flags;
96 	flags = cp_flags(flags);
97 
98 	/* If destination is indirect, then source cannot be.. at least
99 	 * I don't think so..
100 	 */
101 	if ((instr->regs[0]->flags & IR3_REG_RELATIV) &&
102 			(flags & IR3_REG_RELATIV))
103 		return false;
104 
105 	/* TODO it seems to *mostly* work to cp RELATIV, except we get some
106 	 * intermittent piglit variable-indexing fails.  Newer blob driver
107 	 * doesn't seem to cp these.  Possibly this is hw workaround?  Not
108 	 * sure, but until that is understood better, lets just switch off
109 	 * cp for indirect src's:
110 	 */
111 	if (flags & IR3_REG_RELATIV)
112 		return false;
113 
114 	/* clear flags that are 'ok' */
115 	switch (opc_cat(instr->opc)) {
116 	case 1:
117 		valid_flags = IR3_REG_IMMED | IR3_REG_CONST | IR3_REG_RELATIV;
118 		if (flags & ~valid_flags)
119 			return false;
120 		break;
121 	case 5:
122 		/* no flags allowed */
123 		if (flags)
124 			return false;
125 		break;
126 	case 6:
127 		valid_flags = IR3_REG_IMMED;
128 		if (flags & ~valid_flags)
129 			return false;
130 
131 		if (flags & IR3_REG_IMMED) {
132 			/* doesn't seem like we can have immediate src for store
133 			 * instructions:
134 			 *
135 			 * TODO this restriction could also apply to load instructions,
136 			 * but for load instructions this arg is the address (and not
137 			 * really sure any good way to test a hard-coded immed addr src)
138 			 */
139 			if (is_store(instr) && (n == 1))
140 				return false;
141 		}
142 
143 		break;
144 	case 2:
145 		valid_flags = ir3_cat2_absneg(instr->opc) |
146 				IR3_REG_CONST | IR3_REG_RELATIV;
147 
148 		if (ir3_cat2_int(instr->opc))
149 			valid_flags |= IR3_REG_IMMED;
150 
151 		if (flags & ~valid_flags)
152 			return false;
153 
154 		if (flags & (IR3_REG_CONST | IR3_REG_IMMED)) {
155 			unsigned m = (n ^ 1) + 1;
156 			/* cannot deal w/ const in both srcs:
157 			 * (note that some cat2 actually only have a single src)
158 			 */
159 			if (m < instr->regs_count) {
160 				struct ir3_register *reg = instr->regs[m];
161 				if ((flags & IR3_REG_CONST) && (reg->flags & IR3_REG_CONST))
162 					return false;
163 				if ((flags & IR3_REG_IMMED) && (reg->flags & IR3_REG_IMMED))
164 					return false;
165 			}
166 			/* cannot be const + ABS|NEG: */
167 			if (flags & (IR3_REG_FABS | IR3_REG_FNEG |
168 					IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_BNOT))
169 				return false;
170 		}
171 		break;
172 	case 3:
173 		valid_flags = ir3_cat3_absneg(instr->opc) |
174 				IR3_REG_CONST | IR3_REG_RELATIV;
175 
176 		if (flags & ~valid_flags)
177 			return false;
178 
179 		if (flags & (IR3_REG_CONST | IR3_REG_RELATIV)) {
180 			/* cannot deal w/ const/relativ in 2nd src: */
181 			if (n == 1)
182 				return false;
183 		}
184 
185 		if (flags & IR3_REG_CONST) {
186 			/* cannot be const + ABS|NEG: */
187 			if (flags & (IR3_REG_FABS | IR3_REG_FNEG |
188 					IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_BNOT))
189 				return false;
190 		}
191 		break;
192 	case 4:
193 		/* seems like blob compiler avoids const as src.. */
194 		/* TODO double check if this is still the case on a4xx */
195 		if (flags & (IR3_REG_CONST | IR3_REG_IMMED))
196 			return false;
197 		if (flags & (IR3_REG_SABS | IR3_REG_SNEG))
198 			return false;
199 		break;
200 	}
201 
202 	return true;
203 }
204 
205 /* propagate register flags from src to dst.. negates need special
206  * handling to cancel each other out.
207  */
combine_flags(unsigned * dstflags,struct ir3_instruction * src)208 static void combine_flags(unsigned *dstflags, struct ir3_instruction *src)
209 {
210 	unsigned srcflags = src->regs[1]->flags;
211 
212 	/* if what we are combining into already has (abs) flags,
213 	 * we can drop (neg) from src:
214 	 */
215 	if (*dstflags & IR3_REG_FABS)
216 		srcflags &= ~IR3_REG_FNEG;
217 	if (*dstflags & IR3_REG_SABS)
218 		srcflags &= ~IR3_REG_SNEG;
219 
220 	if (srcflags & IR3_REG_FABS)
221 		*dstflags |= IR3_REG_FABS;
222 	if (srcflags & IR3_REG_SABS)
223 		*dstflags |= IR3_REG_SABS;
224 	if (srcflags & IR3_REG_FNEG)
225 		*dstflags ^= IR3_REG_FNEG;
226 	if (srcflags & IR3_REG_SNEG)
227 		*dstflags ^= IR3_REG_SNEG;
228 	if (srcflags & IR3_REG_BNOT)
229 		*dstflags ^= IR3_REG_BNOT;
230 
231 	*dstflags &= ~IR3_REG_SSA;
232 	*dstflags |= srcflags & IR3_REG_SSA;
233 	*dstflags |= srcflags & IR3_REG_CONST;
234 	*dstflags |= srcflags & IR3_REG_IMMED;
235 	*dstflags |= srcflags & IR3_REG_RELATIV;
236 	*dstflags |= srcflags & IR3_REG_ARRAY;
237 
238 	/* if src of the src is boolean we can drop the (abs) since we know
239 	 * the source value is already a postitive integer.  This cleans
240 	 * up the absnegs that get inserted when converting between nir and
241 	 * native boolean (see ir3_b2n/n2b)
242 	 */
243 	struct ir3_instruction *srcsrc = ssa(src->regs[1]);
244 	if (srcsrc && is_bool(srcsrc))
245 		*dstflags &= ~IR3_REG_SABS;
246 }
247 
248 static struct ir3_register *
lower_immed(struct ir3_cp_ctx * ctx,struct ir3_register * reg,unsigned new_flags)249 lower_immed(struct ir3_cp_ctx *ctx, struct ir3_register *reg, unsigned new_flags)
250 {
251 	unsigned swiz, idx, i;
252 
253 	reg = ir3_reg_clone(ctx->shader, reg);
254 
255 	/* in some cases, there are restrictions on (abs)/(neg) plus const..
256 	 * so just evaluate those and clear the flags:
257 	 */
258 	if (new_flags & IR3_REG_SABS) {
259 		reg->iim_val = abs(reg->iim_val);
260 		new_flags &= ~IR3_REG_SABS;
261 	}
262 
263 	if (new_flags & IR3_REG_FABS) {
264 		reg->fim_val = fabs(reg->fim_val);
265 		new_flags &= ~IR3_REG_FABS;
266 	}
267 
268 	if (new_flags & IR3_REG_SNEG) {
269 		reg->iim_val = -reg->iim_val;
270 		new_flags &= ~IR3_REG_SNEG;
271 	}
272 
273 	if (new_flags & IR3_REG_FNEG) {
274 		reg->fim_val = -reg->fim_val;
275 		new_flags &= ~IR3_REG_FNEG;
276 	}
277 
278 	for (i = 0; i < ctx->immediate_idx; i++) {
279 		swiz = i % 4;
280 		idx  = i / 4;
281 
282 		if (ctx->so->immediates[idx].val[swiz] == reg->uim_val) {
283 			break;
284 		}
285 	}
286 
287 	if (i == ctx->immediate_idx) {
288 		/* need to generate a new immediate: */
289 		swiz = i % 4;
290 		idx  = i / 4;
291 		ctx->so->immediates[idx].val[swiz] = reg->uim_val;
292 		ctx->so->immediates_count = idx + 1;
293 		ctx->immediate_idx++;
294 	}
295 
296 	new_flags &= ~IR3_REG_IMMED;
297 	new_flags |= IR3_REG_CONST;
298 	reg->flags = new_flags;
299 	reg->num = i + (4 * ctx->so->constbase.immediate);
300 
301 	return reg;
302 }
303 
304 /**
305  * Handle cp for a given src register.  This additionally handles
306  * the cases of collapsing immedate/const (which replace the src
307  * register with a non-ssa src) or collapsing mov's from relative
308  * src (which needs to also fixup the address src reference by the
309  * instruction).
310  */
311 static void
reg_cp(struct ir3_cp_ctx * ctx,struct ir3_instruction * instr,struct ir3_register * reg,unsigned n)312 reg_cp(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr,
313 		struct ir3_register *reg, unsigned n)
314 {
315 	struct ir3_instruction *src = ssa(reg);
316 
317 	/* don't propagate copies into a PHI, since we don't know if the
318 	 * src block executed:
319 	 */
320 	if (instr->opc == OPC_META_PHI)
321 		return;
322 
323 	if (is_eligible_mov(src, true)) {
324 		/* simple case, no immed/const/relativ, only mov's w/ ssa src: */
325 		struct ir3_register *src_reg = src->regs[1];
326 		unsigned new_flags = reg->flags;
327 
328 		combine_flags(&new_flags, src);
329 
330 		if (valid_flags(instr, n, new_flags)) {
331 			if (new_flags & IR3_REG_ARRAY) {
332 				debug_assert(!(reg->flags & IR3_REG_ARRAY));
333 				reg->array = src_reg->array;
334 			}
335 			reg->flags = new_flags;
336 			reg->instr = ssa(src_reg);
337 		}
338 
339 		src = ssa(reg);      /* could be null for IR3_REG_ARRAY case */
340 		if (!src)
341 			return;
342 	} else if (is_same_type_mov(src) &&
343 			/* cannot collapse const/immed/etc into meta instrs: */
344 			!is_meta(instr)) {
345 		/* immed/const/etc cases, which require some special handling: */
346 		struct ir3_register *src_reg = src->regs[1];
347 		unsigned new_flags = reg->flags;
348 
349 		combine_flags(&new_flags, src);
350 
351 		if (!valid_flags(instr, n, new_flags)) {
352 			/* See if lowering an immediate to const would help. */
353 			if (valid_flags(instr, n, (new_flags & ~IR3_REG_IMMED) | IR3_REG_CONST)) {
354 				debug_assert(new_flags & IR3_REG_IMMED);
355 				instr->regs[n + 1] = lower_immed(ctx, src_reg, new_flags);
356 				return;
357 			}
358 
359 			/* special case for "normal" mad instructions, we can
360 			 * try swapping the first two args if that fits better.
361 			 *
362 			 * the "plain" MAD's (ie. the ones that don't shift first
363 			 * src prior to multiply) can swap their first two srcs if
364 			 * src[0] is !CONST and src[1] is CONST:
365 			 */
366 			if ((n == 1) && is_mad(instr->opc) &&
367 					!(instr->regs[0 + 1]->flags & (IR3_REG_CONST | IR3_REG_RELATIV)) &&
368 					valid_flags(instr, 0, new_flags)) {
369 				/* swap src[0] and src[1]: */
370 				struct ir3_register *tmp;
371 				tmp = instr->regs[0 + 1];
372 				instr->regs[0 + 1] = instr->regs[1 + 1];
373 				instr->regs[1 + 1] = tmp;
374 				n = 0;
375 			} else {
376 				return;
377 			}
378 		}
379 
380 		/* Here we handle the special case of mov from
381 		 * CONST and/or RELATIV.  These need to be handled
382 		 * specially, because in the case of move from CONST
383 		 * there is no src ir3_instruction so we need to
384 		 * replace the ir3_register.  And in the case of
385 		 * RELATIV we need to handle the address register
386 		 * dependency.
387 		 */
388 		if (src_reg->flags & IR3_REG_CONST) {
389 			/* an instruction cannot reference two different
390 			 * address registers:
391 			 */
392 			if ((src_reg->flags & IR3_REG_RELATIV) &&
393 					conflicts(instr->address, reg->instr->address))
394 				return;
395 
396 			/* This seems to be a hw bug, or something where the timings
397 			 * just somehow don't work out.  This restriction may only
398 			 * apply if the first src is also CONST.
399 			 */
400 			if ((opc_cat(instr->opc) == 3) && (n == 2) &&
401 					(src_reg->flags & IR3_REG_RELATIV) &&
402 					(src_reg->array.offset == 0))
403 				return;
404 
405 			src_reg = ir3_reg_clone(instr->block->shader, src_reg);
406 			src_reg->flags = new_flags;
407 			instr->regs[n+1] = src_reg;
408 
409 			if (src_reg->flags & IR3_REG_RELATIV)
410 				ir3_instr_set_address(instr, reg->instr->address);
411 
412 			return;
413 		}
414 
415 		if ((src_reg->flags & IR3_REG_RELATIV) &&
416 				!conflicts(instr->address, reg->instr->address)) {
417 			src_reg = ir3_reg_clone(instr->block->shader, src_reg);
418 			src_reg->flags = new_flags;
419 			instr->regs[n+1] = src_reg;
420 			ir3_instr_set_address(instr, reg->instr->address);
421 
422 			return;
423 		}
424 
425 		/* NOTE: seems we can only do immed integers, so don't
426 		 * need to care about float.  But we do need to handle
427 		 * abs/neg *before* checking that the immediate requires
428 		 * few enough bits to encode:
429 		 *
430 		 * TODO: do we need to do something to avoid accidentally
431 		 * catching a float immed?
432 		 */
433 		if (src_reg->flags & IR3_REG_IMMED) {
434 			int32_t iim_val = src_reg->iim_val;
435 
436 			debug_assert((opc_cat(instr->opc) == 1) ||
437 					(opc_cat(instr->opc) == 6) ||
438 					ir3_cat2_int(instr->opc));
439 
440 			if (new_flags & IR3_REG_SABS)
441 				iim_val = abs(iim_val);
442 
443 			if (new_flags & IR3_REG_SNEG)
444 				iim_val = -iim_val;
445 
446 			if (new_flags & IR3_REG_BNOT)
447 				iim_val = ~iim_val;
448 
449 			/* other than category 1 (mov) we can only encode up to 10 bits: */
450 			if ((instr->opc == OPC_MOV) ||
451 					!((iim_val & ~0x3ff) && (-iim_val & ~0x3ff))) {
452 				new_flags &= ~(IR3_REG_SABS | IR3_REG_SNEG | IR3_REG_BNOT);
453 				src_reg = ir3_reg_clone(instr->block->shader, src_reg);
454 				src_reg->flags = new_flags;
455 				src_reg->iim_val = iim_val;
456 				instr->regs[n+1] = src_reg;
457 			} else if (valid_flags(instr, n, (new_flags & ~IR3_REG_IMMED) | IR3_REG_CONST)) {
458 				/* See if lowering an immediate to const would help. */
459 				instr->regs[n+1] = lower_immed(ctx, src_reg, new_flags);
460 			}
461 
462 			return;
463 		}
464 	}
465 }
466 
467 /* Handle special case of eliminating output mov, and similar cases where
468  * there isn't a normal "consuming" instruction.  In this case we cannot
469  * collapse flags (ie. output mov from const, or w/ abs/neg flags, cannot
470  * be eliminated)
471  */
472 static struct ir3_instruction *
eliminate_output_mov(struct ir3_instruction * instr)473 eliminate_output_mov(struct ir3_instruction *instr)
474 {
475 	if (is_eligible_mov(instr, false)) {
476 		struct ir3_register *reg = instr->regs[1];
477 		if (!(reg->flags & IR3_REG_ARRAY)) {
478 			struct ir3_instruction *src_instr = ssa(reg);
479 			debug_assert(src_instr);
480 			return src_instr;
481 		}
482 	}
483 	return instr;
484 }
485 
486 /**
487  * Find instruction src's which are mov's that can be collapsed, replacing
488  * the mov dst with the mov src
489  */
490 static void
instr_cp(struct ir3_cp_ctx * ctx,struct ir3_instruction * instr)491 instr_cp(struct ir3_cp_ctx *ctx, struct ir3_instruction *instr)
492 {
493 	struct ir3_register *reg;
494 
495 	if (instr->regs_count == 0)
496 		return;
497 
498 	if (ir3_instr_check_mark(instr))
499 		return;
500 
501 	/* walk down the graph from each src: */
502 	foreach_src_n(reg, n, instr) {
503 		struct ir3_instruction *src = ssa(reg);
504 
505 		if (!src)
506 			continue;
507 
508 		instr_cp(ctx, src);
509 
510 		/* TODO non-indirect access we could figure out which register
511 		 * we actually want and allow cp..
512 		 */
513 		if (reg->flags & IR3_REG_ARRAY)
514 			continue;
515 
516 		reg_cp(ctx, instr, reg, n);
517 	}
518 
519 	if (instr->regs[0]->flags & IR3_REG_ARRAY) {
520 		struct ir3_instruction *src = ssa(instr->regs[0]);
521 		if (src)
522 			instr_cp(ctx, src);
523 	}
524 
525 	if (instr->address) {
526 		instr_cp(ctx, instr->address);
527 		ir3_instr_set_address(instr, eliminate_output_mov(instr->address));
528 	}
529 
530 	/* we can end up with extra cmps.s from frontend, which uses a
531 	 *
532 	 *    cmps.s p0.x, cond, 0
533 	 *
534 	 * as a way to mov into the predicate register.  But frequently 'cond'
535 	 * is itself a cmps.s/cmps.f/cmps.u.  So detect this special case and
536 	 * just re-write the instruction writing predicate register to get rid
537 	 * of the double cmps.
538 	 */
539 	if ((instr->opc == OPC_CMPS_S) &&
540 			(instr->regs[0]->num == regid(REG_P0, 0)) &&
541 			ssa(instr->regs[1]) &&
542 			(instr->regs[2]->flags & IR3_REG_IMMED) &&
543 			(instr->regs[2]->iim_val == 0)) {
544 		struct ir3_instruction *cond = ssa(instr->regs[1]);
545 		switch (cond->opc) {
546 		case OPC_CMPS_S:
547 		case OPC_CMPS_F:
548 		case OPC_CMPS_U:
549 			instr->opc   = cond->opc;
550 			instr->flags = cond->flags;
551 			instr->cat2  = cond->cat2;
552 			instr->address = cond->address;
553 			instr->regs[1] = cond->regs[1];
554 			instr->regs[2] = cond->regs[2];
555 			break;
556 		default:
557 			break;
558 		}
559 	}
560 }
561 
562 void
ir3_cp(struct ir3 * ir,struct ir3_shader_variant * so)563 ir3_cp(struct ir3 *ir, struct ir3_shader_variant *so)
564 {
565 	struct ir3_cp_ctx ctx = {
566 			.shader = ir,
567 			.so = so,
568 	};
569 
570 	ir3_clear_mark(ir);
571 
572 	for (unsigned i = 0; i < ir->noutputs; i++) {
573 		if (ir->outputs[i]) {
574 			instr_cp(&ctx, ir->outputs[i]);
575 			ir->outputs[i] = eliminate_output_mov(ir->outputs[i]);
576 		}
577 	}
578 
579 	for (unsigned i = 0; i < ir->keeps_count; i++) {
580 		instr_cp(&ctx, ir->keeps[i]);
581 		ir->keeps[i] = eliminate_output_mov(ir->keeps[i]);
582 	}
583 
584 	list_for_each_entry (struct ir3_block, block, &ir->block_list, node) {
585 		if (block->condition) {
586 			instr_cp(&ctx, block->condition);
587 			block->condition = eliminate_output_mov(block->condition);
588 		}
589 	}
590 }
591