• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /*
2  * Copyright (C) 2018 Jonathan Marek <jonathan@marek.ca>
3  *
4  * Permission is hereby granted, free of charge, to any person obtaining a
5  * copy of this software and associated documentation files (the "Software"),
6  * to deal in the Software without restriction, including without limitation
7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8  * and/or sell copies of the Software, and to permit persons to whom the
9  * Software is furnished to do so, subject to the following conditions:
10  *
11  * The above copyright notice and this permission notice (including the next
12  * paragraph) shall be included in all copies or substantial portions of the
13  * Software.
14  *
15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21  * SOFTWARE.
22  *
23  * Authors:
24  *    Jonathan Marek <jonathan@marek.ca>
25  */
26 
27 #include "ir2_private.h"
28 
is_mov(struct ir2_instr * instr)29 static bool is_mov(struct ir2_instr *instr)
30 {
31 	return instr->type == IR2_ALU && instr->alu.vector_opc == MAXv &&
32 		instr->src_count == 1;
33 }
34 
src_combine(struct ir2_src * src,struct ir2_src b)35 static void src_combine(struct ir2_src *src, struct ir2_src b)
36 {
37 	src->num = b.num;
38 	src->type = b.type;
39 	src->swizzle = swiz_merge(b.swizzle, src->swizzle);
40 	if (!src->abs) /* if we have abs we don't care about previous negate */
41 		src->negate ^= b.negate;
42 	src->abs |= b.abs;
43 }
44 
45 /* cp_src: replace src regs when they refer to a mov instruction
46  * example:
47  *	ALU:      MAXv    R7 = C7, C7
48  *	ALU:      MULADDv R7 = R7, R10, R0.xxxx
49  * becomes:
50  *	ALU:      MULADDv R7 = C7, R10, R0.xxxx
51  */
cp_src(struct ir2_context * ctx)52 void cp_src(struct ir2_context *ctx)
53 {
54 	struct ir2_instr *p;
55 
56 	ir2_foreach_instr(instr, ctx) {
57 		ir2_foreach_src(src, instr) {
58 			/* loop to replace recursively */
59 			do {
60 				if (src->type != IR2_SRC_SSA)
61 					break;
62 
63 				p = &ctx->instr[src->num];
64 				/* don't work across blocks to avoid possible issues */
65 				if (p->block_idx != instr->block_idx)
66 					break;
67 
68 				if (!is_mov(p))
69 					break;
70 
71 				if (p->alu.saturate)
72 					break;
73 
74 				/* cant apply abs to const src, const src only for alu */
75 				if (p->src[0].type == IR2_SRC_CONST &&
76 					(src->abs || instr->type != IR2_ALU))
77 					break;
78 
79 				src_combine(src, p->src[0]);
80 			} while (1);
81 		}
82 	}
83 }
84 
85 /* cp_export: replace mov to export when possible
86  * in the cp_src pass we bypass any mov instructions related
87  * to the src registers, but for exports for need something different
88  * example:
89  *	ALU:      MAXv    R3.x___ = C9.x???, C9.x???
90  *	ALU:      MAXv    R3._y__ = R0.?x??, C8.?x??
91  *	ALU:      MAXv    export0 = R3.yyyx, R3.yyyx
92  * becomes:
93  *	ALU:      MAXv    export0.___w = C9.???x, C9.???x
94  *	ALU:      MAXv    export0.xyz_ = R0.xxx?, C8.xxx?
95  *
96  */
cp_export(struct ir2_context * ctx)97 void cp_export(struct ir2_context *ctx)
98 {
99 	struct ir2_instr *c[4], *ins[4];
100 	struct ir2_src *src;
101 	struct ir2_reg *reg;
102 	unsigned ncomp;
103 
104 	ir2_foreach_instr(instr, ctx) {
105 		if (!is_export(instr)) /* TODO */
106 			continue;
107 
108 		if (!is_mov(instr))
109 			continue;
110 
111 		src = &instr->src[0];
112 
113 		if (src->negate || src->abs) /* TODO handle these cases */
114 			continue;
115 
116 		if (src->type == IR2_SRC_INPUT || src->type == IR2_SRC_CONST)
117 			continue;
118 
119 		reg = get_reg_src(ctx, src);
120 		ncomp = dst_ncomp(instr);
121 
122 		unsigned reswiz[4] = {};
123 		unsigned num_instr = 0;
124 
125 		/* fill array c with pointers to instrs that write each component */
126 		if (src->type == IR2_SRC_SSA) {
127 			struct ir2_instr *instr = &ctx->instr[src->num];
128 
129 			if (instr->type != IR2_ALU)
130 				continue;
131 
132 			for (int i = 0; i < ncomp; i++)
133 				c[i] = instr;
134 
135 			ins[num_instr++] = instr;
136 			reswiz[0] = src->swizzle;
137 		} else {
138 			bool ok = true;
139 			unsigned write_mask = 0;
140 
141 			ir2_foreach_instr(instr, ctx) {
142 				if (instr->is_ssa || instr->reg != reg)
143 					continue;
144 
145 				/* set by non-ALU */
146 				if (instr->type != IR2_ALU) {
147 					ok = false;
148 					break;
149 				}
150 
151 				/* component written more than once */
152 				if (write_mask & instr->alu.write_mask) {
153 					ok = false;
154 					break;
155 				}
156 
157 				write_mask |= instr->alu.write_mask;
158 
159 				/* src pointers for components */
160 				for (int i = 0, j = 0; i < 4; i++) {
161 					unsigned k = swiz_get(src->swizzle, i);
162 					if (instr->alu.write_mask & 1 << k) {
163 						c[i] = instr;
164 
165 						/* reswiz = compressed src->swizzle */
166 						unsigned x = 0;
167 						for (int i = 0; i < k; i++)
168 							x += !!(instr->alu.write_mask & 1 << i);
169 
170 						assert(src->swizzle || x == j);
171 						reswiz[num_instr] |= swiz_set(x, j++);
172 					}
173 				}
174 				ins[num_instr++] = instr;
175 			}
176 			if (!ok)
177 				continue;
178 		}
179 
180 		bool redirect = true;
181 
182 		/* must all be in same block */
183 		for (int i = 0; i < ncomp; i++)
184 			redirect &= (c[i]->block_idx == instr->block_idx);
185 
186 		/* no other instr using the value */
187 		ir2_foreach_instr(p, ctx) {
188 			if (p == instr)
189 				continue;
190 			ir2_foreach_src(src, p)
191 				redirect &= reg != get_reg_src(ctx, src);
192 		}
193 
194 		if (!redirect)
195 			continue;
196 
197 		/* redirect the instructions writing to the register */
198 		for (int i = 0; i < num_instr; i++) {
199 			struct ir2_instr *p = ins[i];
200 
201 			p->alu.export = instr->alu.export;
202 			p->alu.write_mask = 0;
203 			p->is_ssa = true;
204 			p->ssa.ncomp = 0;
205 			memset(p->ssa.comp, 0, sizeof(p->ssa.comp));
206 			p->alu.saturate |= instr->alu.saturate;
207 
208 			switch (p->alu.vector_opc) {
209 			case PRED_SETE_PUSHv ... PRED_SETGTE_PUSHv:
210 			case DOT2ADDv:
211 			case DOT3v:
212 			case DOT4v:
213 			case CUBEv:
214 				continue;
215 			default:
216 				break;
217 			}
218 			ir2_foreach_src(s, p)
219 				swiz_merge_p(&s->swizzle, reswiz[i]);
220 		}
221 
222 		for (int i = 0; i < ncomp; i++) {
223 			c[i]->alu.write_mask |= (1 << i);
224 			c[i]->ssa.ncomp++;
225 		}
226 		instr->type = IR2_NONE;
227 		instr->need_emit = false;
228 	}
229 }
230