1 /*
2 * Copyright (C) 2018 Jonathan Marek <jonathan@marek.ca>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining a
5 * copy of this software and associated documentation files (the "Software"),
6 * to deal in the Software without restriction, including without limitation
7 * the rights to use, copy, modify, merge, publish, distribute, sublicense,
8 * and/or sell copies of the Software, and to permit persons to whom the
9 * Software is furnished to do so, subject to the following conditions:
10 *
11 * The above copyright notice and this permission notice (including the next
12 * paragraph) shall be included in all copies or substantial portions of the
13 * Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 * SOFTWARE.
22 *
23 * Authors:
24 * Jonathan Marek <jonathan@marek.ca>
25 */
26
27 #include "ir2_private.h"
28
29 static bool
is_mov(struct ir2_instr * instr)30 is_mov(struct ir2_instr *instr)
31 {
32 return instr->type == IR2_ALU && instr->alu.vector_opc == MAXv &&
33 instr->src_count == 1;
34 }
35
36 static void
src_combine(struct ir2_src * src,struct ir2_src b)37 src_combine(struct ir2_src *src, struct ir2_src b)
38 {
39 src->num = b.num;
40 src->type = b.type;
41 src->swizzle = swiz_merge(b.swizzle, src->swizzle);
42 if (!src->abs) /* if we have abs we don't care about previous negate */
43 src->negate ^= b.negate;
44 src->abs |= b.abs;
45 }
46
47 /* cp_src: replace src regs when they refer to a mov instruction
48 * example:
49 * ALU: MAXv R7 = C7, C7
50 * ALU: MULADDv R7 = R7, R10, R0.xxxx
51 * becomes:
52 * ALU: MULADDv R7 = C7, R10, R0.xxxx
53 */
54 void
cp_src(struct ir2_context * ctx)55 cp_src(struct ir2_context *ctx)
56 {
57 struct ir2_instr *p;
58
59 ir2_foreach_instr (instr, ctx) {
60 ir2_foreach_src (src, instr) {
61 /* loop to replace recursively */
62 do {
63 if (src->type != IR2_SRC_SSA)
64 break;
65
66 p = &ctx->instr[src->num];
67 /* don't work across blocks to avoid possible issues */
68 if (p->block_idx != instr->block_idx)
69 break;
70
71 if (!is_mov(p))
72 break;
73
74 if (p->alu.saturate)
75 break;
76
77 /* cant apply abs to const src, const src only for alu */
78 if (p->src[0].type == IR2_SRC_CONST &&
79 (src->abs || instr->type != IR2_ALU))
80 break;
81
82 src_combine(src, p->src[0]);
83 } while (1);
84 }
85 }
86 }
87
88 /* cp_export: replace mov to export when possible
89 * in the cp_src pass we bypass any mov instructions related
90 * to the src registers, but for exports for need something different
91 * example:
92 * ALU: MAXv R3.x___ = C9.x???, C9.x???
93 * ALU: MAXv R3._y__ = R0.?x??, C8.?x??
94 * ALU: MAXv export0 = R3.yyyx, R3.yyyx
95 * becomes:
96 * ALU: MAXv export0.___w = C9.???x, C9.???x
97 * ALU: MAXv export0.xyz_ = R0.xxx?, C8.xxx?
98 *
99 */
100 void
cp_export(struct ir2_context * ctx)101 cp_export(struct ir2_context *ctx)
102 {
103 struct ir2_instr *c[4], *ins[4];
104 struct ir2_src *src;
105 struct ir2_reg *reg;
106 unsigned ncomp;
107
108 ir2_foreach_instr (instr, ctx) {
109 if (!is_export(instr)) /* TODO */
110 continue;
111
112 if (!is_mov(instr))
113 continue;
114
115 src = &instr->src[0];
116
117 if (src->negate || src->abs) /* TODO handle these cases */
118 continue;
119
120 if (src->type == IR2_SRC_INPUT || src->type == IR2_SRC_CONST)
121 continue;
122
123 reg = get_reg_src(ctx, src);
124 ncomp = dst_ncomp(instr);
125
126 unsigned reswiz[4] = {};
127 unsigned num_instr = 0;
128
129 /* fill array c with pointers to instrs that write each component */
130 if (src->type == IR2_SRC_SSA) {
131 struct ir2_instr *instr = &ctx->instr[src->num];
132
133 if (instr->type != IR2_ALU)
134 continue;
135
136 for (int i = 0; i < ncomp; i++)
137 c[i] = instr;
138
139 ins[num_instr++] = instr;
140 reswiz[0] = src->swizzle;
141 } else {
142 bool ok = true;
143 unsigned write_mask = 0;
144
145 ir2_foreach_instr (instr, ctx) {
146 if (instr->is_ssa || instr->reg != reg)
147 continue;
148
149 /* set by non-ALU */
150 if (instr->type != IR2_ALU) {
151 ok = false;
152 break;
153 }
154
155 /* component written more than once */
156 if (write_mask & instr->alu.write_mask) {
157 ok = false;
158 break;
159 }
160
161 write_mask |= instr->alu.write_mask;
162
163 /* src pointers for components */
164 for (int i = 0, j = 0; i < 4; i++) {
165 unsigned k = swiz_get(src->swizzle, i);
166 if (instr->alu.write_mask & 1 << k) {
167 c[i] = instr;
168
169 /* reswiz = compressed src->swizzle */
170 unsigned x = 0;
171 for (int i = 0; i < k; i++)
172 x += !!(instr->alu.write_mask & 1 << i);
173
174 assert(src->swizzle || x == j);
175 reswiz[num_instr] |= swiz_set(x, j++);
176 }
177 }
178 ins[num_instr++] = instr;
179 }
180 if (!ok)
181 continue;
182 }
183
184 bool redirect = true;
185
186 /* must all be in same block */
187 for (int i = 0; i < ncomp; i++)
188 redirect &= (c[i]->block_idx == instr->block_idx);
189
190 /* no other instr using the value */
191 ir2_foreach_instr (p, ctx) {
192 if (p == instr)
193 continue;
194 ir2_foreach_src (src, p)
195 redirect &= reg != get_reg_src(ctx, src);
196 }
197
198 if (!redirect)
199 continue;
200
201 /* redirect the instructions writing to the register */
202 for (int i = 0; i < num_instr; i++) {
203 struct ir2_instr *p = ins[i];
204
205 p->alu.export = instr->alu.export;
206 p->alu.write_mask = 0;
207 p->is_ssa = true;
208 p->ssa.ncomp = 0;
209 memset(p->ssa.comp, 0, sizeof(p->ssa.comp));
210 p->alu.saturate |= instr->alu.saturate;
211
212 switch (p->alu.vector_opc) {
213 case PRED_SETE_PUSHv ... PRED_SETGTE_PUSHv:
214 case DOT2ADDv:
215 case DOT3v:
216 case DOT4v:
217 case CUBEv:
218 continue;
219 default:
220 break;
221 }
222 ir2_foreach_src (s, p)
223 swiz_merge_p(&s->swizzle, reswiz[i]);
224 }
225
226 for (int i = 0; i < ncomp; i++) {
227 c[i]->alu.write_mask |= (1 << i);
228 c[i]->ssa.ncomp++;
229 }
230 instr->type = IR2_NONE;
231 instr->need_emit = false;
232 }
233 }
234