1 /* -*- mesa-c++ -*-
2 *
3 * Copyright (c) 2022 Collabora LTD
4 *
5 * Author: Gert Wollny <gert.wollny@collabora.com>
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * on the rights to use, copy, modify, merge, publish, distribute, sub
11 * license, and/or sell copies of the Software, and to permit persons to whom
12 * the Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
16 * Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 * USE OR OTHER DEALINGS IN THE SOFTWARE.
25 */
26
27 #include "sfn_peephole.h"
28 #include "sfn_instr_alugroup.h"
29
30 namespace r600 {
31
32 class PeepholeVisitor : public InstrVisitor {
33 public:
34 void visit(AluInstr *instr) override;
35 void visit(AluGroup *instr) override;
visit(TexInstr * instr)36 void visit(TexInstr *instr) override { (void)instr; };
visit(ExportInstr * instr)37 void visit(ExportInstr *instr) override { (void)instr; }
visit(FetchInstr * instr)38 void visit(FetchInstr *instr) override { (void)instr; }
39 void visit(Block *instr) override;
visit(ControlFlowInstr * instr)40 void visit(ControlFlowInstr *instr) override { (void)instr; }
41 void visit(IfInstr *instr) override;
visit(ScratchIOInstr * instr)42 void visit(ScratchIOInstr *instr) override { (void)instr; }
visit(StreamOutInstr * instr)43 void visit(StreamOutInstr *instr) override { (void)instr; }
visit(MemRingOutInstr * instr)44 void visit(MemRingOutInstr *instr) override { (void)instr; }
visit(EmitVertexInstr * instr)45 void visit(EmitVertexInstr *instr) override { (void)instr; }
visit(GDSInstr * instr)46 void visit(GDSInstr *instr) override { (void)instr; };
visit(WriteTFInstr * instr)47 void visit(WriteTFInstr *instr) override { (void)instr; };
visit(LDSAtomicInstr * instr)48 void visit(LDSAtomicInstr *instr) override { (void)instr; };
visit(LDSReadInstr * instr)49 void visit(LDSReadInstr *instr) override { (void)instr; };
visit(RatInstr * instr)50 void visit(RatInstr *instr) override { (void)instr; };
51
52 void convert_to_mov(AluInstr *alu, int src_idx);
53
54 void apply_source_mods(AluInstr *alu);
55 void apply_dest_clamp(AluInstr *alu);
56 void try_fuse_with_prev(AluInstr *alu);
57
58 bool progress{false};
59 };
60
61 bool
peephole(Shader & sh)62 peephole(Shader& sh)
63 {
64 PeepholeVisitor peephole;
65 for (auto b : sh.func())
66 b->accept(peephole);
67 return peephole.progress;
68 }
69
70 class ReplacePredicate : public AluInstrVisitor {
71 public:
ReplacePredicate(AluInstr * pred)72 ReplacePredicate(AluInstr *pred):
73 m_pred(pred)
74 {
75 }
76
77 using AluInstrVisitor::visit;
78
79 void visit(AluInstr *alu) override;
80
81 AluInstr *m_pred;
82 bool success{false};
83 };
84
85 void
visit(AluInstr * instr)86 PeepholeVisitor::visit(AluInstr *instr)
87 {
88 switch (instr->opcode()) {
89 case op1_mov:
90 if (instr->has_alu_flag(alu_dst_clamp))
91 apply_dest_clamp(instr);
92 else if (!instr->has_source_mod(0, AluInstr::mod_abs) &&
93 !instr->has_source_mod(0, AluInstr::mod_neg))
94 try_fuse_with_prev(instr);
95 break;
96 case op2_add:
97 case op2_add_int:
98 if (value_is_const_uint(instr->src(0), 0))
99 convert_to_mov(instr, 1);
100 else if (value_is_const_uint(instr->src(1), 0))
101 convert_to_mov(instr, 0);
102 break;
103 case op2_mul:
104 case op2_mul_ieee:
105 if (value_is_const_float(instr->src(0), 1.0f))
106 convert_to_mov(instr, 1);
107 else if (value_is_const_float(instr->src(1), 1.0f))
108 convert_to_mov(instr, 0);
109 break;
110 case op3_muladd:
111 case op3_muladd_ieee:
112 if (value_is_const_uint(instr->src(0), 0) || value_is_const_uint(instr->src(1), 0))
113 convert_to_mov(instr, 2);
114 break;
115 case op2_killne_int:
116 if (value_is_const_uint(instr->src(1), 0)) {
117 auto src0 = instr->psrc(0)->as_register();
118 if (src0 && src0->has_flag(Register::ssa)) {
119 auto parent = *src0->parents().begin();
120 ReplacePredicate visitor(instr);
121 parent->accept(visitor);
122 progress |= visitor.success;
123 }
124 }
125 break;
126 default:;
127 }
128
129 auto opinfo = alu_ops.at(instr->opcode());
130 if (opinfo.can_srcmod)
131 apply_source_mods(instr);
132 }
133
134 void
convert_to_mov(AluInstr * alu,int src_idx)135 PeepholeVisitor::convert_to_mov(AluInstr *alu, int src_idx)
136 {
137 AluInstr::SrcValues new_src{alu->psrc(src_idx)};
138 alu->set_sources(new_src);
139 alu->set_op(op1_mov);
140 progress = true;
141 }
142
143 void
visit(UNUSED AluGroup * instr)144 PeepholeVisitor::visit(UNUSED AluGroup *instr)
145 {
146 for (auto alu : *instr) {
147 if (!alu)
148 continue;
149 visit(alu);
150 }
151 }
152
153 void
visit(Block * instr)154 PeepholeVisitor::visit(Block *instr)
155 {
156 for (auto& i : *instr)
157 i->accept(*this);
158 }
159
160 void
visit(IfInstr * instr)161 PeepholeVisitor::visit(IfInstr *instr)
162 {
163 auto pred = instr->predicate();
164
165 auto& src1 = pred->src(1);
166 if (value_is_const_uint(src1, 0)) {
167 auto src0 = pred->src(0).as_register();
168 if (src0 && src0->has_flag(Register::ssa) && !src0->parents().empty()) {
169 assert(src0->parents().size() == 1);
170 auto parent = *src0->parents().begin();
171
172 ReplacePredicate visitor(pred);
173 parent->accept(visitor);
174 progress |= visitor.success;
175 }
176 }
177 }
178
apply_source_mods(AluInstr * alu)179 void PeepholeVisitor::apply_source_mods(AluInstr *alu)
180 {
181 bool has_abs = alu->n_sources() / alu->alu_slots() < 3;
182
183 for (unsigned i = 0; i < alu->n_sources(); ++i) {
184
185 auto reg = alu->psrc(i)->as_register();
186 if (!reg)
187 continue;
188 if (!reg->has_flag(Register::ssa))
189 continue;
190 if (reg->parents().size() != 1)
191 continue;
192
193 auto p = (*reg->parents().begin())->as_alu();
194 if (!p)
195 continue;
196
197 if (p->opcode() != op1_mov)
198 continue;
199
200 if (!has_abs && p->has_source_mod(0, AluInstr::mod_abs))
201 continue;
202
203 if (!p->has_source_mod(0, AluInstr::mod_abs) &&
204 !p->has_source_mod(0, AluInstr::mod_neg))
205 continue;
206
207 if (p->has_alu_flag(alu_dst_clamp))
208 continue;
209
210 auto new_src = p->psrc(0);
211 bool new_src_not_pinned = new_src->pin() == pin_free ||
212 new_src->pin() == pin_none;
213
214 bool old_src_not_pinned = reg->pin() == pin_free ||
215 reg->pin() == pin_none;
216
217 bool sources_equal_channel = reg->pin() == pin_chan &&
218 new_src->pin() == pin_chan &&
219 new_src->chan() == reg->chan();
220
221 if (!new_src_not_pinned &&
222 !old_src_not_pinned &&
223 !sources_equal_channel)
224 continue;
225
226 uint32_t to_set = 0;
227 AluInstr::SourceMod to_clear = AluInstr::mod_none;
228
229 if (p->has_source_mod(0, AluInstr::mod_abs))
230 to_set |= AluInstr::mod_abs;
231 if (p->has_source_mod(0, AluInstr::mod_neg)) {
232 if (!alu->has_source_mod(i, AluInstr::mod_neg))
233 to_set |= AluInstr::mod_neg;
234 else
235 to_clear = AluInstr::mod_neg;
236 }
237
238 progress |= alu->replace_src(i, new_src, to_set, to_clear);
239 }
240 }
241
try_fuse_with_prev(AluInstr * alu)242 void PeepholeVisitor::try_fuse_with_prev(AluInstr *alu)
243 {
244 if (auto reg = alu->src(0).as_register()) {
245 if (!reg->has_flag(Register::ssa) ||
246 reg->uses().size() != 1 ||
247 reg->parents().size() != 1)
248 return;
249 auto p = *reg->parents().begin();
250 auto dest = alu->dest();
251 if (!dest->has_flag(Register::ssa) &&
252 alu->block_id() != p->block_id())
253 return;
254 if (p->replace_dest(dest, alu)) {
255 dest->del_parent(alu);
256 dest->add_parent(p);
257 for (auto d : alu->dependend_instr()) {
258 d->add_required_instr(p);
259 }
260 alu->set_dead();
261 progress = true;
262 }
263 }
264 }
265
apply_dest_clamp(AluInstr * alu)266 void PeepholeVisitor::apply_dest_clamp(AluInstr *alu)
267 {
268 if (alu->has_source_mod(0, AluInstr::mod_abs) ||
269 alu->has_source_mod(0, AluInstr::mod_neg))
270 return;
271
272 auto dest = alu->dest();
273
274 assert(dest);
275
276 if (!dest->has_flag(Register::ssa))
277 return;
278
279 auto src = alu->psrc(0)->as_register();
280 if (!src)
281 return;
282
283 if (src->parents().size() != 1)
284 return;
285
286 if (src->uses().size() != 1)
287 return;
288
289 auto new_parent = (*src->parents().begin())->as_alu();
290 if (!new_parent)
291 return;
292
293 auto opinfo = alu_ops.at(new_parent->opcode());
294 if (!opinfo.can_clamp)
295 return;
296
297 // Move clamp flag to the parent, and let copy propagation do the rest
298 new_parent->set_alu_flag(alu_dst_clamp);
299 alu->reset_alu_flag(alu_dst_clamp);
300
301 progress = true;
302 }
303
304
305 static EAluOp
pred_from_op(EAluOp pred_op,EAluOp op)306 pred_from_op(EAluOp pred_op, EAluOp op)
307 {
308 switch (pred_op) {
309 case op2_pred_setne_int:
310 switch (op) {
311 case op2_setge_dx10:
312 return op2_pred_setge;
313 case op2_setgt_dx10:
314 return op2_pred_setgt;
315 case op2_sete_dx10:
316 return op2_pred_sete;
317 case op2_setne_dx10:
318 return op2_pred_setne;
319
320 case op2_setge_int:
321 return op2_pred_setge_int;
322 case op2_setgt_int:
323 return op2_pred_setgt_int;
324 case op2_setge_uint:
325 return op2_pred_setge_uint;
326 case op2_setgt_uint:
327 return op2_pred_setgt_uint;
328 case op2_sete_int:
329 return op2_prede_int;
330 case op2_setne_int:
331 return op2_pred_setne_int;
332 default:
333 return op0_nop;
334 }
335 case op2_prede_int:
336 switch (op) {
337 case op2_sete_int:
338 return op2_pred_setne_int;
339 case op2_setne_int:
340 return op2_prede_int;
341 default:
342 return op0_nop;
343 }
344 case op2_pred_setne:
345 switch (op) {
346 case op2_setge:
347 return op2_pred_setge;
348 case op2_setgt:
349 return op2_pred_setgt;
350 case op2_sete:
351 return op2_pred_sete;
352 default:
353 return op0_nop;
354 }
355 case op2_killne_int:
356 switch (op) {
357 case op2_setge_dx10:
358 return op2_killge;
359 case op2_setgt_dx10:
360 return op2_killgt;
361 case op2_sete_dx10:
362 return op2_kille;
363 case op2_setne_dx10:
364 return op2_killne;
365 case op2_setge_int:
366 return op2_killge_int;
367 case op2_setgt_int:
368 return op2_killgt_int;
369 case op2_setge_uint:
370 return op2_killge_uint;
371 case op2_setgt_uint:
372 return op2_killgt_uint;
373 case op2_sete_int:
374 return op2_kille_int;
375 case op2_setne_int:
376 return op2_killne_int;
377 default:
378 return op0_nop;
379 }
380
381 default:
382 return op0_nop;
383 }
384 }
385
386 void
visit(AluInstr * alu)387 ReplacePredicate::visit(AluInstr *alu)
388 {
389 auto new_op = pred_from_op(m_pred->opcode(), alu->opcode());
390
391 if (new_op == op0_nop)
392 return;
393
394 for (auto& s : alu->sources()) {
395 auto reg = s->as_register();
396 /* Protect against propagating
397 *
398 * V = COND(R, X)
399 * R = SOME_OP
400 * IF (V)
401 *
402 * to
403 *
404 * R = SOME_OP
405 * IF (COND(R, X))
406 */
407 if (reg && !reg->has_flag(Register::ssa))
408 return;
409 }
410
411 m_pred->set_op(new_op);
412 m_pred->set_sources(alu->sources());
413
414 std::array<AluInstr::SourceMod, 2> mods = { AluInstr::mod_abs, AluInstr::mod_neg };
415
416 for (int i = 0; i < 2; ++i) {
417 for (auto m : mods) {
418 if (alu->has_source_mod(i, m))
419 m_pred->set_source_mod(i, m);
420 }
421 }
422
423 success = true;
424 }
425
426 } // namespace r600
427