• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* -*- mesa-c++  -*-
2  *
3  * Copyright (c) 2022 Collabora LTD
4  *
5  * Author: Gert Wollny <gert.wollny@collabora.com>
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and associated documentation files (the "Software"),
9  * to deal in the Software without restriction, including without limitation
10  * on the rights to use, copy, modify, merge, publish, distribute, sub
11  * license, and/or sell copies of the Software, and to permit persons to whom
12  * the Software is furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the next
15  * paragraph) shall be included in all copies or substantial portions of the
16  * Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24  * USE OR OTHER DEALINGS IN THE SOFTWARE.
25  */
26 
27 #include "sfn_peephole.h"
28 #include "sfn_instr_alugroup.h"
29 
30 namespace r600 {
31 
32 class PeepholeVisitor : public InstrVisitor {
33 public:
34    void visit(AluInstr *instr) override;
35    void visit(AluGroup *instr) override;
visit(TexInstr * instr)36    void visit(TexInstr *instr) override { (void)instr; };
visit(ExportInstr * instr)37    void visit(ExportInstr *instr) override { (void)instr; }
visit(FetchInstr * instr)38    void visit(FetchInstr *instr) override { (void)instr; }
39    void visit(Block *instr) override;
visit(ControlFlowInstr * instr)40    void visit(ControlFlowInstr *instr) override { (void)instr; }
41    void visit(IfInstr *instr) override;
visit(ScratchIOInstr * instr)42    void visit(ScratchIOInstr *instr) override { (void)instr; }
visit(StreamOutInstr * instr)43    void visit(StreamOutInstr *instr) override { (void)instr; }
visit(MemRingOutInstr * instr)44    void visit(MemRingOutInstr *instr) override { (void)instr; }
visit(EmitVertexInstr * instr)45    void visit(EmitVertexInstr *instr) override { (void)instr; }
visit(GDSInstr * instr)46    void visit(GDSInstr *instr) override { (void)instr; };
visit(WriteTFInstr * instr)47    void visit(WriteTFInstr *instr) override { (void)instr; };
visit(LDSAtomicInstr * instr)48    void visit(LDSAtomicInstr *instr) override { (void)instr; };
visit(LDSReadInstr * instr)49    void visit(LDSReadInstr *instr) override { (void)instr; };
visit(RatInstr * instr)50    void visit(RatInstr *instr) override { (void)instr; };
51 
52    void convert_to_mov(AluInstr *alu, int src_idx);
53 
54    void apply_source_mods(AluInstr *alu);
55    void apply_dest_clamp(AluInstr *alu);
56    void try_fuse_with_prev(AluInstr *alu);
57 
58    bool progress{false};
59 };
60 
61 bool
peephole(Shader & sh)62 peephole(Shader& sh)
63 {
64    PeepholeVisitor peephole;
65    for (auto b : sh.func())
66       b->accept(peephole);
67    return peephole.progress;
68 }
69 
70 class ReplacePredicate : public AluInstrVisitor {
71 public:
ReplacePredicate(AluInstr * pred)72    ReplacePredicate(AluInstr *pred):
73        m_pred(pred)
74    {
75    }
76 
77    using AluInstrVisitor::visit;
78 
79    void visit(AluInstr *alu) override;
80 
81    AluInstr *m_pred;
82    bool success{false};
83 };
84 
85 void
visit(AluInstr * instr)86 PeepholeVisitor::visit(AluInstr *instr)
87 {
88    switch (instr->opcode()) {
89    case op1_mov:
90       if (instr->has_alu_flag(alu_dst_clamp))
91          apply_dest_clamp(instr);
92       else if (!instr->has_source_mod(0, AluInstr::mod_abs) &&
93                !instr->has_source_mod(0, AluInstr::mod_neg))
94          try_fuse_with_prev(instr);
95       break;
96    case op2_add:
97    case op2_add_int:
98       if (value_is_const_uint(instr->src(0), 0))
99          convert_to_mov(instr, 1);
100       else if (value_is_const_uint(instr->src(1), 0))
101          convert_to_mov(instr, 0);
102       break;
103    case op2_mul:
104    case op2_mul_ieee:
105       if (value_is_const_float(instr->src(0), 1.0f))
106          convert_to_mov(instr, 1);
107       else if (value_is_const_float(instr->src(1), 1.0f))
108          convert_to_mov(instr, 0);
109       break;
110    case op3_muladd:
111    case op3_muladd_ieee:
112       if (value_is_const_uint(instr->src(0), 0) || value_is_const_uint(instr->src(1), 0))
113          convert_to_mov(instr, 2);
114       break;
115    case op2_killne_int:
116       if (value_is_const_uint(instr->src(1), 0)) {
117          auto src0 = instr->psrc(0)->as_register();
118          if (src0 && src0->has_flag(Register::ssa)) {
119             auto parent = *src0->parents().begin();
120             ReplacePredicate visitor(instr);
121             parent->accept(visitor);
122             progress |= visitor.success;
123          }
124       }
125       break;
126    default:;
127    }
128 
129    auto opinfo = alu_ops.at(instr->opcode());
130    if (opinfo.can_srcmod)
131          apply_source_mods(instr);
132 }
133 
134 void
convert_to_mov(AluInstr * alu,int src_idx)135 PeepholeVisitor::convert_to_mov(AluInstr *alu, int src_idx)
136 {
137    AluInstr::SrcValues new_src{alu->psrc(src_idx)};
138    alu->set_sources(new_src);
139    alu->set_op(op1_mov);
140    progress = true;
141 }
142 
143 void
visit(UNUSED AluGroup * instr)144 PeepholeVisitor::visit(UNUSED AluGroup *instr)
145 {
146    for (auto alu : *instr) {
147       if (!alu)
148          continue;
149       visit(alu);
150    }
151 }
152 
153 void
visit(Block * instr)154 PeepholeVisitor::visit(Block *instr)
155 {
156    for (auto& i : *instr)
157       i->accept(*this);
158 }
159 
160 void
visit(IfInstr * instr)161 PeepholeVisitor::visit(IfInstr *instr)
162 {
163    auto pred = instr->predicate();
164 
165    auto& src1 = pred->src(1);
166    if (value_is_const_uint(src1, 0)) {
167       auto src0 = pred->src(0).as_register();
168       if (src0 && src0->has_flag(Register::ssa) && !src0->parents().empty()) {
169          assert(src0->parents().size() == 1);
170          auto parent = *src0->parents().begin();
171 
172          ReplacePredicate visitor(pred);
173          parent->accept(visitor);
174          progress |= visitor.success;
175       }
176    }
177 }
178 
apply_source_mods(AluInstr * alu)179 void PeepholeVisitor::apply_source_mods(AluInstr *alu)
180 {
181    bool has_abs = alu->n_sources() / alu->alu_slots() < 3;
182 
183    for (unsigned i = 0; i < alu->n_sources(); ++i) {
184 
185       auto reg = alu->psrc(i)->as_register();
186       if (!reg)
187          continue;
188       if (!reg->has_flag(Register::ssa))
189          continue;
190       if (reg->parents().size() != 1)
191          continue;
192 
193       auto p = (*reg->parents().begin())->as_alu();
194       if (!p)
195          continue;
196 
197       if (p->opcode() != op1_mov)
198          continue;
199 
200       if (!has_abs && p->has_source_mod(0, AluInstr::mod_abs))
201          continue;
202 
203       if (!p->has_source_mod(0, AluInstr::mod_abs) &&
204           !p->has_source_mod(0, AluInstr::mod_neg))
205          continue;
206 
207       if (p->has_alu_flag(alu_dst_clamp))
208          continue;
209 
210       auto new_src = p->psrc(0);
211       bool new_src_not_pinned = new_src->pin() == pin_free ||
212                                 new_src->pin() == pin_none;
213 
214       bool old_src_not_pinned = reg->pin() == pin_free ||
215                                 reg->pin() == pin_none;
216 
217       bool sources_equal_channel = reg->pin() == pin_chan &&
218                                    new_src->pin() == pin_chan &&
219                                    new_src->chan() == reg->chan();
220 
221       if (!new_src_not_pinned &&
222           !old_src_not_pinned &&
223           !sources_equal_channel)
224          continue;
225 
226       uint32_t to_set = 0;
227       AluInstr::SourceMod to_clear = AluInstr::mod_none;
228 
229       if (p->has_source_mod(0, AluInstr::mod_abs))
230          to_set |= AluInstr::mod_abs;
231       if (p->has_source_mod(0, AluInstr::mod_neg)) {
232          if (!alu->has_source_mod(i, AluInstr::mod_neg))
233             to_set |= AluInstr::mod_neg;
234          else
235             to_clear = AluInstr::mod_neg;
236       }
237 
238       progress |= alu->replace_src(i, new_src, to_set, to_clear);
239    }
240 }
241 
try_fuse_with_prev(AluInstr * alu)242 void PeepholeVisitor::try_fuse_with_prev(AluInstr *alu)
243 {
244    if (auto reg = alu->src(0).as_register()) {
245       if (!reg->has_flag(Register::ssa) ||
246           reg->uses().size() != 1 ||
247           reg->parents().size() != 1)
248          return;
249       auto p = *reg->parents().begin();
250       auto dest = alu->dest();
251       if (!dest->has_flag(Register::ssa) &&
252           alu->block_id() != p->block_id())
253          return;
254       if (p->replace_dest(dest, alu)) {
255          dest->del_parent(alu);
256          dest->add_parent(p);
257          for (auto d : alu->dependend_instr()) {
258             d->add_required_instr(p);
259          }
260          alu->set_dead();
261          progress = true;
262       }
263    }
264 }
265 
apply_dest_clamp(AluInstr * alu)266 void PeepholeVisitor::apply_dest_clamp(AluInstr *alu)
267 {
268    if (alu->has_source_mod(0, AluInstr::mod_abs) ||
269        alu->has_source_mod(0, AluInstr::mod_neg))
270        return;
271 
272    auto dest = alu->dest();
273 
274    assert(dest);
275 
276    if (!dest->has_flag(Register::ssa))
277       return;
278 
279    auto src = alu->psrc(0)->as_register();
280    if (!src)
281       return;
282 
283    if (src->parents().size() != 1)
284       return;
285 
286    if (src->uses().size() != 1)
287       return;
288 
289    auto new_parent = (*src->parents().begin())->as_alu();
290    if (!new_parent)
291       return;
292 
293    auto opinfo = alu_ops.at(new_parent->opcode());
294    if (!opinfo.can_clamp)
295       return;
296 
297    // Move clamp flag to the parent, and let copy propagation do the rest
298    new_parent->set_alu_flag(alu_dst_clamp);
299    alu->reset_alu_flag(alu_dst_clamp);
300 
301    progress = true;
302 }
303 
304 
305 static EAluOp
pred_from_op(EAluOp pred_op,EAluOp op)306 pred_from_op(EAluOp pred_op, EAluOp op)
307 {
308    switch (pred_op) {
309    case op2_pred_setne_int:
310       switch (op) {
311       case op2_setge_dx10:
312          return op2_pred_setge;
313       case op2_setgt_dx10:
314          return op2_pred_setgt;
315       case op2_sete_dx10:
316          return op2_pred_sete;
317       case op2_setne_dx10:
318          return op2_pred_setne;
319 
320       case op2_setge_int:
321          return op2_pred_setge_int;
322       case op2_setgt_int:
323          return op2_pred_setgt_int;
324       case op2_setge_uint:
325          return op2_pred_setge_uint;
326       case op2_setgt_uint:
327          return op2_pred_setgt_uint;
328       case op2_sete_int:
329          return op2_prede_int;
330       case op2_setne_int:
331          return op2_pred_setne_int;
332       default:
333          return op0_nop;
334       }
335    case op2_prede_int:
336       switch (op) {
337       case op2_sete_int:
338          return op2_pred_setne_int;
339       case op2_setne_int:
340          return op2_prede_int;
341       default:
342          return op0_nop;
343       }
344    case op2_pred_setne:
345       switch (op) {
346       case op2_setge:
347          return op2_pred_setge;
348       case op2_setgt:
349          return op2_pred_setgt;
350       case op2_sete:
351          return op2_pred_sete;
352       default:
353          return op0_nop;
354       }
355    case op2_killne_int:
356       switch (op) {
357       case op2_setge_dx10:
358          return op2_killge;
359       case op2_setgt_dx10:
360          return op2_killgt;
361       case op2_sete_dx10:
362          return op2_kille;
363       case op2_setne_dx10:
364          return op2_killne;
365       case op2_setge_int:
366          return op2_killge_int;
367       case op2_setgt_int:
368          return op2_killgt_int;
369       case op2_setge_uint:
370          return op2_killge_uint;
371       case op2_setgt_uint:
372          return op2_killgt_uint;
373       case op2_sete_int:
374          return op2_kille_int;
375       case op2_setne_int:
376          return op2_killne_int;
377       default:
378          return op0_nop;
379       }
380 
381    default:
382       return op0_nop;
383    }
384 }
385 
386 void
visit(AluInstr * alu)387 ReplacePredicate::visit(AluInstr *alu)
388 {
389    auto new_op = pred_from_op(m_pred->opcode(), alu->opcode());
390 
391    if (new_op == op0_nop)
392       return;
393 
394    for (auto& s : alu->sources()) {
395       auto reg = s->as_register();
396       /* Protect against propagating
397        *
398        *   V = COND(R, X)
399        *   R = SOME_OP
400        *   IF (V)
401        *
402        * to
403        *
404        *   R = SOME_OP
405        *   IF (COND(R, X))
406        */
407       if (reg && !reg->has_flag(Register::ssa))
408          return;
409    }
410 
411    m_pred->set_op(new_op);
412    m_pred->set_sources(alu->sources());
413 
414    std::array<AluInstr::SourceMod, 2> mods = { AluInstr::mod_abs, AluInstr::mod_neg };
415 
416    for (int i = 0; i < 2; ++i) {
417       for (auto m : mods) {
418          if (alu->has_source_mod(i, m))
419             m_pred->set_source_mod(i, m);
420       }
421    }
422 
423    success = true;
424 }
425 
426 } // namespace r600
427