• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* -*- mesa-c++  -*-
2  *
3  * Copyright (c) 2022 Collabora LTD
4  *
5  * Author: Gert Wollny <gert.wollny@collabora.com>
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and associated documentation files (the "Software"),
9  * to deal in the Software without restriction, including without limitation
10  * on the rights to use, copy, modify, merge, publish, distribute, sub
11  * license, and/or sell copies of the Software, and to permit persons to whom
12  * the Software is furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the next
15  * paragraph) shall be included in all copies or substantial portions of the
16  * Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24  * USE OR OTHER DEALINGS IN THE SOFTWARE.
25  */
26 
27 #include "sfn_optimizer.h"
28 
29 #include "sfn_debug.h"
30 #include "sfn_instr_alugroup.h"
31 #include "sfn_instr_controlflow.h"
32 #include "sfn_instr_export.h"
33 #include "sfn_instr_fetch.h"
34 #include "sfn_instr_lds.h"
35 #include "sfn_instr_mem.h"
36 #include "sfn_instr_tex.h"
37 #include "sfn_peephole.h"
38 #include "sfn_valuefactory.h"
39 #include "sfn_virtualvalues.h"
40 
41 #include <sstream>
42 
43 namespace r600 {
44 
45 bool
optimize(Shader & shader)46 optimize(Shader& shader)
47 {
48    bool progress;
49 
50    sfn_log << SfnLog::opt << "Shader before optimization\n";
51    if (sfn_log.has_debug_flag(SfnLog::opt)) {
52       std::stringstream ss;
53       shader.print(ss);
54       sfn_log << ss.str() << "\n\n";
55    }
56 
57    do {
58       progress = false;
59       progress |= copy_propagation_fwd(shader);
60       progress |= dead_code_elimination(shader);
61       progress |= copy_propagation_backward(shader);
62       progress |= dead_code_elimination(shader);
63       progress |= simplify_source_vectors(shader);
64       progress |= peephole(shader);
65       progress |= dead_code_elimination(shader);
66    } while (progress);
67 
68    return progress;
69 }
70 
71 class DCEVisitor : public InstrVisitor {
72 public:
73    DCEVisitor();
74 
75    void visit(AluInstr *instr) override;
76    void visit(AluGroup *instr) override;
77    void visit(TexInstr *instr) override;
visit(ExportInstr * instr)78    void visit(ExportInstr *instr) override { (void)instr; };
79    void visit(FetchInstr *instr) override;
80    void visit(Block *instr) override;
81 
visit(ControlFlowInstr * instr)82    void visit(ControlFlowInstr *instr) override { (void)instr; };
visit(IfInstr * instr)83    void visit(IfInstr *instr) override { (void)instr; };
visit(ScratchIOInstr * instr)84    void visit(ScratchIOInstr *instr) override { (void)instr; };
visit(StreamOutInstr * instr)85    void visit(StreamOutInstr *instr) override { (void)instr; };
visit(MemRingOutInstr * instr)86    void visit(MemRingOutInstr *instr) override { (void)instr; };
visit(EmitVertexInstr * instr)87    void visit(EmitVertexInstr *instr) override { (void)instr; };
visit(GDSInstr * instr)88    void visit(GDSInstr *instr) override { (void)instr; };
visit(WriteTFInstr * instr)89    void visit(WriteTFInstr *instr) override { (void)instr; };
visit(LDSAtomicInstr * instr)90    void visit(LDSAtomicInstr *instr) override { (void)instr; };
91    void visit(LDSReadInstr *instr) override;
visit(RatInstr * instr)92    void visit(RatInstr *instr) override { (void)instr; };
93 
94    bool progress;
95 };
96 
97 bool
dead_code_elimination(Shader & shader)98 dead_code_elimination(Shader& shader)
99 {
100    DCEVisitor dce;
101 
102    do {
103 
104       sfn_log << SfnLog::opt << "start dce run\n";
105 
106       dce.progress = false;
107       for (auto& b : shader.func())
108          b->accept(dce);
109 
110       sfn_log << SfnLog::opt << "finished dce run\n\n";
111 
112    } while (dce.progress);
113 
114    sfn_log << SfnLog::opt << "Shader after DCE\n";
115    if (sfn_log.has_debug_flag(SfnLog::opt)) {
116       std::stringstream ss;
117       shader.print(ss);
118       sfn_log << ss.str() << "\n\n";
119    }
120 
121    return dce.progress;
122 }
123 
DCEVisitor()124 DCEVisitor::DCEVisitor():
125     progress(false)
126 {
127 }
128 
129 void
visit(AluInstr * instr)130 DCEVisitor::visit(AluInstr *instr)
131 {
132    sfn_log << SfnLog::opt << "DCE: visit '" << *instr;
133 
134    if (instr->has_instr_flag(Instr::dead))
135       return;
136 
137    if (instr->dest() && (instr->dest()->has_uses())) {
138       sfn_log << SfnLog::opt << " dest used\n";
139       return;
140    }
141 
142    switch (instr->opcode()) {
143    case op2_kille:
144    case op2_killne:
145    case op2_kille_int:
146    case op2_killne_int:
147    case op2_killge:
148    case op2_killge_int:
149    case op2_killge_uint:
150    case op2_killgt:
151    case op2_killgt_int:
152    case op2_killgt_uint:
153    case op0_group_barrier:
154       sfn_log << SfnLog::opt << " never kill\n";
155       return;
156    default:;
157    }
158 
159    bool dead = instr->set_dead();
160    sfn_log << SfnLog::opt << (dead ? "dead" : "alive") << "\n";
161    progress |= dead;
162 }
163 
164 void
visit(LDSReadInstr * instr)165 DCEVisitor::visit(LDSReadInstr *instr)
166 {
167    sfn_log << SfnLog::opt << "visit " << *instr << "\n";
168    progress |= instr->remove_unused_components();
169 }
170 
171 void
visit(AluGroup * instr)172 DCEVisitor::visit(AluGroup *instr)
173 {
174    /* Groups are created because the instructions are used together
175     * so don't try to eliminate code there */
176    (void)instr;
177 }
178 
179 void
visit(TexInstr * instr)180 DCEVisitor::visit(TexInstr *instr)
181 {
182    auto& dest = instr->dst();
183 
184    bool has_uses = false;
185    RegisterVec4::Swizzle swz = instr->all_dest_swizzle();
186    for (int i = 0; i < 4; ++i) {
187       if (!dest[i]->has_uses())
188          swz[i] = 7;
189       else
190          has_uses |= true;
191    }
192    instr->set_dest_swizzle(swz);
193 
194    if (has_uses)
195       return;
196 
197    progress |= instr->set_dead();
198 }
199 
200 void
visit(FetchInstr * instr)201 DCEVisitor::visit(FetchInstr *instr)
202 {
203    auto& dest = instr->dst();
204 
205    bool has_uses = false;
206    RegisterVec4::Swizzle swz = instr->all_dest_swizzle();
207    for (int i = 0; i < 4; ++i) {
208       if (!dest[i]->has_uses())
209          swz[i] = 7;
210       else
211          has_uses |= true;
212    }
213    instr->set_dest_swizzle(swz);
214 
215    if (has_uses)
216       return;
217 
218    sfn_log << SfnLog::opt << "set dead: " << *instr << "\n";
219 
220    progress |= instr->set_dead();
221 }
222 
223 void
visit(Block * block)224 DCEVisitor::visit(Block *block)
225 {
226    auto i = block->begin();
227    auto e = block->end();
228    while (i != e) {
229       auto n = i++;
230       if (!(*n)->keep()) {
231          (*n)->accept(*this);
232          if ((*n)->is_dead()) {
233             block->erase(n);
234          }
235       }
236    }
237 }
238 
239 class CopyPropFwdVisitor : public InstrVisitor {
240 public:
241    CopyPropFwdVisitor(ValueFactory& vf);
242 
243    void visit(AluInstr *instr) override;
244    void visit(AluGroup *instr) override;
245    void visit(TexInstr *instr) override;
246    void visit(ExportInstr *instr) override;
247    void visit(FetchInstr *instr) override;
248    void visit(Block *instr) override;
visit(ControlFlowInstr * instr)249    void visit(ControlFlowInstr *instr) override { (void)instr; }
visit(IfInstr * instr)250    void visit(IfInstr *instr) override { (void)instr; }
visit(ScratchIOInstr * instr)251    void visit(ScratchIOInstr *instr) override { (void)instr; }
visit(StreamOutInstr * instr)252    void visit(StreamOutInstr *instr) override { (void)instr; }
visit(MemRingOutInstr * instr)253    void visit(MemRingOutInstr *instr) override { (void)instr; }
visit(EmitVertexInstr * instr)254    void visit(EmitVertexInstr *instr) override { (void)instr; }
255    void visit(GDSInstr *instr) override;
visit(WriteTFInstr * instr)256    void visit(WriteTFInstr *instr) override { (void)instr; };
visit(RatInstr * instr)257    void visit(RatInstr *instr) override { (void)instr; };
258 
259    // TODO: these two should use copy propagation
visit(LDSAtomicInstr * instr)260    void visit(LDSAtomicInstr *instr) override { (void)instr; };
visit(LDSReadInstr * instr)261    void visit(LDSReadInstr *instr) override { (void)instr; };
262 
263    void propagate_to(RegisterVec4& src, Instr *instr);
264    bool assigned_register_direct(PRegister reg);
265 
266    ValueFactory& value_factory;
267    bool progress;
268 };
269 
270 class CopyPropBackVisitor : public InstrVisitor {
271 public:
272    CopyPropBackVisitor();
273 
274    void visit(AluInstr *instr) override;
275    void visit(AluGroup *instr) override;
276    void visit(TexInstr *instr) override;
visit(ExportInstr * instr)277    void visit(ExportInstr *instr) override { (void)instr; }
278    void visit(FetchInstr *instr) override;
279    void visit(Block *instr) override;
visit(ControlFlowInstr * instr)280    void visit(ControlFlowInstr *instr) override { (void)instr; }
visit(IfInstr * instr)281    void visit(IfInstr *instr) override { (void)instr; }
visit(ScratchIOInstr * instr)282    void visit(ScratchIOInstr *instr) override { (void)instr; }
visit(StreamOutInstr * instr)283    void visit(StreamOutInstr *instr) override { (void)instr; }
visit(MemRingOutInstr * instr)284    void visit(MemRingOutInstr *instr) override { (void)instr; }
visit(EmitVertexInstr * instr)285    void visit(EmitVertexInstr *instr) override { (void)instr; }
visit(GDSInstr * instr)286    void visit(GDSInstr *instr) override { (void)instr; };
visit(WriteTFInstr * instr)287    void visit(WriteTFInstr *instr) override { (void)instr; };
visit(LDSAtomicInstr * instr)288    void visit(LDSAtomicInstr *instr) override { (void)instr; };
visit(LDSReadInstr * instr)289    void visit(LDSReadInstr *instr) override { (void)instr; };
visit(RatInstr * instr)290    void visit(RatInstr *instr) override { (void)instr; };
291 
292    bool progress;
293 };
294 
295 bool
copy_propagation_fwd(Shader & shader)296 copy_propagation_fwd(Shader& shader)
297 {
298    auto& root = shader.func();
299    CopyPropFwdVisitor copy_prop(shader.value_factory());
300 
301    do {
302       copy_prop.progress = false;
303       for (auto b : root)
304          b->accept(copy_prop);
305    } while (copy_prop.progress);
306 
307    sfn_log << SfnLog::opt << "Shader after Copy Prop forward\n";
308    if (sfn_log.has_debug_flag(SfnLog::opt)) {
309       std::stringstream ss;
310       shader.print(ss);
311       sfn_log << ss.str() << "\n\n";
312    }
313 
314    return copy_prop.progress;
315 }
316 
317 bool
copy_propagation_backward(Shader & shader)318 copy_propagation_backward(Shader& shader)
319 {
320    CopyPropBackVisitor copy_prop;
321 
322    do {
323       copy_prop.progress = false;
324       for (auto b : shader.func())
325          b->accept(copy_prop);
326    } while (copy_prop.progress);
327 
328    sfn_log << SfnLog::opt << "Shader after Copy Prop backwards\n";
329    if (sfn_log.has_debug_flag(SfnLog::opt)) {
330       std::stringstream ss;
331       shader.print(ss);
332       sfn_log << ss.str() << "\n\n";
333    }
334 
335    return copy_prop.progress;
336 }
337 
CopyPropFwdVisitor(ValueFactory & vf)338 CopyPropFwdVisitor::CopyPropFwdVisitor(ValueFactory& vf):
339    value_factory(vf),
340    progress(false)
341 {
342 }
343 
344 void
visit(AluInstr * instr)345 CopyPropFwdVisitor::visit(AluInstr *instr)
346 {
347    sfn_log << SfnLog::opt << "CopyPropFwdVisitor:[" << instr->block_id() << ":"
348            << instr->index() << "] " << *instr << " dset=" << instr->dest() << " ";
349 
350    if (instr->dest()) {
351       sfn_log << SfnLog::opt << "has uses; " << instr->dest()->uses().size();
352    }
353 
354    sfn_log << SfnLog::opt << "\n";
355 
356    if (!instr->can_propagate_src()) {
357       return;
358    }
359 
360    auto src = instr->psrc(0);
361    auto dest = instr->dest();
362 
363    /* Don't propagate an indirect load to more than one
364     * instruction, because we may have to split the address loads
365     * creating more instructions */
366    if (dest->uses().size() > 1) {
367       auto [addr, is_for_dest, index] = instr->indirect_addr();
368       if (addr && !is_for_dest)
369          return;
370    }
371 
372 
373    auto ii = dest->uses().begin();
374    auto ie = dest->uses().end();
375 
376    auto mov_block_id = instr->block_id();
377 
378    /** libc++ seems to invalidate the end iterator too if a std::set is
379     *  made empty by an erase operation,
380     *  https://gitlab.freedesktop.org/mesa/mesa/-/issues/7931
381     */
382    while(ii != ie && !dest->uses().empty()) {
383       auto i = *ii;
384       auto target_block_id = i->block_id();
385 
386       ++ii;
387       /* SSA can always be propagated, registers only in the same block
388        * and only if they are assigned in the same block */
389       bool dest_can_propagate = dest->has_flag(Register::ssa);
390 
391       if (!dest_can_propagate) {
392 
393          /* Register can propagate if the assignment was in the same
394           * block, and we don't have a second assignment coming later
395           * (e.g. helper invocation evaluation does
396           *
397           * 1: MOV R0.x, -1
398           * 2: FETCH R0.0 VPM
399           * 3: MOV SN.x, R0.x
400           *
401           * Here we can't prpagate the move in 1 to SN.x in 3 */
402          if ((mov_block_id == target_block_id && instr->index() < i->index())) {
403             dest_can_propagate = true;
404             if (dest->parents().size() > 1) {
405                for (auto p : dest->parents()) {
406                   if (p->block_id() == i->block_id() && p->index() > instr->index()) {
407                      dest_can_propagate = false;
408                      break;
409                   }
410                }
411             }
412          }
413       }
414       bool move_addr_use = false;
415       bool src_can_propagate = false;
416       if (auto rsrc = src->as_register()) {
417          if (rsrc->has_flag(Register::ssa)) {
418             src_can_propagate = true;
419          } else if (mov_block_id == target_block_id) {
420             if (auto a = rsrc->addr()) {
421                if (a->as_register() &&
422                    !a->as_register()->has_flag(Register::addr_or_idx) &&
423                    i->block_id() == mov_block_id &&
424                    i->index() == instr->index() + 1) {
425                   src_can_propagate = true;
426                   move_addr_use = true;
427                }
428             } else {
429                src_can_propagate = true;
430             }
431             for (auto p : rsrc->parents()) {
432                if (p->block_id() == mov_block_id &&
433                    p->index() > instr->index() &&
434                    p->index() < i->index()) {
435                   src_can_propagate = false;
436                   break;
437                }
438             }
439          }
440       } else {
441          src_can_propagate = true;
442       }
443 
444       if (dest_can_propagate && src_can_propagate) {
445          sfn_log << SfnLog::opt << "   Try replace in " << i->block_id() << ":"
446                  << i->index() << *i << "\n";
447 
448          if (i->as_alu() && i->as_alu()->parent_group()) {
449             progress |= i->as_alu()->parent_group()->replace_source(dest, src);
450          } else {
451             bool success = i->replace_source(dest, src);
452             if (success && move_addr_use) {
453                for (auto r : instr->required_instr()){
454                   std::cerr << "add " << *r << " to " << *i << "\n";
455                   i->add_required_instr(r);
456                }
457             }
458             progress |= success;
459          }
460       }
461    }
462    if (instr->dest()) {
463       sfn_log << SfnLog::opt << "has uses; " << instr->dest()->uses().size();
464    }
465    sfn_log << SfnLog::opt << "  done\n";
466 }
467 
468 void
visit(AluGroup * instr)469 CopyPropFwdVisitor::visit(AluGroup *instr)
470 {
471    (void)instr;
472 }
473 
474 void
visit(TexInstr * instr)475 CopyPropFwdVisitor::visit(TexInstr *instr)
476 {
477    propagate_to(instr->src(), instr);
478 }
479 
visit(GDSInstr * instr)480 void CopyPropFwdVisitor::visit(GDSInstr *instr)
481 {
482    propagate_to(instr->src(), instr);
483 }
484 
485 void
visit(ExportInstr * instr)486 CopyPropFwdVisitor::visit(ExportInstr *instr)
487 {
488    propagate_to(instr->value(), instr);
489 }
490 
register_sel_can_change(Pin pin)491 static bool register_sel_can_change(Pin pin)
492 {
493    return pin == pin_free || pin == pin_none;
494 }
495 
register_chan_is_pinned(Pin pin)496 static bool register_chan_is_pinned(Pin pin)
497 {
498    return pin == pin_chan ||
499          pin == pin_fully ||
500          pin == pin_chgr;
501 }
502 
503 
504 void
propagate_to(RegisterVec4 & value,Instr * instr)505 CopyPropFwdVisitor::propagate_to(RegisterVec4& value, Instr *instr)
506 {
507    /* Collect parent instructions - only ALU move without modifiers
508     * and without indirect access are allowed. */
509    AluInstr *parents[4] = {nullptr};
510    bool have_candidates = false;
511    for (int i = 0; i < 4; ++i) {
512       if (value[i]->chan() < 4 && value[i]->has_flag(Register::ssa)) {
513          /*  We have a pre-define value, so we can't propagate a copy */
514          if (value[i]->parents().empty())
515             return;
516 
517          if (value[i]->uses().size() > 1)
518             return;
519 
520          assert(value[i]->parents().size() == 1);
521          parents[i] = (*value[i]->parents().begin())->as_alu();
522 
523          /* Parent op is not an ALU instruction, so we can't
524             copy-propagate */
525          if (!parents[i])
526              return;
527 
528 
529          if ((parents[i]->opcode() != op1_mov) ||
530              parents[i]->has_source_mod(0, AluInstr::mod_neg) ||
531              parents[i]->has_source_mod(0, AluInstr::mod_abs) ||
532              parents[i]->has_alu_flag(alu_dst_clamp) ||
533              parents[i]->has_alu_flag(alu_src0_rel))
534             return;
535 
536          auto [addr, dummy0, index_reg_dummy] = parents[i]->indirect_addr();
537 
538          /* Don't accept moves with indirect reads, because they are not
539           * supported with instructions that use vec4 values */
540          if (addr || index_reg_dummy)
541              return;
542 
543          have_candidates = true;
544       }
545    }
546 
547    if (!have_candidates)
548       return;
549 
550    /* Collect the new source registers. We may have to move all registers
551     * to a new virtual sel index. */
552 
553    PRegister new_src[4] = {0};
554    int new_chan[4] = {0,0,0,0};
555 
556    uint8_t used_chan_mask = 0;
557    int new_sel = -1;
558    bool all_sel_can_change = true;
559 
560    bool is_ssa = true;
561 
562    for (int i = 0; i < 4; ++i) {
563 
564       /* No parent means we either ignore the channel or insert 0 or 1.*/
565       if (!parents[i])
566          continue;
567 
568       unsigned allowed_mask = 0xf & ~used_chan_mask;
569 
570       auto src = parents[i]->src(0).as_register();
571       if (!src)
572          return;
573 
574       /* Don't accept an array element for now, we would need extra checking
575        * that the value is not overwritten by an indirect access */
576       if (src->pin() == pin_array)
577          return;
578 
579       /* Is this check still needed ? */
580       if (!src->has_flag(Register::ssa) &&
581           !assigned_register_direct(src)) {
582          return;
583       }
584 
585       /* If the channel chan't switch we have to update the channel mask
586        * TODO: assign channel pinned registers first might give more
587        *  opportunities for this optimization */
588       if (register_chan_is_pinned(src->pin()))
589          allowed_mask = 1 << src->chan();
590 
591       /* Update the possible channel mask based on the sourcee's parent
592        * instruction(s) */
593       for (auto p : src->parents()) {
594          auto alu = p->as_alu();
595          if (alu)
596             allowed_mask &= alu->allowed_dest_chan_mask();
597       }
598 
599       for (auto u : src->uses()) {
600          auto alu = u->as_alu();
601          if (alu)
602             allowed_mask &= alu->allowed_src_chan_mask();
603       }
604 
605       if (!allowed_mask)
606          return;
607 
608       /* Prefer keeping the channel, but if that's not possible
609        * i.e. if the sel has to change, then  pick the next free channel
610        * (see below) */
611       new_chan[i] = src->chan();
612 
613       if (new_sel < 0) {
614          new_sel = src->sel();
615          is_ssa = src->has_flag(Register::ssa);
616       } else if (new_sel != src->sel()) {
617          /* If we have to assign a new register sel index do so only
618           * if all already assigned source can get a new register index,
619           * and all registers are either SSA or registers.
620           * TODO: check whether this last restriction is required */
621          if (all_sel_can_change &&
622              register_sel_can_change(src->pin()) &&
623              (is_ssa == src->has_flag(Register::ssa))) {
624             new_sel = value_factory.new_register_index();
625             new_chan[i] = u_bit_scan(&allowed_mask);
626          } else /* Sources can't be combined to a vec4 so bail out */
627             return;
628       }
629 
630       new_src[i] = src;
631       used_chan_mask |= 1 << new_chan[i];
632       if (!register_sel_can_change(src->pin()))
633          all_sel_can_change = false;
634    }
635 
636    /* Apply the changes to the vec4 source */
637    value.del_use(instr);
638    for (int i = 0; i < 4; ++i) {
639       if (parents[i]) {
640          new_src[i]->set_sel(new_sel);
641          if (is_ssa)
642             new_src[i]->set_flag(Register::ssa);
643          new_src[i]->set_chan(new_chan[i]);
644 
645          value.set_value(i, new_src[i]);
646 
647          if (new_src[i]->pin() != pin_fully &&
648              new_src[i]->pin() != pin_chgr) {
649             if (new_src[i]->pin() == pin_chan)
650                new_src[i]->set_pin(pin_chgr);
651             else
652                new_src[i]->set_pin(pin_group);
653          }
654          progress |= true;
655       }
656    }
657    value.add_use(instr);
658    if (progress)
659       value.validate();
660 }
661 
assigned_register_direct(PRegister reg)662 bool CopyPropFwdVisitor::assigned_register_direct(PRegister reg)
663 {
664    for (auto p: reg->parents()) {
665       if (p->as_alu())  {
666           auto [addr, dummy, index_reg] = p->as_alu()->indirect_addr();
667           if (addr)
668              return false;
669       }
670    }
671    return true;
672 }
673 
674 void
visit(FetchInstr * instr)675 CopyPropFwdVisitor::visit(FetchInstr *instr)
676 {
677    (void)instr;
678 }
679 
680 void
visit(Block * instr)681 CopyPropFwdVisitor::visit(Block *instr)
682 {
683    for (auto& i : *instr)
684       i->accept(*this);
685 }
686 
CopyPropBackVisitor()687 CopyPropBackVisitor::CopyPropBackVisitor():
688     progress(false)
689 {
690 }
691 
692 void
visit(AluInstr * instr)693 CopyPropBackVisitor::visit(AluInstr *instr)
694 {
695    bool local_progress = false;
696 
697    sfn_log << SfnLog::opt << "CopyPropBackVisitor:[" << instr->block_id() << ":"
698            << instr->index() << "] " << *instr << "\n";
699 
700    if (!instr->can_propagate_dest()) {
701       return;
702    }
703 
704    auto src_reg = instr->psrc(0)->as_register();
705    if (!src_reg) {
706       return;
707    }
708 
709    if (src_reg->uses().size() > 1)
710       return;
711 
712    auto dest = instr->dest();
713    if (!dest || !instr->has_alu_flag(alu_write)) {
714       return;
715    }
716 
717    if (!dest->has_flag(Register::ssa) && dest->parents().size() > 1)
718       return;
719 
720    for (auto& i : src_reg->parents()) {
721       sfn_log << SfnLog::opt << "Try replace dest in " << i->block_id() << ":"
722               << i->index() << *i << "\n";
723 
724       if (i->replace_dest(dest, instr)) {
725          dest->del_parent(instr);
726          dest->add_parent(i);
727          for (auto d : instr->dependend_instr()) {
728             d->add_required_instr(i);
729          }
730          local_progress = true;
731       }
732    }
733 
734    if (local_progress)
735       instr->set_dead();
736 
737    progress |= local_progress;
738 }
739 
740 void
visit(AluGroup * instr)741 CopyPropBackVisitor::visit(AluGroup *instr)
742 {
743    for (auto& i : *instr) {
744       if (i)
745          i->accept(*this);
746    }
747 }
748 
749 void
visit(TexInstr * instr)750 CopyPropBackVisitor::visit(TexInstr *instr)
751 {
752    (void)instr;
753 }
754 
755 void
visit(FetchInstr * instr)756 CopyPropBackVisitor::visit(FetchInstr *instr)
757 {
758    (void)instr;
759 }
760 
761 void
visit(Block * instr)762 CopyPropBackVisitor::visit(Block *instr)
763 {
764    for (auto i = instr->rbegin(); i != instr->rend(); ++i)
765       if (!(*i)->is_dead())
766          (*i)->accept(*this);
767 }
768 
769 class SimplifySourceVecVisitor : public InstrVisitor {
770 public:
SimplifySourceVecVisitor()771    SimplifySourceVecVisitor():
772        progress(false)
773    {
774    }
775 
visit(AluInstr * instr)776    void visit(AluInstr *instr) override { (void)instr; }
visit(AluGroup * instr)777    void visit(AluGroup *instr) override { (void)instr; }
778    void visit(TexInstr *instr) override;
779    void visit(ExportInstr *instr) override;
780    void visit(FetchInstr *instr) override;
781    void visit(Block *instr) override;
782    void visit(ControlFlowInstr *instr) override;
783    void visit(IfInstr *instr) override;
784    void visit(ScratchIOInstr *instr) override;
785    void visit(StreamOutInstr *instr) override;
786    void visit(MemRingOutInstr *instr) override;
visit(EmitVertexInstr * instr)787    void visit(EmitVertexInstr *instr) override { (void)instr; }
visit(GDSInstr * instr)788    void visit(GDSInstr *instr) override { (void)instr; };
visit(WriteTFInstr * instr)789    void visit(WriteTFInstr *instr) override { (void)instr; };
visit(LDSAtomicInstr * instr)790    void visit(LDSAtomicInstr *instr) override { (void)instr; };
visit(LDSReadInstr * instr)791    void visit(LDSReadInstr *instr) override { (void)instr; };
visit(RatInstr * instr)792    void visit(RatInstr *instr) override { (void)instr; };
793 
794    void replace_src(Instr *instr, RegisterVec4& reg4);
795 
796    bool progress;
797 };
798 
799 class HasVecDestVisitor : public ConstInstrVisitor {
800 public:
HasVecDestVisitor()801    HasVecDestVisitor():
802        has_group_dest(false)
803    {
804    }
805 
visit(const AluInstr & instr)806    void visit(const AluInstr& instr) override { (void)instr; }
visit(const AluGroup & instr)807    void visit(const AluGroup& instr) override { (void)instr; }
visit(const TexInstr & instr)808    void visit(const TexInstr& instr) override  {  (void)instr; has_group_dest = true; };
visit(const ExportInstr & instr)809    void visit(const ExportInstr& instr) override { (void)instr; }
visit(const FetchInstr & instr)810    void visit(const FetchInstr& instr) override  {  (void)instr; has_group_dest = true; };
visit(const Block & instr)811    void visit(const Block& instr) override { (void)instr; };
visit(const ControlFlowInstr & instr)812    void visit(const ControlFlowInstr& instr) override{ (void)instr; }
visit(const IfInstr & instr)813    void visit(const IfInstr& instr) override{ (void)instr; }
visit(const ScratchIOInstr & instr)814    void visit(const ScratchIOInstr& instr) override  { (void)instr; };
visit(const StreamOutInstr & instr)815    void visit(const StreamOutInstr& instr) override { (void)instr; }
visit(const MemRingOutInstr & instr)816    void visit(const MemRingOutInstr& instr) override { (void)instr; }
visit(const EmitVertexInstr & instr)817    void visit(const EmitVertexInstr& instr) override { (void)instr; }
visit(const GDSInstr & instr)818    void visit(const GDSInstr& instr) override { (void)instr; }
visit(const WriteTFInstr & instr)819    void visit(const WriteTFInstr& instr) override { (void)instr; };
visit(const LDSAtomicInstr & instr)820    void visit(const LDSAtomicInstr& instr) override { (void)instr; };
visit(const LDSReadInstr & instr)821    void visit(const LDSReadInstr& instr) override { (void)instr; };
visit(const RatInstr & instr)822    void visit(const RatInstr& instr) override {  (void)instr; };
823 
824    bool has_group_dest;
825 };
826 
827 class HasVecSrcVisitor : public ConstInstrVisitor {
828 public:
HasVecSrcVisitor()829    HasVecSrcVisitor():
830        has_group_src(false)
831    {
832    }
833 
visit(UNUSED const AluInstr & instr)834    void visit(UNUSED const AluInstr& instr) override { }
visit(UNUSED const AluGroup & instr)835    void visit(UNUSED const AluGroup& instr) override { }
visit(UNUSED const FetchInstr & instr)836    void visit(UNUSED const FetchInstr& instr) override  { };
visit(UNUSED const Block & instr)837    void visit(UNUSED const Block& instr) override { };
visit(UNUSED const ControlFlowInstr & instr)838    void visit(UNUSED const ControlFlowInstr& instr) override{ }
visit(UNUSED const IfInstr & instr)839    void visit(UNUSED const IfInstr& instr) override{ }
visit(UNUSED const LDSAtomicInstr & instr)840    void visit(UNUSED const LDSAtomicInstr& instr) override { };
visit(UNUSED const LDSReadInstr & instr)841    void visit(UNUSED const LDSReadInstr& instr) override { };
842 
visit(const TexInstr & instr)843    void visit(const TexInstr& instr) override { check(instr.src()); }
visit(const ExportInstr & instr)844    void visit(const ExportInstr& instr) override { check(instr.value()); }
visit(const GDSInstr & instr)845    void visit(const GDSInstr& instr) override { check(instr.src()); }
846 
847    // No swizzling supported, so we want to keep the register group
visit(UNUSED const ScratchIOInstr & instr)848    void visit(UNUSED const ScratchIOInstr& instr) override  { has_group_src = true; };
visit(UNUSED const StreamOutInstr & instr)849    void visit(UNUSED const StreamOutInstr& instr) override { has_group_src = true; }
visit(UNUSED const MemRingOutInstr & instr)850    void visit(UNUSED const MemRingOutInstr& instr) override { has_group_src = true; }
visit(UNUSED const RatInstr & instr)851    void visit(UNUSED const RatInstr& instr) override { has_group_src = true; };
852 
visit(UNUSED const EmitVertexInstr & instr)853    void visit(UNUSED const EmitVertexInstr& instr) override { }
854 
855    // We always emit at least two values
visit(UNUSED const WriteTFInstr & instr)856    void visit(UNUSED const WriteTFInstr& instr) override { has_group_src = true; };
857 
858 
859    void check(const RegisterVec4& value);
860 
861    bool has_group_src;
862 };
863 
check(const RegisterVec4 & value)864 void HasVecSrcVisitor::check(const RegisterVec4& value)
865 {
866    int nval = 0;
867    for (int i = 0; i < 4 && nval < 2; ++i) {
868       if (value[i]->chan() < 4)
869          ++nval;
870    }
871    has_group_src = nval > 1;
872 }
873 
874 bool
simplify_source_vectors(Shader & sh)875 simplify_source_vectors(Shader& sh)
876 {
877    SimplifySourceVecVisitor visitor;
878 
879    for (auto b : sh.func())
880       b->accept(visitor);
881 
882    return visitor.progress;
883 }
884 
885 void
visit(TexInstr * instr)886 SimplifySourceVecVisitor::visit(TexInstr *instr)
887 {
888 
889    if (instr->opcode() != TexInstr::get_resinfo) {
890       auto& src = instr->src();
891       replace_src(instr, src);
892       int nvals = 0;
893       for (int i = 0; i < 4; ++i)
894          if (src[i]->chan() < 4)
895             ++nvals;
896       if (nvals == 1) {
897          for (int i = 0; i < 4; ++i)
898             if (src[i]->chan() < 4) {
899                HasVecDestVisitor check_dests;
900                for (auto p : src[i]->parents()) {
901                   p->accept(check_dests);
902                   if (check_dests.has_group_dest)
903                      break;
904                }
905 
906                HasVecSrcVisitor check_src;
907                for (auto p : src[i]->uses()) {
908                   p->accept(check_src);
909                   if (check_src.has_group_src)
910                      break;
911                }
912 
913                if (check_dests.has_group_dest || check_src.has_group_src)
914                   break;
915 
916                if (src[i]->pin() == pin_group)
917                   src[i]->set_pin(pin_free);
918                else if (src[i]->pin() == pin_chgr)
919                   src[i]->set_pin(pin_chan);
920             }
921       }
922    }
923    for (auto& prep : instr->prepare_instr()) {
924       prep->accept(*this);
925    }
926 }
927 
928 void
visit(ScratchIOInstr * instr)929 SimplifySourceVecVisitor::visit(ScratchIOInstr *instr)
930 {
931    (void)instr;
932 }
933 
934 class ReplaceConstSource : public AluInstrVisitor {
935 public:
ReplaceConstSource(Instr * old_use_,RegisterVec4 & vreg_,int i)936    ReplaceConstSource(Instr *old_use_, RegisterVec4& vreg_, int i):
937        old_use(old_use_),
938        vreg(vreg_),
939        index(i),
940        success(false)
941    {
942    }
943 
944    using AluInstrVisitor::visit;
945 
946    void visit(AluInstr *alu) override;
947 
948    Instr *old_use;
949    RegisterVec4& vreg;
950    int index;
951    bool success;
952 };
953 
954 void
visit(ExportInstr * instr)955 SimplifySourceVecVisitor::visit(ExportInstr *instr)
956 {
957    replace_src(instr, instr->value());
958 }
959 
960 void
replace_src(Instr * instr,RegisterVec4 & reg4)961 SimplifySourceVecVisitor::replace_src(Instr *instr, RegisterVec4& reg4)
962 {
963    for (int i = 0; i < 4; ++i) {
964       auto s = reg4[i];
965 
966       if (s->chan() > 3)
967          continue;
968 
969       if (!s->has_flag(Register::ssa))
970          continue;
971 
972       /* Cayman trans ops have more then one parent for
973        * one dest */
974       if (s->parents().size() != 1)
975          continue;
976 
977       auto& op = *s->parents().begin();
978 
979       ReplaceConstSource visitor(instr, reg4, i);
980 
981       op->accept(visitor);
982 
983       progress |= visitor.success;
984    }
985 }
986 
987 void
visit(StreamOutInstr * instr)988 SimplifySourceVecVisitor::visit(StreamOutInstr *instr)
989 {
990    (void)instr;
991 }
992 
993 void
visit(MemRingOutInstr * instr)994 SimplifySourceVecVisitor::visit(MemRingOutInstr *instr)
995 {
996    (void)instr;
997 }
998 
999 void
visit(AluInstr * alu)1000 ReplaceConstSource::visit(AluInstr *alu)
1001 {
1002    if (alu->opcode() != op1_mov)
1003       return;
1004 
1005    if (alu->has_source_mod(0, AluInstr::mod_abs) ||
1006        alu->has_source_mod(0, AluInstr::mod_neg))
1007       return;
1008 
1009    auto src = alu->psrc(0);
1010    assert(src);
1011 
1012    int override_chan = -1;
1013 
1014    if (value_is_const_uint(*src, 0)) {
1015       override_chan = 4;
1016    } else if (value_is_const_float(*src, 1.0f)) {
1017       override_chan = 5;
1018    }
1019 
1020    if (override_chan >= 0) {
1021       vreg[index]->del_use(old_use);
1022       auto reg = new Register(vreg.sel(), override_chan, vreg[index]->pin());
1023       vreg.set_value(index, reg);
1024       success = true;
1025    }
1026 }
1027 
1028 void
visit(FetchInstr * instr)1029 SimplifySourceVecVisitor::visit(FetchInstr *instr)
1030 {
1031    (void)instr;
1032 }
1033 
1034 void
visit(Block * instr)1035 SimplifySourceVecVisitor::visit(Block *instr)
1036 {
1037    for (auto i = instr->rbegin(); i != instr->rend(); ++i)
1038       if (!(*i)->is_dead())
1039          (*i)->accept(*this);
1040 }
1041 
1042 void
visit(ControlFlowInstr * instr)1043 SimplifySourceVecVisitor::visit(ControlFlowInstr *instr)
1044 {
1045    (void)instr;
1046 }
1047 
1048 void
visit(IfInstr * instr)1049 SimplifySourceVecVisitor::visit(IfInstr *instr)
1050 {
1051    (void)instr;
1052 }
1053 
1054 } // namespace r600
1055