1 /* -*- mesa-c++ -*-
2 *
3 * Copyright (c) 2022 Collabora LTD
4 *
5 * Author: Gert Wollny <gert.wollny@collabora.com>
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * on the rights to use, copy, modify, merge, publish, distribute, sub
11 * license, and/or sell copies of the Software, and to permit persons to whom
12 * the Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
16 * Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 * USE OR OTHER DEALINGS IN THE SOFTWARE.
25 */
26
27 #include "sfn_optimizer.h"
28
29 #include "sfn_debug.h"
30 #include "sfn_instr_alugroup.h"
31 #include "sfn_instr_controlflow.h"
32 #include "sfn_instr_export.h"
33 #include "sfn_instr_fetch.h"
34 #include "sfn_instr_lds.h"
35 #include "sfn_instr_mem.h"
36 #include "sfn_instr_tex.h"
37 #include "sfn_peephole.h"
38 #include "sfn_valuefactory.h"
39 #include "sfn_virtualvalues.h"
40
41 #include <sstream>
42
43 namespace r600 {
44
45 bool
optimize(Shader & shader)46 optimize(Shader& shader)
47 {
48 bool progress;
49
50 sfn_log << SfnLog::opt << "Shader before optimization\n";
51 if (sfn_log.has_debug_flag(SfnLog::opt)) {
52 std::stringstream ss;
53 shader.print(ss);
54 sfn_log << ss.str() << "\n\n";
55 }
56
57 do {
58 progress = false;
59 progress |= copy_propagation_fwd(shader);
60 progress |= dead_code_elimination(shader);
61 progress |= copy_propagation_backward(shader);
62 progress |= dead_code_elimination(shader);
63 progress |= simplify_source_vectors(shader);
64 progress |= peephole(shader);
65 progress |= dead_code_elimination(shader);
66 } while (progress);
67
68 return progress;
69 }
70
71 class DCEVisitor : public InstrVisitor {
72 public:
73 DCEVisitor();
74
75 void visit(AluInstr *instr) override;
76 void visit(AluGroup *instr) override;
77 void visit(TexInstr *instr) override;
visit(ExportInstr * instr)78 void visit(ExportInstr *instr) override { (void)instr; };
79 void visit(FetchInstr *instr) override;
80 void visit(Block *instr) override;
81
visit(ControlFlowInstr * instr)82 void visit(ControlFlowInstr *instr) override { (void)instr; };
visit(IfInstr * instr)83 void visit(IfInstr *instr) override { (void)instr; };
visit(ScratchIOInstr * instr)84 void visit(ScratchIOInstr *instr) override { (void)instr; };
visit(StreamOutInstr * instr)85 void visit(StreamOutInstr *instr) override { (void)instr; };
visit(MemRingOutInstr * instr)86 void visit(MemRingOutInstr *instr) override { (void)instr; };
visit(EmitVertexInstr * instr)87 void visit(EmitVertexInstr *instr) override { (void)instr; };
visit(GDSInstr * instr)88 void visit(GDSInstr *instr) override { (void)instr; };
visit(WriteTFInstr * instr)89 void visit(WriteTFInstr *instr) override { (void)instr; };
visit(LDSAtomicInstr * instr)90 void visit(LDSAtomicInstr *instr) override { (void)instr; };
91 void visit(LDSReadInstr *instr) override;
visit(RatInstr * instr)92 void visit(RatInstr *instr) override { (void)instr; };
93
94 bool progress;
95 };
96
97 bool
dead_code_elimination(Shader & shader)98 dead_code_elimination(Shader& shader)
99 {
100 DCEVisitor dce;
101
102 do {
103
104 sfn_log << SfnLog::opt << "start dce run\n";
105
106 dce.progress = false;
107 for (auto& b : shader.func())
108 b->accept(dce);
109
110 sfn_log << SfnLog::opt << "finished dce run\n\n";
111
112 } while (dce.progress);
113
114 sfn_log << SfnLog::opt << "Shader after DCE\n";
115 if (sfn_log.has_debug_flag(SfnLog::opt)) {
116 std::stringstream ss;
117 shader.print(ss);
118 sfn_log << ss.str() << "\n\n";
119 }
120
121 return dce.progress;
122 }
123
DCEVisitor()124 DCEVisitor::DCEVisitor():
125 progress(false)
126 {
127 }
128
129 void
visit(AluInstr * instr)130 DCEVisitor::visit(AluInstr *instr)
131 {
132 sfn_log << SfnLog::opt << "DCE: visit '" << *instr;
133
134 if (instr->has_instr_flag(Instr::dead))
135 return;
136
137 if (instr->dest() && (instr->dest()->has_uses())) {
138 sfn_log << SfnLog::opt << " dest used\n";
139 return;
140 }
141
142 switch (instr->opcode()) {
143 case op2_kille:
144 case op2_killne:
145 case op2_kille_int:
146 case op2_killne_int:
147 case op2_killge:
148 case op2_killge_int:
149 case op2_killge_uint:
150 case op2_killgt:
151 case op2_killgt_int:
152 case op2_killgt_uint:
153 case op0_group_barrier:
154 sfn_log << SfnLog::opt << " never kill\n";
155 return;
156 default:;
157 }
158
159 bool dead = instr->set_dead();
160 sfn_log << SfnLog::opt << (dead ? "dead" : "alive") << "\n";
161 progress |= dead;
162 }
163
164 void
visit(LDSReadInstr * instr)165 DCEVisitor::visit(LDSReadInstr *instr)
166 {
167 sfn_log << SfnLog::opt << "visit " << *instr << "\n";
168 progress |= instr->remove_unused_components();
169 }
170
171 void
visit(AluGroup * instr)172 DCEVisitor::visit(AluGroup *instr)
173 {
174 /* Groups are created because the instructions are used together
175 * so don't try to eliminate code there */
176 (void)instr;
177 }
178
179 void
visit(TexInstr * instr)180 DCEVisitor::visit(TexInstr *instr)
181 {
182 auto& dest = instr->dst();
183
184 bool has_uses = false;
185 RegisterVec4::Swizzle swz = instr->all_dest_swizzle();
186 for (int i = 0; i < 4; ++i) {
187 if (!dest[i]->has_uses())
188 swz[i] = 7;
189 else
190 has_uses |= true;
191 }
192 instr->set_dest_swizzle(swz);
193
194 if (has_uses)
195 return;
196
197 progress |= instr->set_dead();
198 }
199
200 void
visit(FetchInstr * instr)201 DCEVisitor::visit(FetchInstr *instr)
202 {
203 auto& dest = instr->dst();
204
205 bool has_uses = false;
206 RegisterVec4::Swizzle swz = instr->all_dest_swizzle();
207 for (int i = 0; i < 4; ++i) {
208 if (!dest[i]->has_uses())
209 swz[i] = 7;
210 else
211 has_uses |= true;
212 }
213 instr->set_dest_swizzle(swz);
214
215 if (has_uses)
216 return;
217
218 sfn_log << SfnLog::opt << "set dead: " << *instr << "\n";
219
220 progress |= instr->set_dead();
221 }
222
223 void
visit(Block * block)224 DCEVisitor::visit(Block *block)
225 {
226 auto i = block->begin();
227 auto e = block->end();
228 while (i != e) {
229 auto n = i++;
230 if (!(*n)->keep()) {
231 (*n)->accept(*this);
232 if ((*n)->is_dead()) {
233 block->erase(n);
234 }
235 }
236 }
237 }
238
239 class CopyPropFwdVisitor : public InstrVisitor {
240 public:
241 CopyPropFwdVisitor(ValueFactory& vf);
242
243 void visit(AluInstr *instr) override;
244 void visit(AluGroup *instr) override;
245 void visit(TexInstr *instr) override;
246 void visit(ExportInstr *instr) override;
247 void visit(FetchInstr *instr) override;
248 void visit(Block *instr) override;
visit(ControlFlowInstr * instr)249 void visit(ControlFlowInstr *instr) override { (void)instr; }
visit(IfInstr * instr)250 void visit(IfInstr *instr) override { (void)instr; }
visit(ScratchIOInstr * instr)251 void visit(ScratchIOInstr *instr) override { (void)instr; }
visit(StreamOutInstr * instr)252 void visit(StreamOutInstr *instr) override { (void)instr; }
visit(MemRingOutInstr * instr)253 void visit(MemRingOutInstr *instr) override { (void)instr; }
visit(EmitVertexInstr * instr)254 void visit(EmitVertexInstr *instr) override { (void)instr; }
255 void visit(GDSInstr *instr) override;
visit(WriteTFInstr * instr)256 void visit(WriteTFInstr *instr) override { (void)instr; };
visit(RatInstr * instr)257 void visit(RatInstr *instr) override { (void)instr; };
258
259 // TODO: these two should use copy propagation
visit(LDSAtomicInstr * instr)260 void visit(LDSAtomicInstr *instr) override { (void)instr; };
visit(LDSReadInstr * instr)261 void visit(LDSReadInstr *instr) override { (void)instr; };
262
263 void propagate_to(RegisterVec4& src, Instr *instr);
264 bool assigned_register_direct(PRegister reg);
265
266 ValueFactory& value_factory;
267 bool progress;
268 };
269
270 class CopyPropBackVisitor : public InstrVisitor {
271 public:
272 CopyPropBackVisitor();
273
274 void visit(AluInstr *instr) override;
275 void visit(AluGroup *instr) override;
276 void visit(TexInstr *instr) override;
visit(ExportInstr * instr)277 void visit(ExportInstr *instr) override { (void)instr; }
278 void visit(FetchInstr *instr) override;
279 void visit(Block *instr) override;
visit(ControlFlowInstr * instr)280 void visit(ControlFlowInstr *instr) override { (void)instr; }
visit(IfInstr * instr)281 void visit(IfInstr *instr) override { (void)instr; }
visit(ScratchIOInstr * instr)282 void visit(ScratchIOInstr *instr) override { (void)instr; }
visit(StreamOutInstr * instr)283 void visit(StreamOutInstr *instr) override { (void)instr; }
visit(MemRingOutInstr * instr)284 void visit(MemRingOutInstr *instr) override { (void)instr; }
visit(EmitVertexInstr * instr)285 void visit(EmitVertexInstr *instr) override { (void)instr; }
visit(GDSInstr * instr)286 void visit(GDSInstr *instr) override { (void)instr; };
visit(WriteTFInstr * instr)287 void visit(WriteTFInstr *instr) override { (void)instr; };
visit(LDSAtomicInstr * instr)288 void visit(LDSAtomicInstr *instr) override { (void)instr; };
visit(LDSReadInstr * instr)289 void visit(LDSReadInstr *instr) override { (void)instr; };
visit(RatInstr * instr)290 void visit(RatInstr *instr) override { (void)instr; };
291
292 bool progress;
293 };
294
295 bool
copy_propagation_fwd(Shader & shader)296 copy_propagation_fwd(Shader& shader)
297 {
298 auto& root = shader.func();
299 CopyPropFwdVisitor copy_prop(shader.value_factory());
300
301 do {
302 copy_prop.progress = false;
303 for (auto b : root)
304 b->accept(copy_prop);
305 } while (copy_prop.progress);
306
307 sfn_log << SfnLog::opt << "Shader after Copy Prop forward\n";
308 if (sfn_log.has_debug_flag(SfnLog::opt)) {
309 std::stringstream ss;
310 shader.print(ss);
311 sfn_log << ss.str() << "\n\n";
312 }
313
314 return copy_prop.progress;
315 }
316
317 bool
copy_propagation_backward(Shader & shader)318 copy_propagation_backward(Shader& shader)
319 {
320 CopyPropBackVisitor copy_prop;
321
322 do {
323 copy_prop.progress = false;
324 for (auto b : shader.func())
325 b->accept(copy_prop);
326 } while (copy_prop.progress);
327
328 sfn_log << SfnLog::opt << "Shader after Copy Prop backwards\n";
329 if (sfn_log.has_debug_flag(SfnLog::opt)) {
330 std::stringstream ss;
331 shader.print(ss);
332 sfn_log << ss.str() << "\n\n";
333 }
334
335 return copy_prop.progress;
336 }
337
CopyPropFwdVisitor(ValueFactory & vf)338 CopyPropFwdVisitor::CopyPropFwdVisitor(ValueFactory& vf):
339 value_factory(vf),
340 progress(false)
341 {
342 }
343
344 void
visit(AluInstr * instr)345 CopyPropFwdVisitor::visit(AluInstr *instr)
346 {
347 sfn_log << SfnLog::opt << "CopyPropFwdVisitor:[" << instr->block_id() << ":"
348 << instr->index() << "] " << *instr << " dset=" << instr->dest() << " ";
349
350 if (instr->dest()) {
351 sfn_log << SfnLog::opt << "has uses; " << instr->dest()->uses().size();
352 }
353
354 sfn_log << SfnLog::opt << "\n";
355
356 if (!instr->can_propagate_src()) {
357 return;
358 }
359
360 auto src = instr->psrc(0);
361 auto dest = instr->dest();
362
363 /* Don't propagate an indirect load to more than one
364 * instruction, because we may have to split the address loads
365 * creating more instructions */
366 if (dest->uses().size() > 1) {
367 auto [addr, is_for_dest, index] = instr->indirect_addr();
368 if (addr && !is_for_dest)
369 return;
370 }
371
372
373 auto ii = dest->uses().begin();
374 auto ie = dest->uses().end();
375
376 auto mov_block_id = instr->block_id();
377
378 /** libc++ seems to invalidate the end iterator too if a std::set is
379 * made empty by an erase operation,
380 * https://gitlab.freedesktop.org/mesa/mesa/-/issues/7931
381 */
382 while(ii != ie && !dest->uses().empty()) {
383 auto i = *ii;
384 auto target_block_id = i->block_id();
385
386 ++ii;
387 /* SSA can always be propagated, registers only in the same block
388 * and only if they are assigned in the same block */
389 bool dest_can_propagate = dest->has_flag(Register::ssa);
390
391 if (!dest_can_propagate) {
392
393 /* Register can propagate if the assignment was in the same
394 * block, and we don't have a second assignment coming later
395 * (e.g. helper invocation evaluation does
396 *
397 * 1: MOV R0.x, -1
398 * 2: FETCH R0.0 VPM
399 * 3: MOV SN.x, R0.x
400 *
401 * Here we can't prpagate the move in 1 to SN.x in 3 */
402 if ((mov_block_id == target_block_id && instr->index() < i->index())) {
403 dest_can_propagate = true;
404 if (dest->parents().size() > 1) {
405 for (auto p : dest->parents()) {
406 if (p->block_id() == i->block_id() && p->index() > instr->index()) {
407 dest_can_propagate = false;
408 break;
409 }
410 }
411 }
412 }
413 }
414 bool move_addr_use = false;
415 bool src_can_propagate = false;
416 if (auto rsrc = src->as_register()) {
417 if (rsrc->has_flag(Register::ssa)) {
418 src_can_propagate = true;
419 } else if (mov_block_id == target_block_id) {
420 if (auto a = rsrc->addr()) {
421 if (a->as_register() &&
422 !a->as_register()->has_flag(Register::addr_or_idx) &&
423 i->block_id() == mov_block_id &&
424 i->index() == instr->index() + 1) {
425 src_can_propagate = true;
426 move_addr_use = true;
427 }
428 } else {
429 src_can_propagate = true;
430 }
431 for (auto p : rsrc->parents()) {
432 if (p->block_id() == mov_block_id &&
433 p->index() > instr->index() &&
434 p->index() < i->index()) {
435 src_can_propagate = false;
436 break;
437 }
438 }
439 }
440 } else {
441 src_can_propagate = true;
442 }
443
444 if (dest_can_propagate && src_can_propagate) {
445 sfn_log << SfnLog::opt << " Try replace in " << i->block_id() << ":"
446 << i->index() << *i << "\n";
447
448 if (i->as_alu() && i->as_alu()->parent_group()) {
449 progress |= i->as_alu()->parent_group()->replace_source(dest, src);
450 } else {
451 bool success = i->replace_source(dest, src);
452 if (success && move_addr_use) {
453 for (auto r : instr->required_instr()){
454 std::cerr << "add " << *r << " to " << *i << "\n";
455 i->add_required_instr(r);
456 }
457 }
458 progress |= success;
459 }
460 }
461 }
462 if (instr->dest()) {
463 sfn_log << SfnLog::opt << "has uses; " << instr->dest()->uses().size();
464 }
465 sfn_log << SfnLog::opt << " done\n";
466 }
467
468 void
visit(AluGroup * instr)469 CopyPropFwdVisitor::visit(AluGroup *instr)
470 {
471 (void)instr;
472 }
473
474 void
visit(TexInstr * instr)475 CopyPropFwdVisitor::visit(TexInstr *instr)
476 {
477 propagate_to(instr->src(), instr);
478 }
479
visit(GDSInstr * instr)480 void CopyPropFwdVisitor::visit(GDSInstr *instr)
481 {
482 propagate_to(instr->src(), instr);
483 }
484
485 void
visit(ExportInstr * instr)486 CopyPropFwdVisitor::visit(ExportInstr *instr)
487 {
488 propagate_to(instr->value(), instr);
489 }
490
register_sel_can_change(Pin pin)491 static bool register_sel_can_change(Pin pin)
492 {
493 return pin == pin_free || pin == pin_none;
494 }
495
register_chan_is_pinned(Pin pin)496 static bool register_chan_is_pinned(Pin pin)
497 {
498 return pin == pin_chan ||
499 pin == pin_fully ||
500 pin == pin_chgr;
501 }
502
503
504 void
propagate_to(RegisterVec4 & value,Instr * instr)505 CopyPropFwdVisitor::propagate_to(RegisterVec4& value, Instr *instr)
506 {
507 /* Collect parent instructions - only ALU move without modifiers
508 * and without indirect access are allowed. */
509 AluInstr *parents[4] = {nullptr};
510 bool have_candidates = false;
511 for (int i = 0; i < 4; ++i) {
512 if (value[i]->chan() < 4 && value[i]->has_flag(Register::ssa)) {
513 /* We have a pre-define value, so we can't propagate a copy */
514 if (value[i]->parents().empty())
515 return;
516
517 if (value[i]->uses().size() > 1)
518 return;
519
520 assert(value[i]->parents().size() == 1);
521 parents[i] = (*value[i]->parents().begin())->as_alu();
522
523 /* Parent op is not an ALU instruction, so we can't
524 copy-propagate */
525 if (!parents[i])
526 return;
527
528
529 if ((parents[i]->opcode() != op1_mov) ||
530 parents[i]->has_source_mod(0, AluInstr::mod_neg) ||
531 parents[i]->has_source_mod(0, AluInstr::mod_abs) ||
532 parents[i]->has_alu_flag(alu_dst_clamp) ||
533 parents[i]->has_alu_flag(alu_src0_rel))
534 return;
535
536 auto [addr, dummy0, index_reg_dummy] = parents[i]->indirect_addr();
537
538 /* Don't accept moves with indirect reads, because they are not
539 * supported with instructions that use vec4 values */
540 if (addr || index_reg_dummy)
541 return;
542
543 have_candidates = true;
544 }
545 }
546
547 if (!have_candidates)
548 return;
549
550 /* Collect the new source registers. We may have to move all registers
551 * to a new virtual sel index. */
552
553 PRegister new_src[4] = {0};
554 int new_chan[4] = {0,0,0,0};
555
556 uint8_t used_chan_mask = 0;
557 int new_sel = -1;
558 bool all_sel_can_change = true;
559
560 bool is_ssa = true;
561
562 for (int i = 0; i < 4; ++i) {
563
564 /* No parent means we either ignore the channel or insert 0 or 1.*/
565 if (!parents[i])
566 continue;
567
568 unsigned allowed_mask = 0xf & ~used_chan_mask;
569
570 auto src = parents[i]->src(0).as_register();
571 if (!src)
572 return;
573
574 /* Don't accept an array element for now, we would need extra checking
575 * that the value is not overwritten by an indirect access */
576 if (src->pin() == pin_array)
577 return;
578
579 /* Is this check still needed ? */
580 if (!src->has_flag(Register::ssa) &&
581 !assigned_register_direct(src)) {
582 return;
583 }
584
585 /* If the channel chan't switch we have to update the channel mask
586 * TODO: assign channel pinned registers first might give more
587 * opportunities for this optimization */
588 if (register_chan_is_pinned(src->pin()))
589 allowed_mask = 1 << src->chan();
590
591 /* Update the possible channel mask based on the sourcee's parent
592 * instruction(s) */
593 for (auto p : src->parents()) {
594 auto alu = p->as_alu();
595 if (alu)
596 allowed_mask &= alu->allowed_dest_chan_mask();
597 }
598
599 for (auto u : src->uses()) {
600 auto alu = u->as_alu();
601 if (alu)
602 allowed_mask &= alu->allowed_src_chan_mask();
603 }
604
605 if (!allowed_mask)
606 return;
607
608 /* Prefer keeping the channel, but if that's not possible
609 * i.e. if the sel has to change, then pick the next free channel
610 * (see below) */
611 new_chan[i] = src->chan();
612
613 if (new_sel < 0) {
614 new_sel = src->sel();
615 is_ssa = src->has_flag(Register::ssa);
616 } else if (new_sel != src->sel()) {
617 /* If we have to assign a new register sel index do so only
618 * if all already assigned source can get a new register index,
619 * and all registers are either SSA or registers.
620 * TODO: check whether this last restriction is required */
621 if (all_sel_can_change &&
622 register_sel_can_change(src->pin()) &&
623 (is_ssa == src->has_flag(Register::ssa))) {
624 new_sel = value_factory.new_register_index();
625 new_chan[i] = u_bit_scan(&allowed_mask);
626 } else /* Sources can't be combined to a vec4 so bail out */
627 return;
628 }
629
630 new_src[i] = src;
631 used_chan_mask |= 1 << new_chan[i];
632 if (!register_sel_can_change(src->pin()))
633 all_sel_can_change = false;
634 }
635
636 /* Apply the changes to the vec4 source */
637 value.del_use(instr);
638 for (int i = 0; i < 4; ++i) {
639 if (parents[i]) {
640 new_src[i]->set_sel(new_sel);
641 if (is_ssa)
642 new_src[i]->set_flag(Register::ssa);
643 new_src[i]->set_chan(new_chan[i]);
644
645 value.set_value(i, new_src[i]);
646
647 if (new_src[i]->pin() != pin_fully &&
648 new_src[i]->pin() != pin_chgr) {
649 if (new_src[i]->pin() == pin_chan)
650 new_src[i]->set_pin(pin_chgr);
651 else
652 new_src[i]->set_pin(pin_group);
653 }
654 progress |= true;
655 }
656 }
657 value.add_use(instr);
658 if (progress)
659 value.validate();
660 }
661
assigned_register_direct(PRegister reg)662 bool CopyPropFwdVisitor::assigned_register_direct(PRegister reg)
663 {
664 for (auto p: reg->parents()) {
665 if (p->as_alu()) {
666 auto [addr, dummy, index_reg] = p->as_alu()->indirect_addr();
667 if (addr)
668 return false;
669 }
670 }
671 return true;
672 }
673
674 void
visit(FetchInstr * instr)675 CopyPropFwdVisitor::visit(FetchInstr *instr)
676 {
677 (void)instr;
678 }
679
680 void
visit(Block * instr)681 CopyPropFwdVisitor::visit(Block *instr)
682 {
683 for (auto& i : *instr)
684 i->accept(*this);
685 }
686
CopyPropBackVisitor()687 CopyPropBackVisitor::CopyPropBackVisitor():
688 progress(false)
689 {
690 }
691
692 void
visit(AluInstr * instr)693 CopyPropBackVisitor::visit(AluInstr *instr)
694 {
695 bool local_progress = false;
696
697 sfn_log << SfnLog::opt << "CopyPropBackVisitor:[" << instr->block_id() << ":"
698 << instr->index() << "] " << *instr << "\n";
699
700 if (!instr->can_propagate_dest()) {
701 return;
702 }
703
704 auto src_reg = instr->psrc(0)->as_register();
705 if (!src_reg) {
706 return;
707 }
708
709 if (src_reg->uses().size() > 1)
710 return;
711
712 auto dest = instr->dest();
713 if (!dest || !instr->has_alu_flag(alu_write)) {
714 return;
715 }
716
717 if (!dest->has_flag(Register::ssa) && dest->parents().size() > 1)
718 return;
719
720 for (auto& i : src_reg->parents()) {
721 sfn_log << SfnLog::opt << "Try replace dest in " << i->block_id() << ":"
722 << i->index() << *i << "\n";
723
724 if (i->replace_dest(dest, instr)) {
725 dest->del_parent(instr);
726 dest->add_parent(i);
727 for (auto d : instr->dependend_instr()) {
728 d->add_required_instr(i);
729 }
730 local_progress = true;
731 }
732 }
733
734 if (local_progress)
735 instr->set_dead();
736
737 progress |= local_progress;
738 }
739
740 void
visit(AluGroup * instr)741 CopyPropBackVisitor::visit(AluGroup *instr)
742 {
743 for (auto& i : *instr) {
744 if (i)
745 i->accept(*this);
746 }
747 }
748
749 void
visit(TexInstr * instr)750 CopyPropBackVisitor::visit(TexInstr *instr)
751 {
752 (void)instr;
753 }
754
755 void
visit(FetchInstr * instr)756 CopyPropBackVisitor::visit(FetchInstr *instr)
757 {
758 (void)instr;
759 }
760
761 void
visit(Block * instr)762 CopyPropBackVisitor::visit(Block *instr)
763 {
764 for (auto i = instr->rbegin(); i != instr->rend(); ++i)
765 if (!(*i)->is_dead())
766 (*i)->accept(*this);
767 }
768
769 class SimplifySourceVecVisitor : public InstrVisitor {
770 public:
SimplifySourceVecVisitor()771 SimplifySourceVecVisitor():
772 progress(false)
773 {
774 }
775
visit(AluInstr * instr)776 void visit(AluInstr *instr) override { (void)instr; }
visit(AluGroup * instr)777 void visit(AluGroup *instr) override { (void)instr; }
778 void visit(TexInstr *instr) override;
779 void visit(ExportInstr *instr) override;
780 void visit(FetchInstr *instr) override;
781 void visit(Block *instr) override;
782 void visit(ControlFlowInstr *instr) override;
783 void visit(IfInstr *instr) override;
784 void visit(ScratchIOInstr *instr) override;
785 void visit(StreamOutInstr *instr) override;
786 void visit(MemRingOutInstr *instr) override;
visit(EmitVertexInstr * instr)787 void visit(EmitVertexInstr *instr) override { (void)instr; }
visit(GDSInstr * instr)788 void visit(GDSInstr *instr) override { (void)instr; };
visit(WriteTFInstr * instr)789 void visit(WriteTFInstr *instr) override { (void)instr; };
visit(LDSAtomicInstr * instr)790 void visit(LDSAtomicInstr *instr) override { (void)instr; };
visit(LDSReadInstr * instr)791 void visit(LDSReadInstr *instr) override { (void)instr; };
visit(RatInstr * instr)792 void visit(RatInstr *instr) override { (void)instr; };
793
794 void replace_src(Instr *instr, RegisterVec4& reg4);
795
796 bool progress;
797 };
798
799 class HasVecDestVisitor : public ConstInstrVisitor {
800 public:
HasVecDestVisitor()801 HasVecDestVisitor():
802 has_group_dest(false)
803 {
804 }
805
visit(const AluInstr & instr)806 void visit(const AluInstr& instr) override { (void)instr; }
visit(const AluGroup & instr)807 void visit(const AluGroup& instr) override { (void)instr; }
visit(const TexInstr & instr)808 void visit(const TexInstr& instr) override { (void)instr; has_group_dest = true; };
visit(const ExportInstr & instr)809 void visit(const ExportInstr& instr) override { (void)instr; }
visit(const FetchInstr & instr)810 void visit(const FetchInstr& instr) override { (void)instr; has_group_dest = true; };
visit(const Block & instr)811 void visit(const Block& instr) override { (void)instr; };
visit(const ControlFlowInstr & instr)812 void visit(const ControlFlowInstr& instr) override{ (void)instr; }
visit(const IfInstr & instr)813 void visit(const IfInstr& instr) override{ (void)instr; }
visit(const ScratchIOInstr & instr)814 void visit(const ScratchIOInstr& instr) override { (void)instr; };
visit(const StreamOutInstr & instr)815 void visit(const StreamOutInstr& instr) override { (void)instr; }
visit(const MemRingOutInstr & instr)816 void visit(const MemRingOutInstr& instr) override { (void)instr; }
visit(const EmitVertexInstr & instr)817 void visit(const EmitVertexInstr& instr) override { (void)instr; }
visit(const GDSInstr & instr)818 void visit(const GDSInstr& instr) override { (void)instr; }
visit(const WriteTFInstr & instr)819 void visit(const WriteTFInstr& instr) override { (void)instr; };
visit(const LDSAtomicInstr & instr)820 void visit(const LDSAtomicInstr& instr) override { (void)instr; };
visit(const LDSReadInstr & instr)821 void visit(const LDSReadInstr& instr) override { (void)instr; };
visit(const RatInstr & instr)822 void visit(const RatInstr& instr) override { (void)instr; };
823
824 bool has_group_dest;
825 };
826
827 class HasVecSrcVisitor : public ConstInstrVisitor {
828 public:
HasVecSrcVisitor()829 HasVecSrcVisitor():
830 has_group_src(false)
831 {
832 }
833
visit(UNUSED const AluInstr & instr)834 void visit(UNUSED const AluInstr& instr) override { }
visit(UNUSED const AluGroup & instr)835 void visit(UNUSED const AluGroup& instr) override { }
visit(UNUSED const FetchInstr & instr)836 void visit(UNUSED const FetchInstr& instr) override { };
visit(UNUSED const Block & instr)837 void visit(UNUSED const Block& instr) override { };
visit(UNUSED const ControlFlowInstr & instr)838 void visit(UNUSED const ControlFlowInstr& instr) override{ }
visit(UNUSED const IfInstr & instr)839 void visit(UNUSED const IfInstr& instr) override{ }
visit(UNUSED const LDSAtomicInstr & instr)840 void visit(UNUSED const LDSAtomicInstr& instr) override { };
visit(UNUSED const LDSReadInstr & instr)841 void visit(UNUSED const LDSReadInstr& instr) override { };
842
visit(const TexInstr & instr)843 void visit(const TexInstr& instr) override { check(instr.src()); }
visit(const ExportInstr & instr)844 void visit(const ExportInstr& instr) override { check(instr.value()); }
visit(const GDSInstr & instr)845 void visit(const GDSInstr& instr) override { check(instr.src()); }
846
847 // No swizzling supported, so we want to keep the register group
visit(UNUSED const ScratchIOInstr & instr)848 void visit(UNUSED const ScratchIOInstr& instr) override { has_group_src = true; };
visit(UNUSED const StreamOutInstr & instr)849 void visit(UNUSED const StreamOutInstr& instr) override { has_group_src = true; }
visit(UNUSED const MemRingOutInstr & instr)850 void visit(UNUSED const MemRingOutInstr& instr) override { has_group_src = true; }
visit(UNUSED const RatInstr & instr)851 void visit(UNUSED const RatInstr& instr) override { has_group_src = true; };
852
visit(UNUSED const EmitVertexInstr & instr)853 void visit(UNUSED const EmitVertexInstr& instr) override { }
854
855 // We always emit at least two values
visit(UNUSED const WriteTFInstr & instr)856 void visit(UNUSED const WriteTFInstr& instr) override { has_group_src = true; };
857
858
859 void check(const RegisterVec4& value);
860
861 bool has_group_src;
862 };
863
check(const RegisterVec4 & value)864 void HasVecSrcVisitor::check(const RegisterVec4& value)
865 {
866 int nval = 0;
867 for (int i = 0; i < 4 && nval < 2; ++i) {
868 if (value[i]->chan() < 4)
869 ++nval;
870 }
871 has_group_src = nval > 1;
872 }
873
874 bool
simplify_source_vectors(Shader & sh)875 simplify_source_vectors(Shader& sh)
876 {
877 SimplifySourceVecVisitor visitor;
878
879 for (auto b : sh.func())
880 b->accept(visitor);
881
882 return visitor.progress;
883 }
884
885 void
visit(TexInstr * instr)886 SimplifySourceVecVisitor::visit(TexInstr *instr)
887 {
888
889 if (instr->opcode() != TexInstr::get_resinfo) {
890 auto& src = instr->src();
891 replace_src(instr, src);
892 int nvals = 0;
893 for (int i = 0; i < 4; ++i)
894 if (src[i]->chan() < 4)
895 ++nvals;
896 if (nvals == 1) {
897 for (int i = 0; i < 4; ++i)
898 if (src[i]->chan() < 4) {
899 HasVecDestVisitor check_dests;
900 for (auto p : src[i]->parents()) {
901 p->accept(check_dests);
902 if (check_dests.has_group_dest)
903 break;
904 }
905
906 HasVecSrcVisitor check_src;
907 for (auto p : src[i]->uses()) {
908 p->accept(check_src);
909 if (check_src.has_group_src)
910 break;
911 }
912
913 if (check_dests.has_group_dest || check_src.has_group_src)
914 break;
915
916 if (src[i]->pin() == pin_group)
917 src[i]->set_pin(pin_free);
918 else if (src[i]->pin() == pin_chgr)
919 src[i]->set_pin(pin_chan);
920 }
921 }
922 }
923 for (auto& prep : instr->prepare_instr()) {
924 prep->accept(*this);
925 }
926 }
927
928 void
visit(ScratchIOInstr * instr)929 SimplifySourceVecVisitor::visit(ScratchIOInstr *instr)
930 {
931 (void)instr;
932 }
933
934 class ReplaceConstSource : public AluInstrVisitor {
935 public:
ReplaceConstSource(Instr * old_use_,RegisterVec4 & vreg_,int i)936 ReplaceConstSource(Instr *old_use_, RegisterVec4& vreg_, int i):
937 old_use(old_use_),
938 vreg(vreg_),
939 index(i),
940 success(false)
941 {
942 }
943
944 using AluInstrVisitor::visit;
945
946 void visit(AluInstr *alu) override;
947
948 Instr *old_use;
949 RegisterVec4& vreg;
950 int index;
951 bool success;
952 };
953
954 void
visit(ExportInstr * instr)955 SimplifySourceVecVisitor::visit(ExportInstr *instr)
956 {
957 replace_src(instr, instr->value());
958 }
959
960 void
replace_src(Instr * instr,RegisterVec4 & reg4)961 SimplifySourceVecVisitor::replace_src(Instr *instr, RegisterVec4& reg4)
962 {
963 for (int i = 0; i < 4; ++i) {
964 auto s = reg4[i];
965
966 if (s->chan() > 3)
967 continue;
968
969 if (!s->has_flag(Register::ssa))
970 continue;
971
972 /* Cayman trans ops have more then one parent for
973 * one dest */
974 if (s->parents().size() != 1)
975 continue;
976
977 auto& op = *s->parents().begin();
978
979 ReplaceConstSource visitor(instr, reg4, i);
980
981 op->accept(visitor);
982
983 progress |= visitor.success;
984 }
985 }
986
987 void
visit(StreamOutInstr * instr)988 SimplifySourceVecVisitor::visit(StreamOutInstr *instr)
989 {
990 (void)instr;
991 }
992
993 void
visit(MemRingOutInstr * instr)994 SimplifySourceVecVisitor::visit(MemRingOutInstr *instr)
995 {
996 (void)instr;
997 }
998
999 void
visit(AluInstr * alu)1000 ReplaceConstSource::visit(AluInstr *alu)
1001 {
1002 if (alu->opcode() != op1_mov)
1003 return;
1004
1005 if (alu->has_source_mod(0, AluInstr::mod_abs) ||
1006 alu->has_source_mod(0, AluInstr::mod_neg))
1007 return;
1008
1009 auto src = alu->psrc(0);
1010 assert(src);
1011
1012 int override_chan = -1;
1013
1014 if (value_is_const_uint(*src, 0)) {
1015 override_chan = 4;
1016 } else if (value_is_const_float(*src, 1.0f)) {
1017 override_chan = 5;
1018 }
1019
1020 if (override_chan >= 0) {
1021 vreg[index]->del_use(old_use);
1022 auto reg = new Register(vreg.sel(), override_chan, vreg[index]->pin());
1023 vreg.set_value(index, reg);
1024 success = true;
1025 }
1026 }
1027
1028 void
visit(FetchInstr * instr)1029 SimplifySourceVecVisitor::visit(FetchInstr *instr)
1030 {
1031 (void)instr;
1032 }
1033
1034 void
visit(Block * instr)1035 SimplifySourceVecVisitor::visit(Block *instr)
1036 {
1037 for (auto i = instr->rbegin(); i != instr->rend(); ++i)
1038 if (!(*i)->is_dead())
1039 (*i)->accept(*this);
1040 }
1041
1042 void
visit(ControlFlowInstr * instr)1043 SimplifySourceVecVisitor::visit(ControlFlowInstr *instr)
1044 {
1045 (void)instr;
1046 }
1047
1048 void
visit(IfInstr * instr)1049 SimplifySourceVecVisitor::visit(IfInstr *instr)
1050 {
1051 (void)instr;
1052 }
1053
1054 } // namespace r600
1055