• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* -*- mesa-c++  -*-
2  *
3  * Copyright (c) 2022 Collabora LTD
4  *
5  * Author: Gert Wollny <gert.wollny@collabora.com>
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and associated documentation files (the "Software"),
9  * to deal in the Software without restriction, including without limitation
10  * on the rights to use, copy, modify, merge, publish, distribute, sub
11  * license, and/or sell copies of the Software, and to permit persons to whom
12  * the Software is furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the next
15  * paragraph) shall be included in all copies or substantial portions of the
16  * Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24  * USE OR OTHER DEALINGS IN THE SOFTWARE.
25  */
26 
27 #include "sfn_instr_alu.h"
28 #include "sfn_instr_alugroup.h"
29 #include "sfn_instr_tex.h"
30 #include "sfn_shader.h"
31 #include "sfn_debug.h"
32 
33 #include <algorithm>
34 #include <sstream>
35 
36 
37 namespace r600 {
38 
39 using std::string;
40 using std::istream;
41 using std::vector;
42 
AluInstr(EAluOp opcode,PRegister dest,SrcValues src,const std::set<AluModifiers> & flags,int slots)43 AluInstr::AluInstr(EAluOp opcode, PRegister dest,
44                    SrcValues src,
45                    const std::set<AluModifiers>& flags, int slots):
46    m_opcode(opcode),
47    m_dest(dest),
48    m_bank_swizzle(alu_vec_unknown),
49    m_cf_type(cf_alu),
50    m_alu_slots(slots)
51 {
52    m_src.swap(src);
53 
54    if (m_src.size() == 3)
55       m_alu_flags.set(alu_op3);
56 
57    for(auto f : flags)
58       m_alu_flags.set(f);
59 
60    ASSERT_OR_THROW(m_src.size() == static_cast<size_t>(alu_ops.at(opcode).nsrc * m_alu_slots),
61                    "Unexpected number of source values");
62 
63    if (m_alu_flags.test(alu_write))
64       ASSERT_OR_THROW(dest, "Write flag is set, but no destination register is given");
65 
66    update_uses();
67 
68 }
69 
AluInstr(EAluOp opcode)70 AluInstr::AluInstr(EAluOp opcode):
71    AluInstr(opcode, nullptr, SrcValues(alu_ops.at(opcode).nsrc), {}, 1)
72 {
73 }
74 
AluInstr(EAluOp opcode,int chan)75 AluInstr::AluInstr(EAluOp opcode, int chan):
76    AluInstr(opcode, nullptr, SrcValues(), {}, 1)
77 {
78    m_fallback_chan = chan;
79 }
80 
AluInstr(EAluOp opcode,PRegister dest,PVirtualValue src0,const std::set<AluModifiers> & m_flags)81 AluInstr::AluInstr(EAluOp opcode, PRegister dest, PVirtualValue src0,
82                    const std::set<AluModifiers>& m_flags):
83    AluInstr(opcode, dest, SrcValues{src0}, m_flags, 1)
84 {
85 
86 }
87 
AluInstr(EAluOp opcode,PRegister dest,PVirtualValue src0,PVirtualValue src1,const std::set<AluModifiers> & m_flags)88 AluInstr::AluInstr(EAluOp opcode, PRegister dest,
89                    PVirtualValue src0, PVirtualValue src1,
90                    const std::set<AluModifiers>& m_flags):
91    AluInstr(opcode, dest, SrcValues{src0, src1}, m_flags, 1)
92 {
93 
94 }
95 
AluInstr(EAluOp opcode,PRegister dest,PVirtualValue src0,PVirtualValue src1,PVirtualValue src2,const std::set<AluModifiers> & m_flags)96 AluInstr::AluInstr(EAluOp opcode, PRegister dest, PVirtualValue src0, PVirtualValue src1,
97                    PVirtualValue src2,
98                    const std::set<AluModifiers>& m_flags):
99    AluInstr(opcode, dest, SrcValues{src0, src1, src2}, m_flags, 1)
100 {
101 
102 }
103 
AluInstr(ESDOp op,PVirtualValue src0,PVirtualValue src1,PVirtualValue address)104 AluInstr::AluInstr(ESDOp op, PVirtualValue src0, PVirtualValue src1, PVirtualValue address):
105    m_lds_opcode(op)
106 {
107    set_alu_flag(alu_is_lds);
108 
109    m_src.push_back(address);
110    if (src0) {
111       m_src.push_back(src0);
112       if (src1)
113          m_src.push_back(src1);
114    }
115    update_uses();
116 }
117 
AluInstr(ESDOp op,const SrcValues & src,const std::set<AluModifiers> & flags)118 AluInstr::AluInstr(ESDOp op, const SrcValues& src, const std::set<AluModifiers>& flags):
119    m_lds_opcode(op),
120    m_src(src)
121 {
122    for(auto f : flags)
123       set_alu_flag(f);
124 
125    set_alu_flag(alu_is_lds);
126    update_uses();
127 }
128 
update_uses()129 void AluInstr::update_uses()
130 {
131    for (auto& s : m_src) {
132       auto r = s->as_register();
133       if (r) {
134          r->add_use(this);
135          // move this to add_use
136          if (r->pin() == pin_array) {
137             auto array_elm = static_cast<LocalArrayValue *>(r);
138             auto addr = array_elm->addr();
139             if (addr && addr->as_register())
140                addr->as_register()->add_use(this);
141          }
142       }
143       auto u = s->as_uniform();
144       if (u && u->buf_addr() && u->buf_addr()->as_register())
145          u->buf_addr()->as_register()->add_use(this);
146    }
147 
148    if (m_dest && has_alu_flag(alu_write)) {
149       m_dest->add_parent(this);
150 
151       if (m_dest->pin() == pin_array) {
152          // move this to add_parent
153          auto array_elm = static_cast<LocalArrayValue *>(m_dest);
154          auto addr = array_elm->addr();
155          if (addr && addr->as_register())
156             addr->as_register()->add_use(this);
157       }
158    }
159 }
160 
accept(ConstInstrVisitor & visitor) const161 void AluInstr::accept(ConstInstrVisitor& visitor) const
162 {
163    visitor.visit(*this);
164 }
165 
accept(InstrVisitor & visitor)166 void AluInstr::accept(InstrVisitor& visitor)
167 {
168    visitor.visit(this);
169 }
170 
171 const std::map<ECFAluOpCode, std::string> AluInstr::cf_map = {
172    {cf_alu_break, "BREAK"},
173    {cf_alu_continue, "CONT"},
174    {cf_alu_else_after, "ELSE_AFTER"},
175    {cf_alu_extended, "EXTENDED"},
176    {cf_alu_pop_after, "POP_AFTER"},
177    {cf_alu_pop2_after, "POP2_AFTER"},
178    {cf_alu_push_before, "PUSH_BEFORE"}
179 };
180 
181 const std::map<AluBankSwizzle, std::string> AluInstr::bank_swizzle_map = {
182    {alu_vec_012, "VEC_012"},
183    {alu_vec_021, "VEC_021"},
184    {alu_vec_102, "VEC_102"},
185    {alu_vec_120, "VEC_120"},
186    {alu_vec_201, "VEC_201"},
187    {alu_vec_210, "VEC_210"}
188 };
189 
190 const AluModifiers AluInstr::src_abs_flags[2] =
191 {alu_src0_abs, alu_src1_abs};
192 const AluModifiers AluInstr::src_neg_flags[3] =
193 {alu_src0_neg, alu_src1_neg, alu_src2_neg};
194 const AluModifiers AluInstr::src_rel_flags[3] =
195 {alu_src0_rel, alu_src1_rel, alu_src2_rel};
196 
197 struct ValuePrintFlags {
ValuePrintFlagsr600::ValuePrintFlags198    ValuePrintFlags(int im, int f):index_mode(im),
199       flags(f)
200    {
201    }
202    int index_mode = 0;
203    int flags = 0;
204    static const int is_rel = 1;
205    static const int has_abs = 2;
206    static const int has_neg = 4;
207    static const int literal_is_float = 8;
208    static const int index_ar = 16;
209    static const int index_loopidx = 32;
210 };
211 
do_print(std::ostream & os) const212 void AluInstr::do_print(std::ostream& os) const
213 {
214    const char swzchar[] = "xyzw01?_";
215 
216    unsigned i = 0;
217 
218    os << "ALU ";
219 
220    if (has_alu_flag(alu_is_lds)) {
221       os << "LDS " << lds_ops.at(m_lds_opcode).name;
222       os << " __.x : ";
223    } else {
224 
225       os << alu_ops.at(m_opcode).name;
226       if (has_alu_flag(alu_dst_clamp))
227          os << " CLAMP";
228 
229       if (m_dest) {
230          if (has_alu_flag(alu_write))
231             os << " " << *m_dest;
232          else
233             os << " __" << "." << swzchar[m_dest->chan()];
234          if (!has_alu_flag(alu_write) && m_dest->pin() != pin_none)
235             os << "@" << m_dest->pin();
236          os << " : ";
237       } else {
238          os << "__." << swzchar[dest_chan()] << " : ";
239       }
240    }
241 
242    const int n_source_per_slot = has_alu_flag(alu_is_lds) ?
243                                  m_src.size() : alu_ops.at(m_opcode).nsrc;
244 
245    for (int s = 0; s < m_alu_slots; ++s) {
246 
247       if (s > 0)
248          os << " +";
249 
250       for (int k = 0; k < n_source_per_slot; ++k) {
251          int pflags = 0;
252          if (i)
253             os << ' ';
254          if (has_alu_flag(src_neg_flags[k])) pflags |= ValuePrintFlags::has_neg;
255          if (has_alu_flag(src_rel_flags[k])) pflags |= ValuePrintFlags::is_rel;
256          if (i < 2)
257             if (has_alu_flag(src_abs_flags[k])) pflags |= ValuePrintFlags::has_abs;
258 
259          if (pflags & ValuePrintFlags::has_neg) os << '-';
260          if (pflags & ValuePrintFlags::has_abs) os << '|';
261          os << *m_src[i];
262          if (pflags & ValuePrintFlags::has_abs) os << '|';
263          ++i;
264       }
265    }
266 
267    os << " {";
268    if (has_alu_flag(alu_write)) os << 'W';
269    if (has_alu_flag(alu_last_instr)) os << 'L';
270    if (has_alu_flag(alu_update_exec)) os << 'E';
271    if (has_alu_flag(alu_update_pred)) os << 'P';
272    os << "}";
273 
274    auto bs_name = bank_swizzle_map.find(m_bank_swizzle);
275    if (bs_name != bank_swizzle_map.end())
276       os << ' ' <<  bs_name->second;
277 
278    auto cf_name = cf_map.find(m_cf_type);
279    if (cf_name != cf_map.end())
280       os <<  ' ' << cf_name->second;
281 }
282 
can_propagate_src() const283 bool AluInstr::can_propagate_src() const
284 {
285    /* We can use the source in the next instruction */
286    if (!can_copy_propagate())
287       return false;
288 
289    auto src_reg = m_src[0]->as_register();
290    if (!src_reg)
291       return true;
292 
293    assert(m_dest);
294 
295 
296 
297    if (!m_dest->is_ssa()) {
298       return false;
299    }
300 
301    if (m_dest->pin() == pin_fully)
302       return m_dest->equal_to(*src_reg);
303 
304    if (m_dest->pin() == pin_chan)
305       return src_reg->pin() == pin_none ||
306             (src_reg->pin() == pin_chan &&
307              src_reg->chan() == m_dest->chan());
308 
309    return m_dest->pin() == pin_none || m_dest->pin() == pin_free;
310 }
311 
can_propagate_dest() const312 bool AluInstr::can_propagate_dest() const
313 {
314    if (!can_copy_propagate()){
315       return false;
316    }
317 
318    auto src_reg = m_src[0]->as_register();
319    if (!src_reg) {
320       return false;
321    }
322 
323    assert(m_dest);
324 
325    if (src_reg->pin() == pin_fully ||
326        src_reg->pin() == pin_group) {
327       return false;
328    }
329 
330    if (!src_reg->is_ssa())
331       return false;
332 
333    if (src_reg->pin() == pin_chan)
334       return m_dest->pin() == pin_none ||
335              m_dest->pin() == pin_free ||
336             ((m_dest->pin() == pin_chan ||
337               m_dest->pin() == pin_group) &&
338              src_reg->chan() == m_dest->chan());
339 
340    return (src_reg->pin() == pin_none ||
341            src_reg->pin() == pin_free);
342 }
343 
can_copy_propagate() const344 bool AluInstr::can_copy_propagate() const
345 {
346    if (m_opcode != op1_mov)
347       return false;
348 
349    if (has_alu_flag(alu_src0_abs) ||
350        has_alu_flag(alu_src0_neg) ||
351        has_alu_flag(alu_dst_clamp))
352       return false;
353 
354    return has_alu_flag(alu_write);
355 }
356 
replace_source(PRegister old_src,PVirtualValue new_src)357 bool AluInstr::replace_source(PRegister old_src, PVirtualValue new_src)
358 {
359    bool process = false;
360 
361    if (!check_readport_validation(old_src, new_src))
362       return false;
363 
364    /* If the old source is an array element, we assume that there
365     * might have been an (untracked) indirect access, so don't replace
366     * this source */
367    if (old_src->pin() == pin_array)
368       return false;
369 
370    if (new_src->get_addr()) {
371       for (auto& s : m_src) {
372          auto addr = s->get_addr();
373          /* can't have two differen't indirect addresses in the same instr */
374          if (addr && !addr->equal_to(*new_src->get_addr()))
375             return false;
376       }
377    }
378 
379    if (m_dest) {
380       /* We don't allow src and dst with rel and different indirect register addresses */
381       if (m_dest->pin() == pin_array && new_src->pin() == pin_array) {
382          auto dav = static_cast<const LocalArrayValue *>(m_dest)->addr();
383          auto sav = static_cast<const LocalArrayValue *>(new_src)->addr();
384          if (dav && sav && dav->as_register() &&  !dav->equal_to(*sav))
385             return false;
386       }
387    }
388 
389    /* Check the readports */
390    if (m_alu_slots * alu_ops.at(m_opcode).nsrc > 2 || m_parent_group) {
391       AluReadportReservation read_port_check =
392             !m_parent_group ?
393                AluReadportReservation() :
394                m_parent_group->readport_reserer();
395 
396       int nsrc = alu_ops.at(m_opcode).nsrc;
397       PVirtualValue src[3];
398 
399       for (int s = 0; s < m_alu_slots; ++s) {
400          for (int i = 0; i < nsrc; ++i) {
401             auto old_s = m_src[i + nsrc * s];
402             src[i] = old_s->equal_to(*old_src) ? new_src : old_s;
403          }
404          AluBankSwizzle bs = alu_vec_012;
405          while (bs != alu_vec_unknown) {
406             if (read_port_check.schedule_vec_src(src,  nsrc, bs)) {
407                break;
408             }
409             ++bs;
410          }
411          if (bs == alu_vec_unknown)
412             return false;
413       }
414       if (m_parent_group)
415          m_parent_group->set_readport_reserer(read_port_check);
416    }
417 
418    for (unsigned i = 0; i < m_src.size(); ++i) {
419       if (old_src->equal_to(*m_src[i])) {
420          m_src[i] = new_src;
421          process = true;
422       }
423    }
424    if (process) {
425       auto r = new_src->as_register();
426       if (r)
427          r->add_use(this);
428       old_src->del_use(this);
429    }
430    return process;
431 }
432 
set_sources(SrcValues src)433 void AluInstr::set_sources(SrcValues src)
434 {
435    for (auto& s : m_src) {
436       auto r = s->as_register();
437       if (r)
438          r->del_use(this);
439    }
440    m_src.swap(src);
441    for (auto& s : m_src) {
442       auto r = s->as_register();
443       if (r)
444          r->add_use(this);
445    }
446 }
447 
replace_dest(PRegister new_dest,AluInstr * move_instr)448 bool AluInstr::replace_dest(PRegister new_dest, AluInstr *move_instr)
449 {
450    if (m_dest->equal_to(*new_dest))
451       return false;
452 
453    if (m_dest->uses().size() > 1)
454       return false;
455 
456    if (new_dest->pin() == pin_array)
457       return false;
458 
459    /* Currently we bail out when an array write should be moved, because
460     * decalring an array write is currently not well defined. The
461     * Whole "backwards" copy propagation shoul dprobably be replaced by some
462     * forward peep holew optimization */
463    /*
464    if (new_dest->pin() == pin_array) {
465       auto dav = static_cast<const LocalArrayValue *>(new_dest)->addr();
466       for (auto s: m_src) {
467          if (s->pin() == pin_array) {
468             auto sav = static_cast<const LocalArrayValue *>(s)->addr();
469             if (dav && sav && dav->as_register() &&  !dav->equal_to(*sav))
470                return false;
471          }
472       }
473    }
474    */
475 
476    if (m_dest->pin() == pin_chan &&
477        new_dest->chan() != m_dest->chan())
478       return false;
479 
480 
481    if (m_dest->pin() == pin_chan) {
482       if (new_dest->pin() == pin_group)
483          new_dest->set_pin(pin_chgr);
484       else
485          new_dest->set_pin(pin_chan);
486    }
487 
488    m_dest = new_dest;
489    if (!move_instr->has_alu_flag(alu_last_instr))
490       reset_alu_flag(alu_last_instr);
491 
492    if (has_alu_flag(alu_is_cayman_trans)) {
493       /* Copy propagation puts an instruction into the w channel, but we
494        * don't have the slots for a w channel */
495       if (m_dest->chan() == 3 && m_alu_slots < 4) {
496          m_alu_slots = 4;
497          assert(m_src.size() == 3);
498          m_src.push_back(m_src[0]);
499       }
500    }
501 
502    return true;
503 }
504 
pin_sources_to_chan()505 void AluInstr::pin_sources_to_chan()
506 {
507    for (auto s: m_src) {
508       auto r = s->as_register();
509       if (r) {
510          if (r->pin() == pin_free)
511             r->set_pin(pin_chan);
512          else if (r->pin() == pin_group)
513                r->set_pin(pin_chgr);
514       }
515    }
516 }
517 
check_readport_validation(PRegister old_src,PVirtualValue new_src) const518 bool AluInstr::check_readport_validation(PRegister old_src, PVirtualValue new_src) const
519 {
520    bool success = true;
521    AluReadportReservation rpr_sum;
522 
523    if (m_src.size() < 3)
524       return true;
525 
526    unsigned nsrc = alu_ops.at(m_opcode).nsrc;
527    assert(nsrc * m_alu_slots == m_src.size());
528 
529 
530    for (int s = 0; s < m_alu_slots && success; ++s) {
531       for (AluBankSwizzle i = alu_vec_012; i != alu_vec_unknown; ++i) {
532          auto ireg = m_src.begin() + s * nsrc;
533 
534          AluReadportReservation rpr = rpr_sum;
535          PVirtualValue s[3];
536 
537          for (unsigned i = 0; i < nsrc; ++i, ++ireg)
538             s[i] = old_src->equal_to(**ireg) ? new_src : *ireg;
539 
540          if (rpr.schedule_vec_src(s, nsrc, i)) {
541             rpr_sum = rpr;
542             break;
543          } else {
544             success = false;
545          }
546       }
547    }
548    return success;
549 }
550 
add_extra_dependency(PVirtualValue value)551 void AluInstr::add_extra_dependency(PVirtualValue value)
552 {
553    auto reg = value->as_register();
554    if (reg)
555       m_extra_dependencies.insert(reg);
556 }
557 
558 
is_equal_to(const AluInstr & lhs) const559 bool AluInstr::is_equal_to(const AluInstr& lhs) const
560 {
561    if (lhs.m_opcode != m_opcode ||
562        lhs.m_bank_swizzle != m_bank_swizzle ||
563        lhs.m_cf_type != m_cf_type ||
564        lhs.m_alu_flags != m_alu_flags) {
565       return false;
566    }
567 
568    if (m_dest) {
569       if (!lhs.m_dest) {
570          return false;
571       } else {
572          if (has_alu_flag(alu_write)) {
573             if (!m_dest->equal_to(*lhs.m_dest))
574                return false;
575          } else {
576             if (m_dest->chan() != lhs.m_dest->chan())
577                return false;
578          }
579       }
580    } else {
581       if (lhs.m_dest)
582          return false;
583    }
584 
585    if (m_src.size() != lhs.m_src.size())
586       return false;
587 
588    for (unsigned i = 0; i < m_src.size(); ++i) {
589       if (!m_src[i]->equal_to(*lhs.m_src[i]))
590          return false;
591    }
592 
593    return true;
594 }
595 
596 class ResolveIndirectArrayAddr: public ConstRegisterVisitor {
597 public:
visit(const Register & value)598    void visit(const Register& value){(void) value;}
visit(const LocalArray & value)599    void visit(const LocalArray& value) {(void)value; unreachable("An array can't be used as address");}
600    void visit(const LocalArrayValue& value);
601    void visit(const UniformValue& value);
visit(const LiteralConstant & value)602    void visit(const LiteralConstant& value) {(void)value;}
visit(const InlineConstant & value)603    void visit(const InlineConstant& value) {(void)value;}
604 
605    PRegister addr{nullptr};
606    bool is_index{false};
607 };
608 
visit(const LocalArrayValue & value)609 void ResolveIndirectArrayAddr::visit(const LocalArrayValue& value)
610 {
611    auto a = value.addr();
612    if (a)
613       addr = a->as_register();
614 }
615 
visit(const UniformValue & value)616 void ResolveIndirectArrayAddr::visit(const UniformValue& value)
617 {
618    auto a = value.buf_addr();
619    if (a) {
620       addr = a->as_register();
621       is_index = true;
622    }
623 }
624 
indirect_addr() const625 std::tuple<PRegister, bool, bool> AluInstr::indirect_addr() const
626 {
627    ResolveIndirectArrayAddr visitor;
628 
629    if (m_dest) {
630       m_dest->accept(visitor);
631       if (visitor.addr)
632          return {visitor.addr, false, false};
633    }
634 
635    for (auto s: m_src) {
636       s->accept(visitor);
637       if (visitor.addr) {
638          return {visitor.addr, !visitor.is_index, visitor.is_index};
639       }
640    }
641    return {nullptr, false, false};
642 }
643 
split(ValueFactory & vf)644 AluGroup *AluInstr::split(ValueFactory& vf)
645 {
646    if (m_alu_slots == 1)
647       return nullptr;
648 
649    sfn_log << SfnLog::instr << "Split " << *this << "\n";
650 
651    auto group = new AluGroup();
652 
653    m_dest->del_parent(this);
654 
655    for (int s = 0; s < m_alu_slots; ++s) {
656 
657       PRegister dst = m_dest->chan() == s ? m_dest : vf.dummy_dest(s);
658       if (dst->pin() != pin_chgr) {
659          auto pin = pin_chan;
660          if (dst->pin() == pin_group && m_dest->chan() == s)
661             pin = pin_chgr;
662          dst->set_pin(pin);
663       }
664 
665       SrcValues src;
666       for (int i = 0; i < alu_ops.at(m_opcode).nsrc; ++i) {
667          auto old_src = m_src[s * alu_ops.at(m_opcode).nsrc + i];
668          // Make it easy for the scheduler and pin the register to the
669          // channel, otherwise scheduler would have to check whether a
670          // channel switch is possible
671          auto r = old_src->as_register();
672          if (r) {
673             if (r->pin() == pin_free || r->pin() == pin_none)
674                r->set_pin(pin_chan);
675             else if (r->pin() == pin_group)
676                r->set_pin(pin_chgr);
677          }
678          src.push_back(old_src);
679       }
680 
681       auto instr = new AluInstr(m_opcode, dst, src, {}, 1);
682       instr->set_blockid(block_id(), index());
683 
684       if (s == 0 || !m_alu_flags.test(alu_64bit_op)) {
685          if (has_alu_flag(alu_src0_neg))
686             instr->set_alu_flag(alu_src0_neg);
687          if (has_alu_flag(alu_src1_neg))
688             instr->set_alu_flag(alu_src1_neg);
689          if (has_alu_flag(alu_src2_neg))
690             instr->set_alu_flag(alu_src2_neg);
691          if (has_alu_flag(alu_src0_abs))
692             instr->set_alu_flag(alu_src0_abs);
693          if (has_alu_flag(alu_src1_abs))
694             instr->set_alu_flag(alu_src1_abs);
695       }
696       if (has_alu_flag(alu_dst_clamp))
697          instr->set_alu_flag(alu_dst_clamp);
698 
699       if (s == m_dest->chan())
700          instr->set_alu_flag(alu_write);
701 
702       m_dest->add_parent(instr);
703       sfn_log << SfnLog::instr << "   " << *instr << "\n";
704 
705       if (!group->add_instruction(instr)) {
706          std::cerr << "Unable to schedule '" << *instr << "' into\n"
707                    << *group << "\n";
708 
709          unreachable("Invalid group instruction");
710       }
711    }
712    group->set_blockid(block_id(), index());
713 
714    for (auto s : m_src) {
715       auto r = s->as_register();
716       if (r) {
717          r->del_use(this);
718       }
719    }
720 
721    return group;
722 }
723 
724 /* Alu instructions that have SSA dest registers increase the  regietsr pressure
725  * Alu instructions that read from SSA registers may decresase the register pressure
726  * hency evaluate a priorityx values based on register pressure change */
register_priority() const727 int AluInstr::register_priority() const
728 {
729    int priority = 0;
730    if (!has_alu_flag(alu_no_schedule_bias)) {
731 
732       if (m_dest && m_dest->is_ssa() && has_alu_flag(alu_write)) {
733          if (m_dest->pin() != pin_group && m_dest->pin() != pin_chgr)
734             priority--;
735       }
736 
737       for (const auto s : m_src) {
738          auto r = s->as_register();
739          if (r && r->is_ssa()) {
740             int pending = 0;
741             for (auto b : r->uses()) {
742                if (!b->is_scheduled())
743                   ++pending;
744             }
745             if (pending == 1)
746                ++priority;
747          }
748       }
749    }
750    return priority;
751 }
752 
propagate_death()753 bool AluInstr::propagate_death()
754 {
755    if (!m_dest)
756       return true;
757 
758    if  (m_dest->pin() == pin_group ||
759         m_dest->pin() == pin_chan) {
760       switch (m_opcode) {
761       case op2_interp_x:
762       case op2_interp_xy:
763       case op2_interp_z:
764       case op2_interp_zw:
765          reset_alu_flag(alu_write);
766          return false;
767       default:
768          ;
769       }
770    }
771 
772    if  (m_dest->pin() == pin_array)
773       return false;
774 
775    /* We assume that nir does a good job in eliminating all ALU results that
776     * are not needed, and we don't let copy propagation doesn't make the
777     * instruction obsolte, so just keep all */
778    if (has_alu_flag(alu_is_cayman_trans))
779       return false;
780 
781    for (auto& src: m_src) {
782       auto reg = src->as_register();
783       if (reg)
784          reg->del_use(this);
785    }
786    return true;
787 }
788 
has_lds_access() const789 bool AluInstr::has_lds_access() const
790 {
791    return has_alu_flag(alu_is_lds) || has_lds_queue_read();
792 }
793 
has_lds_queue_read() const794 bool AluInstr::has_lds_queue_read() const
795 {
796    for (auto& s : m_src) {
797       auto ic = s->as_inline_const();
798       if (!ic)
799          continue;
800 
801       if (ic->sel() == ALU_SRC_LDS_OQ_A_POP ||
802           ic->sel() == ALU_SRC_LDS_OQ_B_POP)
803          return true;
804    }
805    return false;
806 }
807 
808 struct OpDescr {
809    union {
810       EAluOp alu_opcode;
811       ESDOp lds_opcode;
812    };
813    int nsrc;
814 };
815 
816 static std::map<std::string, OpDescr> s_alu_map_by_name;
817 static std::map<std::string, OpDescr> s_lds_map_by_name;
818 
from_string(istream & is,ValueFactory & value_factory,AluGroup * group)819 Instr::Pointer AluInstr::from_string(istream& is, ValueFactory& value_factory, AluGroup *group)
820 {
821    vector<string> tokens;
822 
823    while (is.good() && !is.eof()) {
824       string t;
825       is >> t;
826       if (t.length() > 0) {
827          tokens.push_back(t);
828       }
829    }
830 
831    std::set<AluModifiers> flags;
832    auto t = tokens.begin();
833 
834    bool is_lds = false;
835 
836    if (*t == "LDS") {
837       is_lds = true;
838       t++;
839    }
840 
841    string opstr = *t++;
842    string deststr = *t++;
843 
844    if (deststr == "CLAMP") {
845       flags.insert(alu_dst_clamp);
846       deststr = *t++;
847    }
848 
849    assert(*t == ":");
850    OpDescr op_descr = {{op_invalid}, -1};
851 
852    if (is_lds) {
853       auto op = s_lds_map_by_name.find(opstr);
854       if (op == s_lds_map_by_name.end()) {
855          for(auto [opcode, opdescr] : lds_ops ) {
856             if (opstr == opdescr.name) {
857                op_descr.lds_opcode = opcode;
858                op_descr.nsrc = opdescr.nsrc;
859                s_alu_map_by_name[opstr] = op_descr;
860                break;
861             }
862          }
863 
864          if (op_descr.nsrc == -1) {
865             std::cerr << "'" << opstr << "'";
866             unreachable("Unknown opcode");
867             return nullptr;
868          }
869       } else {
870          op_descr = op->second;
871       }
872    } else {
873       auto op = s_alu_map_by_name.find(opstr);
874       if (op == s_alu_map_by_name.end()) {
875          for(auto [opcode, opdescr] : alu_ops ) {
876             if (opstr == opdescr.name) {
877                op_descr = {{opcode}, opdescr.nsrc};
878                s_alu_map_by_name[opstr] = op_descr;
879                break;
880             }
881          }
882 
883          if (op_descr.nsrc == -1) {
884             std::cerr << "'" << opstr << "'";
885             unreachable("Unknown opcode");
886             return nullptr;
887          }
888       } else {
889          op_descr = op->second;
890       }
891    }
892 
893    int slots = 0;
894 
895    SrcValues sources;
896    do {
897       ++t;
898       for (int i = 0; i < op_descr.nsrc; ++i) {
899          string srcstr = *t++;
900 
901          if (srcstr[0] == '-') {
902             if (!slots)
903                flags.insert(AluInstr::src_neg_flags[i]);
904             else
905                assert(flags.find(AluInstr::src_neg_flags[i]) != flags.end());
906             srcstr = srcstr.substr(1);
907          }
908 
909          if (srcstr[0] == '|') {
910             assert(srcstr[srcstr.length() - 1] == '|');
911             if (!slots)
912                flags.insert(AluInstr::src_abs_flags[i]);
913             else
914                assert(flags.find(AluInstr::src_abs_flags[i]) != flags.end());
915             srcstr = srcstr.substr(1, srcstr.length() - 2);
916          }
917 
918          auto src = value_factory.src_from_string(srcstr);
919          if (!src) {
920             std::cerr << "Unable to create src[" << i << "] from " << srcstr << "\n";
921             assert(src);
922          }
923          sources.push_back(src);
924       }
925       ++slots;
926    } while (t != tokens.end() && *t == "+");
927 
928    AluBankSwizzle bank_swizzle = alu_vec_unknown;
929    ECFAluOpCode cf = cf_alu;
930 
931    while (t != tokens.end()) {
932 
933       switch ((*t)[0]) {
934       case '{': {
935          auto iflag = t->begin() + 1;
936          while (iflag != t->end()) {
937             if (*iflag == '}')
938                break;
939 
940             switch (*iflag) {
941             case 'L': flags.insert(alu_last_instr); break;
942             case 'W': flags.insert(alu_write); break;
943             case 'E': flags.insert(alu_update_exec); break;
944             case 'P': flags.insert(alu_update_pred); break;
945             }
946             ++iflag;
947          }
948       }
949       break;
950 
951       case 'V': {
952          string bs = *t;
953          if (bs == "VEC_012")
954             bank_swizzle = alu_vec_012;
955          else if (bs == "VEC_021")
956             bank_swizzle = alu_vec_021;
957          else if (bs == "VEC_102")
958             bank_swizzle = alu_vec_102;
959          else if (bs == "VEC_120")
960             bank_swizzle = alu_vec_120;
961          else if (bs == "VEC_201")
962             bank_swizzle = alu_vec_201;
963          else if (bs == "VEC_210")
964             bank_swizzle = alu_vec_210;
965          else {
966             std::cerr << "'" << bs << "': ";
967             unreachable("Unknowe bankswizzle given");
968          }
969       }
970       break;
971 
972       default: {
973          string cf_str = *t;
974          if (cf_str == "PUSH_BEFORE")
975             cf = cf_alu_push_before;
976          else if (cf_str == "POP_AFTER")
977             cf = cf_alu_pop_after;
978          else if (cf_str == "POP2_AFTER")
979             cf = cf_alu_pop2_after;
980          else if (cf_str == "EXTENDED")
981             cf = cf_alu_extended;
982          else if (cf_str == "BREAK")
983             cf = cf_alu_break;
984          else if (cf_str == "CONT")
985             cf = cf_alu_continue;
986          else if (cf_str == "ELSE_AFTER")
987             cf = cf_alu_else_after;
988          else {
989             std::cerr << " '" << cf_str << "' ";
990             unreachable("Unknown tocken in ALU instruction");
991          }
992       }
993       }
994       ++t;
995    }
996 
997    PRegister dest = nullptr;
998    // construct instruction
999    if (deststr != "(null)")
1000       dest = value_factory.dest_from_string(deststr);
1001 
1002    AluInstr *retval = nullptr;
1003    if (is_lds)
1004       retval = new AluInstr(op_descr.lds_opcode, sources, flags);
1005    else
1006       retval = new AluInstr(op_descr.alu_opcode, dest, sources, flags, slots);
1007 
1008    retval->set_bank_swizzle(bank_swizzle);
1009    retval->set_cf_type(cf);
1010    if (group) {
1011       group->add_instruction(retval);
1012       retval= nullptr;
1013    }
1014    return retval;
1015 }
1016 
do_ready() const1017 bool AluInstr::do_ready() const
1018 {
1019    /* Alu instructions are shuffled by the scheduler, so
1020     * we have to make sure that required ops are already
1021     * scheduled before marking this one ready */
1022    for (auto i: required_instr()) {
1023       if (!i->is_scheduled())
1024          return false;
1025    }
1026 
1027    for (auto s : m_src) {
1028       auto r = s->as_register();
1029       if (r) {
1030          if (!r->ready(block_id(), index()))
1031             return false;
1032       }
1033       auto u = s->as_uniform();
1034       if (u && u->buf_addr() && u->buf_addr()->as_register()) {
1035          if (!u->buf_addr()->as_register()->ready(block_id(), index()))
1036             return false;
1037       }
1038    }
1039 
1040    if (m_dest && !m_dest->is_ssa()) {
1041       if (m_dest->pin() == pin_array) {
1042          auto av = static_cast<const LocalArrayValue *>(m_dest);
1043          auto addr = av->addr();
1044          /* For true indiect dest access we have to make sure that all
1045        * instructions that write the value before are schedukled */
1046          if (addr && (!addr->ready(block_id(), index()) ||
1047                       !m_dest->ready(block_id(), index() - 1)))
1048             return false;
1049       }
1050 
1051       /* If a register is updates, we have to make sure that uses before that
1052        * update are scheduled, otherwise we may use the updated value when we
1053        * shouldn't */
1054       for (auto u : m_dest->uses()) {
1055          if (u->block_id() <= block_id() && u->index() < index() &&
1056              !u->is_scheduled()) {
1057             return false;
1058          }
1059       }
1060    }
1061 
1062    for (auto& r : m_extra_dependencies) {
1063       if (!r->ready(block_id(), index()))
1064          return false;
1065    }
1066 
1067    return true;
1068 }
1069 
visit(AluGroup * instr)1070 void AluInstrVisitor::visit(AluGroup *instr)
1071 {
1072    for (auto& i : *instr) {
1073       if (i)
1074          i->accept(*this);
1075    }
1076 }
1077 
visit(Block * instr)1078 void AluInstrVisitor::visit(Block *instr)
1079 {
1080    for (auto& i : *instr)
1081       i->accept(*this);
1082 }
1083 
visit(IfInstr * instr)1084 void AluInstrVisitor::visit(IfInstr *instr)
1085 {
1086    instr->predicate()->accept(*this);
1087 }
1088 
1089 static bool emit_alu_b2x(const nir_alu_instr& alu, AluInlineConstants mask, Shader& shader);
1090 
1091 static bool emit_alu_op1(const nir_alu_instr& alu, EAluOp opcode, Shader& shader,
1092                          const AluOpFlags& flags = 0);
1093 static bool emit_alu_op1_64bit(const nir_alu_instr& alu, EAluOp opcode, Shader& shader, bool switch_chan);
1094 static bool emit_alu_mov_64bit(const nir_alu_instr& alu, Shader& shader);
1095 static bool emit_alu_neg(const nir_alu_instr& alu, Shader& shader);
1096 static bool emit_alu_op1_64bit_trans(const nir_alu_instr& alu, EAluOp opcode, Shader& shader);
1097 static bool emit_alu_op2_64bit(const nir_alu_instr& alu, EAluOp opcode, Shader& shader, bool switch_order);
1098 static bool emit_alu_op2_64bit_one_dst(const nir_alu_instr& alu, EAluOp opcode, Shader& shader, bool switch_order);
1099 static bool emit_alu_fma_64bit(const nir_alu_instr& alu, EAluOp opcode, Shader& shader);
1100 static bool emit_alu_b2f64(const nir_alu_instr& alu, Shader& shader);
1101 static bool emit_alu_f2f64(const nir_alu_instr& alu, Shader& shader);
1102 static bool emit_alu_i2f64(const nir_alu_instr& alu, EAluOp op, Shader& shader);
1103 static bool emit_alu_f2f32(const nir_alu_instr& alu, Shader& shader);
1104 static bool emit_alu_abs64(const nir_alu_instr& alu, Shader& shader);
1105 
1106 static bool emit_alu_op2(const nir_alu_instr& alu, EAluOp opcode, Shader& shader,
1107                          AluInstr::Op2Options opts = AluInstr::op2_opt_none);
1108 static bool emit_alu_op2_int(const nir_alu_instr& alu, EAluOp opcode, Shader& shader,
1109                              AluInstr::Op2Options opts = AluInstr::op2_opt_none);
1110 static bool emit_alu_op3(const nir_alu_instr& alu, EAluOp opcode, Shader& shader,
1111                   const std::array<int, 3>& src_shuffle = {0,1,2});
1112 static bool emit_any_all_fcomp2(const nir_alu_instr& alu, EAluOp opcode, Shader& shader);
1113 static bool emit_any_all_fcomp(const nir_alu_instr& alu, EAluOp opcode, int nc, bool all, Shader& shader);
1114 static bool emit_any_all_icomp(const nir_alu_instr& alu, EAluOp opcode, int nc, bool all, Shader& shader);
1115 
1116 static bool emit_alu_i2orf2_b1(const nir_alu_instr& alu, EAluOp opcode, Shader& shader);
1117 
1118 static bool emit_alu_comb_with_zero(const nir_alu_instr& alu, EAluOp opcode, Shader& shader);
1119 static bool emit_unpack_64_2x32_split(const nir_alu_instr& alu, int comp, Shader& shader);
1120 static bool emit_pack_64_2x32(const nir_alu_instr& alu, Shader& shader);
1121 static bool emit_unpack_64_2x32(const nir_alu_instr& alu, Shader& shader);
1122 static bool emit_pack_64_2x32_split(const nir_alu_instr& alu, Shader& shader);
1123 static bool emit_pack_32_2x16_split(const nir_alu_instr& alu, Shader& shader);
1124 static bool emit_alu_vec2_64(const nir_alu_instr& alu, Shader& shader);
1125 
1126 static bool emit_unpack_32_2x16_split_x(const nir_alu_instr& alu, Shader& shader);
1127 static bool emit_unpack_32_2x16_split_y(const nir_alu_instr& alu, Shader& shader);
1128 
1129 static bool emit_dot(const nir_alu_instr& alu, int nelm, Shader& shader);
1130 static bool emit_create_vec(const nir_alu_instr& instr, unsigned nc, Shader& shader);
1131 
1132 static bool emit_alu_trans_op1_eg(const nir_alu_instr& alu, EAluOp opcode, Shader& shader);
1133 static bool emit_alu_trans_op1_cayman(const nir_alu_instr& alu, EAluOp opcode, Shader& shader);
1134 
1135 static bool emit_alu_trans_op2_eg(const nir_alu_instr& alu, EAluOp opcode, Shader& shader);
1136 static bool emit_alu_trans_op2_cayman(const nir_alu_instr& alu, EAluOp opcode, Shader& shader);
1137 
1138 static bool emit_alu_f2i32_or_u32_eg(const nir_alu_instr& alu, EAluOp opcode, Shader& shader);
1139 
1140 static bool emit_tex_fdd(const nir_alu_instr& alu, TexInstr::Opcode opcode, bool fine, Shader& shader);
1141 
1142 static bool emit_alu_cube(const nir_alu_instr& alu, Shader& shader);
1143 
1144 static bool emit_fdph(const nir_alu_instr& alu, Shader& shader);
1145 
check_64_bit_op_src(nir_src * src,void * state)1146 static bool check_64_bit_op_src(nir_src *src, void *state)
1147 {
1148    if (nir_src_bit_size(*src) == 64) {
1149       *(bool*)state = true;
1150       return false;
1151    }
1152    return true;
1153 }
1154 
check_64_bit_op_dest(nir_dest * dest,void * state)1155 static bool check_64_bit_op_dest(nir_dest *dest, void *state)
1156 {
1157    if (nir_dest_bit_size(*dest) == 64) {
1158       *(bool*)state = true;
1159       return false;
1160    }
1161    return true;
1162 }
1163 
from_nir(nir_alu_instr * alu,Shader & shader)1164 bool AluInstr::from_nir(nir_alu_instr *alu, Shader& shader)
1165 {
1166    bool is_64bit_op = false;
1167    nir_foreach_src(&alu->instr, check_64_bit_op_src, &is_64bit_op);
1168    if (!is_64bit_op)
1169       nir_foreach_dest(&alu->instr, check_64_bit_op_dest, &is_64bit_op);
1170 
1171 
1172    if (is_64bit_op) {
1173       switch (alu->op) {
1174       case nir_op_pack_64_2x32:
1175       case nir_op_unpack_64_2x32:
1176       case nir_op_pack_64_2x32_split:
1177       case nir_op_pack_half_2x16_split:
1178       case nir_op_unpack_64_2x32_split_x:
1179       case nir_op_unpack_64_2x32_split_y: break;
1180       case nir_op_mov: return emit_alu_mov_64bit(*alu, shader);
1181       case nir_op_fneg: return emit_alu_neg(*alu, shader);
1182       case nir_op_ffract: return emit_alu_op1_64bit(*alu, op1_fract_64, shader, true);
1183       case nir_op_feq32: return emit_alu_op2_64bit_one_dst(*alu, op2_sete_64, shader, false);
1184       case nir_op_fge32: return emit_alu_op2_64bit_one_dst(*alu, op2_setge_64, shader, false);
1185       case nir_op_flt32: return emit_alu_op2_64bit_one_dst(*alu, op2_setgt_64, shader, true);
1186       case nir_op_fneu32: return emit_alu_op2_64bit_one_dst(*alu, op2_setne_64, shader, false);
1187       case nir_op_ffma: return emit_alu_fma_64bit(*alu, op3_fma_64, shader);
1188 
1189       case nir_op_fadd: return emit_alu_op2_64bit(*alu, op2_add_64, shader, false);
1190       case nir_op_fmul: return emit_alu_op2_64bit(*alu, op2_mul_64, shader, false);
1191       case nir_op_fmax: return emit_alu_op2_64bit(*alu, op2_max_64, shader, false);
1192       case nir_op_fmin: return emit_alu_op2_64bit(*alu, op2_min_64, shader, false);
1193       case nir_op_b2f64: return emit_alu_b2f64(*alu, shader);
1194       case nir_op_f2f64: return emit_alu_f2f64(*alu, shader);
1195       case nir_op_i2f64: return emit_alu_i2f64(*alu, op1_int_to_flt, shader);
1196       case nir_op_u2f64: return emit_alu_i2f64(*alu, op1_uint_to_flt, shader);
1197       case nir_op_f2f32: return emit_alu_f2f32(*alu, shader);
1198       case nir_op_fabs: return emit_alu_abs64(*alu, shader);
1199       case nir_op_fsqrt: return emit_alu_op1_64bit_trans(*alu, op1_sqrt_64, shader);
1200       case nir_op_frcp: return emit_alu_op1_64bit_trans(*alu, op1_recip_64, shader);
1201       case nir_op_frsq: return emit_alu_op1_64bit_trans(*alu, op1_recipsqrt_64, shader);
1202       case nir_op_vec2: return emit_alu_vec2_64(*alu, shader);
1203       default:
1204          return false;
1205          ;
1206       }
1207    }
1208 
1209 
1210    if (shader.chip_class() == ISA_CC_CAYMAN) {
1211       switch (alu->op) {
1212       case nir_op_fcos_amd: return emit_alu_trans_op1_cayman(*alu, op1_cos, shader);
1213       case nir_op_fexp2: return emit_alu_trans_op1_cayman(*alu, op1_exp_ieee, shader);
1214       case nir_op_flog2: return emit_alu_trans_op1_cayman(*alu, op1_log_clamped, shader);
1215       case nir_op_frcp: return emit_alu_trans_op1_cayman(*alu, op1_recip_ieee, shader);
1216       case nir_op_frsq: return emit_alu_trans_op1_cayman(*alu,  op1_recipsqrt_ieee1, shader);
1217       case nir_op_fsqrt: return emit_alu_trans_op1_cayman(*alu, op1_sqrt_ieee, shader);
1218       case nir_op_fsin_amd: return emit_alu_trans_op1_cayman(*alu, op1_sin, shader);
1219       case nir_op_i2f32: return emit_alu_op1(*alu, op1_int_to_flt, shader);
1220       case nir_op_u2f32: return emit_alu_op1(*alu, op1_uint_to_flt, shader);
1221       case nir_op_imul: return emit_alu_trans_op2_cayman(*alu, op2_mullo_int, shader);
1222       case nir_op_imul_high: return emit_alu_trans_op2_cayman(*alu, op2_mulhi_int, shader);
1223       case nir_op_umul_high: return emit_alu_trans_op2_cayman(*alu, op2_mulhi_uint, shader);
1224       case nir_op_f2u32: return emit_alu_op1(*alu, op1_flt_to_uint, shader);
1225       case nir_op_f2i32: return emit_alu_op1(*alu, op1_flt_to_int, shader);
1226       case nir_op_ishl: return emit_alu_op2_int(*alu, op2_lshl_int, shader);
1227       case nir_op_ishr: return emit_alu_op2_int(*alu, op2_ashr_int, shader);
1228       case nir_op_ushr: return emit_alu_op2_int(*alu, op2_lshr_int, shader);
1229       default:
1230          ;
1231       }
1232    } else {
1233       if (shader.chip_class() == ISA_CC_EVERGREEN) {
1234          switch (alu->op) {
1235          case nir_op_f2i32: return emit_alu_f2i32_or_u32_eg(*alu, op1_flt_to_int, shader);
1236          case nir_op_f2u32: return emit_alu_f2i32_or_u32_eg(*alu, op1_flt_to_uint, shader);
1237          default:
1238             ;
1239          }
1240       }
1241 
1242       if (shader.chip_class() >= ISA_CC_R700) {
1243          switch (alu->op) {
1244          case nir_op_ishl: return emit_alu_op2_int(*alu, op2_lshl_int, shader);
1245          case nir_op_ishr: return emit_alu_op2_int(*alu, op2_ashr_int, shader);
1246          case nir_op_ushr: return emit_alu_op2_int(*alu, op2_lshr_int, shader);
1247          default:
1248             ;
1249          }
1250       } else  {
1251          switch (alu->op) {
1252          case nir_op_ishl: return emit_alu_trans_op2_eg(*alu, op2_lshl_int, shader);
1253          case nir_op_ishr: return emit_alu_trans_op2_eg(*alu, op2_ashr_int, shader);
1254          case nir_op_ushr: return emit_alu_trans_op2_eg(*alu, op2_lshr_int, shader);
1255          default:
1256             ;
1257          }
1258       }
1259 
1260       switch (alu->op) {
1261       case nir_op_f2i32: return emit_alu_trans_op1_eg(*alu, op1_flt_to_int, shader);
1262       case nir_op_f2u32: return emit_alu_trans_op1_eg(*alu, op1_flt_to_uint, shader);
1263       case nir_op_fcos_amd: return emit_alu_trans_op1_eg(*alu, op1_cos, shader);
1264       case nir_op_fexp2: return emit_alu_trans_op1_eg(*alu, op1_exp_ieee, shader);
1265       case nir_op_flog2: return emit_alu_trans_op1_eg(*alu, op1_log_clamped, shader);
1266       case nir_op_frcp: return emit_alu_trans_op1_eg(*alu, op1_recip_ieee, shader);
1267       case nir_op_frsq: return emit_alu_trans_op1_eg(*alu, op1_recipsqrt_ieee1, shader);
1268       case nir_op_fsin_amd: return emit_alu_trans_op1_eg(*alu, op1_sin, shader);
1269       case nir_op_fsqrt: return emit_alu_trans_op1_eg(*alu, op1_sqrt_ieee, shader);
1270       case nir_op_i2f32: return emit_alu_trans_op1_eg(*alu, op1_int_to_flt, shader);
1271       case nir_op_u2f32: return emit_alu_trans_op1_eg(*alu, op1_uint_to_flt, shader);
1272       case nir_op_imul: return emit_alu_trans_op2_eg(*alu, op2_mullo_int, shader);
1273       case nir_op_imul_high: return emit_alu_trans_op2_eg(*alu, op2_mulhi_int, shader);
1274       case nir_op_umul_high: return emit_alu_trans_op2_eg(*alu, op2_mulhi_uint, shader);
1275       default:
1276          ;
1277       }
1278    }
1279 
1280    switch (alu->op) {
1281    case nir_op_b2b1: return emit_alu_op1(*alu, op1_mov, shader);
1282    case nir_op_b2b32: return emit_alu_op1(*alu, op1_mov, shader);
1283    case nir_op_b2f32: return emit_alu_b2x(*alu, ALU_SRC_1, shader);
1284    case nir_op_b2i32: return emit_alu_b2x(*alu, ALU_SRC_1_INT, shader);
1285 
1286    case nir_op_bfm: return emit_alu_op2_int(*alu, op2_bfm_int, shader, op2_opt_none);
1287    case nir_op_bit_count: return emit_alu_op1(*alu, op1_bcnt_int, shader);
1288 
1289    case nir_op_bitfield_reverse: return emit_alu_op1(*alu, op1_bfrev_int, shader);
1290    case nir_op_bitfield_select: return emit_alu_op3(*alu, op3_bfi_int, shader);
1291 
1292    case nir_op_b32all_fequal2: return emit_any_all_fcomp2(*alu, op2_sete_dx10, shader);
1293    case nir_op_b32all_fequal3: return emit_any_all_fcomp(*alu, op2_sete, 3, true, shader);
1294    case nir_op_b32all_fequal4: return emit_any_all_fcomp(*alu, op2_sete, 4, true, shader);
1295    case nir_op_b32all_iequal2: return emit_any_all_icomp(*alu, op2_sete_int, 2, true, shader);
1296    case nir_op_b32all_iequal3: return emit_any_all_icomp(*alu, op2_sete_int, 3, true, shader);
1297    case nir_op_b32all_iequal4: return emit_any_all_icomp(*alu, op2_sete_int, 4, true, shader);
1298    case nir_op_b32any_fnequal2: return emit_any_all_fcomp2(*alu, op2_setne_dx10, shader);
1299    case nir_op_b32any_fnequal3: return emit_any_all_fcomp(*alu, op2_setne, 3, false, shader);
1300    case nir_op_b32any_fnequal4: return emit_any_all_fcomp(*alu, op2_setne, 4, false, shader);
1301    case nir_op_b32any_inequal2: return emit_any_all_icomp(*alu, op2_setne_int, 2, false, shader);
1302    case nir_op_b32any_inequal3: return emit_any_all_icomp(*alu, op2_setne_int, 3, false, shader);
1303    case nir_op_b32any_inequal4: return emit_any_all_icomp(*alu, op2_setne_int, 4, false, shader);
1304    case nir_op_b32csel: return emit_alu_op3(*alu, op3_cnde_int, shader, {0, 2, 1});
1305 
1306    case nir_op_f2b32: return emit_alu_comb_with_zero(*alu, op2_setne_dx10, shader);
1307    case nir_op_fabs: return emit_alu_op1(*alu, op1_mov, shader,  {1 << alu_src0_abs});
1308    case nir_op_fadd: return emit_alu_op2(*alu, op2_add, shader);
1309    case nir_op_fceil: return emit_alu_op1(*alu, op1_ceil, shader);
1310    case nir_op_fcsel: return emit_alu_op3(*alu, op3_cnde, shader, {0, 2, 1});
1311    case nir_op_fcsel_ge: return emit_alu_op3(*alu, op3_cndge, shader, {0, 1, 2});
1312    case nir_op_fcsel_gt: return emit_alu_op3(*alu, op3_cndgt, shader, {0, 1, 2});
1313 
1314    case nir_op_fdph: return emit_fdph(*alu, shader);
1315    case nir_op_fdot2: return emit_dot(*alu, 2, shader);
1316    case nir_op_fdot3: return emit_dot(*alu, 3, shader);
1317    case nir_op_fdot4: return emit_dot(*alu, 4, shader);
1318 
1319    case nir_op_feq32:
1320    case nir_op_feq: return emit_alu_op2(*alu, op2_sete_dx10, shader);
1321    case nir_op_ffloor: return emit_alu_op1(*alu, op1_floor, shader);
1322    case nir_op_ffract: return emit_alu_op1(*alu, op1_fract, shader);
1323    case nir_op_fge32: return emit_alu_op2(*alu, op2_setge_dx10, shader);
1324    case nir_op_fge: return emit_alu_op2(*alu, op2_setge_dx10, shader);
1325    case nir_op_find_lsb: return emit_alu_op1(*alu, op1_ffbl_int, shader);
1326 
1327    case nir_op_flt32: return emit_alu_op2(*alu, op2_setgt_dx10, shader, op2_opt_reverse);
1328    case nir_op_flt: return emit_alu_op2(*alu, op2_setgt_dx10, shader, op2_opt_reverse);
1329    case nir_op_fmax: return emit_alu_op2(*alu, op2_max_dx10, shader);
1330    case nir_op_fmin: return emit_alu_op2(*alu, op2_min_dx10, shader);
1331 
1332    case nir_op_fmul:
1333       if (!shader.has_flag(Shader::sh_legacy_math_rules))
1334          return emit_alu_op2(*alu, op2_mul_ieee, shader);
1335       FALLTHROUGH;
1336    case nir_op_fmulz: return emit_alu_op2(*alu, op2_mul, shader);
1337 
1338    case nir_op_fneg: return emit_alu_op1(*alu, op1_mov, shader, {1 << alu_src0_neg});
1339    case nir_op_fneu32: return emit_alu_op2(*alu, op2_setne_dx10, shader);
1340    case nir_op_fneu: return emit_alu_op2(*alu, op2_setne_dx10, shader);
1341 
1342    case nir_op_fround_even: return emit_alu_op1(*alu, op1_rndne, shader);
1343    case nir_op_fsat: return emit_alu_op1(*alu, op1_mov, shader, {1 << alu_dst_clamp});
1344    case nir_op_fsub: return emit_alu_op2(*alu, op2_add, shader, op2_opt_neg_src1);
1345    case nir_op_ftrunc: return emit_alu_op1(*alu, op1_trunc, shader);
1346    case nir_op_i2b1:
1347    case nir_op_i2b32: return emit_alu_i2orf2_b1(*alu, op2_setne_int, shader);
1348    case nir_op_iadd: return emit_alu_op2_int(*alu, op2_add_int, shader);
1349    case nir_op_iand: return emit_alu_op2_int(*alu, op2_and_int, shader);
1350    case nir_op_ibfe: return emit_alu_op3(*alu, op3_bfe_int, shader);
1351    case nir_op_i32csel_ge: return emit_alu_op3(*alu, op3_cndge_int, shader,  {0, 1, 2});
1352    case nir_op_i32csel_gt: return emit_alu_op3(*alu, op3_cndgt_int, shader,  {0, 1, 2});
1353    case nir_op_ieq32: return emit_alu_op2_int(*alu, op2_sete_int, shader);
1354    case nir_op_ieq: return emit_alu_op2_int(*alu, op2_sete_int, shader);
1355    case nir_op_ifind_msb_rev: return emit_alu_op1(*alu, op1_ffbh_int, shader);
1356    case nir_op_ige32: return emit_alu_op2_int(*alu, op2_setge_int, shader);
1357    case nir_op_ige: return emit_alu_op2_int(*alu, op2_setge_int, shader);
1358    case nir_op_ilt32: return emit_alu_op2_int(*alu, op2_setgt_int, shader, op2_opt_reverse);
1359    case nir_op_ilt: return emit_alu_op2_int(*alu, op2_setgt_int, shader, op2_opt_reverse);
1360    case nir_op_imax: return emit_alu_op2_int(*alu, op2_max_int, shader);
1361    case nir_op_imin: return emit_alu_op2_int(*alu, op2_min_int, shader);
1362    case nir_op_ine32: return emit_alu_op2_int(*alu, op2_setne_int, shader);
1363    case nir_op_ine: return emit_alu_op2_int(*alu, op2_setne_int, shader);
1364    case nir_op_ineg: return emit_alu_comb_with_zero(*alu, op2_sub_int, shader);
1365    case nir_op_inot: return emit_alu_op1(*alu, op1_not_int, shader);
1366    case nir_op_ior: return emit_alu_op2_int(*alu, op2_or_int, shader);
1367    case nir_op_isub: return emit_alu_op2_int(*alu, op2_sub_int, shader);
1368    case nir_op_ixor: return emit_alu_op2_int(*alu, op2_xor_int, shader);
1369    case nir_op_pack_64_2x32: return emit_pack_64_2x32(*alu, shader);
1370    case nir_op_unpack_64_2x32: return emit_unpack_64_2x32(*alu, shader);
1371    case nir_op_pack_64_2x32_split: return emit_pack_64_2x32_split(*alu, shader);
1372    case nir_op_pack_half_2x16_split: return emit_pack_32_2x16_split(*alu, shader);
1373    case nir_op_slt: return emit_alu_op2(*alu, op2_setgt, shader, op2_opt_reverse);
1374    case nir_op_sge: return emit_alu_op2(*alu, op2_setge, shader);
1375    case nir_op_ubfe: return emit_alu_op3(*alu, op3_bfe_uint, shader);
1376    case nir_op_ufind_msb_rev: return emit_alu_op1(*alu, op1_ffbh_uint, shader);
1377    case nir_op_uge32: return emit_alu_op2_int(*alu, op2_setge_uint, shader);
1378    case nir_op_uge: return emit_alu_op2_int(*alu, op2_setge_uint, shader);
1379    case nir_op_ult32: return emit_alu_op2_int(*alu, op2_setgt_uint, shader, op2_opt_reverse);
1380    case nir_op_ult: return emit_alu_op2_int(*alu, op2_setgt_uint, shader, op2_opt_reverse);
1381    case nir_op_umad24: return emit_alu_op3(*alu, op3_muladd_uint24, shader,  {0, 1, 2});
1382    case nir_op_umax: return emit_alu_op2_int(*alu, op2_max_uint, shader);
1383    case nir_op_umin: return emit_alu_op2_int(*alu, op2_min_uint, shader);
1384    case nir_op_umul24: return emit_alu_op2(*alu, op2_mul_uint24, shader);
1385    case nir_op_unpack_64_2x32_split_x: return emit_unpack_64_2x32_split(*alu, 0, shader);
1386    case nir_op_unpack_64_2x32_split_y: return emit_unpack_64_2x32_split(*alu, 1, shader);
1387    case nir_op_unpack_half_2x16_split_x: return emit_unpack_32_2x16_split_x(*alu, shader);
1388    case nir_op_unpack_half_2x16_split_y: return emit_unpack_32_2x16_split_y(*alu, shader);
1389 
1390    case nir_op_ffma:
1391       if (!shader.has_flag(Shader::sh_legacy_math_rules))
1392          return emit_alu_op3(*alu, op3_muladd_ieee, shader);
1393       FALLTHROUGH;
1394    case nir_op_ffmaz: return emit_alu_op3(*alu, op3_muladd, shader);
1395 
1396    case nir_op_mov: return emit_alu_op1(*alu, op1_mov, shader);
1397    case nir_op_f2i32: return emit_alu_op1(*alu, op1_flt_to_int, shader);
1398    case nir_op_vec2: return emit_create_vec(*alu, 2, shader);
1399    case nir_op_vec3: return emit_create_vec(*alu, 3, shader);
1400    case nir_op_vec4: return emit_create_vec(*alu, 4, shader);
1401 
1402    case nir_op_fddx:
1403    case nir_op_fddx_coarse: return emit_tex_fdd(*alu, TexInstr::get_gradient_h, false, shader);
1404    case nir_op_fddx_fine: return emit_tex_fdd(*alu, TexInstr::get_gradient_h, true, shader);
1405    case nir_op_fddy:
1406    case nir_op_fddy_coarse: return emit_tex_fdd(*alu,TexInstr::get_gradient_v, false, shader);
1407    case nir_op_fddy_fine: return emit_tex_fdd(*alu, TexInstr::get_gradient_v,  true, shader);
1408    case nir_op_cube_r600: return emit_alu_cube(*alu, shader);
1409    default:
1410       fprintf(stderr, "Unknown instruction '");
1411       nir_print_instr(&alu->instr, stderr);
1412       fprintf(stderr, "'\n");
1413       assert(0);
1414    return false;
1415    }
1416 }
1417 
pin_for_components(const nir_alu_instr & alu)1418 static Pin pin_for_components(const nir_alu_instr& alu)
1419 {
1420    return (alu.dest.dest.is_ssa &&
1421            (nir_dest_num_components(alu.dest.dest) == 1)) ? pin_free : pin_none;
1422 
1423 }
1424 
emit_alu_op1_64bit(const nir_alu_instr & alu,EAluOp opcode,Shader & shader,bool switch_chan)1425 static bool emit_alu_op1_64bit(const nir_alu_instr& alu, EAluOp opcode, Shader& shader, bool switch_chan)
1426 {
1427    auto& value_factory = shader.value_factory();
1428 
1429    auto group = new AluGroup();
1430 
1431    AluInstr *ir = nullptr;
1432 
1433    int swz[2] = {0,1};
1434    if (switch_chan) {
1435       swz[0] = 1;
1436       swz[1] = 0;
1437    }
1438 
1439    for (unsigned i = 0; i < nir_dest_num_components(alu.dest.dest) ; ++i) {
1440       for (unsigned c = 0; c < 2 ; ++c) {
1441          ir = new AluInstr(opcode,
1442                            value_factory.dest(alu.dest, 2 * i + c, pin_chan),
1443                            value_factory.src64(alu.src[0], i, swz[c]),
1444                            {alu_write});
1445          group->add_instruction(ir);
1446       }
1447       if (alu.src[0].abs) ir->set_alu_flag(alu_src0_abs);
1448       if (alu.src[0].negate) ir->set_alu_flag(alu_src0_neg);
1449    }
1450    if (ir)
1451       ir->set_alu_flag(alu_last_instr);
1452    shader.emit_instruction(group);
1453    return true;
1454 }
1455 
emit_alu_mov_64bit(const nir_alu_instr & alu,Shader & shader)1456 static bool emit_alu_mov_64bit(const nir_alu_instr& alu, Shader& shader)
1457 {
1458    auto& value_factory = shader.value_factory();
1459 
1460    AluInstr *ir = nullptr;
1461 
1462    for (unsigned i = 0; i < nir_dest_num_components(alu.dest.dest) ; ++i) {
1463       for (unsigned c = 0; c < 2 ; ++c) {
1464          ir = new AluInstr(op1_mov,
1465                            value_factory.dest(alu.dest, 2 * i + c, pin_free),
1466                            value_factory.src64(alu.src[0], i, c),
1467                            {alu_write});
1468          shader.emit_instruction(ir);
1469       }
1470       if (alu.src[0].abs) ir->set_alu_flag(alu_src0_abs);
1471       if (alu.src[0].negate) ir->set_alu_flag(alu_src0_neg);
1472    }
1473    if (ir)
1474       ir->set_alu_flag(alu_last_instr);
1475    return true;
1476 }
1477 
emit_alu_neg(const nir_alu_instr & alu,Shader & shader)1478 static bool emit_alu_neg(const nir_alu_instr& alu, Shader& shader)
1479 {
1480    auto& value_factory = shader.value_factory();
1481 
1482    auto group = new AluGroup();
1483 
1484    AluInstr *ir = nullptr;
1485 
1486    for (unsigned i = 0; i < nir_dest_num_components(alu.dest.dest) ; ++i) {
1487       for (unsigned c = 0; c < 2 ; ++c) {
1488          ir = new AluInstr(op1_mov,
1489                            value_factory.dest(alu.dest, 2 * i + c, pin_chan),
1490                            value_factory.src64(alu.src[0], i, c),
1491                            {alu_write});
1492          group->add_instruction(ir);
1493       }
1494       ir->set_alu_flag(alu_src0_neg);
1495    }
1496    if (ir)
1497       ir->set_alu_flag(alu_last_instr);
1498    shader.emit_instruction(group);
1499    return true;
1500 }
1501 
emit_alu_abs64(const nir_alu_instr & alu,Shader & shader)1502 static bool emit_alu_abs64(const nir_alu_instr& alu, Shader& shader)
1503 {
1504    auto& value_factory = shader.value_factory();
1505 
1506    assert(nir_dest_num_components(alu.dest.dest) == 1);
1507 
1508    shader.emit_instruction(new AluInstr(op1_mov, value_factory.dest(alu.dest, 0, pin_chan),
1509                                         value_factory.src64(alu.src[0], 0, 0),
1510                                         AluInstr::write));
1511 
1512    auto ir = new AluInstr(op1_mov, value_factory.dest(alu.dest, 1, pin_chan),
1513                           value_factory.src64(alu.src[0], 0, 1),
1514                           AluInstr::last_write);
1515    ir->set_alu_flag(alu_src0_abs);
1516    shader.emit_instruction(ir);
1517    return true;
1518 }
1519 
emit_alu_op2_64bit(const nir_alu_instr & alu,EAluOp opcode,Shader & shader,bool switch_src)1520 static bool emit_alu_op2_64bit(const nir_alu_instr& alu, EAluOp opcode, Shader& shader, bool switch_src)
1521 {
1522    auto& value_factory = shader.value_factory();
1523    auto group = new AluGroup();
1524    AluInstr *ir = nullptr;
1525    int order[2] =  {0, 1};
1526    if (switch_src) {
1527       order[0] = 1;
1528       order[1] = 0;
1529    }
1530 
1531    int num_emit0  = opcode == op2_mul_64 ? 3 : 1;
1532 
1533    assert(num_emit0 == 1 || nir_dest_num_components(alu.dest.dest) == 1);
1534 
1535 
1536    for (unsigned k = 0; k < nir_dest_num_components(alu.dest.dest); ++k) {
1537       int i = 0;
1538       for (; i < num_emit0; ++i) {
1539          auto dest = i < 2 ? value_factory.dest(alu.dest, i, pin_chan) :
1540                              value_factory.dummy_dest(i);
1541 
1542          ir = new AluInstr(opcode,
1543                            dest,
1544                            value_factory.src64(alu.src[order[0]], k, 1),
1545                            value_factory.src64(alu.src[order[1]], k, 1),
1546                            i < 2 ? AluInstr::write : AluInstr::empty);
1547 
1548          if (alu.src[0].abs) ir->set_alu_flag(switch_src ? alu_src1_abs : alu_src0_abs);
1549          if (alu.src[1].abs) ir->set_alu_flag(switch_src ? alu_src0_abs : alu_src1_abs);
1550          if (alu.src[0].negate) ir->set_alu_flag(switch_src ? alu_src1_neg : alu_src0_neg);
1551          if (alu.src[1].negate) ir->set_alu_flag(switch_src ? alu_src0_neg : alu_src1_neg);
1552          if (alu.dest.saturate && i == 0) {
1553             ir->set_alu_flag(alu_dst_clamp);
1554          }
1555 
1556          group->add_instruction(ir);
1557       }
1558 
1559       auto dest = i == 1 ? value_factory.dest(alu.dest, i, pin_chan) :
1560                            value_factory.dummy_dest(i);
1561 
1562       ir = new AluInstr(opcode,
1563                         dest,
1564                         value_factory.src64(alu.src[order[0]], k, 0),
1565                         value_factory.src64(alu.src[order[1]], k, 0),
1566                         i == 1 ? AluInstr::write : AluInstr::empty);
1567       group->add_instruction(ir);
1568    }
1569    if (ir)
1570       ir->set_alu_flag(alu_last_instr);
1571 
1572    shader.emit_instruction(group);
1573    return true;
1574 }
1575 
emit_alu_op2_64bit_one_dst(const nir_alu_instr & alu,EAluOp opcode,Shader & shader,bool switch_order)1576 static bool emit_alu_op2_64bit_one_dst(const nir_alu_instr& alu, EAluOp opcode,
1577                                        Shader& shader, bool switch_order)
1578 {
1579    auto& value_factory = shader.value_factory();
1580    AluInstr *ir = nullptr;
1581    int order[2] =  {0, 1};
1582    if (switch_order) {
1583       order[0] = 1;
1584       order[1] = 0;
1585    }
1586 
1587    AluInstr::SrcValues src(4);
1588 
1589    for (unsigned k = 0; k < nir_dest_num_components(alu.dest.dest); ++k) {
1590       auto dest = value_factory.dest(alu.dest, 2 * k, pin_chan);
1591       src[0] = value_factory.src64(alu.src[order[0]], k, 1);
1592       src[1] = value_factory.src64(alu.src[order[1]], k, 1);
1593       src[2] = value_factory.src64(alu.src[order[0]], k, 0);
1594       src[3] = value_factory.src64(alu.src[order[1]], k, 0);
1595 
1596       ir = new AluInstr(opcode, dest, src, AluInstr::write, 2);
1597 
1598       if (alu.src[0].abs) ir->set_alu_flag(switch_order ? alu_src1_abs : alu_src0_abs);
1599       if (alu.src[1].abs) ir->set_alu_flag(switch_order ? alu_src0_abs : alu_src1_abs);
1600       if (alu.src[0].negate) ir->set_alu_flag(switch_order ? alu_src1_neg : alu_src0_neg);
1601       if (alu.src[1].negate) ir->set_alu_flag(switch_order ? alu_src0_neg : alu_src1_neg);
1602       ir->set_alu_flag(alu_64bit_op);
1603 
1604       shader.emit_instruction(ir);
1605    }
1606    if (ir)
1607       ir->set_alu_flag(alu_last_instr);
1608 
1609    return true;
1610 }
1611 
emit_alu_op1_64bit_trans(const nir_alu_instr & alu,EAluOp opcode,Shader & shader)1612 static bool emit_alu_op1_64bit_trans(const nir_alu_instr& alu, EAluOp opcode, Shader& shader)
1613 {
1614    auto& value_factory = shader.value_factory();
1615    auto group = new AluGroup();
1616    AluInstr *ir = nullptr;
1617    for (unsigned i = 0; i < 3; ++i) {
1618       ir = new AluInstr(opcode,
1619                         i < 2 ? value_factory.dest(alu.dest, i, pin_chan) :
1620                                 value_factory.dummy_dest(i),
1621                         value_factory.src64(alu.src[0], 0, 1),
1622                         value_factory.src64(alu.src[0], 0, 0),
1623                         i < 2 ? AluInstr::write : AluInstr::empty);
1624 
1625       if (alu.src[0].abs || opcode == op1_sqrt_64) ir->set_alu_flag(alu_src1_abs);
1626       if (alu.src[0].negate) ir->set_alu_flag(alu_src1_neg);
1627 
1628       group->add_instruction(ir);
1629    }
1630    if (ir)
1631       ir->set_alu_flag(alu_last_instr);
1632    shader.emit_instruction(group);
1633    return true;
1634 
1635 }
1636 
emit_alu_fma_64bit(const nir_alu_instr & alu,EAluOp opcode,Shader & shader)1637 static bool emit_alu_fma_64bit(const nir_alu_instr& alu, EAluOp opcode, Shader& shader)
1638 {
1639    auto& value_factory = shader.value_factory();
1640    auto group = new AluGroup();
1641    AluInstr *ir = nullptr;
1642    for (unsigned i = 0; i < 4 ; ++i) {
1643 
1644       int chan = i < 3 ? 1 : 0;
1645       auto dest = i < 2 ? value_factory.dest(alu.dest, i, pin_chan) :
1646                           value_factory.dummy_dest(i);
1647 
1648       ir = new AluInstr(opcode,
1649                         dest,
1650                         value_factory.src64(alu.src[0], 0, chan),
1651                         value_factory.src64(alu.src[1], 0, chan),
1652                         value_factory.src64(alu.src[2], 0, chan),
1653                         i < 2 ? AluInstr::write : AluInstr::empty);
1654 
1655       if (i < 3) {
1656          if (alu.src[0].negate) ir->set_alu_flag(alu_src0_neg);
1657          if (alu.src[1].negate) ir->set_alu_flag(alu_src1_neg);
1658          if (alu.src[2].negate) ir->set_alu_flag(alu_src2_neg);
1659       }
1660 
1661       group->add_instruction(ir);
1662 
1663    }
1664    if (ir)
1665       ir->set_alu_flag(alu_last_instr);
1666    shader.emit_instruction(group);
1667    return true;
1668 }
1669 
emit_alu_b2f64(const nir_alu_instr & alu,Shader & shader)1670 static bool emit_alu_b2f64(const nir_alu_instr& alu, Shader& shader)
1671 {
1672    auto& value_factory = shader.value_factory();
1673    auto group = new AluGroup();
1674    AluInstr *ir = nullptr;
1675 
1676    for (unsigned i = 0; i < nir_dest_num_components(alu.dest.dest) ; ++i) {
1677       ir = new AluInstr(op2_and_int,
1678                            value_factory.dest(alu.dest, 2 * i, pin_group),
1679                            value_factory.src(alu.src[0], i),
1680                            value_factory.zero(),
1681                            {alu_write});
1682        group->add_instruction(ir);
1683 
1684        ir = new AluInstr(op2_and_int,
1685                          value_factory.dest(alu.dest, 2 * i  + 1, pin_group),
1686                          value_factory.src(alu.src[0], i),
1687                          value_factory.literal(0x3ff00000),
1688                          {alu_write});
1689         group->add_instruction(ir);
1690    }
1691    if (ir)
1692       ir->set_alu_flag(alu_last_instr);
1693    shader.emit_instruction(group);
1694    return true;
1695 }
1696 
emit_alu_i2f64(const nir_alu_instr & alu,EAluOp op,Shader & shader)1697 static bool emit_alu_i2f64(const nir_alu_instr& alu, EAluOp op, Shader& shader)
1698 {
1699    /* int 64 to f64 should have been lowered, so we only handle i32 to f64 */
1700    auto& value_factory = shader.value_factory();
1701    auto group = new AluGroup();
1702    AluInstr *ir = nullptr;
1703 
1704    assert(nir_dest_num_components(alu.dest.dest) == 1);
1705 
1706    auto tmpx = value_factory.temp_register();
1707    shader.emit_instruction(new AluInstr(op2_and_int, tmpx,
1708                                         value_factory.src(alu.src[0], 0),
1709                                         value_factory.literal(0xffffff00),
1710                                         AluInstr::write));
1711    auto tmpy = value_factory.temp_register();
1712    shader.emit_instruction(new AluInstr(op2_and_int, tmpy,
1713                                         value_factory.src(alu.src[0], 0),
1714                                         value_factory.literal(0xff),
1715                                         AluInstr::last_write));
1716 
1717 
1718    auto tmpx2 = value_factory.temp_register();
1719    auto tmpy2 = value_factory.temp_register();
1720    shader.emit_instruction(new AluInstr(op, tmpx2, tmpx,
1721                                         AluInstr::last_write));
1722    shader.emit_instruction(new AluInstr(op, tmpy2, tmpy,
1723                                         AluInstr::last_write));
1724 
1725    auto tmpx3 = value_factory.temp_register(0);
1726    auto tmpy3 = value_factory.temp_register(1);
1727    auto tmpz3 = value_factory.temp_register(2);
1728    auto tmpw3 = value_factory.temp_register(3);
1729 
1730 
1731    ir = new AluInstr(op1_flt32_to_flt64,
1732                      tmpx3,
1733                      tmpx2, AluInstr::write);
1734    group->add_instruction(ir);
1735    ir = new AluInstr(op1_flt32_to_flt64,
1736                      tmpy3,
1737                      value_factory.zero(), AluInstr::write);
1738    group->add_instruction(ir);
1739    ir = new AluInstr(op1_flt32_to_flt64,
1740                      tmpz3,
1741                      tmpy2, AluInstr::write);
1742    group->add_instruction(ir);
1743    ir = new AluInstr(op1_flt32_to_flt64,
1744                      tmpw3,
1745                      value_factory.zero(), AluInstr::last_write);
1746    group->add_instruction(ir);
1747    shader.emit_instruction(group);
1748 
1749    group = new AluGroup();
1750 
1751    ir = new AluInstr(op2_add_64,
1752                      value_factory.dest(alu.dest, 0, pin_chan),
1753                      tmpy3, tmpw3, AluInstr::write);
1754    group->add_instruction(ir);
1755    ir = new AluInstr(op2_add_64,
1756                      value_factory.dest(alu.dest, 1, pin_chan),
1757                      tmpx3, tmpz3, AluInstr::write);
1758    group->add_instruction(ir);
1759    shader.emit_instruction(group);
1760 
1761    return true;
1762 }
1763 
emit_alu_f2f64(const nir_alu_instr & alu,Shader & shader)1764 static bool emit_alu_f2f64(const nir_alu_instr& alu, Shader& shader)
1765 {
1766    auto& value_factory = shader.value_factory();
1767    auto group = new AluGroup();
1768    AluInstr *ir = nullptr;
1769 
1770    assert(nir_dest_num_components(alu.dest.dest) == 1);
1771 
1772    ir = new AluInstr(op1_flt32_to_flt64,
1773                      value_factory.dest(alu.dest, 0, pin_chan),
1774                      value_factory.src(alu.src[0], 0), AluInstr::write);
1775    group->add_instruction(ir);
1776    ir = new AluInstr(op1_flt32_to_flt64,
1777                      value_factory.dest(alu.dest, 1, pin_chan),
1778                      value_factory.zero(), AluInstr::last_write);
1779    group->add_instruction(ir);
1780    shader.emit_instruction(group);
1781    return true;
1782 }
1783 
emit_alu_f2f32(const nir_alu_instr & alu,Shader & shader)1784 static bool emit_alu_f2f32(const nir_alu_instr& alu, Shader& shader)
1785 {
1786    auto& value_factory = shader.value_factory();
1787    auto group = new AluGroup();
1788    AluInstr *ir = nullptr;
1789 
1790    ir = new AluInstr(op1v_flt64_to_flt32,
1791                      value_factory.dest(alu.dest, 0, pin_chan),
1792                      value_factory.src64(alu.src[0], 0, 1), {alu_write});
1793    group->add_instruction(ir);
1794    ir = new AluInstr(op1v_flt64_to_flt32,
1795                      value_factory.dummy_dest(1),
1796                      value_factory.src64(alu.src[0], 0, 0), AluInstr::last);
1797    group->add_instruction(ir);
1798    shader.emit_instruction(group);
1799    return true;
1800 
1801 }
1802 
emit_alu_b2x(const nir_alu_instr & alu,AluInlineConstants mask,Shader & shader)1803 static bool emit_alu_b2x(const nir_alu_instr& alu, AluInlineConstants mask, Shader& shader)
1804 {
1805    auto& value_factory = shader.value_factory();
1806    AluInstr *ir = nullptr;
1807    auto pin = pin_for_components(alu);
1808 
1809    for (unsigned i = 0; i < nir_dest_num_components(alu.dest.dest) ; ++i) {
1810       if (alu.dest.write_mask & (1 << i)){
1811          ir = new AluInstr(op2_and_int,
1812                            value_factory.dest(alu.dest, i, pin),
1813                            value_factory.src(alu.src[0], i),
1814                            value_factory.inline_const(mask, 0),
1815                            {alu_write});
1816          if (alu.src[0].negate) ir->set_alu_flag(alu_src0_neg);
1817          if (alu.src[0].abs) ir->set_alu_flag(alu_src0_abs);
1818          shader.emit_instruction(ir);
1819       }
1820    }
1821    if (ir)
1822       ir->set_alu_flag(alu_last_instr);
1823    return true;
1824 }
1825 
emit_alu_op1(const nir_alu_instr & alu,EAluOp opcode,Shader & shader,const AluOpFlags & flags)1826 static bool emit_alu_op1(const nir_alu_instr& alu, EAluOp opcode, Shader& shader, const AluOpFlags& flags)
1827 {
1828    auto& value_factory = shader.value_factory();
1829 
1830    AluInstr *ir = nullptr;
1831    auto pin = pin_for_components(alu);
1832 
1833    for (unsigned i = 0; i < nir_dest_num_components(alu.dest.dest) ; ++i) {
1834       if (alu.dest.write_mask & (1 << i)){
1835          ir = new AluInstr(opcode, value_factory.dest(alu.dest, i, pin),
1836                            value_factory.src(alu.src[0], i), {alu_write});
1837 
1838          if (flags.test(alu_src0_abs) || alu.src[0].abs)
1839             ir->set_alu_flag(alu_src0_abs);
1840 
1841          if (alu.src[0].negate ^ flags.test(alu_src0_neg))
1842             ir->set_alu_flag(alu_src0_neg);
1843 
1844          if (flags.test(alu_dst_clamp) || alu.dest.saturate)
1845             ir->set_alu_flag(alu_dst_clamp);
1846 
1847          shader.emit_instruction(ir);
1848       }
1849    }
1850    if (ir)
1851       ir->set_alu_flag(alu_last_instr);
1852    return true;
1853 }
1854 
emit_alu_op2(const nir_alu_instr & alu,EAluOp opcode,Shader & shader,AluInstr::Op2Options opts)1855 static bool emit_alu_op2(const nir_alu_instr& alu, EAluOp opcode, Shader& shader,
1856                   AluInstr::Op2Options opts)
1857 {
1858    auto& value_factory = shader.value_factory();
1859    const nir_alu_src *src0 = &alu.src[0];
1860    const nir_alu_src *src1 = &alu.src[1];
1861 
1862    int idx0 = 0;
1863    int idx1 = 1;
1864    if (opts & AluInstr::op2_opt_reverse) {
1865       std::swap(src0, src1);
1866       std::swap(idx0, idx1);
1867    }
1868 
1869    bool src1_negate = (opts & AluInstr::op2_opt_neg_src1) ^ src1->negate;
1870 
1871    auto pin = pin_for_components(alu);
1872    AluInstr *ir = nullptr;
1873    for (unsigned i = 0; i < nir_dest_num_components(alu.dest.dest) ; ++i) {
1874       if (alu.dest.write_mask & (1 << i)){
1875          ir = new AluInstr(opcode,
1876                            value_factory.dest(alu.dest.dest, i, pin),
1877                            value_factory.src(*src0, i),
1878                            value_factory.src(*src1, i), {alu_write});
1879 
1880          if (src0->negate) ir->set_alu_flag(alu_src0_neg);
1881          if (src0->abs) ir->set_alu_flag(alu_src0_abs);
1882          if (src1_negate) ir->set_alu_flag(alu_src1_neg);
1883          if (src1->abs) ir->set_alu_flag(alu_src1_abs);
1884          if (alu.dest.saturate) ir->set_alu_flag(alu_dst_clamp);
1885          shader.emit_instruction(ir);
1886       }
1887    }
1888    if (ir)
1889       ir->set_alu_flag(alu_last_instr);
1890    return true;
1891 }
1892 
emit_alu_op2_int(const nir_alu_instr & alu,EAluOp opcode,Shader & shader,AluInstr::Op2Options opts)1893 static bool emit_alu_op2_int(const nir_alu_instr& alu, EAluOp opcode, Shader& shader,
1894                              AluInstr::Op2Options opts)
1895 {
1896    assert(!alu.src[0].abs);
1897    assert(!alu.src[0].negate);
1898    assert(!alu.src[1].abs);
1899    assert(!alu.src[1].negate);
1900 
1901    return emit_alu_op2(alu, opcode, shader, opts);
1902 }
1903 
emit_alu_op3(const nir_alu_instr & alu,EAluOp opcode,Shader & shader,const std::array<int,3> & src_shuffle)1904 static bool emit_alu_op3(const nir_alu_instr& alu, EAluOp opcode, Shader& shader,
1905                   const std::array<int, 3>& src_shuffle)
1906 {
1907    auto& value_factory = shader.value_factory();
1908    const nir_alu_src *src[3];
1909    src[0] = &alu.src[src_shuffle[0]];
1910    src[1] = &alu.src[src_shuffle[1]];
1911    src[2] = &alu.src[src_shuffle[2]];
1912 
1913    auto pin = pin_for_components(alu);
1914    AluInstr *ir = nullptr;
1915    for (unsigned i = 0; i < nir_dest_num_components(alu.dest.dest) ; ++i) {
1916       if (alu.dest.write_mask & (1 << i)){
1917          ir = new AluInstr(opcode, value_factory.dest(alu.dest.dest, i, pin),
1918                            value_factory.src(*src[0], i),
1919                value_factory.src(*src[1], i),
1920                value_factory.src(*src[2], i),
1921          {alu_write});
1922 
1923          if (src[0]->negate) ir->set_alu_flag(alu_src0_neg);
1924          if (src[1]->negate) ir->set_alu_flag(alu_src1_neg);
1925          if (src[2]->negate) ir->set_alu_flag(alu_src2_neg);
1926 
1927          if (alu.dest.saturate) ir->set_alu_flag(alu_dst_clamp);
1928          ir->set_alu_flag(alu_write);
1929          shader.emit_instruction(ir);
1930       }
1931    }
1932    if (ir)
1933       ir->set_alu_flag(alu_last_instr);
1934    return true;
1935 }
1936 
emit_any_all_fcomp2(const nir_alu_instr & alu,EAluOp opcode,Shader & shader)1937 static bool emit_any_all_fcomp2(const nir_alu_instr& alu, EAluOp opcode, Shader& shader)
1938 {
1939    AluInstr *ir = nullptr;
1940    auto& value_factory = shader.value_factory();
1941 
1942    PRegister tmp[2];
1943    tmp[0] = value_factory.temp_register();
1944    tmp[1] = value_factory.temp_register();
1945 
1946    for (unsigned i = 0; i < 2 ; ++i) {
1947       ir = new AluInstr(opcode, tmp[i],
1948                         value_factory.src(alu.src[0], i),
1949                         value_factory.src(alu.src[1], i), {alu_write});
1950       if (alu.src[0].abs)
1951          ir->set_alu_flag(alu_src0_abs);
1952       if (alu.src[0].negate)
1953          ir->set_alu_flag(alu_src0_neg);
1954 
1955       if (alu.src[1].abs)
1956          ir->set_alu_flag(alu_src1_abs);
1957       if (alu.src[1].negate)
1958          ir->set_alu_flag(alu_src1_neg);
1959 
1960       shader.emit_instruction(ir);
1961    }
1962    ir->set_alu_flag(alu_last_instr);
1963 
1964    opcode = (opcode == op2_setne_dx10) ? op2_or_int: op2_and_int;
1965    ir = new AluInstr(opcode,
1966                      value_factory.dest(alu.dest, 0, pin_free),
1967                      tmp[0], tmp[1], AluInstr::last_write);
1968    shader.emit_instruction(ir);
1969    return true;
1970 }
1971 
emit_any_all_fcomp(const nir_alu_instr & alu,EAluOp op,int nc,bool all,Shader & shader)1972 static bool emit_any_all_fcomp(const nir_alu_instr& alu, EAluOp op, int nc, bool all, Shader& shader)
1973 {
1974    /* This should probabyl be lowered in nir */
1975    auto& value_factory = shader.value_factory();
1976 
1977    AluInstr *ir = nullptr;
1978    RegisterVec4 v = value_factory.temp_vec4(pin_group);
1979    AluInstr::SrcValues s;
1980 
1981    for (int i = 0; i < nc ; ++i) {
1982       s.push_back(v[i]);
1983    }
1984 
1985    for (int i = nc; i < 4 ; ++i)
1986       s.push_back(value_factory.inline_const(all ? ALU_SRC_1 : ALU_SRC_0, 0));
1987 
1988    for (int i = 0; i < nc ; ++i) {
1989       ir = new AluInstr(op, v[i],
1990                         value_factory.src(alu.src[0], i),
1991                         value_factory.src(alu.src[1], i), {alu_write});
1992 
1993       if (alu.src[0].abs)
1994          ir->set_alu_flag(alu_src0_abs);
1995       if (alu.src[0].negate)
1996          ir->set_alu_flag(alu_src0_neg);
1997 
1998       if (alu.src[1].abs)
1999          ir->set_alu_flag(alu_src1_abs);
2000       if (alu.src[1].negate)
2001          ir->set_alu_flag(alu_src1_neg);
2002 
2003       shader.emit_instruction(ir);
2004    }
2005    if (ir)
2006       ir->set_alu_flag(alu_last_instr);
2007 
2008    auto max_val = value_factory.temp_register();
2009 
2010    ir = new AluInstr(op1_max4, max_val, s, AluInstr::last_write, 4);
2011 
2012    if (all)
2013       ir->set_alu_flag(alu_src0_neg);
2014 
2015    shader.emit_instruction(ir);
2016 
2017    if (all)
2018       op = (op == op2_sete) ? op2_sete_dx10: op2_setne_dx10;
2019    else
2020       op = (op == op2_sete) ? op2_setne_dx10: op2_sete_dx10;
2021 
2022    ir = new AluInstr(op,
2023                      value_factory.dest(alu.dest, 0, pin_free),
2024                      max_val,
2025                      value_factory.inline_const(ALU_SRC_1, 0),
2026                      AluInstr::last_write);
2027    if (all)
2028       ir->set_alu_flag(alu_src1_neg);
2029    shader.emit_instruction(ir);
2030 
2031    return true;
2032 }
2033 
emit_any_all_icomp(const nir_alu_instr & alu,EAluOp op,int nc,bool all,Shader & shader)2034 static bool emit_any_all_icomp(const nir_alu_instr& alu, EAluOp op, int nc, bool all, Shader& shader)
2035 {
2036    assert(!alu.src[0].abs);
2037    assert(!alu.src[0].negate);
2038    assert(!alu.src[1].abs);
2039    assert(!alu.src[1].negate);
2040 
2041    /* This should probabyl be lowered in nir */
2042    auto& value_factory = shader.value_factory();
2043 
2044    AluInstr *ir = nullptr;
2045    PRegister v[6];
2046 
2047    auto dest = value_factory.dest(alu.dest.dest, 0, pin_free);
2048 
2049    for (int i = 0; i < nc + nc/2; ++i)
2050       v[i] = value_factory.temp_register();
2051 
2052    EAluOp combine = all ? op2_and_int : op2_or_int;
2053 
2054    for (int i = 0; i < nc ; ++i) {
2055       ir = new AluInstr(op, v[i], value_factory.src(alu.src[0], i),
2056             value_factory.src(alu.src[1], i), AluInstr::write);
2057       shader.emit_instruction(ir);
2058    }
2059    if (ir)
2060          ir->set_alu_flag(alu_last_instr);
2061 
2062    if (nc ==2) {
2063       ir = new AluInstr(combine, dest, v[0], v[1], AluInstr::last_write);
2064       shader.emit_instruction(ir);
2065       return true;
2066    }
2067 
2068    if (nc == 3) {
2069       ir = new AluInstr(combine, v[3], v[0], v[1], AluInstr::last_write);
2070       shader.emit_instruction(ir);
2071       ir = new AluInstr(combine, dest, v[3], v[2], AluInstr::last_write);
2072       shader.emit_instruction(ir);
2073       return true;
2074    }
2075 
2076    if (nc == 4) {
2077       ir = new AluInstr(combine, v[4], v[0], v[1], AluInstr::write);
2078       shader.emit_instruction(ir);
2079       ir = new AluInstr(combine, v[5], v[2], v[3], AluInstr::last_write);
2080       shader.emit_instruction(ir);
2081       ir = new AluInstr(combine, dest, v[4], v[5], AluInstr::last_write);
2082       shader.emit_instruction(ir);
2083       return true;
2084    }
2085 
2086    return false;
2087 }
2088 
emit_dot(const nir_alu_instr & alu,int n,Shader & shader)2089 static bool emit_dot(const nir_alu_instr& alu, int n, Shader& shader)
2090 {
2091    auto& value_factory = shader.value_factory();
2092    const nir_alu_src& src0 = alu.src[0];
2093    const nir_alu_src& src1 = alu.src[1];
2094 
2095    auto dest = value_factory.dest(alu.dest.dest, 0, pin_free);
2096 
2097    AluInstr::SrcValues srcs(8);
2098 
2099    for (int i = 0; i < n ; ++i) {
2100       srcs[2 * i    ] = value_factory.src(src0, i);
2101       srcs[2 * i + 1] = value_factory.src(src1, i);
2102    }
2103 
2104    for (int i = n; i < 4 ; ++i) {
2105       srcs[2 * i    ] = value_factory.zero();
2106       srcs[2 * i + 1] = value_factory.zero();
2107    }
2108 
2109    auto op = unlikely(shader.has_flag(Shader::sh_legacy_math_rules)) ?
2110                 op2_dot4 : op2_dot4_ieee;
2111    AluInstr *ir = new AluInstr(op, dest, srcs,  AluInstr::last_write, 4);
2112 
2113    if (src0.negate) ir->set_alu_flag(alu_src0_neg);
2114    if (src0.abs) ir->set_alu_flag(alu_src0_abs);
2115    if (src1.negate) ir->set_alu_flag(alu_src1_neg);
2116    if (src1.abs) ir->set_alu_flag(alu_src1_abs);
2117 
2118    if (alu.dest.saturate) ir->set_alu_flag(alu_dst_clamp);
2119 
2120    shader.emit_instruction(ir);
2121    return true;
2122 }
2123 
emit_fdph(const nir_alu_instr & alu,Shader & shader)2124 static bool emit_fdph(const nir_alu_instr& alu, Shader& shader)
2125 {
2126    auto& value_factory = shader.value_factory();
2127    const nir_alu_src& src0 = alu.src[0];
2128    const nir_alu_src& src1 = alu.src[1];
2129 
2130    auto dest = value_factory.dest(alu.dest.dest, 0, pin_free);
2131 
2132    AluInstr::SrcValues srcs(8);
2133 
2134    for (int i = 0; i < 3 ; ++i) {
2135       srcs[2 * i    ] = value_factory.src(src0, i);
2136       srcs[2 * i + 1] = value_factory.src(src1, i);
2137    }
2138 
2139    srcs[6] = value_factory.one();
2140    srcs[7] = value_factory.src(src1, 3);
2141 
2142    auto op = unlikely(shader.has_flag(Shader::sh_legacy_math_rules)) ?
2143                 op2_dot4 : op2_dot4_ieee;
2144    AluInstr *ir = new AluInstr(op, dest, srcs,  AluInstr::last_write, 4);
2145 
2146    if (src0.negate) ir->set_alu_flag(alu_src0_neg);
2147    if (src0.abs) ir->set_alu_flag(alu_src0_abs);
2148    if (src1.negate) ir->set_alu_flag(alu_src1_neg);
2149    if (src1.abs) ir->set_alu_flag(alu_src1_abs);
2150 
2151    if (alu.dest.saturate) ir->set_alu_flag(alu_dst_clamp);
2152 
2153    shader.emit_instruction(ir);
2154    return true;
2155 }
2156 
emit_create_vec(const nir_alu_instr & instr,unsigned nc,Shader & shader)2157 static bool emit_create_vec(const nir_alu_instr& instr, unsigned nc, Shader& shader)
2158 {
2159    auto& value_factory = shader.value_factory();
2160    AluInstr *ir = nullptr;
2161 
2162    for(unsigned i = 0; i < nc; ++i) {
2163       if (instr.dest.write_mask & (1 << i)){
2164          auto src = value_factory.src(instr.src[i].src, instr.src[i].swizzle[0]);
2165          auto dst = value_factory.dest(instr.dest.dest, i, pin_chan);
2166          ir = new AluInstr(op1_mov, dst, src, {alu_write});
2167 
2168          if (instr.dest.saturate) ir->set_alu_flag(alu_dst_clamp);
2169          if (instr.src[i].negate) ir->set_alu_flag(alu_src0_neg);
2170          if (instr.src[i].abs) ir->set_alu_flag(alu_src0_abs);
2171 
2172          shader.emit_instruction(ir);
2173       }
2174    }
2175 
2176    if (ir)
2177       ir->set_alu_flag(alu_last_instr);
2178    return true;
2179 }
2180 
emit_alu_i2orf2_b1(const nir_alu_instr & alu,EAluOp opcode,Shader & shader)2181 static bool emit_alu_i2orf2_b1(const nir_alu_instr& alu, EAluOp opcode, Shader& shader)
2182 {
2183    auto& value_factory = shader.value_factory();
2184    AluInstr *ir = nullptr;
2185    Pin pin = nir_dest_num_components(alu.dest.dest) == 1 ? pin_free : pin_none;
2186 
2187    for (int i = 0; i < 4 ; ++i) {
2188       if (alu.dest.write_mask & (1 << i)) {
2189          ir = new AluInstr(opcode,
2190                            value_factory.dest(alu.dest, i, pin),
2191                            value_factory.src(alu.src[0], i),
2192                            value_factory.zero(),
2193                            AluInstr::write);
2194          shader.emit_instruction(ir);
2195       }
2196    }
2197    if (ir)
2198       ir->set_alu_flag(alu_last_instr);
2199    return true;
2200 }
2201 
emit_alu_comb_with_zero(const nir_alu_instr & alu,EAluOp opcode,Shader & shader)2202 static bool emit_alu_comb_with_zero(const nir_alu_instr& alu, EAluOp opcode, Shader& shader)
2203 {
2204    auto& value_factory = shader.value_factory();
2205    AluInstr *ir = nullptr;
2206    auto pin = pin_for_components(alu);
2207    for (int i = 0; i < 4 ; ++i) {
2208       if (alu.dest.write_mask & (1 << i)){
2209          ir = new AluInstr(opcode,
2210                            value_factory.dest(alu.dest, i, pin),
2211                            value_factory.zero(),
2212                            value_factory.src(alu.src[0], i),
2213                            AluInstr::write);
2214          shader.emit_instruction(ir);
2215       }
2216    }
2217    if (ir)
2218       ir->set_alu_flag(alu_last_instr);
2219 
2220    return true;
2221 }
2222 
emit_pack_64_2x32_split(const nir_alu_instr & alu,Shader & shader)2223 static bool emit_pack_64_2x32_split(const nir_alu_instr& alu, Shader& shader)
2224 {
2225    auto& value_factory = shader.value_factory();
2226    AluInstr *ir = nullptr;
2227    for (unsigned i = 0; i < 2; ++i) {
2228       ir = new AluInstr(op1_mov, value_factory.dest(alu.dest, i, pin_none),
2229                         value_factory.src(alu.src[i], 0), AluInstr::write);
2230      shader.emit_instruction(ir);
2231    }
2232    ir->set_alu_flag(alu_last_instr);
2233    return true;
2234 }
2235 
emit_pack_64_2x32(const nir_alu_instr & alu,Shader & shader)2236 static bool emit_pack_64_2x32(const nir_alu_instr& alu, Shader& shader)
2237 {
2238    auto& value_factory = shader.value_factory();
2239    AluInstr *ir = nullptr;
2240    for (unsigned i = 0; i < 2; ++i) {
2241       ir = new AluInstr(op1_mov, value_factory.dest(alu.dest, i, pin_none),
2242                         value_factory.src(alu.src[0], i), AluInstr::write);
2243      shader.emit_instruction(ir);
2244    }
2245    ir->set_alu_flag(alu_last_instr);
2246    return true;
2247 }
2248 
2249 
emit_unpack_64_2x32(const nir_alu_instr & alu,Shader & shader)2250 static bool emit_unpack_64_2x32(const nir_alu_instr& alu, Shader& shader)
2251 {
2252    auto& value_factory = shader.value_factory();
2253    AluInstr *ir = nullptr;
2254    for (unsigned i = 0; i < 2; ++i) {
2255       ir = new AluInstr(op1_mov, value_factory.dest(alu.dest, i, pin_none),
2256                         value_factory.src64(alu.src[0], 0, i), AluInstr::write);
2257      shader.emit_instruction(ir);
2258    }
2259    ir->set_alu_flag(alu_last_instr);
2260    return true;
2261 }
2262 
emit_alu_vec2_64(const nir_alu_instr & alu,Shader & shader)2263 bool emit_alu_vec2_64(const nir_alu_instr& alu, Shader& shader)
2264 {
2265    auto& value_factory = shader.value_factory();
2266    AluInstr *ir = nullptr;
2267    for (unsigned i = 0; i < 2; ++i) {
2268       ir = new AluInstr(op1_mov, value_factory.dest(alu.dest, i, pin_chan),
2269                         value_factory.src64(alu.src[0], 0, i), AluInstr::write);
2270      shader.emit_instruction(ir);
2271    }
2272    for (unsigned i = 0; i < 2; ++i) {
2273       ir = new AluInstr(op1_mov, value_factory.dest(alu.dest, i + 2, pin_chan),
2274                         value_factory.src64(alu.src[1], 1, i), AluInstr::write);
2275      shader.emit_instruction(ir);
2276    }
2277    ir->set_alu_flag(alu_last_instr);
2278    return true;
2279 }
2280 
emit_pack_32_2x16_split(const nir_alu_instr & alu,Shader & shader)2281 static bool emit_pack_32_2x16_split(const nir_alu_instr& alu, Shader& shader)
2282 {
2283    auto& value_factory = shader.value_factory();
2284 
2285    auto x = value_factory.temp_register();
2286    auto y = value_factory.temp_register();
2287    auto yy = value_factory.temp_register();
2288 
2289    shader.emit_instruction(new AluInstr(op1_flt32_to_flt16, x,
2290                                         value_factory.src(alu.src[0], 0), AluInstr::last_write));
2291 
2292    shader.emit_instruction(new AluInstr(op1_flt32_to_flt16, y,
2293                                         value_factory.src(alu.src[1], 0), AluInstr::last_write));
2294 
2295    shader.emit_instruction(new AluInstr(op2_lshl_int, yy, y, value_factory.literal(16), AluInstr::last_write));
2296 
2297    shader.emit_instruction(new AluInstr(op2_or_int,
2298                                         value_factory.dest(alu.dest, 0, pin_free),
2299                                         x, yy, AluInstr::last_write));
2300    return true;
2301 }
2302 
emit_unpack_64_2x32_split(const nir_alu_instr & alu,int comp,Shader & shader)2303 static bool emit_unpack_64_2x32_split(const nir_alu_instr& alu, int comp, Shader& shader)
2304 {
2305    auto& value_factory = shader.value_factory();
2306    shader.emit_instruction(new AluInstr(op1_mov, value_factory.dest(alu.dest, 0, pin_free),
2307                                         value_factory.src64(alu.src[0], 0, comp), AluInstr::last_write));
2308    return true;
2309 }
2310 
emit_unpack_32_2x16_split_x(const nir_alu_instr & alu,Shader & shader)2311 static bool emit_unpack_32_2x16_split_x(const nir_alu_instr& alu, Shader& shader)
2312 {
2313    auto& value_factory = shader.value_factory();
2314    shader.emit_instruction(new AluInstr(op1_flt16_to_flt32, value_factory.dest(alu.dest, 0, pin_free),
2315                                         value_factory.src(alu.src[0], 0), AluInstr::last_write));
2316    return true;
2317 }
emit_unpack_32_2x16_split_y(const nir_alu_instr & alu,Shader & shader)2318 static bool emit_unpack_32_2x16_split_y(const nir_alu_instr& alu, Shader& shader)
2319 {
2320    auto& value_factory = shader.value_factory();
2321    auto tmp = value_factory.temp_register();
2322    shader.emit_instruction(new AluInstr(op2_lshr_int, tmp,
2323                                         value_factory.src(alu.src[0], 0),
2324                                         value_factory.literal(16),
2325                                         AluInstr::last_write));
2326 
2327    shader.emit_instruction(new AluInstr(op1_flt16_to_flt32,
2328                                         value_factory.dest(alu.dest, 0, pin_free),
2329                                         tmp, AluInstr::last_write));
2330    return true;
2331 }
2332 
2333 
2334 
emit_alu_trans_op1_eg(const nir_alu_instr & alu,EAluOp opcode,Shader & shader)2335 static bool emit_alu_trans_op1_eg(const nir_alu_instr& alu, EAluOp opcode, Shader& shader)
2336 {
2337    auto& value_factory = shader.value_factory();
2338    const nir_alu_src& src0 = alu.src[0];
2339 
2340    AluInstr *ir = nullptr;
2341    auto pin = pin_for_components(alu);
2342 
2343    for (unsigned i = 0; i < nir_dest_num_components(alu.dest.dest); ++i) {
2344       if (alu.dest.write_mask & (1 << i)){
2345          ir = new AluInstr(opcode,
2346                            value_factory.dest(alu.dest.dest, i, pin),
2347                            value_factory.src(src0, i),
2348                            AluInstr::last_write);
2349          if (src0.negate) ir->set_alu_flag(alu_src0_neg);
2350          if (src0.abs) ir->set_alu_flag(alu_src0_abs);
2351          if (alu.dest.saturate) ir->set_alu_flag(alu_dst_clamp);
2352          ir->set_alu_flag(alu_is_trans);
2353          shader.emit_instruction(ir);
2354       }
2355    }
2356 
2357    return true;
2358 }
2359 
emit_alu_f2i32_or_u32_eg(const nir_alu_instr & alu,EAluOp opcode,Shader & shader)2360 static bool emit_alu_f2i32_or_u32_eg(const nir_alu_instr& alu, EAluOp opcode, Shader& shader)
2361 {
2362    auto& value_factory = shader.value_factory();
2363    AluInstr *ir = nullptr;
2364 
2365    PRegister reg[4];
2366 
2367    int num_comp = nir_dest_num_components(alu.dest.dest);
2368 
2369    for (int i = 0; i < num_comp; ++i) {
2370       reg[i] = value_factory.temp_register();
2371       ir = new AluInstr(op1_trunc, reg[i], value_factory.src(alu.src[0], i), AluInstr::last_write);
2372       if (alu.src[0].abs) ir->set_alu_flag(alu_src0_abs);
2373       if (alu.src[0].negate) ir->set_alu_flag(alu_src0_neg);
2374       shader.emit_instruction(ir);
2375    }
2376 
2377    auto pin = pin_for_components(alu);
2378    for (int i = 0; i < num_comp; ++i) {
2379       ir = new AluInstr(opcode,
2380                         value_factory.dest(alu.dest, i, pin),
2381                         reg[i], AluInstr::write);
2382       if (opcode == op1_flt_to_uint) {
2383          ir->set_alu_flag(alu_is_trans);
2384          ir->set_alu_flag(alu_last_instr);
2385       }
2386       shader.emit_instruction(ir);
2387    }
2388    ir->set_alu_flag(alu_last_instr);
2389    return true;
2390 }
2391 
emit_alu_trans_op1_cayman(const nir_alu_instr & alu,EAluOp opcode,Shader & shader)2392 static bool emit_alu_trans_op1_cayman(const nir_alu_instr& alu, EAluOp opcode, Shader& shader)
2393 {
2394    auto& value_factory = shader.value_factory();
2395    const nir_alu_src& src0 = alu.src[0];
2396 
2397    auto pin = pin_for_components(alu);
2398 
2399    /* todo: Actually we need only three channels, but then we have
2400     * to make sure that we don't hava w dest */
2401    for (unsigned j = 0; j < 4; ++j) {
2402       if (alu.dest.write_mask & (1 << j)) {
2403          AluInstr::SrcValues srcs(4);
2404          PRegister dest = value_factory.dest(alu.dest.dest, j, pin);
2405 
2406          for (unsigned i = 0; i < 4; ++i)
2407             srcs[i] = value_factory.src(src0, j);
2408 
2409          auto ir = new AluInstr(opcode, dest,  srcs,  AluInstr::last_write, 4);
2410 
2411          if (alu.src[0].abs) ir->set_alu_flag(alu_src0_abs);
2412          if (alu.src[0].negate) ir->set_alu_flag(alu_src0_neg);
2413          if (alu.dest.saturate) ir->set_alu_flag(alu_dst_clamp);
2414 
2415          ir->set_alu_flag(alu_is_cayman_trans);
2416 
2417 
2418          shader.emit_instruction(ir);
2419       }
2420    }
2421    return true;
2422 }
2423 
emit_alu_trans_op2_eg(const nir_alu_instr & alu,EAluOp opcode,Shader & shader)2424 static bool emit_alu_trans_op2_eg(const nir_alu_instr& alu, EAluOp opcode, Shader& shader)
2425 {
2426    auto& value_factory = shader.value_factory();
2427 
2428    const nir_alu_src& src0 = alu.src[0];
2429    const nir_alu_src& src1 = alu.src[1];
2430 
2431    AluInstr *ir = nullptr;
2432 
2433    auto pin = pin_for_components(alu);
2434    for (int i = 0; i < 4 ; ++i) {
2435       if (alu.dest.write_mask & (1 << i)){
2436          ir = new AluInstr(opcode, value_factory.dest(alu.dest.dest, i, pin),
2437                            value_factory.src(src0, i),
2438                            value_factory.src(src1, i), AluInstr::last_write);
2439          if (src0.negate) ir->set_alu_flag(alu_src0_neg);
2440          if (src0.abs) ir->set_alu_flag(alu_src0_abs);
2441          if (src1.negate) ir->set_alu_flag(alu_src1_neg);
2442          if (src1.abs) ir->set_alu_flag(alu_src1_abs);
2443          if (alu.dest.saturate) ir->set_alu_flag(alu_dst_clamp);
2444          ir->set_alu_flag(alu_is_trans);
2445          shader.emit_instruction(ir);
2446       }
2447    }
2448    return true;
2449 }
2450 
emit_alu_trans_op2_cayman(const nir_alu_instr & alu,EAluOp opcode,Shader & shader)2451 static bool emit_alu_trans_op2_cayman(const nir_alu_instr& alu, EAluOp opcode, Shader& shader)
2452 {
2453    auto& value_factory = shader.value_factory();
2454 
2455    const nir_alu_src& src0 = alu.src[0];
2456    const nir_alu_src& src1 = alu.src[1];
2457 
2458    unsigned last_slot = 4;
2459 
2460    for (unsigned k = 0; k < nir_dest_num_components(alu.dest.dest); ++k) {
2461       if (alu.dest.write_mask & (1 << k)) {
2462          AluInstr::SrcValues srcs(2 * last_slot);
2463          PRegister dest = value_factory.dest(alu.dest.dest, k, pin_free);
2464 
2465          for (unsigned i = 0; i < last_slot ; ++i) {
2466             srcs[2 * i    ] = value_factory.src(src0, k);
2467             srcs[2 * i + 1] = value_factory.src(src1, k);
2468          }
2469 
2470          auto ir = new AluInstr(opcode,
2471                                 dest, srcs, AluInstr::last_write, last_slot);
2472 
2473          if (src0.negate) ir->set_alu_flag(alu_src0_neg);
2474          if (src0.abs) ir->set_alu_flag(alu_src0_abs);
2475          if (src1.negate) ir->set_alu_flag(alu_src1_neg);
2476          if (src1.abs) ir->set_alu_flag(alu_src1_abs);
2477          if (alu.dest.saturate) ir->set_alu_flag(alu_dst_clamp);
2478          ir->set_alu_flag(alu_is_cayman_trans);
2479          shader.emit_instruction(ir);
2480       }
2481    }
2482    return true;
2483 }
2484 
2485 
emit_tex_fdd(const nir_alu_instr & alu,TexInstr::Opcode opcode,bool fine,Shader & shader)2486 static bool emit_tex_fdd(const nir_alu_instr& alu, TexInstr::Opcode opcode, bool fine, Shader& shader)
2487 {
2488    auto& value_factory = shader.value_factory();
2489 
2490    int ncomp = nir_dest_num_components(alu.dest.dest);
2491    RegisterVec4::Swizzle src_swz = {7,7,7,7};
2492    for (auto i = 0; i < ncomp; ++i)
2493       src_swz[i] = alu.src[0].swizzle[i];
2494 
2495    auto src = value_factory.src_vec4(alu.src[0].src, pin_group, src_swz);
2496 
2497    auto tmp = value_factory.temp_vec4(pin_group);
2498    AluInstr *mv = nullptr;
2499    for (int i = 0; i < ncomp; ++i) {
2500       mv = new AluInstr(op1_mov, tmp[i], src[i], AluInstr::write);
2501       if (alu.src[0].abs)
2502          mv->set_alu_flag(alu_src0_abs);
2503       if (alu.src[0].negate)
2504          mv->set_alu_flag(alu_src0_neg);
2505       shader.emit_instruction(mv);
2506    }
2507    if (mv)
2508       mv->set_alu_flag(alu_last_instr);
2509 
2510    auto dst = value_factory.dest_vec4(alu.dest.dest, pin_group);
2511    RegisterVec4::Swizzle dst_swz = {7,7,7,7};
2512    for (auto i = 0; i < ncomp; ++i) {
2513       if (alu.dest.write_mask & (1 << i))
2514          dst_swz[i] = i;
2515    }
2516 
2517    auto tex = new TexInstr(opcode, dst, dst_swz, tmp, 0, R600_MAX_CONST_BUFFERS);
2518 
2519    if (fine)
2520       tex->set_tex_flag(TexInstr::grad_fine);
2521 
2522    shader.emit_instruction(tex);
2523 
2524    return true;
2525 }
2526 
emit_alu_cube(const nir_alu_instr & alu,Shader & shader)2527 static bool emit_alu_cube(const nir_alu_instr& alu, Shader& shader)
2528 {
2529    auto& value_factory = shader.value_factory();
2530    AluInstr *ir = nullptr;
2531 
2532    const uint16_t src0_chan[4] = {2, 2, 0, 1};
2533    const uint16_t src1_chan[4] = {1, 0, 2, 2};
2534 
2535    auto group = new AluGroup();
2536 
2537    for (int i = 0; i < 4; ++i)  {
2538 
2539 
2540       ir = new AluInstr(op2_cube, value_factory.dest(alu.dest.dest, i, pin_chan),
2541                         value_factory.src(alu.src[0], src0_chan[i]),
2542                         value_factory.src(alu.src[0], src1_chan[i]),
2543                         AluInstr::write);
2544       group->add_instruction(ir);
2545    }
2546    ir->set_alu_flag(alu_last_instr);
2547    shader.emit_instruction(group);
2548    return true;
2549 }
2550 
2551 const std::set<AluModifiers> AluInstr::empty;
2552 const std::set<AluModifiers> AluInstr::write({alu_write});
2553 const std::set<AluModifiers> AluInstr::last({alu_last_instr});
2554 const std::set<AluModifiers> AluInstr::last_write({alu_write, alu_last_instr});
2555 
2556 }
2557