• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* -*- mesa-c++  -*-
2  *
3  * Copyright (c) 2022 Collabora LTD
4  *
5  * Author: Gert Wollny <gert.wollny@collabora.com>
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and associated documentation files (the "Software"),
9  * to deal in the Software without restriction, including without limitation
10  * on the rights to use, copy, modify, merge, publish, distribute, sub
11  * license, and/or sell copies of the Software, and to permit persons to whom
12  * the Software is furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the next
15  * paragraph) shall be included in all copies or substantial portions of the
16  * Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24  * USE OR OTHER DEALINGS IN THE SOFTWARE.
25  */
26 
27 #include "sfn_instr_alu.h"
28 
29 #include "sfn_alu_defines.h"
30 #include "sfn_debug.h"
31 #include "sfn_instr_alugroup.h"
32 #include "sfn_instr_tex.h"
33 #include "sfn_shader.h"
34 #include "sfn_virtualvalues.h"
35 
36 #include <algorithm>
37 #include <sstream>
38 
39 namespace r600 {
40 
41 using std::istream;
42 using std::string;
43 using std::vector;
44 
AluInstr(EAluOp opcode,PRegister dest,SrcValues src,const std::set<AluModifiers> & flags,int slots)45 AluInstr::AluInstr(EAluOp opcode,
46                    PRegister dest,
47                    SrcValues src,
48                    const std::set<AluModifiers>& flags,
49                    int slots):
50     m_opcode(opcode),
51     m_dest(dest),
52     m_bank_swizzle(alu_vec_unknown),
53     m_cf_type(cf_alu),
54     m_alu_slots(slots)
55 {
56    m_src.swap(src);
57 
58    if (m_src.size() == 3)
59       m_alu_flags.set(alu_op3);
60 
61    for (auto f : flags)
62       m_alu_flags.set(f);
63 
64    ASSERT_OR_THROW(m_src.size() ==
65                       static_cast<size_t>(alu_ops.at(opcode).nsrc * m_alu_slots),
66                    "Unexpected number of source values");
67 
68    if (m_alu_flags.test(alu_write))
69       ASSERT_OR_THROW(dest, "Write flag is set, but no destination register is given");
70 
71    update_uses();
72 
73    if (dest && slots > 1) {
74       switch (m_opcode) {
75       case op2_dot_ieee: m_allowed_dest_mask = (1 << (5 - slots)) - 1;
76          break;
77       default:
78          if (has_alu_flag(alu_is_cayman_trans)) {
79             m_allowed_dest_mask = (1 << slots) - 1;
80          }
81       }
82    }
83    assert(!dest || (m_allowed_dest_mask & (1 << dest->chan())));
84 }
85 
AluInstr(EAluOp opcode)86 AluInstr::AluInstr(EAluOp opcode):
87     AluInstr(opcode, nullptr, SrcValues(alu_ops.at(opcode).nsrc), {}, 1)
88 {
89 }
90 
AluInstr(EAluOp opcode,int chan)91 AluInstr::AluInstr(EAluOp opcode, int chan):
92     AluInstr(opcode, nullptr, SrcValues(), {}, 1)
93 {
94    m_fallback_chan = chan;
95 }
96 
AluInstr(EAluOp opcode,PRegister dest,PVirtualValue src0,const std::set<AluModifiers> & m_flags)97 AluInstr::AluInstr(EAluOp opcode,
98                    PRegister dest,
99                    PVirtualValue src0,
100                    const std::set<AluModifiers>& m_flags):
101     AluInstr(opcode, dest, SrcValues{src0}, m_flags, 1)
102 {
103 }
104 
AluInstr(EAluOp opcode,PRegister dest,PVirtualValue src0,PVirtualValue src1,const std::set<AluModifiers> & m_flags)105 AluInstr::AluInstr(EAluOp opcode,
106                    PRegister dest,
107                    PVirtualValue src0,
108                    PVirtualValue src1,
109                    const std::set<AluModifiers>& m_flags):
110     AluInstr(opcode, dest, SrcValues{src0, src1}, m_flags, 1)
111 {
112 }
113 
AluInstr(EAluOp opcode,PRegister dest,PVirtualValue src0,PVirtualValue src1,PVirtualValue src2,const std::set<AluModifiers> & m_flags)114 AluInstr::AluInstr(EAluOp opcode,
115                    PRegister dest,
116                    PVirtualValue src0,
117                    PVirtualValue src1,
118                    PVirtualValue src2,
119                    const std::set<AluModifiers>& m_flags):
120     AluInstr(opcode, dest, SrcValues{src0, src1, src2}, m_flags, 1)
121 {
122 }
123 
AluInstr(ESDOp op,PVirtualValue src0,PVirtualValue src1,PVirtualValue address)124 AluInstr::AluInstr(ESDOp op,
125                    PVirtualValue src0,
126                    PVirtualValue src1,
127                    PVirtualValue address):
128     m_lds_opcode(op)
129 {
130    set_alu_flag(alu_is_lds);
131 
132    m_src.push_back(address);
133    if (src0) {
134       m_src.push_back(src0);
135       if (src1)
136          m_src.push_back(src1);
137    }
138    update_uses();
139 }
140 
AluInstr(ESDOp op,const SrcValues & src,const std::set<AluModifiers> & flags)141 AluInstr::AluInstr(ESDOp op, const SrcValues& src, const std::set<AluModifiers>& flags):
142     m_lds_opcode(op),
143     m_src(src)
144 {
145    for (auto f : flags)
146       set_alu_flag(f);
147 
148    set_alu_flag(alu_is_lds);
149    update_uses();
150 }
151 
152 void
update_uses()153 AluInstr::update_uses()
154 {
155    for (auto& s : m_src) {
156       auto r = s->as_register();
157       if (r) {
158          r->add_use(this);
159          // move this to add_use
160          if (r->pin() == pin_array) {
161             auto array_elm = static_cast<LocalArrayValue *>(r);
162             auto addr = array_elm->addr();
163             if (addr && addr->as_register())
164                addr->as_register()->add_use(this);
165          }
166       }
167       auto u = s->as_uniform();
168       if (u && u->buf_addr() && u->buf_addr()->as_register())
169          u->buf_addr()->as_register()->add_use(this);
170    }
171 
172    if (m_dest &&
173        (has_alu_flag(alu_write) ||
174         m_opcode == op1_mova_int ||
175         m_opcode == op1_set_cf_idx0 ||
176         m_opcode == op1_set_cf_idx1)) {
177       m_dest->add_parent(this);
178 
179       if (m_dest->pin() == pin_array) {
180          // move this to add_parent
181          auto array_elm = static_cast<LocalArrayValue *>(m_dest);
182          auto addr = array_elm->addr();
183          if (addr && addr->as_register())
184             addr->as_register()->add_use(this);
185       }
186    }
187 }
188 
189 void
accept(ConstInstrVisitor & visitor) const190 AluInstr::accept(ConstInstrVisitor& visitor) const
191 {
192    visitor.visit(*this);
193 }
194 
195 void
accept(InstrVisitor & visitor)196 AluInstr::accept(InstrVisitor& visitor)
197 {
198    visitor.visit(this);
199 }
200 
201 const std::map<ECFAluOpCode, std::string> AluInstr::cf_map = {
202    {cf_alu_break,       "BREAK"      },
203    {cf_alu_continue,    "CONT"       },
204    {cf_alu_else_after,  "ELSE_AFTER" },
205    {cf_alu_extended,    "EXTENDED"   },
206    {cf_alu_pop_after,   "POP_AFTER"  },
207    {cf_alu_pop2_after,  "POP2_AFTER" },
208    {cf_alu_push_before, "PUSH_BEFORE"}
209 };
210 
211 const std::map<AluBankSwizzle, std::string> AluInstr::bank_swizzle_map = {
212    {alu_vec_012, "VEC_012"},
213    {alu_vec_021, "VEC_021"},
214    {alu_vec_102, "VEC_102"},
215    {alu_vec_120, "VEC_120"},
216    {alu_vec_201, "VEC_201"},
217    {alu_vec_210, "VEC_210"}
218 };
219 
220 const AluModifiers AluInstr::src_rel_flags[3] = {
221    alu_src0_rel, alu_src1_rel, alu_src2_rel};
222 
223 struct ValuePrintFlags {
ValuePrintFlagsr600::ValuePrintFlags224    ValuePrintFlags(int im, int f):
225        index_mode(im),
226        flags(f)
227    {
228    }
229    int index_mode = 0;
230    int flags = 0;
231    static const int is_rel = 1;
232    static const int has_abs = 2;
233    static const int has_neg = 4;
234    static const int literal_is_float = 8;
235    static const int index_ar = 16;
236    static const int index_loopidx = 32;
237 };
238 
239 void
do_print(std::ostream & os) const240 AluInstr::do_print(std::ostream& os) const
241 {
242    const char swzchar[] = "xyzw01?_";
243 
244    unsigned i = 0;
245 
246    os << "ALU ";
247 
248    if (has_alu_flag(alu_is_lds)) {
249       os << "LDS " << lds_ops.at(m_lds_opcode).name;
250       os << " __.x : ";
251    } else {
252 
253       os << alu_ops.at(m_opcode).name;
254       if (has_alu_flag(alu_dst_clamp))
255          os << " CLAMP";
256 
257       if (m_dest) {
258          if (has_alu_flag(alu_write) || m_dest->has_flag(Register::addr_or_idx)) {
259             os << " " << *m_dest;
260          } else {
261             os << " __"
262                << "." << swzchar[m_dest->chan()];
263             if (m_dest->pin() != pin_none)
264                os << "@" << m_dest->pin();
265          }
266          os << " : ";
267       } else {
268          os << " __." << swzchar[dest_chan()] << " : ";
269       }
270    }
271 
272    const int n_source_per_slot =
273       has_alu_flag(alu_is_lds) ? m_src.size() : alu_ops.at(m_opcode).nsrc;
274 
275 
276    for (int s = 0; s < m_alu_slots; ++s) {
277 
278       if (s > 0)
279          os << " +";
280 
281       for (int k = 0; k < n_source_per_slot; ++k) {
282          int pflags = 0;
283          if (i)
284             os << ' ';
285          if (has_source_mod(i, mod_neg))
286             pflags |= ValuePrintFlags::has_neg;
287          if (has_alu_flag(src_rel_flags[k]))
288             pflags |= ValuePrintFlags::is_rel;
289          if (n_source_per_slot <= 2)
290             if (has_source_mod(i, mod_abs))
291                pflags |= ValuePrintFlags::has_abs;
292 
293          if (pflags & ValuePrintFlags::has_neg)
294             os << '-';
295          if (pflags & ValuePrintFlags::has_abs)
296             os << '|';
297          os << *m_src[i];
298          if (pflags & ValuePrintFlags::has_abs)
299             os << '|';
300          ++i;
301       }
302    }
303 
304    os << " {";
305    if (has_alu_flag(alu_write))
306       os << 'W';
307    if (has_alu_flag(alu_last_instr))
308       os << 'L';
309    if (has_alu_flag(alu_update_exec))
310       os << 'E';
311    if (has_alu_flag(alu_update_pred))
312       os << 'P';
313    os << "}";
314 
315    auto bs_name = bank_swizzle_map.find(m_bank_swizzle);
316    if (bs_name != bank_swizzle_map.end())
317       os << ' ' << bs_name->second;
318 
319    auto cf_name = cf_map.find(m_cf_type);
320    if (cf_name != cf_map.end())
321       os << ' ' << cf_name->second;
322 }
323 
324 bool
can_propagate_src() const325 AluInstr::can_propagate_src() const
326 {
327    /* We can use the source in the next instruction */
328    if (!can_copy_propagate())
329       return false;
330 
331    auto src_reg = m_src[0]->as_register();
332    if (!src_reg)
333       return true;
334 
335    assert(m_dest);
336 
337    if (!m_dest->has_flag(Register::ssa)) {
338       return false;
339    }
340 
341    if (m_dest->pin() == pin_fully)
342       return m_dest->equal_to(*src_reg);
343 
344    if (m_dest->pin() == pin_chan)
345       return src_reg->pin() == pin_none ||
346              src_reg->pin() == pin_free ||
347              (src_reg->pin() == pin_chan && src_reg->chan() == m_dest->chan());
348 
349    return m_dest->pin() == pin_none || m_dest->pin() == pin_free;
350 }
351 
352 class ReplaceIndirectArrayAddr : public RegisterVisitor {
353 public:
visit(Register & value)354    void visit(Register& value) override { (void)value; }
visit(LocalArray & value)355    void visit(LocalArray& value) override
356    {
357       (void)value;
358       unreachable("An array can't be used as address");
359    }
360    void visit(LocalArrayValue& value) override;
361    void visit(UniformValue& value) override;
visit(LiteralConstant & value)362    void visit(LiteralConstant& value) override { (void)value; }
visit(InlineConstant & value)363    void visit(InlineConstant& value) override { (void)value; }
364 
365    PRegister new_addr;
366 };
367 
visit(LocalArrayValue & value)368 void ReplaceIndirectArrayAddr::visit(LocalArrayValue& value)
369 {
370    if (new_addr->sel() == 0 && value.addr()
371        && value.addr()->as_register())
372       value.set_addr(new_addr);
373 }
374 
visit(UniformValue & value)375 void ReplaceIndirectArrayAddr::visit(UniformValue& value)
376 {
377    if (value.buf_addr() && value.buf_addr()->as_register() &&
378        (new_addr->sel() == 1 || new_addr->sel() == 2)) {
379       value.set_buf_addr(new_addr);
380    }
381 }
382 
update_indirect_addr(UNUSED PRegister old_reg,PRegister reg)383 void AluInstr::update_indirect_addr(UNUSED PRegister old_reg, PRegister reg)
384 {
385    ReplaceIndirectArrayAddr visitor;
386 
387    visitor.new_addr = reg;
388    assert(reg->has_flag(Register::addr_or_idx));
389 
390    if (m_dest)
391       m_dest->accept(visitor);
392 
393    for (auto src : m_src)
394       src->accept(visitor);
395 
396    reg->add_use(this);
397 }
398 
399 bool
can_propagate_dest() const400 AluInstr::can_propagate_dest() const
401 {
402    if (!can_copy_propagate()) {
403       return false;
404    }
405 
406    auto src_reg = m_src[0]->as_register();
407    if (!src_reg) {
408       return false;
409    }
410 
411    assert(m_dest);
412 
413    if (src_reg->pin() == pin_fully) {
414       return false;
415    }
416 
417    if (!src_reg->has_flag(Register::ssa))
418       return false;
419 
420    if (!m_dest->has_flag(Register::ssa))
421       return false;
422 
423    if (src_reg->pin() == pin_chan)
424       return m_dest->pin() == pin_none || m_dest->pin() == pin_free ||
425              ((m_dest->pin() == pin_chan || m_dest->pin() == pin_group) &&
426               src_reg->chan() == m_dest->chan());
427 
428    return (src_reg->pin() == pin_none || src_reg->pin() == pin_free);
429 }
430 
431 bool
can_copy_propagate() const432 AluInstr::can_copy_propagate() const
433 {
434    if (m_opcode != op1_mov)
435       return false;
436 
437    if (has_source_mod(0, mod_abs) || has_source_mod(0, mod_neg) ||
438        has_alu_flag(alu_dst_clamp))
439       return false;
440 
441    return has_alu_flag(alu_write);
442 }
443 
444 bool
replace_source(PRegister old_src,PVirtualValue new_src)445 AluInstr::replace_source(PRegister old_src, PVirtualValue new_src)
446 {
447    if (!can_replace_source(old_src, new_src))
448       return false;
449 
450    return do_replace_source(old_src, new_src);
451 }
452 
do_replace_source(PRegister old_src,PVirtualValue new_src)453 bool AluInstr::do_replace_source(PRegister old_src, PVirtualValue new_src)
454 {
455    bool process = false;
456 
457    for (unsigned i = 0; i < m_src.size(); ++i) {
458       if (old_src->equal_to(*m_src[i])) {
459          m_src[i] = new_src;
460          process = true;
461       }
462    }
463    if (process) {
464       auto r = new_src->as_register();
465       if (r)
466          r->add_use(this);
467       old_src->del_use(this);
468    }
469 
470    return process;
471 }
472 
replace_src(int i,PVirtualValue new_src,uint32_t to_set,SourceMod to_clear)473 bool AluInstr::replace_src(int i, PVirtualValue new_src, uint32_t to_set,
474                            SourceMod to_clear)
475 {
476    auto old_src = m_src[i]->as_register();
477    assert(old_src);
478 
479    if (!can_replace_source(old_src, new_src))
480       return false;
481 
482    assert(old_src);
483    old_src->del_use(this);
484 
485    m_src[i] = new_src;
486 
487    auto r = new_src->as_register();
488    if (r)
489       r->add_use(this);
490 
491    m_source_modifiers |= to_set << (2 * i);
492    m_source_modifiers &= ~(to_clear  << (2 * i));
493 
494    return true;
495 }
496 
497 
can_replace_source(PRegister old_src,PVirtualValue new_src)498 bool AluInstr::can_replace_source(PRegister old_src, PVirtualValue new_src)
499 {
500    if (!check_readport_validation(old_src, new_src))
501       return false;
502 
503    /* If the old or new source is an array element, we assume that there
504     * might have been an (untracked) indirect access, so don't replace
505     * this source */
506    if (old_src->pin() == pin_array && new_src->pin() == pin_array)
507       return false;
508 
509    auto [addr, dummy, index] = indirect_addr();
510    auto addr_reg = addr ?  addr->as_register() : nullptr;
511    auto index_reg = index ? index->as_register() : nullptr;
512 
513    if (auto u = new_src->as_uniform()) {
514       if (u && u->buf_addr()) {
515 
516          /* Don't mix indirect buffer and indirect registers, because the
517           * scheduler can't handle it yet. */
518          if (addr_reg)
519             return false;
520 
521          /* Don't allow two different index registers, can't deal with that yet */
522          if (index_reg && !index_reg->equal_to(*u->buf_addr()))
523             return false;
524       }
525    }
526 
527    if (auto new_addr = new_src->get_addr()) {
528       auto new_addr_reg = new_addr->as_register();
529       bool new_addr_lowered = new_addr_reg &&
530                               new_addr_reg->has_flag(Register::addr_or_idx);
531 
532       if (addr_reg) {
533          if (!addr_reg->equal_to(*new_addr) || new_addr_lowered ||
534              addr_reg->has_flag(Register::addr_or_idx))
535             return false;
536       }
537       if (m_dest->has_flag(Register::addr_or_idx)) {
538          if (new_src->pin() == pin_array) {
539             auto s = static_cast<const LocalArrayValue *>(new_src)->addr();
540             if (!s->as_inline_const() || !s->as_literal())
541                return false;
542          }
543       }
544    }
545    return true;
546 }
547 
548 void
set_sources(SrcValues src)549 AluInstr::set_sources(SrcValues src)
550 {
551    for (auto& s : m_src) {
552       auto r = s->as_register();
553       if (r)
554          r->del_use(this);
555    }
556    m_src.swap(src);
557    for (auto& s : m_src) {
558       auto r = s->as_register();
559       if (r)
560          r->add_use(this);
561    }
562 }
563 
allowed_src_chan_mask() const564 uint8_t AluInstr::allowed_src_chan_mask() const
565 {
566    if (m_alu_slots < 2)
567       return 0xf;
568 
569    int chan_use_count[4] = {0};
570 
571    for (auto s : m_src) {
572        auto r = s->as_register();
573        if (r)
574            ++chan_use_count[r->chan()];
575    }
576    /* Each channel can only be loaded in one of three cycles,
577     * so if a channel is already used three times, we can't
578     * add another source withthis channel.
579     * Since we want to move away from one channel to another, it
580     * is not important to know which is the old channel that will
581     * be freed by the channel switch.*/
582    int mask = 0;
583 
584    /* Be conservative about channel use when using more than two
585     * slots. Currently a constellatioon of
586     *
587     *  ALU d.x = f(r0.x, r1.y)
588     *  ALU _.y = f(r2.y, r3.x)
589     *  ALU _.z = f(r4.x, r5.y)
590     *
591     * will fail to be split. To get constellations like this to be scheduled
592     * properly will need some work on the bank swizzle check.
593     */
594    int maxuse = m_alu_slots > 2 ? 2 : 3;
595    for (int i = 0; i < 4; ++i) {
596        if (chan_use_count[i] < maxuse)
597            mask |= 1 << i;
598    }
599    return mask;
600 }
601 
602 bool
replace_dest(PRegister new_dest,AluInstr * move_instr)603 AluInstr::replace_dest(PRegister new_dest, AluInstr *move_instr)
604 {
605    if (m_dest->equal_to(*new_dest))
606       return false;
607 
608    if (m_dest->uses().size() > 1)
609       return false;
610 
611    if (new_dest->pin() == pin_array)
612       return false;
613 
614    /* Currently we bail out when an array write should be moved, because
615     * declaring an array write is currently not well defined. The
616     * Whole "backwards" copy propagation should dprobably be replaced by some
617     * forward peep holew optimization */
618    /*
619    if (new_dest->pin() == pin_array) {
620       auto dav = static_cast<const LocalArrayValue *>(new_dest)->addr();
621       for (auto s: m_src) {
622          if (s->pin() == pin_array) {
623             auto sav = static_cast<const LocalArrayValue *>(s)->addr();
624             if (dav && sav && dav->as_register() &&  !dav->equal_to(*sav))
625                return false;
626          }
627       }
628    }
629    */
630 
631    if (m_dest->pin() == pin_chan && new_dest->chan() != m_dest->chan())
632       return false;
633 
634    if (m_dest->pin() == pin_chan) {
635       if (new_dest->pin() == pin_group)
636          new_dest->set_pin(pin_chgr);
637       else if (new_dest->pin() != pin_chgr)
638          new_dest->set_pin(pin_chan);
639    }
640 
641    m_dest = new_dest;
642    if (!move_instr->has_alu_flag(alu_last_instr))
643       reset_alu_flag(alu_last_instr);
644 
645    if (has_alu_flag(alu_is_cayman_trans)) {
646       /* Copy propagation puts an instruction into the w channel, but we
647        * don't have the slots for a w channel */
648       if (m_dest->chan() == 3 && m_alu_slots < 4) {
649          m_alu_slots = 4;
650          assert(m_src.size() == 3);
651          m_src.push_back(m_src[0]);
652       }
653    }
654 
655    return true;
656 }
657 
658 void
pin_sources_to_chan()659 AluInstr::pin_sources_to_chan()
660 {
661    for (auto s : m_src) {
662       auto r = s->as_register();
663       if (r) {
664          if (r->pin() == pin_free)
665             r->set_pin(pin_chan);
666          else if (r->pin() == pin_group)
667             r->set_pin(pin_chgr);
668       }
669    }
670 }
671 
672 bool
check_readport_validation(PRegister old_src,PVirtualValue new_src) const673 AluInstr::check_readport_validation(PRegister old_src, PVirtualValue new_src) const
674 {
675    if (m_src.size() < 3)
676       return true;
677 
678    bool success = true;
679    AluReadportReservation rpr_sum;
680 
681    unsigned nsrc = alu_ops.at(m_opcode).nsrc;
682    assert(nsrc * m_alu_slots == m_src.size());
683 
684    for (int s = 0; s < m_alu_slots && success; ++s) {
685       PVirtualValue src[3];
686       auto ireg = m_src.begin() + s * nsrc;
687 
688       for (unsigned i = 0; i < nsrc; ++i, ++ireg)
689          src[i] = old_src->equal_to(**ireg) ? new_src : *ireg;
690 
691       AluBankSwizzle bs = alu_vec_012;
692       while (bs != alu_vec_unknown) {
693          AluReadportReservation rpr = rpr_sum;
694          if (rpr.schedule_vec_src(src, nsrc, bs)) {
695             rpr_sum = rpr;
696             break;
697          }
698          ++bs;
699       }
700 
701       if (bs == alu_vec_unknown)
702          success = false;
703    }
704    return success;
705 }
706 
707 void
add_extra_dependency(PVirtualValue value)708 AluInstr::add_extra_dependency(PVirtualValue value)
709 {
710    auto reg = value->as_register();
711    if (reg)
712       m_extra_dependencies.insert(reg);
713 }
714 
715 bool
is_equal_to(const AluInstr & lhs) const716 AluInstr::is_equal_to(const AluInstr& lhs) const
717 {
718    if (lhs.m_opcode != m_opcode || lhs.m_bank_swizzle != m_bank_swizzle ||
719        lhs.m_cf_type != m_cf_type || lhs.m_alu_flags != m_alu_flags) {
720       return false;
721    }
722 
723    if (m_dest) {
724       if (!lhs.m_dest) {
725          return false;
726       } else {
727          if (has_alu_flag(alu_write)) {
728             if (!m_dest->equal_to(*lhs.m_dest))
729                return false;
730          } else {
731             if (m_dest->chan() != lhs.m_dest->chan())
732                return false;
733          }
734       }
735    } else {
736       if (lhs.m_dest)
737          return false;
738    }
739 
740    if (m_src.size() != lhs.m_src.size())
741       return false;
742 
743    for (unsigned i = 0; i < m_src.size(); ++i) {
744       if (!m_src[i]->equal_to(*lhs.m_src[i]))
745          return false;
746    }
747 
748    return true;
749 }
750 
751 class ResolveIndirectArrayAddr : public ConstRegisterVisitor {
752 public:
visit(const Register & value)753    void visit(const Register& value) { (void)value; }
visit(const LocalArray & value)754    void visit(const LocalArray& value)
755    {
756       (void)value;
757       unreachable("An array can't be used as address");
758    }
759    void visit(const LocalArrayValue& value);
760    void visit(const UniformValue& value);
visit(const LiteralConstant & value)761    void visit(const LiteralConstant& value) { (void)value; }
visit(const InlineConstant & value)762    void visit(const InlineConstant& value) { (void)value; }
763 
764    PRegister addr{nullptr};
765    PRegister index{nullptr};
766    bool addr_is_for_dest{false};
767 };
768 
769 void
visit(const LocalArrayValue & value)770 ResolveIndirectArrayAddr::visit(const LocalArrayValue& value)
771 {
772    auto a = value.addr();
773    if (a) {
774       addr = a->as_register();
775       assert(!addr_is_for_dest);
776    }
777 }
778 
779 void
visit(const UniformValue & value)780 ResolveIndirectArrayAddr::visit(const UniformValue& value)
781 {
782    auto a = value.buf_addr();
783    if (a) {
784       index = a->as_register();
785    }
786 }
787 
788 std::tuple<PRegister, bool, PRegister>
indirect_addr() const789 AluInstr::indirect_addr() const
790 {
791    ResolveIndirectArrayAddr visitor;
792 
793    if (m_dest) {
794       m_dest->accept(visitor);
795       if (visitor.addr)
796           visitor.addr_is_for_dest = true;
797    }
798 
799    for (auto s : m_src) {
800       s->accept(visitor);
801    }
802    return {visitor.addr, visitor.addr_is_for_dest, visitor.index};
803 }
804 
805 AluGroup *
split(ValueFactory & vf)806 AluInstr::split(ValueFactory& vf)
807 {
808    if (m_alu_slots == 1)
809       return nullptr;
810 
811    sfn_log << SfnLog::instr << "Split " << *this << "\n";
812 
813    auto group = new AluGroup();
814 
815    m_dest->del_parent(this);
816 
817    int start_slot = 0;
818    bool is_dot = m_opcode == op2_dot_ieee;
819    auto last_opcode = m_opcode;
820 
821    if (is_dot) {
822       start_slot = m_dest->chan();
823       last_opcode = op2_mul_ieee;
824    }
825 
826 
827    for (int k = 0; k < m_alu_slots; ++k) {
828       int s = k + start_slot;
829 
830       PRegister dst = m_dest->chan() == s ? m_dest : vf.dummy_dest(s);
831       if (dst->pin() != pin_chgr) {
832          auto pin = pin_chan;
833          if (dst->pin() == pin_group && m_dest->chan() == s)
834             pin = pin_chgr;
835          dst->set_pin(pin);
836       }
837 
838       SrcValues src;
839       int nsrc = alu_ops.at(m_opcode).nsrc;
840       for (int i = 0; i < nsrc; ++i) {
841          auto old_src = m_src[k * nsrc + i];
842          // Make it easy for the scheduler and pin the register to the
843          // channel, otherwise scheduler would have to check whether a
844          // channel switch is possible
845          auto r = old_src->as_register();
846          if (r) {
847             if (r->pin() == pin_free || r->pin() == pin_none)
848                r->set_pin(pin_chan);
849             else if (r->pin() == pin_group)
850                r->set_pin(pin_chgr);
851          }
852          src.push_back(old_src);
853       }
854 
855       auto opcode = k < m_alu_slots -1 ? m_opcode : last_opcode;
856 
857 
858       auto instr = new AluInstr(opcode, dst, src, {}, 1);
859       instr->set_blockid(block_id(), index());
860 
861       if (s == 0 || !m_alu_flags.test(alu_64bit_op)) {
862          if (has_source_mod(nsrc * k + 0, mod_neg))
863             instr->set_source_mod(0, mod_neg);
864          if (has_source_mod(nsrc * k + 1, mod_neg))
865             instr->set_source_mod(1, mod_neg);
866          if (has_source_mod(nsrc * k + 2, mod_neg))
867             instr->set_source_mod(2, mod_neg);
868          if (has_source_mod(nsrc * k + 0, mod_abs))
869             instr->set_source_mod(0, mod_abs);
870          if (has_source_mod(nsrc * k + 1, mod_abs))
871             instr->set_source_mod(1, mod_abs);
872       }
873       if (has_alu_flag(alu_dst_clamp))
874          instr->set_alu_flag(alu_dst_clamp);
875 
876       if (s == m_dest->chan())
877          instr->set_alu_flag(alu_write);
878 
879       m_dest->add_parent(instr);
880       sfn_log << SfnLog::instr << "   " << *instr << "\n";
881 
882       if (!group->add_instruction(instr)) {
883          std::cerr << "Unable to schedule '" << *instr << "' into\n" << *group << "\n";
884 
885          unreachable("Invalid group instruction");
886       }
887    }
888    group->set_blockid(block_id(), index());
889 
890    for (auto s : m_src) {
891       auto r = s->as_register();
892       if (r) {
893          r->del_use(this);
894       }
895    }
896    group->set_origin(this);
897 
898    return group;
899 }
900 
901 /* Alu instructions that have SSA dest registers increase the  regietsr
902  * pressure Alu instructions that read from SSA registers may decresase the
903  * register pressure hency evaluate a priorityx values based on register
904  * pressure change */
905 int
register_priority() const906 AluInstr::register_priority() const
907 {
908    int priority = 0;
909    if (!has_alu_flag(alu_no_schedule_bias)) {
910 
911       if (m_dest) {
912          if (m_dest->has_flag(Register::ssa) && has_alu_flag(alu_write)) {
913             if (m_dest->pin() != pin_group && m_dest->pin() != pin_chgr &&
914                 !m_dest->addr())
915                priority--;
916          } else {
917             // Arrays and registers are pre-allocated, hence scheduling
918             // assignments early is unlikely to increase register pressure
919             priority++;
920          }
921       }
922 
923       for (const auto s : m_src) {
924          auto r = s->as_register();
925          if (r) {
926             if (r->has_flag(Register::ssa)) {
927                int pending = 0;
928                for (auto b : r->uses()) {
929                   if (!b->is_scheduled())
930                      ++pending;
931                }
932                if (pending == 1)
933                   ++priority;
934             }
935             if (r->addr() && r->addr()->as_register())
936                priority += 2;
937          }
938          if (s->as_uniform())
939             ++priority;
940       }
941    }
942    return priority;
943 }
944 
945 bool
propagate_death()946 AluInstr::propagate_death()
947 {
948    if (!m_dest)
949       return true;
950 
951    if (m_dest->pin() == pin_group || m_dest->pin() == pin_chan) {
952       switch (m_opcode) {
953       case op2_interp_x:
954       case op2_interp_xy:
955       case op2_interp_z:
956       case op2_interp_zw:
957          reset_alu_flag(alu_write);
958          return false;
959       default:;
960       }
961    }
962 
963    if (m_dest->pin() == pin_array)
964       return false;
965 
966    /* We assume that nir does a good job in eliminating all ALU results that
967     * are not needed, and we don't let copy propagation doesn't make the
968     * instruction obsolete, so just keep all */
969    if (has_alu_flag(alu_is_cayman_trans))
970       return false;
971 
972    for (auto& src : m_src) {
973       auto reg = src->as_register();
974       if (reg)
975          reg->del_use(this);
976    }
977    return true;
978 }
979 
980 bool
has_lds_access() const981 AluInstr::has_lds_access() const
982 {
983    return has_alu_flag(alu_is_lds) || has_lds_queue_read();
984 }
985 
986 bool
has_lds_queue_read() const987 AluInstr::has_lds_queue_read() const
988 {
989    for (auto& s : m_src) {
990       auto ic = s->as_inline_const();
991       if (!ic)
992          continue;
993 
994       if (ic->sel() == ALU_SRC_LDS_OQ_A_POP || ic->sel() == ALU_SRC_LDS_OQ_B_POP)
995          return true;
996    }
997    return false;
998 }
999 
1000 struct OpDescr {
1001    union {
1002       EAluOp alu_opcode;
1003       ESDOp lds_opcode;
1004    };
1005    int nsrc;
1006 };
1007 
1008 static std::map<std::string, OpDescr> s_alu_map_by_name;
1009 static std::map<std::string, OpDescr> s_lds_map_by_name;
1010 
1011 Instr::Pointer
from_string(istream & is,ValueFactory & value_factory,AluGroup * group,bool is_cayman)1012 AluInstr::from_string(istream& is, ValueFactory& value_factory, AluGroup *group, bool is_cayman)
1013 {
1014    vector<string> tokens;
1015 
1016    while (is.good() && !is.eof()) {
1017       string t;
1018       is >> t;
1019       if (t.length() > 0) {
1020          tokens.push_back(t);
1021       }
1022    }
1023 
1024    std::set<AluModifiers> flags;
1025    auto t = tokens.begin();
1026 
1027    bool is_lds = false;
1028 
1029    if (*t == "LDS") {
1030       is_lds = true;
1031       t++;
1032    }
1033 
1034    string opstr = *t++;
1035    string deststr = *t++;
1036 
1037    if (deststr == "CLAMP") {
1038       flags.insert(alu_dst_clamp);
1039       deststr = *t++;
1040    }
1041 
1042    assert(*t == ":");
1043    OpDescr op_descr = {{op_invalid}, -1};
1044 
1045    if (is_lds) {
1046       auto op = s_lds_map_by_name.find(opstr);
1047       if (op == s_lds_map_by_name.end()) {
1048          for (auto [opcode, opdescr] : lds_ops) {
1049             if (opstr == opdescr.name) {
1050                op_descr.lds_opcode = opcode;
1051                op_descr.nsrc = opdescr.nsrc;
1052                s_alu_map_by_name[opstr] = op_descr;
1053                break;
1054             }
1055          }
1056 
1057          if (op_descr.nsrc == -1) {
1058             std::cerr << "'" << opstr << "'";
1059             unreachable("Unknown opcode");
1060             return nullptr;
1061          }
1062       } else {
1063          op_descr = op->second;
1064       }
1065    } else {
1066       auto op = s_alu_map_by_name.find(opstr);
1067       if (op == s_alu_map_by_name.end()) {
1068          for (auto [opcode, opdescr] : alu_ops) {
1069             if (opstr == opdescr.name) {
1070                op_descr = {{opcode}, opdescr.nsrc};
1071                s_alu_map_by_name[opstr] = op_descr;
1072                break;
1073             }
1074          }
1075 
1076          if (op_descr.nsrc == -1) {
1077             std::cerr << "'" << opstr << "'";
1078             unreachable("Unknown opcode");
1079             return nullptr;
1080          }
1081       } else {
1082          op_descr = op->second;
1083       }
1084       if (is_cayman) {
1085          switch (op_descr.alu_opcode) {
1086          case op1_cos:
1087          case op1_exp_ieee:
1088          case op1_log_clamped:
1089          case op1_recip_ieee:
1090          case op1_recipsqrt_ieee1:
1091          case op1_sqrt_ieee:
1092          case op1_sin:
1093          case op2_mullo_int:
1094          case op2_mulhi_int:
1095          case op2_mulhi_uint:
1096             flags.insert(alu_is_cayman_trans);
1097          default:
1098          ;
1099          }
1100       }
1101    }
1102 
1103    int slots = 0;
1104 
1105    uint32_t src_mods = 0;
1106    SrcValues sources;
1107    do {
1108       ++t;
1109       for (int i = 0; i < op_descr.nsrc; ++i) {
1110          string srcstr = *t++;
1111 
1112          if (srcstr[0] == '-') {
1113             src_mods |= AluInstr::mod_neg << (2 * sources.size());
1114             srcstr = srcstr.substr(1);
1115          }
1116 
1117          if (srcstr[0] == '|') {
1118             assert(srcstr[srcstr.length() - 1] == '|');
1119             src_mods |= AluInstr::mod_abs << (2 * sources.size());
1120             srcstr = srcstr.substr(1, srcstr.length() - 2);
1121          }
1122 
1123          auto src = value_factory.src_from_string(srcstr);
1124          if (!src) {
1125             std::cerr << "Unable to create src[" << i << "] from " << srcstr << "\n";
1126             assert(src);
1127          }
1128          sources.push_back(src);
1129       }
1130       ++slots;
1131    } while (t != tokens.end() && *t == "+");
1132 
1133    AluBankSwizzle bank_swizzle = alu_vec_unknown;
1134    ECFAluOpCode cf = cf_alu;
1135 
1136    while (t != tokens.end()) {
1137 
1138       switch ((*t)[0]) {
1139       case '{': {
1140          auto iflag = t->begin() + 1;
1141          while (iflag != t->end()) {
1142             if (*iflag == '}')
1143                break;
1144 
1145             switch (*iflag) {
1146             case 'L':
1147                flags.insert(alu_last_instr);
1148                break;
1149             case 'W':
1150                flags.insert(alu_write);
1151                break;
1152             case 'E':
1153                flags.insert(alu_update_exec);
1154                break;
1155             case 'P':
1156                flags.insert(alu_update_pred);
1157                break;
1158             }
1159             ++iflag;
1160          }
1161       } break;
1162 
1163       case 'V': {
1164          string bs = *t;
1165          if (bs == "VEC_012")
1166             bank_swizzle = alu_vec_012;
1167          else if (bs == "VEC_021")
1168             bank_swizzle = alu_vec_021;
1169          else if (bs == "VEC_102")
1170             bank_swizzle = alu_vec_102;
1171          else if (bs == "VEC_120")
1172             bank_swizzle = alu_vec_120;
1173          else if (bs == "VEC_201")
1174             bank_swizzle = alu_vec_201;
1175          else if (bs == "VEC_210")
1176             bank_swizzle = alu_vec_210;
1177          else {
1178             std::cerr << "'" << bs << "': ";
1179             unreachable("Unknowe bankswizzle given");
1180          }
1181       } break;
1182 
1183       default: {
1184          string cf_str = *t;
1185          if (cf_str == "PUSH_BEFORE")
1186             cf = cf_alu_push_before;
1187          else if (cf_str == "POP_AFTER")
1188             cf = cf_alu_pop_after;
1189          else if (cf_str == "POP2_AFTER")
1190             cf = cf_alu_pop2_after;
1191          else if (cf_str == "EXTENDED")
1192             cf = cf_alu_extended;
1193          else if (cf_str == "BREAK")
1194             cf = cf_alu_break;
1195          else if (cf_str == "CONT")
1196             cf = cf_alu_continue;
1197          else if (cf_str == "ELSE_AFTER")
1198             cf = cf_alu_else_after;
1199          else {
1200             std::cerr << " '" << cf_str << "' ";
1201             unreachable("Unknown tocken in ALU instruction");
1202          }
1203       }
1204       }
1205       ++t;
1206    }
1207 
1208    PRegister dest = nullptr;
1209    // construct instruction
1210    if (deststr != "(null)")
1211       dest = value_factory.dest_from_string(deststr);
1212 
1213    AluInstr *retval = nullptr;
1214    if (is_lds)
1215       retval = new AluInstr(op_descr.lds_opcode, sources, flags);
1216    else
1217       retval = new AluInstr(op_descr.alu_opcode, dest, sources, flags, slots);
1218 
1219    retval->m_source_modifiers = src_mods;
1220    retval->set_bank_swizzle(bank_swizzle);
1221    retval->set_cf_type(cf);
1222    if (group) {
1223       group->add_instruction(retval);
1224       retval = nullptr;
1225    }
1226    return retval;
1227 }
1228 
1229 bool
do_ready() const1230 AluInstr::do_ready() const
1231 {
1232    /* Alu instructions are shuffled by the scheduler, so
1233     * we have to make sure that required ops are already
1234     * scheduled before marking this one ready */
1235    for (auto i : required_instr()) {
1236       if (i->is_dead())
1237          continue;
1238 
1239       bool is_older_instr = i->block_id() <= block_id() &&
1240                             i->index() < index();
1241       bool is_lds = i->as_alu() && i->as_alu()->has_lds_access();
1242       if (!i->is_scheduled() && (is_older_instr || is_lds))
1243          return false;
1244    }
1245 
1246    for (auto s : m_src) {
1247       auto r = s->as_register();
1248       if (r) {
1249          if (!r->ready(block_id(), index()))
1250             return false;
1251       }
1252       auto u = s->as_uniform();
1253       if (u && u->buf_addr() && u->buf_addr()->as_register()) {
1254          if (!u->buf_addr()->as_register()->ready(block_id(), index()))
1255             return false;
1256       }
1257    }
1258 
1259    if (m_dest && !m_dest->has_flag(Register::ssa)) {
1260       if (m_dest->pin() == pin_array) {
1261          auto av = static_cast<const LocalArrayValue *>(m_dest);
1262          auto addr = av->addr();
1263          /* For true indiect dest access we have to make sure that all
1264           * instructions that write the value before are schedukled */
1265          if (addr && (!addr->ready(block_id(), index()) ||
1266                       !m_dest->ready(block_id(), index() - 1)))
1267             return false;
1268       }
1269 
1270       /* If a register is updates, we have to make sure that uses before that
1271        * update are scheduled, otherwise we may use the updated value when we
1272        * shouldn't */
1273       for (auto u : m_dest->uses()) {
1274          /* TODO: This is working around some sloppy use updates, dead instrzuctions
1275           * should remove themselves from uses. */
1276          if (u->is_dead())
1277             continue;
1278          if (!u->is_scheduled() &&
1279              u->block_id() <= block_id() &&
1280              u->index() < index()) {
1281             return false;
1282          }
1283       }
1284    }
1285 
1286    for (auto& r : m_extra_dependencies) {
1287       if (!r->ready(block_id(), index()))
1288          return false;
1289    }
1290 
1291    return true;
1292 }
1293 
1294 void
visit(AluGroup * instr)1295 AluInstrVisitor::visit(AluGroup *instr)
1296 {
1297    for (auto& i : *instr) {
1298       if (i)
1299          i->accept(*this);
1300    }
1301 }
1302 
1303 void
visit(Block * instr)1304 AluInstrVisitor::visit(Block *instr)
1305 {
1306    for (auto& i : *instr)
1307       i->accept(*this);
1308 }
1309 
1310 void
visit(IfInstr * instr)1311 AluInstrVisitor::visit(IfInstr *instr)
1312 {
1313    instr->predicate()->accept(*this);
1314 }
1315 
is_kill() const1316 bool AluInstr::is_kill() const
1317 {
1318    if (has_alu_flag(alu_is_lds))
1319       return false;
1320 
1321    switch (m_opcode) {
1322    case op2_kille:
1323    case op2_kille_int:
1324    case op2_killne:
1325    case op2_killne_int:
1326    case op2_killge:
1327    case op2_killge_int:
1328    case op2_killge_uint:
1329    case op2_killgt:
1330    case op2_killgt_int:
1331    case op2_killgt_uint:
1332       return true;
1333    default:
1334       return false;
1335    }
1336 }
1337 
1338 enum AluMods {
1339    mod_none,
1340    mod_src0_abs,
1341    mod_src0_neg,
1342    mod_dest_clamp,
1343 };
1344 
1345 static bool
1346 emit_alu_b2x(const nir_alu_instr& alu, AluInlineConstants mask, Shader& shader);
1347 
1348 
1349 
1350 static bool
1351 emit_alu_op1(const nir_alu_instr& alu,
1352              EAluOp opcode,
1353              Shader& shader,
1354              AluMods mod = mod_none);
1355 static bool
1356 emit_alu_op1_64bit(const nir_alu_instr& alu,
1357                    EAluOp opcode,
1358                    Shader& shader,
1359                    bool switch_chan);
1360 static bool
1361 emit_alu_mov_64bit(const nir_alu_instr& alu, Shader& shader);
1362 static bool
1363 emit_alu_neg(const nir_alu_instr& alu, Shader& shader);
1364 static bool
1365 emit_alu_op1_64bit_trans(const nir_alu_instr& alu, EAluOp opcode, Shader& shader);
1366 static bool
1367 emit_alu_op2_64bit(const nir_alu_instr& alu,
1368                    EAluOp opcode,
1369                    Shader& shader,
1370                    bool switch_order);
1371 static bool
1372 emit_alu_op2_64bit_one_dst(const nir_alu_instr& alu,
1373                            EAluOp opcode,
1374                            Shader& shader,
1375                            bool switch_order);
1376 static bool
1377 emit_alu_fma_64bit(const nir_alu_instr& alu, EAluOp opcode, Shader& shader);
1378 static bool
1379 emit_alu_b2f64(const nir_alu_instr& alu, Shader& shader);
1380 static bool
1381 emit_alu_f2f64(const nir_alu_instr& alu, Shader& shader);
1382 static bool
1383 emit_alu_i2f64(const nir_alu_instr& alu, EAluOp op, Shader& shader);
1384 static bool
1385 emit_alu_f2f32(const nir_alu_instr& alu, Shader& shader);
1386 static bool
1387 emit_alu_abs64(const nir_alu_instr& alu, Shader& shader);
1388 static bool
1389 emit_alu_fsat64(const nir_alu_instr& alu, Shader& shader);
1390 
1391 static bool
1392 emit_alu_op2(const nir_alu_instr& alu,
1393              EAluOp opcode,
1394              Shader& shader,
1395              AluInstr::Op2Options opts = AluInstr::op2_opt_none);
1396 static bool
1397 emit_alu_op2_int(const nir_alu_instr& alu,
1398                  EAluOp opcode,
1399                  Shader& shader,
1400                  AluInstr::Op2Options opts = AluInstr::op2_opt_none);
1401 static bool
1402 emit_alu_op3(const nir_alu_instr& alu,
1403              EAluOp opcode,
1404              Shader& shader,
1405              const std::array<int, 3>& src_shuffle = {0, 1, 2});
1406 static bool
1407 emit_any_all_fcomp2(const nir_alu_instr& alu, EAluOp opcode, Shader& shader);
1408 static bool
1409 emit_any_all_fcomp(
1410    const nir_alu_instr& alu, EAluOp opcode, int nc, bool all, Shader& shader);
1411 static bool
1412 emit_any_all_icomp(
1413    const nir_alu_instr& alu, EAluOp opcode, int nc, bool all, Shader& shader);
1414 
1415 static bool
1416 emit_alu_comb_with_zero(const nir_alu_instr& alu, EAluOp opcode, Shader& shader);
1417 static bool
1418 emit_unpack_64_2x32_split(const nir_alu_instr& alu, int comp, Shader& shader);
1419 static bool
1420 emit_pack_64_2x32(const nir_alu_instr& alu, Shader& shader);
1421 static bool
1422 emit_unpack_64_2x32(const nir_alu_instr& alu, Shader& shader);
1423 static bool
1424 emit_pack_64_2x32_split(const nir_alu_instr& alu, Shader& shader);
1425 static bool
1426 emit_pack_32_2x16_split(const nir_alu_instr& alu, Shader& shader);
1427 static bool
1428 emit_alu_vec2_64(const nir_alu_instr& alu, Shader& shader);
1429 
1430 static bool
1431 emit_unpack_32_2x16_split_x(const nir_alu_instr& alu, Shader& shader);
1432 static bool
1433 emit_unpack_32_2x16_split_y(const nir_alu_instr& alu, Shader& shader);
1434 
1435 static bool
1436 emit_dot(const nir_alu_instr& alu, int nelm, Shader& shader);
1437 static bool
1438 emit_dot4(const nir_alu_instr& alu, int nelm, Shader& shader);
1439 static bool
1440 emit_create_vec(const nir_alu_instr& instr, unsigned nc, Shader& shader);
1441 
1442 static bool
1443 emit_alu_trans_op1_eg(const nir_alu_instr& alu, EAluOp opcode, Shader& shader);
1444 static bool
1445 emit_alu_trans_op1_cayman(const nir_alu_instr& alu, EAluOp opcode, Shader& shader);
1446 
1447 static bool
1448 emit_alu_trans_op2_eg(const nir_alu_instr& alu, EAluOp opcode, Shader& shader);
1449 static bool
1450 emit_alu_trans_op2_cayman(const nir_alu_instr& alu, EAluOp opcode, Shader& shader);
1451 
1452 static bool
1453 emit_alu_f2i32_or_u32_eg(const nir_alu_instr& alu, EAluOp opcode, Shader& shader);
1454 
1455 static bool
1456 emit_tex_fdd(const nir_alu_instr& alu, TexInstr::Opcode opcode, bool fine, Shader& shader);
1457 
1458 static bool
1459 emit_alu_cube(const nir_alu_instr& alu, Shader& shader);
1460 
1461 static bool
1462 emit_fdph(const nir_alu_instr& alu, Shader& shader);
1463 
1464 static bool
check_64_bit_op_src(nir_src * src,void * state)1465 check_64_bit_op_src(nir_src *src, void *state)
1466 {
1467    if (nir_src_bit_size(*src) == 64) {
1468       *(bool *)state = true;
1469       return false;
1470    }
1471    return true;
1472 }
1473 
1474 static bool
check_64_bit_op_def(nir_def * def,void * state)1475 check_64_bit_op_def(nir_def *def, void *state)
1476 {
1477    if (def->bit_size == 64) {
1478       *(bool *)state = true;
1479       return false;
1480    }
1481    return true;
1482 }
1483 
1484 bool
from_nir(nir_alu_instr * alu,Shader & shader)1485 AluInstr::from_nir(nir_alu_instr *alu, Shader& shader)
1486 {
1487    bool is_64bit_op = false;
1488    nir_foreach_src(&alu->instr, check_64_bit_op_src, &is_64bit_op);
1489    if (!is_64bit_op)
1490       nir_foreach_def(&alu->instr, check_64_bit_op_def, &is_64bit_op);
1491 
1492    if (is_64bit_op) {
1493       switch (alu->op) {
1494       case nir_op_pack_64_2x32:
1495       case nir_op_unpack_64_2x32:
1496       case nir_op_pack_64_2x32_split:
1497       case nir_op_pack_half_2x16_split:
1498       case nir_op_unpack_64_2x32_split_x:
1499       case nir_op_unpack_64_2x32_split_y:
1500          break;
1501       case nir_op_mov:
1502          return emit_alu_mov_64bit(*alu, shader);
1503       case nir_op_fneg:
1504          return emit_alu_neg(*alu, shader);
1505       case nir_op_fsat:
1506          return emit_alu_fsat64(*alu, shader);
1507       case nir_op_ffract:
1508          return emit_alu_op1_64bit(*alu, op1_fract_64, shader, true);
1509       case nir_op_feq32:
1510          return emit_alu_op2_64bit_one_dst(*alu, op2_sete_64, shader, false);
1511       case nir_op_fge32:
1512          return emit_alu_op2_64bit_one_dst(*alu, op2_setge_64, shader, false);
1513       case nir_op_flt32:
1514          return emit_alu_op2_64bit_one_dst(*alu, op2_setgt_64, shader, true);
1515       case nir_op_fneu32:
1516          return emit_alu_op2_64bit_one_dst(*alu, op2_setne_64, shader, false);
1517       case nir_op_ffma:
1518          return emit_alu_fma_64bit(*alu, op3_fma_64, shader);
1519 
1520       case nir_op_fadd:
1521          return emit_alu_op2_64bit(*alu, op2_add_64, shader, false);
1522       case nir_op_fmul:
1523          return emit_alu_op2_64bit(*alu, op2_mul_64, shader, false);
1524       case nir_op_fmax:
1525          return emit_alu_op2_64bit(*alu, op2_max_64, shader, false);
1526       case nir_op_fmin:
1527          return emit_alu_op2_64bit(*alu, op2_min_64, shader, false);
1528       case nir_op_b2f64:
1529          return emit_alu_b2f64(*alu, shader);
1530       case nir_op_f2f64:
1531          return emit_alu_f2f64(*alu, shader);
1532       case nir_op_i2f64:
1533          return emit_alu_i2f64(*alu, op1_int_to_flt, shader);
1534       case nir_op_u2f64:
1535          return emit_alu_i2f64(*alu, op1_uint_to_flt, shader);
1536       case nir_op_f2f32:
1537          return emit_alu_f2f32(*alu, shader);
1538       case nir_op_fabs:
1539          return emit_alu_abs64(*alu, shader);
1540       case nir_op_fsqrt:
1541          return emit_alu_op1_64bit_trans(*alu, op1_sqrt_64, shader);
1542       case nir_op_frcp:
1543          return emit_alu_op1_64bit_trans(*alu, op1_recip_64, shader);
1544       case nir_op_frsq:
1545          return emit_alu_op1_64bit_trans(*alu, op1_recipsqrt_64, shader);
1546       case nir_op_vec2:
1547          return emit_alu_vec2_64(*alu, shader);
1548       default:
1549          return false;
1550          ;
1551       }
1552    }
1553 
1554    if (shader.chip_class() == ISA_CC_CAYMAN) {
1555       switch (alu->op) {
1556       case nir_op_fcos_amd:
1557          return emit_alu_trans_op1_cayman(*alu, op1_cos, shader);
1558       case nir_op_fexp2:
1559          return emit_alu_trans_op1_cayman(*alu, op1_exp_ieee, shader);
1560       case nir_op_flog2:
1561          return emit_alu_trans_op1_cayman(*alu, op1_log_clamped, shader);
1562       case nir_op_frcp:
1563          return emit_alu_trans_op1_cayman(*alu, op1_recip_ieee, shader);
1564       case nir_op_frsq:
1565          return emit_alu_trans_op1_cayman(*alu, op1_recipsqrt_ieee1, shader);
1566       case nir_op_fsqrt:
1567          return emit_alu_trans_op1_cayman(*alu, op1_sqrt_ieee, shader);
1568       case nir_op_fsin_amd:
1569          return emit_alu_trans_op1_cayman(*alu, op1_sin, shader);
1570       case nir_op_i2f32:
1571          return emit_alu_op1(*alu, op1_int_to_flt, shader);
1572       case nir_op_u2f32:
1573          return emit_alu_op1(*alu, op1_uint_to_flt, shader);
1574       case nir_op_imul:
1575          return emit_alu_trans_op2_cayman(*alu, op2_mullo_int, shader);
1576       case nir_op_imul_high:
1577          return emit_alu_trans_op2_cayman(*alu, op2_mulhi_int, shader);
1578       case nir_op_umul_high:
1579          return emit_alu_trans_op2_cayman(*alu, op2_mulhi_uint, shader);
1580       case nir_op_f2u32:
1581          return emit_alu_op1(*alu, op1_flt_to_uint, shader);
1582       case nir_op_f2i32:
1583          return emit_alu_op1(*alu, op1_flt_to_int, shader);
1584       case nir_op_ishl:
1585          return emit_alu_op2_int(*alu, op2_lshl_int, shader);
1586       case nir_op_ishr:
1587          return emit_alu_op2_int(*alu, op2_ashr_int, shader);
1588       case nir_op_ushr:
1589          return emit_alu_op2_int(*alu, op2_lshr_int, shader);
1590       default:;
1591       }
1592    } else {
1593       if (shader.chip_class() == ISA_CC_EVERGREEN) {
1594          switch (alu->op) {
1595          case nir_op_f2i32:
1596             return emit_alu_f2i32_or_u32_eg(*alu, op1_flt_to_int, shader);
1597          case nir_op_f2u32:
1598             return emit_alu_f2i32_or_u32_eg(*alu, op1_flt_to_uint, shader);
1599          default:;
1600          }
1601       }
1602 
1603       if (shader.chip_class() >= ISA_CC_R700) {
1604          switch (alu->op) {
1605          case nir_op_ishl:
1606             return emit_alu_op2_int(*alu, op2_lshl_int, shader);
1607          case nir_op_ishr:
1608             return emit_alu_op2_int(*alu, op2_ashr_int, shader);
1609          case nir_op_ushr:
1610             return emit_alu_op2_int(*alu, op2_lshr_int, shader);
1611          default:;
1612          }
1613       } else {
1614          switch (alu->op) {
1615          case nir_op_ishl:
1616             return emit_alu_trans_op2_eg(*alu, op2_lshl_int, shader);
1617          case nir_op_ishr:
1618             return emit_alu_trans_op2_eg(*alu, op2_ashr_int, shader);
1619          case nir_op_ushr:
1620             return emit_alu_trans_op2_eg(*alu, op2_lshr_int, shader);
1621          default:;
1622          }
1623       }
1624 
1625       switch (alu->op) {
1626       case nir_op_f2i32:
1627          return emit_alu_trans_op1_eg(*alu, op1_flt_to_int, shader);
1628       case nir_op_f2u32:
1629          return emit_alu_trans_op1_eg(*alu, op1_flt_to_uint, shader);
1630       case nir_op_fcos_amd:
1631          return emit_alu_trans_op1_eg(*alu, op1_cos, shader);
1632       case nir_op_fexp2:
1633          return emit_alu_trans_op1_eg(*alu, op1_exp_ieee, shader);
1634       case nir_op_flog2:
1635          return emit_alu_trans_op1_eg(*alu, op1_log_clamped, shader);
1636       case nir_op_frcp:
1637          return emit_alu_trans_op1_eg(*alu, op1_recip_ieee, shader);
1638       case nir_op_frsq:
1639          return emit_alu_trans_op1_eg(*alu, op1_recipsqrt_ieee1, shader);
1640       case nir_op_fsin_amd:
1641          return emit_alu_trans_op1_eg(*alu, op1_sin, shader);
1642       case nir_op_fsqrt:
1643          return emit_alu_trans_op1_eg(*alu, op1_sqrt_ieee, shader);
1644       case nir_op_i2f32:
1645          return emit_alu_trans_op1_eg(*alu, op1_int_to_flt, shader);
1646       case nir_op_u2f32:
1647          return emit_alu_trans_op1_eg(*alu, op1_uint_to_flt, shader);
1648       case nir_op_imul:
1649          return emit_alu_trans_op2_eg(*alu, op2_mullo_int, shader);
1650       case nir_op_imul_high:
1651          return emit_alu_trans_op2_eg(*alu, op2_mulhi_int, shader);
1652       case nir_op_umul_high:
1653          return emit_alu_trans_op2_eg(*alu, op2_mulhi_uint, shader);
1654       default:;
1655       }
1656    }
1657 
1658    switch (alu->op) {
1659    case nir_op_b2b1:
1660       return emit_alu_op1(*alu, op1_mov, shader);
1661    case nir_op_b2b32:
1662       return emit_alu_op1(*alu, op1_mov, shader);
1663    case nir_op_b2f32:
1664       return emit_alu_b2x(*alu, ALU_SRC_1, shader);
1665    case nir_op_b2i32:
1666       return emit_alu_b2x(*alu, ALU_SRC_1_INT, shader);
1667 
1668    case nir_op_bfm:
1669       return emit_alu_op2_int(*alu, op2_bfm_int, shader, op2_opt_none);
1670    case nir_op_bit_count:
1671       return emit_alu_op1(*alu, op1_bcnt_int, shader);
1672 
1673    case nir_op_bitfield_reverse:
1674       return emit_alu_op1(*alu, op1_bfrev_int, shader);
1675    case nir_op_bitfield_select:
1676       return emit_alu_op3(*alu, op3_bfi_int, shader);
1677 
1678    case nir_op_b32all_fequal2:
1679       return emit_any_all_fcomp2(*alu, op2_sete_dx10, shader);
1680    case nir_op_b32all_fequal3:
1681       return emit_any_all_fcomp(*alu, op2_sete, 3, true, shader);
1682    case nir_op_b32all_fequal4:
1683       return emit_any_all_fcomp(*alu, op2_sete, 4, true, shader);
1684    case nir_op_b32all_iequal2:
1685       return emit_any_all_icomp(*alu, op2_sete_int, 2, true, shader);
1686    case nir_op_b32all_iequal3:
1687       return emit_any_all_icomp(*alu, op2_sete_int, 3, true, shader);
1688    case nir_op_b32all_iequal4:
1689       return emit_any_all_icomp(*alu, op2_sete_int, 4, true, shader);
1690    case nir_op_b32any_fnequal2:
1691       return emit_any_all_fcomp2(*alu, op2_setne_dx10, shader);
1692    case nir_op_b32any_fnequal3:
1693       return emit_any_all_fcomp(*alu, op2_setne, 3, false, shader);
1694    case nir_op_b32any_fnequal4:
1695       return emit_any_all_fcomp(*alu, op2_setne, 4, false, shader);
1696    case nir_op_b32any_inequal2:
1697       return emit_any_all_icomp(*alu, op2_setne_int, 2, false, shader);
1698    case nir_op_b32any_inequal3:
1699       return emit_any_all_icomp(*alu, op2_setne_int, 3, false, shader);
1700    case nir_op_b32any_inequal4:
1701       return emit_any_all_icomp(*alu, op2_setne_int, 4, false, shader);
1702    case nir_op_b32csel:
1703       return emit_alu_op3(*alu, op3_cnde_int, shader, {0, 2, 1});
1704 
1705    case nir_op_fabs:
1706       return emit_alu_op1(*alu, op1_mov, shader, mod_src0_abs);
1707    case nir_op_fadd:
1708       return emit_alu_op2(*alu, op2_add, shader);
1709    case nir_op_fceil:
1710       return emit_alu_op1(*alu, op1_ceil, shader);
1711    case nir_op_fcsel:
1712       return emit_alu_op3(*alu, op3_cnde, shader, {0, 2, 1});
1713    case nir_op_fcsel_ge:
1714       return emit_alu_op3(*alu, op3_cndge, shader, {0, 1, 2});
1715    case nir_op_fcsel_gt:
1716       return emit_alu_op3(*alu, op3_cndgt, shader, {0, 1, 2});
1717 
1718    case nir_op_fdph:
1719       return emit_fdph(*alu, shader);
1720    case nir_op_fdot2:
1721       if (shader.chip_class() >= ISA_CC_EVERGREEN)
1722          return emit_dot(*alu, 2, shader);
1723       else
1724          return emit_dot4(*alu, 2, shader);
1725    case nir_op_fdot3:
1726       if (shader.chip_class() >= ISA_CC_EVERGREEN)
1727          return emit_dot(*alu, 3, shader);
1728       else
1729          return emit_dot4(*alu, 3, shader);
1730    case nir_op_fdot4:
1731       return emit_dot4(*alu, 4, shader);
1732 
1733    case nir_op_feq32:
1734    case nir_op_feq:
1735       return emit_alu_op2(*alu, op2_sete_dx10, shader);
1736    case nir_op_ffloor:
1737       return emit_alu_op1(*alu, op1_floor, shader);
1738    case nir_op_ffract:
1739       return emit_alu_op1(*alu, op1_fract, shader);
1740    case nir_op_fge32:
1741       return emit_alu_op2(*alu, op2_setge_dx10, shader);
1742    case nir_op_fge:
1743       return emit_alu_op2(*alu, op2_setge_dx10, shader);
1744    case nir_op_find_lsb:
1745       return emit_alu_op1(*alu, op1_ffbl_int, shader);
1746 
1747    case nir_op_flt32:
1748       return emit_alu_op2(*alu, op2_setgt_dx10, shader, op2_opt_reverse);
1749    case nir_op_flt:
1750       return emit_alu_op2(*alu, op2_setgt_dx10, shader, op2_opt_reverse);
1751    case nir_op_fmax:
1752       return emit_alu_op2(*alu, op2_max_dx10, shader);
1753    case nir_op_fmin:
1754       return emit_alu_op2(*alu, op2_min_dx10, shader);
1755 
1756    case nir_op_fmul:
1757       if (!shader.has_flag(Shader::sh_legacy_math_rules))
1758          return emit_alu_op2(*alu, op2_mul_ieee, shader);
1759       FALLTHROUGH;
1760    case nir_op_fmulz:
1761       return emit_alu_op2(*alu, op2_mul, shader);
1762 
1763    case nir_op_fneg:
1764       return emit_alu_op1(*alu, op1_mov, shader, mod_src0_neg);
1765    case nir_op_fneu32:
1766       return emit_alu_op2(*alu, op2_setne_dx10, shader);
1767    case nir_op_fneu:
1768       return emit_alu_op2(*alu, op2_setne_dx10, shader);
1769 
1770    case nir_op_fround_even:
1771       return emit_alu_op1(*alu, op1_rndne, shader);
1772    case nir_op_fsat:
1773       return emit_alu_op1(*alu, op1_mov, shader, mod_dest_clamp);
1774    case nir_op_fsub:
1775       return emit_alu_op2(*alu, op2_add, shader, op2_opt_neg_src1);
1776    case nir_op_ftrunc:
1777       return emit_alu_op1(*alu, op1_trunc, shader);
1778    case nir_op_iadd:
1779       return emit_alu_op2_int(*alu, op2_add_int, shader);
1780    case nir_op_iand:
1781       return emit_alu_op2_int(*alu, op2_and_int, shader);
1782    case nir_op_ibfe:
1783       return emit_alu_op3(*alu, op3_bfe_int, shader);
1784    case nir_op_i32csel_ge:
1785       return emit_alu_op3(*alu, op3_cndge_int, shader, {0, 1, 2});
1786    case nir_op_i32csel_gt:
1787       return emit_alu_op3(*alu, op3_cndgt_int, shader, {0, 1, 2});
1788    case nir_op_ieq32:
1789       return emit_alu_op2_int(*alu, op2_sete_int, shader);
1790    case nir_op_ieq:
1791       return emit_alu_op2_int(*alu, op2_sete_int, shader);
1792    case nir_op_ifind_msb_rev:
1793       return emit_alu_op1(*alu, op1_ffbh_int, shader);
1794    case nir_op_ige32:
1795       return emit_alu_op2_int(*alu, op2_setge_int, shader);
1796    case nir_op_ige:
1797       return emit_alu_op2_int(*alu, op2_setge_int, shader);
1798    case nir_op_ilt32:
1799       return emit_alu_op2_int(*alu, op2_setgt_int, shader, op2_opt_reverse);
1800    case nir_op_ilt:
1801       return emit_alu_op2_int(*alu, op2_setgt_int, shader, op2_opt_reverse);
1802    case nir_op_imax:
1803       return emit_alu_op2_int(*alu, op2_max_int, shader);
1804    case nir_op_imin:
1805       return emit_alu_op2_int(*alu, op2_min_int, shader);
1806    case nir_op_ine32:
1807       return emit_alu_op2_int(*alu, op2_setne_int, shader);
1808    case nir_op_ine:
1809       return emit_alu_op2_int(*alu, op2_setne_int, shader);
1810    case nir_op_ineg:
1811       return emit_alu_comb_with_zero(*alu, op2_sub_int, shader);
1812    case nir_op_inot:
1813       return emit_alu_op1(*alu, op1_not_int, shader);
1814    case nir_op_ior:
1815       return emit_alu_op2_int(*alu, op2_or_int, shader);
1816    case nir_op_isub:
1817       return emit_alu_op2_int(*alu, op2_sub_int, shader);
1818    case nir_op_ixor:
1819       return emit_alu_op2_int(*alu, op2_xor_int, shader);
1820    case nir_op_pack_64_2x32:
1821       return emit_pack_64_2x32(*alu, shader);
1822    case nir_op_unpack_64_2x32:
1823       return emit_unpack_64_2x32(*alu, shader);
1824    case nir_op_pack_64_2x32_split:
1825       return emit_pack_64_2x32_split(*alu, shader);
1826    case nir_op_pack_half_2x16_split:
1827       return emit_pack_32_2x16_split(*alu, shader);
1828    case nir_op_slt:
1829       return emit_alu_op2(*alu, op2_setgt, shader, op2_opt_reverse);
1830    case nir_op_sge:
1831       return emit_alu_op2(*alu, op2_setge, shader);
1832    case nir_op_seq:
1833       return emit_alu_op2(*alu, op2_sete, shader);
1834    case nir_op_sne:
1835       return emit_alu_op2(*alu, op2_setne, shader);
1836    case nir_op_ubfe:
1837       return emit_alu_op3(*alu, op3_bfe_uint, shader);
1838    case nir_op_ufind_msb_rev:
1839       return emit_alu_op1(*alu, op1_ffbh_uint, shader);
1840    case nir_op_uge32:
1841       return emit_alu_op2_int(*alu, op2_setge_uint, shader);
1842    case nir_op_uge:
1843       return emit_alu_op2_int(*alu, op2_setge_uint, shader);
1844    case nir_op_ult32:
1845       return emit_alu_op2_int(*alu, op2_setgt_uint, shader, op2_opt_reverse);
1846    case nir_op_ult:
1847       return emit_alu_op2_int(*alu, op2_setgt_uint, shader, op2_opt_reverse);
1848    case nir_op_umad24:
1849       return emit_alu_op3(*alu, op3_muladd_uint24, shader, {0, 1, 2});
1850    case nir_op_umax:
1851       return emit_alu_op2_int(*alu, op2_max_uint, shader);
1852    case nir_op_umin:
1853       return emit_alu_op2_int(*alu, op2_min_uint, shader);
1854    case nir_op_umul24:
1855       return emit_alu_op2(*alu, op2_mul_uint24, shader);
1856    case nir_op_unpack_64_2x32_split_x:
1857       return emit_unpack_64_2x32_split(*alu, 0, shader);
1858    case nir_op_unpack_64_2x32_split_y:
1859       return emit_unpack_64_2x32_split(*alu, 1, shader);
1860    case nir_op_unpack_half_2x16_split_x:
1861       return emit_unpack_32_2x16_split_x(*alu, shader);
1862    case nir_op_unpack_half_2x16_split_y:
1863       return emit_unpack_32_2x16_split_y(*alu, shader);
1864 
1865    case nir_op_ffma:
1866       if (!shader.has_flag(Shader::sh_legacy_math_rules))
1867          return emit_alu_op3(*alu, op3_muladd_ieee, shader);
1868       FALLTHROUGH;
1869    case nir_op_ffmaz:
1870       return emit_alu_op3(*alu, op3_muladd, shader);
1871 
1872    case nir_op_mov:
1873       return emit_alu_op1(*alu, op1_mov, shader);
1874    case nir_op_f2i32:
1875       return emit_alu_op1(*alu, op1_flt_to_int, shader);
1876    case nir_op_vec2:
1877       return emit_create_vec(*alu, 2, shader);
1878    case nir_op_vec3:
1879       return emit_create_vec(*alu, 3, shader);
1880    case nir_op_vec4:
1881       return emit_create_vec(*alu, 4, shader);
1882 
1883    case nir_op_fddx:
1884    case nir_op_fddx_coarse:
1885       return emit_tex_fdd(*alu, TexInstr::get_gradient_h, false, shader);
1886    case nir_op_fddx_fine:
1887       return emit_tex_fdd(*alu, TexInstr::get_gradient_h, true, shader);
1888    case nir_op_fddy:
1889    case nir_op_fddy_coarse:
1890       return emit_tex_fdd(*alu, TexInstr::get_gradient_v, false, shader);
1891    case nir_op_fddy_fine:
1892       return emit_tex_fdd(*alu, TexInstr::get_gradient_v, true, shader);
1893    case nir_op_cube_amd:
1894       return emit_alu_cube(*alu, shader);
1895    default:
1896       fprintf(stderr, "Unknown instruction '");
1897       nir_print_instr(&alu->instr, stderr);
1898       fprintf(stderr, "'\n");
1899       assert(0);
1900       return false;
1901    }
1902 }
1903 
1904 static Pin
pin_for_components(const nir_alu_instr & alu)1905 pin_for_components(const nir_alu_instr& alu)
1906 {
1907    return alu.def.num_components == 1 ? pin_free : pin_none;
1908 }
1909 
1910 static bool
emit_alu_op1_64bit(const nir_alu_instr & alu,EAluOp opcode,Shader & shader,bool switch_chan)1911 emit_alu_op1_64bit(const nir_alu_instr& alu,
1912                    EAluOp opcode,
1913                    Shader& shader,
1914                    bool switch_chan)
1915 {
1916    auto& value_factory = shader.value_factory();
1917 
1918    auto group = new AluGroup();
1919 
1920    AluInstr *ir = nullptr;
1921 
1922    int swz[2] = {0, 1};
1923    if (switch_chan) {
1924       swz[0] = 1;
1925       swz[1] = 0;
1926    }
1927 
1928    for (unsigned i = 0; i < alu.def.num_components; ++i) {
1929       ir = new AluInstr(opcode,
1930                         value_factory.dest(alu.def, 2 * i, pin_chan),
1931                         value_factory.src64(alu.src[0], i, swz[0]),
1932                         {alu_write});
1933       group->add_instruction(ir);
1934 
1935       ir = new AluInstr(opcode,
1936                         value_factory.dest(alu.def, 2 * i + 1, pin_chan),
1937                         value_factory.src64(alu.src[0], i, swz[1]),
1938                         {alu_write});
1939       group->add_instruction(ir);
1940    }
1941    if (ir)
1942       ir->set_alu_flag(alu_last_instr);
1943    shader.emit_instruction(group);
1944    return true;
1945 }
1946 
1947 static bool
emit_alu_mov_64bit(const nir_alu_instr & alu,Shader & shader)1948 emit_alu_mov_64bit(const nir_alu_instr& alu, Shader& shader)
1949 {
1950    auto& value_factory = shader.value_factory();
1951 
1952    AluInstr *ir = nullptr;
1953 
1954    for (unsigned i = 0; i < alu.def.num_components; ++i) {
1955       for (unsigned c = 0; c < 2; ++c) {
1956          ir = new AluInstr(op1_mov,
1957                            value_factory.dest(alu.def, 2 * i + c, pin_free),
1958                            value_factory.src64(alu.src[0], i, c),
1959                            {alu_write});
1960          shader.emit_instruction(ir);
1961       }
1962    }
1963    if (ir)
1964       ir->set_alu_flag(alu_last_instr);
1965    return true;
1966 }
1967 
1968 static bool
emit_alu_neg(const nir_alu_instr & alu,Shader & shader)1969 emit_alu_neg(const nir_alu_instr& alu, Shader& shader)
1970 {
1971    auto& value_factory = shader.value_factory();
1972 
1973    AluInstr *ir = nullptr;
1974 
1975    for (unsigned i = 0; i < alu.def.num_components; ++i) {
1976       for (unsigned c = 0; c < 2; ++c) {
1977          ir = new AluInstr(op1_mov,
1978                            value_factory.dest(alu.def, 2 * i + c, pin_chan),
1979                            value_factory.src64(alu.src[0], i, c),
1980                            {alu_write});
1981          shader.emit_instruction(ir);
1982       }
1983       ir->set_source_mod(0, AluInstr::mod_neg);
1984    }
1985    if (ir)
1986       ir->set_alu_flag(alu_last_instr);
1987 
1988    return true;
1989 }
1990 
1991 static bool
emit_alu_abs64(const nir_alu_instr & alu,Shader & shader)1992 emit_alu_abs64(const nir_alu_instr& alu, Shader& shader)
1993 {
1994    auto& value_factory = shader.value_factory();
1995 
1996    assert(alu.def.num_components == 1);
1997 
1998    shader.emit_instruction(new AluInstr(op1_mov,
1999                                         value_factory.dest(alu.def, 0, pin_chan),
2000                                         value_factory.src64(alu.src[0], 0, 0),
2001                                         AluInstr::write));
2002 
2003    auto ir = new AluInstr(op1_mov,
2004                           value_factory.dest(alu.def, 1, pin_chan),
2005                           value_factory.src64(alu.src[0], 0, 1),
2006                           AluInstr::last_write);
2007    ir->set_source_mod(0, AluInstr::mod_abs);
2008    shader.emit_instruction(ir);
2009    return true;
2010 }
2011 
2012 static bool
try_propagat_fsat64(const nir_alu_instr & alu,Shader & shader)2013 try_propagat_fsat64(const nir_alu_instr& alu, Shader& shader)
2014 {
2015    auto& value_factory = shader.value_factory();
2016    auto src0 = value_factory.src64(alu.src[0], 0, 0);
2017    auto reg0 = src0->as_register();
2018    if (!reg0)
2019       return false;
2020 
2021    if (!reg0->has_flag(Register::ssa))
2022       return false;
2023 
2024    if (reg0->parents().size() != 1)
2025       return false;
2026 
2027    if (!reg0->uses().empty())
2028       return false;
2029 
2030    auto parent = (*reg0->parents().begin())->as_alu();
2031    if (!parent)
2032       return false;
2033 
2034    auto opinfo = alu_ops.at(parent->opcode());
2035    if (!opinfo.can_clamp)
2036       return false;
2037 
2038    parent->set_alu_flag(alu_dst_clamp);
2039    return true;
2040 }
2041 
2042 
2043 static bool
emit_alu_fsat64(const nir_alu_instr & alu,Shader & shader)2044 emit_alu_fsat64(const nir_alu_instr& alu, Shader& shader)
2045 {
2046    auto& value_factory = shader.value_factory();
2047 
2048    assert(alu.def.num_components == 1);
2049 
2050    if (try_propagat_fsat64(alu, shader)) {
2051       auto ir = new AluInstr(op1_mov,
2052                              value_factory.dest(alu.def, 0, pin_chan),
2053                              value_factory.src64(alu.src[0], 0, 0),
2054                              AluInstr::write);
2055       shader.emit_instruction(ir);
2056 
2057       shader.emit_instruction(new AluInstr(op1_mov,
2058                                            value_factory.dest(alu.def, 1, pin_chan),
2059                                            value_factory.src64(alu.src[0], 0, 1),
2060                               AluInstr::last_write));
2061    } else {
2062 
2063       /* dest clamp doesn't work on plain 64 bit move, so add a zero
2064        * to apply the modifier */
2065 
2066       auto group = new AluGroup();
2067       auto ir = new AluInstr(op2_add_64,
2068                              value_factory.dest(alu.def, 0, pin_chan),
2069                              value_factory.src64(alu.src[0], 0, 1),
2070                              value_factory.literal(0),
2071                              AluInstr::write);
2072       ir->set_alu_flag(alu_dst_clamp);
2073       group->add_instruction(ir);
2074 
2075       group->add_instruction(new AluInstr(op2_add_64,
2076                                           value_factory.dest(alu.def, 1, pin_chan),
2077                                           value_factory.src64(alu.src[0], 0, 0),
2078                                           value_factory.literal(0),
2079                                           AluInstr::last_write));
2080       shader.emit_instruction(group);
2081 
2082    }
2083    return true;
2084 }
2085 
2086 
2087 static bool
emit_alu_op2_64bit(const nir_alu_instr & alu,EAluOp opcode,Shader & shader,bool switch_src)2088 emit_alu_op2_64bit(const nir_alu_instr& alu,
2089                    EAluOp opcode,
2090                    Shader& shader,
2091                    bool switch_src)
2092 {
2093    auto& value_factory = shader.value_factory();
2094    auto group = new AluGroup();
2095    AluInstr *ir = nullptr;
2096    int order[2] = {0, 1};
2097    if (switch_src) {
2098       order[0] = 1;
2099       order[1] = 0;
2100    }
2101 
2102    int num_emit0 = opcode == op2_mul_64 ? 3 : 1;
2103 
2104    assert(num_emit0 == 1 || alu.def.num_components == 1);
2105 
2106    for (unsigned k = 0; k < alu.def.num_components; ++k) {
2107       int i = 0;
2108       for (; i < num_emit0; ++i) {
2109          auto dest = i < 2 ? value_factory.dest(alu.def, i, pin_chan)
2110                            : value_factory.dummy_dest(i);
2111 
2112          ir = new AluInstr(opcode,
2113                            dest,
2114                            value_factory.src64(alu.src[order[0]], k, 1),
2115                            value_factory.src64(alu.src[order[1]], k, 1),
2116                            i < 2 ? AluInstr::write : AluInstr::empty);
2117          group->add_instruction(ir);
2118       }
2119 
2120       auto dest =
2121          i == 1 ? value_factory.dest(alu.def, i, pin_chan) : value_factory.dummy_dest(i);
2122 
2123       ir = new AluInstr(opcode,
2124                         dest,
2125                         value_factory.src64(alu.src[order[0]], k, 0),
2126                         value_factory.src64(alu.src[order[1]], k, 0),
2127                         i == 1 ? AluInstr::write : AluInstr::empty);
2128       group->add_instruction(ir);
2129    }
2130    if (ir)
2131       ir->set_alu_flag(alu_last_instr);
2132 
2133    shader.emit_instruction(group);
2134    return true;
2135 }
2136 
2137 static bool
emit_alu_op2_64bit_one_dst(const nir_alu_instr & alu,EAluOp opcode,Shader & shader,bool switch_order)2138 emit_alu_op2_64bit_one_dst(const nir_alu_instr& alu,
2139                            EAluOp opcode,
2140                            Shader& shader,
2141                            bool switch_order)
2142 {
2143    auto& value_factory = shader.value_factory();
2144    AluInstr *ir = nullptr;
2145    int order[2] = {0, 1};
2146    if (switch_order) {
2147       order[0] = 1;
2148       order[1] = 0;
2149    }
2150 
2151    AluInstr::SrcValues src(4);
2152 
2153    for (unsigned k = 0; k < alu.def.num_components; ++k) {
2154       auto dest = value_factory.dest(alu.def, 2 * k, pin_chan);
2155       src[0] = value_factory.src64(alu.src[order[0]], k, 1);
2156       src[1] = value_factory.src64(alu.src[order[1]], k, 1);
2157       src[2] = value_factory.src64(alu.src[order[0]], k, 0);
2158       src[3] = value_factory.src64(alu.src[order[1]], k, 0);
2159 
2160       ir = new AluInstr(opcode, dest, src, AluInstr::write, 2);
2161       ir->set_alu_flag(alu_64bit_op);
2162 
2163       shader.emit_instruction(ir);
2164    }
2165    if (ir)
2166       ir->set_alu_flag(alu_last_instr);
2167 
2168    return true;
2169 }
2170 
2171 static bool
emit_alu_op1_64bit_trans(const nir_alu_instr & alu,EAluOp opcode,Shader & shader)2172 emit_alu_op1_64bit_trans(const nir_alu_instr& alu, EAluOp opcode, Shader& shader)
2173 {
2174    auto& value_factory = shader.value_factory();
2175    auto group = new AluGroup();
2176    AluInstr *ir = nullptr;
2177    for (unsigned i = 0; i < 3; ++i) {
2178       ir = new AluInstr(opcode,
2179                         i < 2 ? value_factory.dest(alu.def, i, pin_chan)
2180                               : value_factory.dummy_dest(i),
2181                         value_factory.src64(alu.src[0], 0, 1),
2182                         value_factory.src64(alu.src[0], 0, 0),
2183                         i < 2 ? AluInstr::write : AluInstr::empty);
2184 
2185       if (opcode == op1_sqrt_64)
2186          ir->set_source_mod(0, AluInstr::mod_abs);
2187       group->add_instruction(ir);
2188    }
2189    if (ir)
2190       ir->set_alu_flag(alu_last_instr);
2191    shader.emit_instruction(group);
2192    return true;
2193 }
2194 
2195 static bool
emit_alu_fma_64bit(const nir_alu_instr & alu,EAluOp opcode,Shader & shader)2196 emit_alu_fma_64bit(const nir_alu_instr& alu, EAluOp opcode, Shader& shader)
2197 {
2198    auto& value_factory = shader.value_factory();
2199    auto group = new AluGroup();
2200    AluInstr *ir = nullptr;
2201    for (unsigned i = 0; i < 4; ++i) {
2202 
2203       int chan = i < 3 ? 1 : 0;
2204       auto dest =
2205          i < 2 ? value_factory.dest(alu.def, i, pin_chan) : value_factory.dummy_dest(i);
2206 
2207       ir = new AluInstr(opcode,
2208                         dest,
2209                         value_factory.src64(alu.src[0], 0, chan),
2210                         value_factory.src64(alu.src[1], 0, chan),
2211                         value_factory.src64(alu.src[2], 0, chan),
2212                         i < 2 ? AluInstr::write : AluInstr::empty);
2213       group->add_instruction(ir);
2214    }
2215    if (ir)
2216       ir->set_alu_flag(alu_last_instr);
2217    shader.emit_instruction(group);
2218    return true;
2219 }
2220 
2221 static bool
emit_alu_b2f64(const nir_alu_instr & alu,Shader & shader)2222 emit_alu_b2f64(const nir_alu_instr& alu, Shader& shader)
2223 {
2224    auto& value_factory = shader.value_factory();
2225    auto group = new AluGroup();
2226    AluInstr *ir = nullptr;
2227 
2228    for (unsigned i = 0; i < alu.def.num_components; ++i) {
2229       ir = new AluInstr(op2_and_int,
2230                         value_factory.dest(alu.def, 2 * i, pin_group),
2231                         value_factory.src(alu.src[0], i),
2232                         value_factory.zero(),
2233                         {alu_write});
2234       group->add_instruction(ir);
2235 
2236       ir = new AluInstr(op2_and_int,
2237                         value_factory.dest(alu.def, 2 * i + 1, pin_group),
2238                         value_factory.src(alu.src[0], i),
2239                         value_factory.literal(0x3ff00000),
2240                         {alu_write});
2241       group->add_instruction(ir);
2242    }
2243    if (ir)
2244       ir->set_alu_flag(alu_last_instr);
2245    shader.emit_instruction(group);
2246    return true;
2247 }
2248 
2249 static bool
emit_alu_i2f64(const nir_alu_instr & alu,EAluOp op,Shader & shader)2250 emit_alu_i2f64(const nir_alu_instr& alu, EAluOp op, Shader& shader)
2251 {
2252    /* int 64 to f64 should have been lowered, so we only handle i32 to f64 */
2253    auto& value_factory = shader.value_factory();
2254    auto group = new AluGroup();
2255    AluInstr *ir = nullptr;
2256 
2257    assert(alu.def.num_components == 1);
2258 
2259    auto tmpx = value_factory.temp_register();
2260    shader.emit_instruction(new AluInstr(op2_and_int,
2261                                         tmpx,
2262                                         value_factory.src(alu.src[0], 0),
2263                                         value_factory.literal(0xffffff00),
2264                                         AluInstr::write));
2265    auto tmpy = value_factory.temp_register();
2266    shader.emit_instruction(new AluInstr(op2_and_int,
2267                                         tmpy,
2268                                         value_factory.src(alu.src[0], 0),
2269                                         value_factory.literal(0xff),
2270                                         AluInstr::last_write));
2271 
2272    auto tmpx2 = value_factory.temp_register();
2273    auto tmpy2 = value_factory.temp_register();
2274    shader.emit_instruction(new AluInstr(op, tmpx2, tmpx, AluInstr::last_write));
2275    shader.emit_instruction(new AluInstr(op, tmpy2, tmpy, AluInstr::last_write));
2276 
2277    auto tmpx3 = value_factory.temp_register(0);
2278    auto tmpy3 = value_factory.temp_register(1);
2279    auto tmpz3 = value_factory.temp_register(2);
2280    auto tmpw3 = value_factory.temp_register(3);
2281 
2282    ir = new AluInstr(op1_flt32_to_flt64, tmpx3, tmpx2, AluInstr::write);
2283    group->add_instruction(ir);
2284    ir = new AluInstr(op1_flt32_to_flt64, tmpy3, value_factory.zero(), AluInstr::write);
2285    group->add_instruction(ir);
2286    ir = new AluInstr(op1_flt32_to_flt64, tmpz3, tmpy2, AluInstr::write);
2287    group->add_instruction(ir);
2288    ir =
2289       new AluInstr(op1_flt32_to_flt64, tmpw3, value_factory.zero(), AluInstr::last_write);
2290    group->add_instruction(ir);
2291    shader.emit_instruction(group);
2292 
2293    group = new AluGroup();
2294 
2295    ir = new AluInstr(op2_add_64,
2296                      value_factory.dest(alu.def, 0, pin_chan),
2297                      tmpy3,
2298                      tmpw3,
2299                      AluInstr::write);
2300    group->add_instruction(ir);
2301    ir = new AluInstr(op2_add_64,
2302                      value_factory.dest(alu.def, 1, pin_chan),
2303                      tmpx3,
2304                      tmpz3,
2305                      AluInstr::write);
2306    group->add_instruction(ir);
2307    shader.emit_instruction(group);
2308 
2309    return true;
2310 }
2311 
2312 static bool
emit_alu_f2f64(const nir_alu_instr & alu,Shader & shader)2313 emit_alu_f2f64(const nir_alu_instr& alu, Shader& shader)
2314 {
2315    auto& value_factory = shader.value_factory();
2316    auto group = new AluGroup();
2317    AluInstr *ir = nullptr;
2318 
2319    assert(alu.def.num_components == 1);
2320 
2321    ir = new AluInstr(op1_flt32_to_flt64,
2322                      value_factory.dest(alu.def, 0, pin_chan),
2323                      value_factory.src(alu.src[0], 0),
2324                      AluInstr::write);
2325    group->add_instruction(ir);
2326    ir = new AluInstr(op1_flt32_to_flt64,
2327                      value_factory.dest(alu.def, 1, pin_chan),
2328                      value_factory.zero(),
2329                      AluInstr::last_write);
2330    group->add_instruction(ir);
2331    shader.emit_instruction(group);
2332    return true;
2333 }
2334 
2335 static bool
emit_alu_f2f32(const nir_alu_instr & alu,Shader & shader)2336 emit_alu_f2f32(const nir_alu_instr& alu, Shader& shader)
2337 {
2338    auto& value_factory = shader.value_factory();
2339    auto group = new AluGroup();
2340    AluInstr *ir = nullptr;
2341 
2342    ir = new AluInstr(op1v_flt64_to_flt32,
2343                      value_factory.dest(alu.def, 0, pin_chan),
2344                      value_factory.src64(alu.src[0], 0, 1),
2345                      {alu_write});
2346    group->add_instruction(ir);
2347    ir = new AluInstr(op1v_flt64_to_flt32,
2348                      value_factory.dummy_dest(1),
2349                      value_factory.src64(alu.src[0], 0, 0),
2350                      AluInstr::last);
2351    group->add_instruction(ir);
2352    shader.emit_instruction(group);
2353    return true;
2354 }
2355 
2356 static bool
emit_alu_b2x(const nir_alu_instr & alu,AluInlineConstants mask,Shader & shader)2357 emit_alu_b2x(const nir_alu_instr& alu, AluInlineConstants mask, Shader& shader)
2358 {
2359    auto& value_factory = shader.value_factory();
2360    AluInstr *ir = nullptr;
2361    auto pin = pin_for_components(alu);
2362 
2363    for (unsigned i = 0; i < alu.def.num_components; ++i) {
2364       auto src = value_factory.src(alu.src[0], i);
2365       ir = new AluInstr(op2_and_int,
2366                         value_factory.dest(alu.def, i, pin),
2367                         src,
2368                         value_factory.inline_const(mask, 0),
2369                         {alu_write});
2370       shader.emit_instruction(ir);
2371    }
2372    if (ir)
2373       ir->set_alu_flag(alu_last_instr);
2374    return true;
2375 }
2376 
2377 static bool
emit_alu_op1(const nir_alu_instr & alu,EAluOp opcode,Shader & shader,AluMods mod)2378 emit_alu_op1(const nir_alu_instr& alu,
2379              EAluOp opcode,
2380              Shader& shader,
2381              AluMods mod)
2382 {
2383    auto& value_factory = shader.value_factory();
2384 
2385    AluInstr *ir = nullptr;
2386    auto pin = pin_for_components(alu);
2387 
2388    for (unsigned i = 0; i < alu.def.num_components; ++i) {
2389       ir = new AluInstr(opcode,
2390                         value_factory.dest(alu.def, i, pin),
2391                         value_factory.src(alu.src[0], i),
2392                         {alu_write});
2393       switch (mod) {
2394       case mod_src0_abs:
2395          ir->set_source_mod(0, AluInstr::mod_abs); break;
2396       case mod_src0_neg:
2397          ir->set_source_mod(0, AluInstr::mod_neg); break;
2398       case mod_dest_clamp:
2399          ir->set_alu_flag(alu_dst_clamp);
2400          default:;
2401       }
2402       shader.emit_instruction(ir);
2403    }
2404    if (ir)
2405       ir->set_alu_flag(alu_last_instr);
2406    return true;
2407 }
2408 
2409 static bool
emit_alu_op2(const nir_alu_instr & alu,EAluOp opcode,Shader & shader,AluInstr::Op2Options opts)2410 emit_alu_op2(const nir_alu_instr& alu,
2411              EAluOp opcode,
2412              Shader& shader,
2413              AluInstr::Op2Options opts)
2414 {
2415    auto& value_factory = shader.value_factory();
2416    const nir_alu_src *src0 = &alu.src[0];
2417    const nir_alu_src *src1 = &alu.src[1];
2418 
2419    int idx0 = 0;
2420    int idx1 = 1;
2421    if (opts & AluInstr::op2_opt_reverse) {
2422       std::swap(src0, src1);
2423       std::swap(idx0, idx1);
2424    }
2425 
2426    bool src1_negate = (opts & AluInstr::op2_opt_neg_src1);
2427 
2428    auto pin = pin_for_components(alu);
2429    AluInstr *ir = nullptr;
2430    for (unsigned i = 0; i < alu.def.num_components; ++i) {
2431       ir = new AluInstr(opcode,
2432                         value_factory.dest(alu.def, i, pin),
2433                         value_factory.src(*src0, i),
2434                         value_factory.src(*src1, i),
2435                         {alu_write});
2436       if (src1_negate)
2437          ir->set_source_mod(1, AluInstr::mod_neg);
2438       shader.emit_instruction(ir);
2439    }
2440    if (ir)
2441       ir->set_alu_flag(alu_last_instr);
2442    return true;
2443 }
2444 
2445 static bool
emit_alu_op2_int(const nir_alu_instr & alu,EAluOp opcode,Shader & shader,AluInstr::Op2Options opts)2446 emit_alu_op2_int(const nir_alu_instr& alu,
2447                  EAluOp opcode,
2448                  Shader& shader,
2449                  AluInstr::Op2Options opts)
2450 {
2451    return emit_alu_op2(alu, opcode, shader, opts);
2452 }
2453 
2454 static bool
emit_alu_op3(const nir_alu_instr & alu,EAluOp opcode,Shader & shader,const std::array<int,3> & src_shuffle)2455 emit_alu_op3(const nir_alu_instr& alu,
2456              EAluOp opcode,
2457              Shader& shader,
2458              const std::array<int, 3>& src_shuffle)
2459 {
2460    auto& value_factory = shader.value_factory();
2461    const nir_alu_src *src[3];
2462    src[0] = &alu.src[src_shuffle[0]];
2463    src[1] = &alu.src[src_shuffle[1]];
2464    src[2] = &alu.src[src_shuffle[2]];
2465 
2466    auto pin = pin_for_components(alu);
2467    AluInstr *ir = nullptr;
2468    for (unsigned i = 0; i < alu.def.num_components; ++i) {
2469       ir = new AluInstr(opcode,
2470                         value_factory.dest(alu.def, i, pin),
2471                         value_factory.src(*src[0], i),
2472                         value_factory.src(*src[1], i),
2473                         value_factory.src(*src[2], i),
2474                         {alu_write});
2475       ir->set_alu_flag(alu_write);
2476       shader.emit_instruction(ir);
2477    }
2478    if (ir)
2479       ir->set_alu_flag(alu_last_instr);
2480    return true;
2481 }
2482 
2483 static bool
emit_any_all_fcomp2(const nir_alu_instr & alu,EAluOp opcode,Shader & shader)2484 emit_any_all_fcomp2(const nir_alu_instr& alu, EAluOp opcode, Shader& shader)
2485 {
2486    AluInstr *ir = nullptr;
2487    auto& value_factory = shader.value_factory();
2488 
2489    PRegister tmp[2];
2490    tmp[0] = value_factory.temp_register();
2491    tmp[1] = value_factory.temp_register();
2492 
2493    for (unsigned i = 0; i < 2; ++i) {
2494       ir = new AluInstr(opcode,
2495                         tmp[i],
2496                         value_factory.src(alu.src[0], i),
2497                         value_factory.src(alu.src[1], i),
2498                         {alu_write});
2499       shader.emit_instruction(ir);
2500    }
2501    ir->set_alu_flag(alu_last_instr);
2502 
2503    opcode = (opcode == op2_setne_dx10) ? op2_or_int : op2_and_int;
2504    ir = new AluInstr(opcode,
2505                      value_factory.dest(alu.def, 0, pin_free),
2506                      tmp[0],
2507                      tmp[1],
2508                      AluInstr::last_write);
2509    shader.emit_instruction(ir);
2510    return true;
2511 }
2512 
2513 static bool
emit_any_all_fcomp(const nir_alu_instr & alu,EAluOp op,int nc,bool all,Shader & shader)2514 emit_any_all_fcomp(const nir_alu_instr& alu, EAluOp op, int nc, bool all, Shader& shader)
2515 {
2516    /* This should probabyl be lowered in nir */
2517    auto& value_factory = shader.value_factory();
2518 
2519    AluInstr *ir = nullptr;
2520    RegisterVec4 v = value_factory.temp_vec4(pin_group);
2521    AluInstr::SrcValues s;
2522 
2523    for (int i = 0; i < nc; ++i) {
2524       s.push_back(v[i]);
2525    }
2526 
2527    for (int i = nc; i < 4; ++i)
2528       s.push_back(value_factory.inline_const(all ? ALU_SRC_1 : ALU_SRC_0, 0));
2529 
2530    for (int i = 0; i < nc; ++i) {
2531       ir = new AluInstr(op,
2532                         v[i],
2533                         value_factory.src(alu.src[0], i),
2534                         value_factory.src(alu.src[1], i),
2535                         {alu_write});
2536       shader.emit_instruction(ir);
2537    }
2538    if (ir)
2539       ir->set_alu_flag(alu_last_instr);
2540 
2541    auto max_val = value_factory.temp_register();
2542 
2543    ir = new AluInstr(op1_max4, max_val, s, AluInstr::last_write, 4);
2544 
2545    if (all) {
2546       ir->set_source_mod(0, AluInstr::mod_neg);
2547       ir->set_source_mod(1, AluInstr::mod_neg);
2548       ir->set_source_mod(2, AluInstr::mod_neg);
2549       ir->set_source_mod(3, AluInstr::mod_neg);
2550    }
2551 
2552    shader.emit_instruction(ir);
2553 
2554    if (all)
2555       op = (op == op2_sete) ? op2_sete_dx10 : op2_setne_dx10;
2556    else
2557       op = (op == op2_sete) ? op2_setne_dx10 : op2_sete_dx10;
2558 
2559    ir = new AluInstr(op,
2560                      value_factory.dest(alu.def, 0, pin_free),
2561                      max_val,
2562                      value_factory.inline_const(ALU_SRC_1, 0),
2563                      AluInstr::last_write);
2564    if (all)
2565       ir->set_source_mod(1, AluInstr::mod_neg);
2566    shader.emit_instruction(ir);
2567 
2568    return true;
2569 }
2570 
2571 static bool
emit_any_all_icomp(const nir_alu_instr & alu,EAluOp op,int nc,bool all,Shader & shader)2572 emit_any_all_icomp(const nir_alu_instr& alu, EAluOp op, int nc, bool all, Shader& shader)
2573 {
2574    /* This should probabyl be lowered in nir */
2575    auto& value_factory = shader.value_factory();
2576 
2577    AluInstr *ir = nullptr;
2578    PRegister v[6];
2579 
2580    auto dest = value_factory.dest(alu.def, 0, pin_free);
2581 
2582    for (int i = 0; i < nc + nc / 2; ++i)
2583       v[i] = value_factory.temp_register();
2584 
2585    EAluOp combine = all ? op2_and_int : op2_or_int;
2586 
2587    for (int i = 0; i < nc; ++i) {
2588       ir = new AluInstr(op,
2589                         v[i],
2590                         value_factory.src(alu.src[0], i),
2591                         value_factory.src(alu.src[1], i),
2592                         AluInstr::write);
2593       shader.emit_instruction(ir);
2594    }
2595    if (ir)
2596       ir->set_alu_flag(alu_last_instr);
2597 
2598    if (nc == 2) {
2599       ir = new AluInstr(combine, dest, v[0], v[1], AluInstr::last_write);
2600       shader.emit_instruction(ir);
2601       return true;
2602    }
2603 
2604    if (nc == 3) {
2605       ir = new AluInstr(combine, v[3], v[0], v[1], AluInstr::last_write);
2606       shader.emit_instruction(ir);
2607       ir = new AluInstr(combine, dest, v[3], v[2], AluInstr::last_write);
2608       shader.emit_instruction(ir);
2609       return true;
2610    }
2611 
2612    if (nc == 4) {
2613       ir = new AluInstr(combine, v[4], v[0], v[1], AluInstr::write);
2614       shader.emit_instruction(ir);
2615       ir = new AluInstr(combine, v[5], v[2], v[3], AluInstr::last_write);
2616       shader.emit_instruction(ir);
2617       ir = new AluInstr(combine, dest, v[4], v[5], AluInstr::last_write);
2618       shader.emit_instruction(ir);
2619       return true;
2620    }
2621 
2622    return false;
2623 }
2624 
2625 static bool
emit_dot(const nir_alu_instr & alu,int n,Shader & shader)2626 emit_dot(const nir_alu_instr& alu, int n, Shader& shader)
2627 {
2628    auto& value_factory = shader.value_factory();
2629    const nir_alu_src& src0 = alu.src[0];
2630    const nir_alu_src& src1 = alu.src[1];
2631 
2632    auto dest = value_factory.dest(alu.def, 0, pin_chan);
2633 
2634    AluInstr::SrcValues srcs(2 * n);
2635 
2636    for (int i = 0; i < n; ++i) {
2637       srcs[2 * i] = value_factory.src(src0, i);
2638       srcs[2 * i + 1] = value_factory.src(src1, i);
2639    }
2640 
2641    AluInstr *ir = new AluInstr(op2_dot_ieee, dest, srcs, AluInstr::last_write, n);
2642 
2643    shader.emit_instruction(ir);
2644    shader.set_flag(Shader::sh_disble_sb);
2645 
2646    return true;
2647 }
2648 
2649 static bool
emit_dot4(const nir_alu_instr & alu,int nelm,Shader & shader)2650 emit_dot4(const nir_alu_instr& alu, int nelm, Shader& shader)
2651 {
2652    auto& value_factory = shader.value_factory();
2653    const nir_alu_src& src0 = alu.src[0];
2654    const nir_alu_src& src1 = alu.src[1];
2655 
2656    auto dest = value_factory.dest(alu.def, 0, pin_free);
2657 
2658    AluInstr::SrcValues srcs(8);
2659 
2660    for (int i = 0; i < nelm; ++i) {
2661       srcs[2 * i] = value_factory.src(src0, i);
2662       srcs[2 * i + 1] = value_factory.src(src1, i);
2663    }
2664 
2665    for (int i = nelm; i < 4; ++i) {
2666        srcs[2 * i] = value_factory.zero();
2667        srcs[2 * i + 1] = value_factory.zero();
2668    }
2669 
2670    AluInstr *ir = new AluInstr(op2_dot4_ieee, dest, srcs, AluInstr::last_write, 4);
2671 
2672    shader.emit_instruction(ir);
2673    return true;
2674 }
2675 
2676 static bool
emit_fdph(const nir_alu_instr & alu,Shader & shader)2677 emit_fdph(const nir_alu_instr& alu, Shader& shader)
2678 {
2679    auto& value_factory = shader.value_factory();
2680    const nir_alu_src& src0 = alu.src[0];
2681    const nir_alu_src& src1 = alu.src[1];
2682 
2683    auto dest = value_factory.dest(alu.def, 0, pin_free);
2684 
2685    AluInstr::SrcValues srcs(8);
2686 
2687    for (int i = 0; i < 3; ++i) {
2688       srcs[2 * i] = value_factory.src(src0, i);
2689       srcs[2 * i + 1] = value_factory.src(src1, i);
2690    }
2691 
2692    srcs[6] = value_factory.one();
2693    srcs[7] = value_factory.src(src1, 3);
2694 
2695    AluInstr *ir = new AluInstr(op2_dot4_ieee, dest, srcs, AluInstr::last_write, 4);
2696    shader.emit_instruction(ir);
2697    return true;
2698 }
2699 
2700 static bool
emit_create_vec(const nir_alu_instr & instr,unsigned nc,Shader & shader)2701 emit_create_vec(const nir_alu_instr& instr, unsigned nc, Shader& shader)
2702 {
2703    auto& value_factory = shader.value_factory();
2704    AluInstr *ir = nullptr;
2705 
2706    for (unsigned i = 0; i < nc; ++i) {
2707       auto src = value_factory.src(instr.src[i].src, instr.src[i].swizzle[0]);
2708       auto dst = value_factory.dest(instr.def, i, pin_none);
2709       shader.emit_instruction(new AluInstr(op1_mov, dst, src, {alu_write}));
2710    }
2711 
2712    if (ir)
2713       ir->set_alu_flag(alu_last_instr);
2714    return true;
2715 }
2716 
2717 static bool
emit_alu_comb_with_zero(const nir_alu_instr & alu,EAluOp opcode,Shader & shader)2718 emit_alu_comb_with_zero(const nir_alu_instr& alu, EAluOp opcode, Shader& shader)
2719 {
2720    auto& value_factory = shader.value_factory();
2721    AluInstr *ir = nullptr;
2722    auto pin = pin_for_components(alu);
2723    for (unsigned i = 0; i < alu.def.num_components; ++i) {
2724       ir = new AluInstr(opcode,
2725                         value_factory.dest(alu.def, i, pin),
2726                         value_factory.zero(),
2727                         value_factory.src(alu.src[0], i),
2728                         AluInstr::write);
2729       shader.emit_instruction(ir);
2730    }
2731    if (ir)
2732       ir->set_alu_flag(alu_last_instr);
2733 
2734    return true;
2735 }
2736 
2737 static bool
emit_pack_64_2x32_split(const nir_alu_instr & alu,Shader & shader)2738 emit_pack_64_2x32_split(const nir_alu_instr& alu, Shader& shader)
2739 {
2740    auto& value_factory = shader.value_factory();
2741    AluInstr *ir = nullptr;
2742    for (unsigned i = 0; i < 2; ++i) {
2743       ir = new AluInstr(op1_mov,
2744                         value_factory.dest(alu.def, i, pin_none),
2745                         value_factory.src(alu.src[i], 0),
2746                         AluInstr::write);
2747       shader.emit_instruction(ir);
2748    }
2749    ir->set_alu_flag(alu_last_instr);
2750    return true;
2751 }
2752 
2753 static bool
emit_pack_64_2x32(const nir_alu_instr & alu,Shader & shader)2754 emit_pack_64_2x32(const nir_alu_instr& alu, Shader& shader)
2755 {
2756    auto& value_factory = shader.value_factory();
2757    AluInstr *ir = nullptr;
2758    for (unsigned i = 0; i < 2; ++i) {
2759       ir = new AluInstr(op1_mov,
2760                         value_factory.dest(alu.def, i, pin_none),
2761                         value_factory.src(alu.src[0], i),
2762                         AluInstr::write);
2763       shader.emit_instruction(ir);
2764    }
2765    ir->set_alu_flag(alu_last_instr);
2766    return true;
2767 }
2768 
2769 static bool
emit_unpack_64_2x32(const nir_alu_instr & alu,Shader & shader)2770 emit_unpack_64_2x32(const nir_alu_instr& alu, Shader& shader)
2771 {
2772    auto& value_factory = shader.value_factory();
2773    AluInstr *ir = nullptr;
2774    for (unsigned i = 0; i < 2; ++i) {
2775       ir = new AluInstr(op1_mov,
2776                         value_factory.dest(alu.def, i, pin_none),
2777                         value_factory.src64(alu.src[0], 0, i),
2778                         AluInstr::write);
2779       shader.emit_instruction(ir);
2780    }
2781    ir->set_alu_flag(alu_last_instr);
2782    return true;
2783 }
2784 
2785 bool
emit_alu_vec2_64(const nir_alu_instr & alu,Shader & shader)2786 emit_alu_vec2_64(const nir_alu_instr& alu, Shader& shader)
2787 {
2788    auto& value_factory = shader.value_factory();
2789    AluInstr *ir = nullptr;
2790    for (unsigned i = 0; i < 2; ++i) {
2791       ir = new AluInstr(op1_mov,
2792                         value_factory.dest(alu.def, i, pin_chan),
2793                         value_factory.src64(alu.src[0], 0, i),
2794                         AluInstr::write);
2795       shader.emit_instruction(ir);
2796    }
2797    for (unsigned i = 0; i < 2; ++i) {
2798       ir = new AluInstr(op1_mov,
2799                         value_factory.dest(alu.def, i + 2, pin_chan),
2800                         value_factory.src64(alu.src[1], 1, i),
2801                         AluInstr::write);
2802       shader.emit_instruction(ir);
2803    }
2804    ir->set_alu_flag(alu_last_instr);
2805    return true;
2806 }
2807 
2808 static bool
emit_pack_32_2x16_split(const nir_alu_instr & alu,Shader & shader)2809 emit_pack_32_2x16_split(const nir_alu_instr& alu, Shader& shader)
2810 {
2811    auto& value_factory = shader.value_factory();
2812 
2813    auto x = value_factory.temp_register();
2814    auto y = value_factory.temp_register();
2815    auto yy = value_factory.temp_register();
2816 
2817    shader.emit_instruction(new AluInstr(
2818       op1_flt32_to_flt16, x, value_factory.src(alu.src[0], 0), AluInstr::last_write));
2819 
2820    shader.emit_instruction(new AluInstr(
2821       op1_flt32_to_flt16, y, value_factory.src(alu.src[1], 0), AluInstr::last_write));
2822 
2823    shader.emit_instruction(
2824       new AluInstr(op2_lshl_int, yy, y, value_factory.literal(16), AluInstr::last_write));
2825 
2826    shader.emit_instruction(new AluInstr(op2_or_int,
2827                                         value_factory.dest(alu.def, 0, pin_free),
2828                                         x,
2829                                         yy,
2830                                         AluInstr::last_write));
2831    return true;
2832 }
2833 
2834 static bool
emit_unpack_64_2x32_split(const nir_alu_instr & alu,int comp,Shader & shader)2835 emit_unpack_64_2x32_split(const nir_alu_instr& alu, int comp, Shader& shader)
2836 {
2837    auto& value_factory = shader.value_factory();
2838    shader.emit_instruction(new AluInstr(op1_mov,
2839                                         value_factory.dest(alu.def, 0, pin_free),
2840                                         value_factory.src64(alu.src[0], 0, comp),
2841                                         AluInstr::last_write));
2842    return true;
2843 }
2844 
2845 static bool
emit_unpack_32_2x16_split_x(const nir_alu_instr & alu,Shader & shader)2846 emit_unpack_32_2x16_split_x(const nir_alu_instr& alu, Shader& shader)
2847 {
2848    auto& value_factory = shader.value_factory();
2849    shader.emit_instruction(new AluInstr(op1_flt16_to_flt32,
2850                                         value_factory.dest(alu.def, 0, pin_free),
2851                                         value_factory.src(alu.src[0], 0),
2852                                         AluInstr::last_write));
2853    return true;
2854 }
2855 static bool
emit_unpack_32_2x16_split_y(const nir_alu_instr & alu,Shader & shader)2856 emit_unpack_32_2x16_split_y(const nir_alu_instr& alu, Shader& shader)
2857 {
2858    auto& value_factory = shader.value_factory();
2859    auto tmp = value_factory.temp_register();
2860    shader.emit_instruction(new AluInstr(op2_lshr_int,
2861                                         tmp,
2862                                         value_factory.src(alu.src[0], 0),
2863                                         value_factory.literal(16),
2864                                         AluInstr::last_write));
2865 
2866    shader.emit_instruction(new AluInstr(op1_flt16_to_flt32,
2867                                         value_factory.dest(alu.def, 0, pin_free),
2868                                         tmp,
2869                                         AluInstr::last_write));
2870    return true;
2871 }
2872 
2873 static bool
emit_alu_trans_op1_eg(const nir_alu_instr & alu,EAluOp opcode,Shader & shader)2874 emit_alu_trans_op1_eg(const nir_alu_instr& alu, EAluOp opcode, Shader& shader)
2875 {
2876    auto& value_factory = shader.value_factory();
2877    const nir_alu_src& src0 = alu.src[0];
2878 
2879    AluInstr *ir = nullptr;
2880    auto pin = pin_for_components(alu);
2881 
2882    for (unsigned i = 0; i < alu.def.num_components; ++i) {
2883       ir = new AluInstr(opcode,
2884                         value_factory.dest(alu.def, i, pin),
2885                         value_factory.src(src0, i),
2886                         AluInstr::last_write);
2887       ir->set_alu_flag(alu_is_trans);
2888       shader.emit_instruction(ir);
2889    }
2890 
2891    return true;
2892 }
2893 
2894 static bool
emit_alu_f2i32_or_u32_eg(const nir_alu_instr & alu,EAluOp opcode,Shader & shader)2895 emit_alu_f2i32_or_u32_eg(const nir_alu_instr& alu, EAluOp opcode, Shader& shader)
2896 {
2897    auto& value_factory = shader.value_factory();
2898    AluInstr *ir = nullptr;
2899 
2900    PRegister reg[4];
2901 
2902    int num_comp = alu.def.num_components;
2903 
2904    for (int i = 0; i < num_comp; ++i) {
2905       reg[i] = value_factory.temp_register();
2906       ir = new AluInstr(op1_trunc,
2907                         reg[i],
2908                         value_factory.src(alu.src[0], i),
2909                         AluInstr::last_write);
2910       shader.emit_instruction(ir);
2911    }
2912 
2913    auto pin = pin_for_components(alu);
2914    for (int i = 0; i < num_comp; ++i) {
2915       ir = new AluInstr(opcode,
2916                         value_factory.dest(alu.def, i, pin),
2917                         reg[i],
2918                         AluInstr::write);
2919       if (opcode == op1_flt_to_uint) {
2920          ir->set_alu_flag(alu_is_trans);
2921          ir->set_alu_flag(alu_last_instr);
2922       }
2923       shader.emit_instruction(ir);
2924    }
2925    ir->set_alu_flag(alu_last_instr);
2926    return true;
2927 }
2928 
2929 static bool
emit_alu_trans_op1_cayman(const nir_alu_instr & alu,EAluOp opcode,Shader & shader)2930 emit_alu_trans_op1_cayman(const nir_alu_instr& alu, EAluOp opcode, Shader& shader)
2931 {
2932    auto& value_factory = shader.value_factory();
2933    const nir_alu_src& src0 = alu.src[0];
2934 
2935    auto pin = pin_for_components(alu);
2936 
2937    const std::set<AluModifiers> flags({alu_write, alu_last_instr, alu_is_cayman_trans});
2938 
2939    for (unsigned j = 0; j < alu.def.num_components; ++j) {
2940       unsigned ncomp =  j == 3 ? 4 : 3;
2941 
2942       AluInstr::SrcValues srcs(ncomp);
2943       PRegister dest = value_factory.dest(alu.def, j, pin, (1 << ncomp) - 1);
2944 
2945       for (unsigned i = 0; i < ncomp; ++i)
2946          srcs[i] = value_factory.src(src0, j);
2947 
2948       auto ir = new AluInstr(opcode, dest, srcs, flags, ncomp);
2949       shader.emit_instruction(ir);
2950    }
2951    return true;
2952 }
2953 
2954 static bool
emit_alu_trans_op2_eg(const nir_alu_instr & alu,EAluOp opcode,Shader & shader)2955 emit_alu_trans_op2_eg(const nir_alu_instr& alu, EAluOp opcode, Shader& shader)
2956 {
2957    auto& value_factory = shader.value_factory();
2958 
2959    const nir_alu_src& src0 = alu.src[0];
2960    const nir_alu_src& src1 = alu.src[1];
2961 
2962    AluInstr *ir = nullptr;
2963 
2964    auto pin = pin_for_components(alu);
2965    for (unsigned i = 0; i < alu.def.num_components; ++i) {
2966       ir = new AluInstr(opcode,
2967                         value_factory.dest(alu.def, i, pin),
2968                         value_factory.src(src0, i),
2969                         value_factory.src(src1, i),
2970                         AluInstr::last_write);
2971       ir->set_alu_flag(alu_is_trans);
2972       shader.emit_instruction(ir);
2973    }
2974    return true;
2975 }
2976 
2977 static bool
emit_alu_trans_op2_cayman(const nir_alu_instr & alu,EAluOp opcode,Shader & shader)2978 emit_alu_trans_op2_cayman(const nir_alu_instr& alu, EAluOp opcode, Shader& shader)
2979 {
2980    auto& value_factory = shader.value_factory();
2981 
2982    const nir_alu_src& src0 = alu.src[0];
2983    const nir_alu_src& src1 = alu.src[1];
2984 
2985    unsigned last_slot = 4;
2986 
2987    const std::set<AluModifiers> flags({alu_write, alu_last_instr, alu_is_cayman_trans});
2988 
2989    for (unsigned k = 0; k < alu.def.num_components; ++k) {
2990       AluInstr::SrcValues srcs(2 * last_slot);
2991       PRegister dest = value_factory.dest(alu.def, k, pin_free);
2992 
2993       for (unsigned i = 0; i < last_slot; ++i) {
2994          srcs[2 * i] = value_factory.src(src0, k);
2995          srcs[2 * i + 1] = value_factory.src(src1, k);
2996       }
2997 
2998       auto ir = new AluInstr(opcode, dest, srcs, flags, last_slot);
2999       ir->set_alu_flag(alu_is_cayman_trans);
3000       shader.emit_instruction(ir);
3001    }
3002    return true;
3003 }
3004 
3005 static bool
emit_tex_fdd(const nir_alu_instr & alu,TexInstr::Opcode opcode,bool fine,Shader & shader)3006 emit_tex_fdd(const nir_alu_instr& alu, TexInstr::Opcode opcode, bool fine, Shader& shader)
3007 {
3008    auto& value_factory = shader.value_factory();
3009 
3010    int ncomp = alu.def.num_components;
3011    RegisterVec4::Swizzle src_swz = {7, 7, 7, 7};
3012    RegisterVec4::Swizzle tmp_swz = {7, 7, 7, 7};
3013    for (auto i = 0; i < ncomp; ++i) {
3014       src_swz[i] = alu.src[0].swizzle[i];
3015       tmp_swz[i] = i;
3016    }
3017 
3018    auto src = value_factory.src_vec4(alu.src[0].src, pin_none, src_swz);
3019 
3020    auto tmp = value_factory.temp_vec4(pin_group, tmp_swz);
3021    AluInstr *mv = nullptr;
3022    for (int i = 0; i < ncomp; ++i) {
3023       mv = new AluInstr(op1_mov, tmp[i], src[i], AluInstr::write);
3024       shader.emit_instruction(mv);
3025    }
3026    if (mv)
3027       mv->set_alu_flag(alu_last_instr);
3028 
3029    auto dst = value_factory.dest_vec4(alu.def, pin_group);
3030    RegisterVec4::Swizzle dst_swz = {7, 7, 7, 7};
3031    for (auto i = 0; i < ncomp; ++i) {
3032       dst_swz[i] = i;
3033    }
3034 
3035    auto tex = new TexInstr(opcode, dst, dst_swz, tmp, R600_MAX_CONST_BUFFERS, nullptr);
3036 
3037    if (fine)
3038       tex->set_tex_flag(TexInstr::grad_fine);
3039 
3040    shader.emit_instruction(tex);
3041 
3042    return true;
3043 }
3044 
3045 static bool
emit_alu_cube(const nir_alu_instr & alu,Shader & shader)3046 emit_alu_cube(const nir_alu_instr& alu, Shader& shader)
3047 {
3048    auto& value_factory = shader.value_factory();
3049    AluInstr *ir = nullptr;
3050 
3051    const uint16_t src0_chan[4] = {2, 2, 0, 1};
3052    const uint16_t src1_chan[4] = {1, 0, 2, 2};
3053 
3054    auto group = new AluGroup();
3055 
3056    for (int i = 0; i < 4; ++i) {
3057 
3058       ir = new AluInstr(op2_cube,
3059                         value_factory.dest(alu.def, i, pin_chan),
3060                         value_factory.src(alu.src[0], src0_chan[i]),
3061                         value_factory.src(alu.src[0], src1_chan[i]),
3062                         AluInstr::write);
3063       group->add_instruction(ir);
3064    }
3065    ir->set_alu_flag(alu_last_instr);
3066    shader.emit_instruction(group);
3067    return true;
3068 }
3069 
3070 const std::set<AluModifiers> AluInstr::empty;
3071 const std::set<AluModifiers> AluInstr::write({alu_write});
3072 const std::set<AluModifiers> AluInstr::last({alu_last_instr});
3073 const std::set<AluModifiers> AluInstr::last_write({alu_write, alu_last_instr});
3074 
3075 } // namespace r600
3076