1 /* -*- mesa-c++ -*-
2 *
3 * Copyright (c) 2022 Collabora LTD
4 *
5 * Author: Gert Wollny <gert.wollny@collabora.com>
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * on the rights to use, copy, modify, merge, publish, distribute, sub
11 * license, and/or sell copies of the Software, and to permit persons to whom
12 * the Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
16 * Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 * USE OR OTHER DEALINGS IN THE SOFTWARE.
25 */
26
27 #include "sfn_instr_alu.h"
28 #include "sfn_instr_alugroup.h"
29 #include "sfn_instr_tex.h"
30 #include "sfn_shader.h"
31 #include "sfn_debug.h"
32
33 #include <algorithm>
34 #include <sstream>
35
36
37 namespace r600 {
38
39 using std::string;
40 using std::istream;
41 using std::vector;
42
AluInstr(EAluOp opcode,PRegister dest,SrcValues src,const std::set<AluModifiers> & flags,int slots)43 AluInstr::AluInstr(EAluOp opcode, PRegister dest,
44 SrcValues src,
45 const std::set<AluModifiers>& flags, int slots):
46 m_opcode(opcode),
47 m_dest(dest),
48 m_bank_swizzle(alu_vec_unknown),
49 m_cf_type(cf_alu),
50 m_alu_slots(slots)
51 {
52 m_src.swap(src);
53
54 if (m_src.size() == 3)
55 m_alu_flags.set(alu_op3);
56
57 for(auto f : flags)
58 m_alu_flags.set(f);
59
60 ASSERT_OR_THROW(m_src.size() == static_cast<size_t>(alu_ops.at(opcode).nsrc * m_alu_slots),
61 "Unexpected number of source values");
62
63 if (m_alu_flags.test(alu_write))
64 ASSERT_OR_THROW(dest, "Write flag is set, but no destination register is given");
65
66 update_uses();
67
68 }
69
AluInstr(EAluOp opcode)70 AluInstr::AluInstr(EAluOp opcode):
71 AluInstr(opcode, nullptr, SrcValues(alu_ops.at(opcode).nsrc), {}, 1)
72 {
73 }
74
AluInstr(EAluOp opcode,int chan)75 AluInstr::AluInstr(EAluOp opcode, int chan):
76 AluInstr(opcode, nullptr, SrcValues(), {}, 1)
77 {
78 m_fallback_chan = chan;
79 }
80
AluInstr(EAluOp opcode,PRegister dest,PVirtualValue src0,const std::set<AluModifiers> & m_flags)81 AluInstr::AluInstr(EAluOp opcode, PRegister dest, PVirtualValue src0,
82 const std::set<AluModifiers>& m_flags):
83 AluInstr(opcode, dest, SrcValues{src0}, m_flags, 1)
84 {
85
86 }
87
AluInstr(EAluOp opcode,PRegister dest,PVirtualValue src0,PVirtualValue src1,const std::set<AluModifiers> & m_flags)88 AluInstr::AluInstr(EAluOp opcode, PRegister dest,
89 PVirtualValue src0, PVirtualValue src1,
90 const std::set<AluModifiers>& m_flags):
91 AluInstr(opcode, dest, SrcValues{src0, src1}, m_flags, 1)
92 {
93
94 }
95
AluInstr(EAluOp opcode,PRegister dest,PVirtualValue src0,PVirtualValue src1,PVirtualValue src2,const std::set<AluModifiers> & m_flags)96 AluInstr::AluInstr(EAluOp opcode, PRegister dest, PVirtualValue src0, PVirtualValue src1,
97 PVirtualValue src2,
98 const std::set<AluModifiers>& m_flags):
99 AluInstr(opcode, dest, SrcValues{src0, src1, src2}, m_flags, 1)
100 {
101
102 }
103
AluInstr(ESDOp op,PVirtualValue src0,PVirtualValue src1,PVirtualValue address)104 AluInstr::AluInstr(ESDOp op, PVirtualValue src0, PVirtualValue src1, PVirtualValue address):
105 m_lds_opcode(op)
106 {
107 set_alu_flag(alu_is_lds);
108
109 m_src.push_back(address);
110 if (src0) {
111 m_src.push_back(src0);
112 if (src1)
113 m_src.push_back(src1);
114 }
115 update_uses();
116 }
117
AluInstr(ESDOp op,const SrcValues & src,const std::set<AluModifiers> & flags)118 AluInstr::AluInstr(ESDOp op, const SrcValues& src, const std::set<AluModifiers>& flags):
119 m_lds_opcode(op),
120 m_src(src)
121 {
122 for(auto f : flags)
123 set_alu_flag(f);
124
125 set_alu_flag(alu_is_lds);
126 update_uses();
127 }
128
update_uses()129 void AluInstr::update_uses()
130 {
131 for (auto& s : m_src) {
132 auto r = s->as_register();
133 if (r) {
134 r->add_use(this);
135 // move this to add_use
136 if (r->pin() == pin_array) {
137 auto array_elm = static_cast<LocalArrayValue *>(r);
138 auto addr = array_elm->addr();
139 if (addr && addr->as_register())
140 addr->as_register()->add_use(this);
141 }
142 }
143 auto u = s->as_uniform();
144 if (u && u->buf_addr() && u->buf_addr()->as_register())
145 u->buf_addr()->as_register()->add_use(this);
146 }
147
148 if (m_dest && has_alu_flag(alu_write)) {
149 m_dest->add_parent(this);
150
151 if (m_dest->pin() == pin_array) {
152 // move this to add_parent
153 auto array_elm = static_cast<LocalArrayValue *>(m_dest);
154 auto addr = array_elm->addr();
155 if (addr && addr->as_register())
156 addr->as_register()->add_use(this);
157 }
158 }
159 }
160
accept(ConstInstrVisitor & visitor) const161 void AluInstr::accept(ConstInstrVisitor& visitor) const
162 {
163 visitor.visit(*this);
164 }
165
accept(InstrVisitor & visitor)166 void AluInstr::accept(InstrVisitor& visitor)
167 {
168 visitor.visit(this);
169 }
170
171 const std::map<ECFAluOpCode, std::string> AluInstr::cf_map = {
172 {cf_alu_break, "BREAK"},
173 {cf_alu_continue, "CONT"},
174 {cf_alu_else_after, "ELSE_AFTER"},
175 {cf_alu_extended, "EXTENDED"},
176 {cf_alu_pop_after, "POP_AFTER"},
177 {cf_alu_pop2_after, "POP2_AFTER"},
178 {cf_alu_push_before, "PUSH_BEFORE"}
179 };
180
181 const std::map<AluBankSwizzle, std::string> AluInstr::bank_swizzle_map = {
182 {alu_vec_012, "VEC_012"},
183 {alu_vec_021, "VEC_021"},
184 {alu_vec_102, "VEC_102"},
185 {alu_vec_120, "VEC_120"},
186 {alu_vec_201, "VEC_201"},
187 {alu_vec_210, "VEC_210"}
188 };
189
190 const AluModifiers AluInstr::src_abs_flags[2] =
191 {alu_src0_abs, alu_src1_abs};
192 const AluModifiers AluInstr::src_neg_flags[3] =
193 {alu_src0_neg, alu_src1_neg, alu_src2_neg};
194 const AluModifiers AluInstr::src_rel_flags[3] =
195 {alu_src0_rel, alu_src1_rel, alu_src2_rel};
196
197 struct ValuePrintFlags {
ValuePrintFlagsr600::ValuePrintFlags198 ValuePrintFlags(int im, int f):index_mode(im),
199 flags(f)
200 {
201 }
202 int index_mode = 0;
203 int flags = 0;
204 static const int is_rel = 1;
205 static const int has_abs = 2;
206 static const int has_neg = 4;
207 static const int literal_is_float = 8;
208 static const int index_ar = 16;
209 static const int index_loopidx = 32;
210 };
211
do_print(std::ostream & os) const212 void AluInstr::do_print(std::ostream& os) const
213 {
214 const char swzchar[] = "xyzw01?_";
215
216 unsigned i = 0;
217
218 os << "ALU ";
219
220 if (has_alu_flag(alu_is_lds)) {
221 os << "LDS " << lds_ops.at(m_lds_opcode).name;
222 os << " __.x : ";
223 } else {
224
225 os << alu_ops.at(m_opcode).name;
226 if (has_alu_flag(alu_dst_clamp))
227 os << " CLAMP";
228
229 if (m_dest) {
230 if (has_alu_flag(alu_write))
231 os << " " << *m_dest;
232 else
233 os << " __" << "." << swzchar[m_dest->chan()];
234 if (!has_alu_flag(alu_write) && m_dest->pin() != pin_none)
235 os << "@" << m_dest->pin();
236 os << " : ";
237 } else {
238 os << "__." << swzchar[dest_chan()] << " : ";
239 }
240 }
241
242 const int n_source_per_slot = has_alu_flag(alu_is_lds) ?
243 m_src.size() : alu_ops.at(m_opcode).nsrc;
244
245 for (int s = 0; s < m_alu_slots; ++s) {
246
247 if (s > 0)
248 os << " +";
249
250 for (int k = 0; k < n_source_per_slot; ++k) {
251 int pflags = 0;
252 if (i)
253 os << ' ';
254 if (has_alu_flag(src_neg_flags[k])) pflags |= ValuePrintFlags::has_neg;
255 if (has_alu_flag(src_rel_flags[k])) pflags |= ValuePrintFlags::is_rel;
256 if (i < 2)
257 if (has_alu_flag(src_abs_flags[k])) pflags |= ValuePrintFlags::has_abs;
258
259 if (pflags & ValuePrintFlags::has_neg) os << '-';
260 if (pflags & ValuePrintFlags::has_abs) os << '|';
261 os << *m_src[i];
262 if (pflags & ValuePrintFlags::has_abs) os << '|';
263 ++i;
264 }
265 }
266
267 os << " {";
268 if (has_alu_flag(alu_write)) os << 'W';
269 if (has_alu_flag(alu_last_instr)) os << 'L';
270 if (has_alu_flag(alu_update_exec)) os << 'E';
271 if (has_alu_flag(alu_update_pred)) os << 'P';
272 os << "}";
273
274 auto bs_name = bank_swizzle_map.find(m_bank_swizzle);
275 if (bs_name != bank_swizzle_map.end())
276 os << ' ' << bs_name->second;
277
278 auto cf_name = cf_map.find(m_cf_type);
279 if (cf_name != cf_map.end())
280 os << ' ' << cf_name->second;
281 }
282
can_propagate_src() const283 bool AluInstr::can_propagate_src() const
284 {
285 /* We can use the source in the next instruction */
286 if (!can_copy_propagate())
287 return false;
288
289 auto src_reg = m_src[0]->as_register();
290 if (!src_reg)
291 return true;
292
293 assert(m_dest);
294
295
296
297 if (!m_dest->is_ssa()) {
298 return false;
299 }
300
301 if (m_dest->pin() == pin_fully)
302 return m_dest->equal_to(*src_reg);
303
304 if (m_dest->pin() == pin_chan)
305 return src_reg->pin() == pin_none ||
306 (src_reg->pin() == pin_chan &&
307 src_reg->chan() == m_dest->chan());
308
309 return m_dest->pin() == pin_none || m_dest->pin() == pin_free;
310 }
311
can_propagate_dest() const312 bool AluInstr::can_propagate_dest() const
313 {
314 if (!can_copy_propagate()){
315 return false;
316 }
317
318 auto src_reg = m_src[0]->as_register();
319 if (!src_reg) {
320 return false;
321 }
322
323 assert(m_dest);
324
325 if (src_reg->pin() == pin_fully ||
326 src_reg->pin() == pin_group) {
327 return false;
328 }
329
330 if (!src_reg->is_ssa())
331 return false;
332
333 if (src_reg->pin() == pin_chan)
334 return m_dest->pin() == pin_none ||
335 m_dest->pin() == pin_free ||
336 ((m_dest->pin() == pin_chan ||
337 m_dest->pin() == pin_group) &&
338 src_reg->chan() == m_dest->chan());
339
340 return (src_reg->pin() == pin_none ||
341 src_reg->pin() == pin_free);
342 }
343
can_copy_propagate() const344 bool AluInstr::can_copy_propagate() const
345 {
346 if (m_opcode != op1_mov)
347 return false;
348
349 if (has_alu_flag(alu_src0_abs) ||
350 has_alu_flag(alu_src0_neg) ||
351 has_alu_flag(alu_dst_clamp))
352 return false;
353
354 return has_alu_flag(alu_write);
355 }
356
replace_source(PRegister old_src,PVirtualValue new_src)357 bool AluInstr::replace_source(PRegister old_src, PVirtualValue new_src)
358 {
359 bool process = false;
360
361 if (!check_readport_validation(old_src, new_src))
362 return false;
363
364 /* If the old source is an array element, we assume that there
365 * might have been an (untracked) indirect access, so don't replace
366 * this source */
367 if (old_src->pin() == pin_array)
368 return false;
369
370 if (new_src->get_addr()) {
371 for (auto& s : m_src) {
372 auto addr = s->get_addr();
373 /* can't have two differen't indirect addresses in the same instr */
374 if (addr && !addr->equal_to(*new_src->get_addr()))
375 return false;
376 }
377 }
378
379 if (m_dest) {
380 /* We don't allow src and dst with rel and different indirect register addresses */
381 if (m_dest->pin() == pin_array && new_src->pin() == pin_array) {
382 auto dav = static_cast<const LocalArrayValue *>(m_dest)->addr();
383 auto sav = static_cast<const LocalArrayValue *>(new_src)->addr();
384 if (dav && sav && dav->as_register() && !dav->equal_to(*sav))
385 return false;
386 }
387 }
388
389 /* Check the readports */
390 if (m_alu_slots * alu_ops.at(m_opcode).nsrc > 2 || m_parent_group) {
391 AluReadportReservation read_port_check =
392 !m_parent_group ?
393 AluReadportReservation() :
394 m_parent_group->readport_reserer();
395
396 int nsrc = alu_ops.at(m_opcode).nsrc;
397 PVirtualValue src[3];
398
399 for (int s = 0; s < m_alu_slots; ++s) {
400 for (int i = 0; i < nsrc; ++i) {
401 auto old_s = m_src[i + nsrc * s];
402 src[i] = old_s->equal_to(*old_src) ? new_src : old_s;
403 }
404 AluBankSwizzle bs = alu_vec_012;
405 while (bs != alu_vec_unknown) {
406 if (read_port_check.schedule_vec_src(src, nsrc, bs)) {
407 break;
408 }
409 ++bs;
410 }
411 if (bs == alu_vec_unknown)
412 return false;
413 }
414 if (m_parent_group)
415 m_parent_group->set_readport_reserer(read_port_check);
416 }
417
418 for (unsigned i = 0; i < m_src.size(); ++i) {
419 if (old_src->equal_to(*m_src[i])) {
420 m_src[i] = new_src;
421 process = true;
422 }
423 }
424 if (process) {
425 auto r = new_src->as_register();
426 if (r)
427 r->add_use(this);
428 old_src->del_use(this);
429 }
430 return process;
431 }
432
set_sources(SrcValues src)433 void AluInstr::set_sources(SrcValues src)
434 {
435 for (auto& s : m_src) {
436 auto r = s->as_register();
437 if (r)
438 r->del_use(this);
439 }
440 m_src.swap(src);
441 for (auto& s : m_src) {
442 auto r = s->as_register();
443 if (r)
444 r->add_use(this);
445 }
446 }
447
replace_dest(PRegister new_dest,AluInstr * move_instr)448 bool AluInstr::replace_dest(PRegister new_dest, AluInstr *move_instr)
449 {
450 if (m_dest->equal_to(*new_dest))
451 return false;
452
453 if (m_dest->uses().size() > 1)
454 return false;
455
456 if (new_dest->pin() == pin_array)
457 return false;
458
459 /* Currently we bail out when an array write should be moved, because
460 * decalring an array write is currently not well defined. The
461 * Whole "backwards" copy propagation shoul dprobably be replaced by some
462 * forward peep holew optimization */
463 /*
464 if (new_dest->pin() == pin_array) {
465 auto dav = static_cast<const LocalArrayValue *>(new_dest)->addr();
466 for (auto s: m_src) {
467 if (s->pin() == pin_array) {
468 auto sav = static_cast<const LocalArrayValue *>(s)->addr();
469 if (dav && sav && dav->as_register() && !dav->equal_to(*sav))
470 return false;
471 }
472 }
473 }
474 */
475
476 if (m_dest->pin() == pin_chan &&
477 new_dest->chan() != m_dest->chan())
478 return false;
479
480
481 if (m_dest->pin() == pin_chan) {
482 if (new_dest->pin() == pin_group)
483 new_dest->set_pin(pin_chgr);
484 else
485 new_dest->set_pin(pin_chan);
486 }
487
488 m_dest = new_dest;
489 if (!move_instr->has_alu_flag(alu_last_instr))
490 reset_alu_flag(alu_last_instr);
491
492 if (has_alu_flag(alu_is_cayman_trans)) {
493 /* Copy propagation puts an instruction into the w channel, but we
494 * don't have the slots for a w channel */
495 if (m_dest->chan() == 3 && m_alu_slots < 4) {
496 m_alu_slots = 4;
497 assert(m_src.size() == 3);
498 m_src.push_back(m_src[0]);
499 }
500 }
501
502 return true;
503 }
504
pin_sources_to_chan()505 void AluInstr::pin_sources_to_chan()
506 {
507 for (auto s: m_src) {
508 auto r = s->as_register();
509 if (r) {
510 if (r->pin() == pin_free)
511 r->set_pin(pin_chan);
512 else if (r->pin() == pin_group)
513 r->set_pin(pin_chgr);
514 }
515 }
516 }
517
check_readport_validation(PRegister old_src,PVirtualValue new_src) const518 bool AluInstr::check_readport_validation(PRegister old_src, PVirtualValue new_src) const
519 {
520 bool success = true;
521 AluReadportReservation rpr_sum;
522
523 if (m_src.size() < 3)
524 return true;
525
526 unsigned nsrc = alu_ops.at(m_opcode).nsrc;
527 assert(nsrc * m_alu_slots == m_src.size());
528
529
530 for (int s = 0; s < m_alu_slots && success; ++s) {
531 for (AluBankSwizzle i = alu_vec_012; i != alu_vec_unknown; ++i) {
532 auto ireg = m_src.begin() + s * nsrc;
533
534 AluReadportReservation rpr = rpr_sum;
535 PVirtualValue s[3];
536
537 for (unsigned i = 0; i < nsrc; ++i, ++ireg)
538 s[i] = old_src->equal_to(**ireg) ? new_src : *ireg;
539
540 if (rpr.schedule_vec_src(s, nsrc, i)) {
541 rpr_sum = rpr;
542 break;
543 } else {
544 success = false;
545 }
546 }
547 }
548 return success;
549 }
550
add_extra_dependency(PVirtualValue value)551 void AluInstr::add_extra_dependency(PVirtualValue value)
552 {
553 auto reg = value->as_register();
554 if (reg)
555 m_extra_dependencies.insert(reg);
556 }
557
558
is_equal_to(const AluInstr & lhs) const559 bool AluInstr::is_equal_to(const AluInstr& lhs) const
560 {
561 if (lhs.m_opcode != m_opcode ||
562 lhs.m_bank_swizzle != m_bank_swizzle ||
563 lhs.m_cf_type != m_cf_type ||
564 lhs.m_alu_flags != m_alu_flags) {
565 return false;
566 }
567
568 if (m_dest) {
569 if (!lhs.m_dest) {
570 return false;
571 } else {
572 if (has_alu_flag(alu_write)) {
573 if (!m_dest->equal_to(*lhs.m_dest))
574 return false;
575 } else {
576 if (m_dest->chan() != lhs.m_dest->chan())
577 return false;
578 }
579 }
580 } else {
581 if (lhs.m_dest)
582 return false;
583 }
584
585 if (m_src.size() != lhs.m_src.size())
586 return false;
587
588 for (unsigned i = 0; i < m_src.size(); ++i) {
589 if (!m_src[i]->equal_to(*lhs.m_src[i]))
590 return false;
591 }
592
593 return true;
594 }
595
596 class ResolveIndirectArrayAddr: public ConstRegisterVisitor {
597 public:
visit(const Register & value)598 void visit(const Register& value){(void) value;}
visit(const LocalArray & value)599 void visit(const LocalArray& value) {(void)value; unreachable("An array can't be used as address");}
600 void visit(const LocalArrayValue& value);
601 void visit(const UniformValue& value);
visit(const LiteralConstant & value)602 void visit(const LiteralConstant& value) {(void)value;}
visit(const InlineConstant & value)603 void visit(const InlineConstant& value) {(void)value;}
604
605 PRegister addr{nullptr};
606 bool is_index{false};
607 };
608
visit(const LocalArrayValue & value)609 void ResolveIndirectArrayAddr::visit(const LocalArrayValue& value)
610 {
611 auto a = value.addr();
612 if (a)
613 addr = a->as_register();
614 }
615
visit(const UniformValue & value)616 void ResolveIndirectArrayAddr::visit(const UniformValue& value)
617 {
618 auto a = value.buf_addr();
619 if (a) {
620 addr = a->as_register();
621 is_index = true;
622 }
623 }
624
indirect_addr() const625 std::tuple<PRegister, bool, bool> AluInstr::indirect_addr() const
626 {
627 ResolveIndirectArrayAddr visitor;
628
629 if (m_dest) {
630 m_dest->accept(visitor);
631 if (visitor.addr)
632 return {visitor.addr, false, false};
633 }
634
635 for (auto s: m_src) {
636 s->accept(visitor);
637 if (visitor.addr) {
638 return {visitor.addr, !visitor.is_index, visitor.is_index};
639 }
640 }
641 return {nullptr, false, false};
642 }
643
split(ValueFactory & vf)644 AluGroup *AluInstr::split(ValueFactory& vf)
645 {
646 if (m_alu_slots == 1)
647 return nullptr;
648
649 sfn_log << SfnLog::instr << "Split " << *this << "\n";
650
651 auto group = new AluGroup();
652
653 m_dest->del_parent(this);
654
655 for (int s = 0; s < m_alu_slots; ++s) {
656
657 PRegister dst = m_dest->chan() == s ? m_dest : vf.dummy_dest(s);
658 if (dst->pin() != pin_chgr) {
659 auto pin = pin_chan;
660 if (dst->pin() == pin_group && m_dest->chan() == s)
661 pin = pin_chgr;
662 dst->set_pin(pin);
663 }
664
665 SrcValues src;
666 for (int i = 0; i < alu_ops.at(m_opcode).nsrc; ++i) {
667 auto old_src = m_src[s * alu_ops.at(m_opcode).nsrc + i];
668 // Make it easy for the scheduler and pin the register to the
669 // channel, otherwise scheduler would have to check whether a
670 // channel switch is possible
671 auto r = old_src->as_register();
672 if (r) {
673 if (r->pin() == pin_free || r->pin() == pin_none)
674 r->set_pin(pin_chan);
675 else if (r->pin() == pin_group)
676 r->set_pin(pin_chgr);
677 }
678 src.push_back(old_src);
679 }
680
681 auto instr = new AluInstr(m_opcode, dst, src, {}, 1);
682 instr->set_blockid(block_id(), index());
683
684 if (s == 0 || !m_alu_flags.test(alu_64bit_op)) {
685 if (has_alu_flag(alu_src0_neg))
686 instr->set_alu_flag(alu_src0_neg);
687 if (has_alu_flag(alu_src1_neg))
688 instr->set_alu_flag(alu_src1_neg);
689 if (has_alu_flag(alu_src2_neg))
690 instr->set_alu_flag(alu_src2_neg);
691 if (has_alu_flag(alu_src0_abs))
692 instr->set_alu_flag(alu_src0_abs);
693 if (has_alu_flag(alu_src1_abs))
694 instr->set_alu_flag(alu_src1_abs);
695 }
696 if (has_alu_flag(alu_dst_clamp))
697 instr->set_alu_flag(alu_dst_clamp);
698
699 if (s == m_dest->chan())
700 instr->set_alu_flag(alu_write);
701
702 m_dest->add_parent(instr);
703 sfn_log << SfnLog::instr << " " << *instr << "\n";
704
705 if (!group->add_instruction(instr)) {
706 std::cerr << "Unable to schedule '" << *instr << "' into\n"
707 << *group << "\n";
708
709 unreachable("Invalid group instruction");
710 }
711 }
712 group->set_blockid(block_id(), index());
713
714 for (auto s : m_src) {
715 auto r = s->as_register();
716 if (r) {
717 r->del_use(this);
718 }
719 }
720
721 return group;
722 }
723
724 /* Alu instructions that have SSA dest registers increase the regietsr pressure
725 * Alu instructions that read from SSA registers may decresase the register pressure
726 * hency evaluate a priorityx values based on register pressure change */
register_priority() const727 int AluInstr::register_priority() const
728 {
729 int priority = 0;
730 if (!has_alu_flag(alu_no_schedule_bias)) {
731
732 if (m_dest && m_dest->is_ssa() && has_alu_flag(alu_write)) {
733 if (m_dest->pin() != pin_group && m_dest->pin() != pin_chgr)
734 priority--;
735 }
736
737 for (const auto s : m_src) {
738 auto r = s->as_register();
739 if (r && r->is_ssa()) {
740 int pending = 0;
741 for (auto b : r->uses()) {
742 if (!b->is_scheduled())
743 ++pending;
744 }
745 if (pending == 1)
746 ++priority;
747 }
748 }
749 }
750 return priority;
751 }
752
propagate_death()753 bool AluInstr::propagate_death()
754 {
755 if (!m_dest)
756 return true;
757
758 if (m_dest->pin() == pin_group ||
759 m_dest->pin() == pin_chan) {
760 switch (m_opcode) {
761 case op2_interp_x:
762 case op2_interp_xy:
763 case op2_interp_z:
764 case op2_interp_zw:
765 reset_alu_flag(alu_write);
766 return false;
767 default:
768 ;
769 }
770 }
771
772 if (m_dest->pin() == pin_array)
773 return false;
774
775 /* We assume that nir does a good job in eliminating all ALU results that
776 * are not needed, and we don't let copy propagation doesn't make the
777 * instruction obsolte, so just keep all */
778 if (has_alu_flag(alu_is_cayman_trans))
779 return false;
780
781 for (auto& src: m_src) {
782 auto reg = src->as_register();
783 if (reg)
784 reg->del_use(this);
785 }
786 return true;
787 }
788
has_lds_access() const789 bool AluInstr::has_lds_access() const
790 {
791 return has_alu_flag(alu_is_lds) || has_lds_queue_read();
792 }
793
has_lds_queue_read() const794 bool AluInstr::has_lds_queue_read() const
795 {
796 for (auto& s : m_src) {
797 auto ic = s->as_inline_const();
798 if (!ic)
799 continue;
800
801 if (ic->sel() == ALU_SRC_LDS_OQ_A_POP ||
802 ic->sel() == ALU_SRC_LDS_OQ_B_POP)
803 return true;
804 }
805 return false;
806 }
807
808 struct OpDescr {
809 union {
810 EAluOp alu_opcode;
811 ESDOp lds_opcode;
812 };
813 int nsrc;
814 };
815
816 static std::map<std::string, OpDescr> s_alu_map_by_name;
817 static std::map<std::string, OpDescr> s_lds_map_by_name;
818
from_string(istream & is,ValueFactory & value_factory,AluGroup * group)819 Instr::Pointer AluInstr::from_string(istream& is, ValueFactory& value_factory, AluGroup *group)
820 {
821 vector<string> tokens;
822
823 while (is.good() && !is.eof()) {
824 string t;
825 is >> t;
826 if (t.length() > 0) {
827 tokens.push_back(t);
828 }
829 }
830
831 std::set<AluModifiers> flags;
832 auto t = tokens.begin();
833
834 bool is_lds = false;
835
836 if (*t == "LDS") {
837 is_lds = true;
838 t++;
839 }
840
841 string opstr = *t++;
842 string deststr = *t++;
843
844 if (deststr == "CLAMP") {
845 flags.insert(alu_dst_clamp);
846 deststr = *t++;
847 }
848
849 assert(*t == ":");
850 OpDescr op_descr = {{op_invalid}, -1};
851
852 if (is_lds) {
853 auto op = s_lds_map_by_name.find(opstr);
854 if (op == s_lds_map_by_name.end()) {
855 for(auto [opcode, opdescr] : lds_ops ) {
856 if (opstr == opdescr.name) {
857 op_descr.lds_opcode = opcode;
858 op_descr.nsrc = opdescr.nsrc;
859 s_alu_map_by_name[opstr] = op_descr;
860 break;
861 }
862 }
863
864 if (op_descr.nsrc == -1) {
865 std::cerr << "'" << opstr << "'";
866 unreachable("Unknown opcode");
867 return nullptr;
868 }
869 } else {
870 op_descr = op->second;
871 }
872 } else {
873 auto op = s_alu_map_by_name.find(opstr);
874 if (op == s_alu_map_by_name.end()) {
875 for(auto [opcode, opdescr] : alu_ops ) {
876 if (opstr == opdescr.name) {
877 op_descr = {{opcode}, opdescr.nsrc};
878 s_alu_map_by_name[opstr] = op_descr;
879 break;
880 }
881 }
882
883 if (op_descr.nsrc == -1) {
884 std::cerr << "'" << opstr << "'";
885 unreachable("Unknown opcode");
886 return nullptr;
887 }
888 } else {
889 op_descr = op->second;
890 }
891 }
892
893 int slots = 0;
894
895 SrcValues sources;
896 do {
897 ++t;
898 for (int i = 0; i < op_descr.nsrc; ++i) {
899 string srcstr = *t++;
900
901 if (srcstr[0] == '-') {
902 if (!slots)
903 flags.insert(AluInstr::src_neg_flags[i]);
904 else
905 assert(flags.find(AluInstr::src_neg_flags[i]) != flags.end());
906 srcstr = srcstr.substr(1);
907 }
908
909 if (srcstr[0] == '|') {
910 assert(srcstr[srcstr.length() - 1] == '|');
911 if (!slots)
912 flags.insert(AluInstr::src_abs_flags[i]);
913 else
914 assert(flags.find(AluInstr::src_abs_flags[i]) != flags.end());
915 srcstr = srcstr.substr(1, srcstr.length() - 2);
916 }
917
918 auto src = value_factory.src_from_string(srcstr);
919 if (!src) {
920 std::cerr << "Unable to create src[" << i << "] from " << srcstr << "\n";
921 assert(src);
922 }
923 sources.push_back(src);
924 }
925 ++slots;
926 } while (t != tokens.end() && *t == "+");
927
928 AluBankSwizzle bank_swizzle = alu_vec_unknown;
929 ECFAluOpCode cf = cf_alu;
930
931 while (t != tokens.end()) {
932
933 switch ((*t)[0]) {
934 case '{': {
935 auto iflag = t->begin() + 1;
936 while (iflag != t->end()) {
937 if (*iflag == '}')
938 break;
939
940 switch (*iflag) {
941 case 'L': flags.insert(alu_last_instr); break;
942 case 'W': flags.insert(alu_write); break;
943 case 'E': flags.insert(alu_update_exec); break;
944 case 'P': flags.insert(alu_update_pred); break;
945 }
946 ++iflag;
947 }
948 }
949 break;
950
951 case 'V': {
952 string bs = *t;
953 if (bs == "VEC_012")
954 bank_swizzle = alu_vec_012;
955 else if (bs == "VEC_021")
956 bank_swizzle = alu_vec_021;
957 else if (bs == "VEC_102")
958 bank_swizzle = alu_vec_102;
959 else if (bs == "VEC_120")
960 bank_swizzle = alu_vec_120;
961 else if (bs == "VEC_201")
962 bank_swizzle = alu_vec_201;
963 else if (bs == "VEC_210")
964 bank_swizzle = alu_vec_210;
965 else {
966 std::cerr << "'" << bs << "': ";
967 unreachable("Unknowe bankswizzle given");
968 }
969 }
970 break;
971
972 default: {
973 string cf_str = *t;
974 if (cf_str == "PUSH_BEFORE")
975 cf = cf_alu_push_before;
976 else if (cf_str == "POP_AFTER")
977 cf = cf_alu_pop_after;
978 else if (cf_str == "POP2_AFTER")
979 cf = cf_alu_pop2_after;
980 else if (cf_str == "EXTENDED")
981 cf = cf_alu_extended;
982 else if (cf_str == "BREAK")
983 cf = cf_alu_break;
984 else if (cf_str == "CONT")
985 cf = cf_alu_continue;
986 else if (cf_str == "ELSE_AFTER")
987 cf = cf_alu_else_after;
988 else {
989 std::cerr << " '" << cf_str << "' ";
990 unreachable("Unknown tocken in ALU instruction");
991 }
992 }
993 }
994 ++t;
995 }
996
997 PRegister dest = nullptr;
998 // construct instruction
999 if (deststr != "(null)")
1000 dest = value_factory.dest_from_string(deststr);
1001
1002 AluInstr *retval = nullptr;
1003 if (is_lds)
1004 retval = new AluInstr(op_descr.lds_opcode, sources, flags);
1005 else
1006 retval = new AluInstr(op_descr.alu_opcode, dest, sources, flags, slots);
1007
1008 retval->set_bank_swizzle(bank_swizzle);
1009 retval->set_cf_type(cf);
1010 if (group) {
1011 group->add_instruction(retval);
1012 retval= nullptr;
1013 }
1014 return retval;
1015 }
1016
do_ready() const1017 bool AluInstr::do_ready() const
1018 {
1019 /* Alu instructions are shuffled by the scheduler, so
1020 * we have to make sure that required ops are already
1021 * scheduled before marking this one ready */
1022 for (auto i: required_instr()) {
1023 if (!i->is_scheduled())
1024 return false;
1025 }
1026
1027 for (auto s : m_src) {
1028 auto r = s->as_register();
1029 if (r) {
1030 if (!r->ready(block_id(), index()))
1031 return false;
1032 }
1033 auto u = s->as_uniform();
1034 if (u && u->buf_addr() && u->buf_addr()->as_register()) {
1035 if (!u->buf_addr()->as_register()->ready(block_id(), index()))
1036 return false;
1037 }
1038 }
1039
1040 if (m_dest && !m_dest->is_ssa()) {
1041 if (m_dest->pin() == pin_array) {
1042 auto av = static_cast<const LocalArrayValue *>(m_dest);
1043 auto addr = av->addr();
1044 /* For true indiect dest access we have to make sure that all
1045 * instructions that write the value before are schedukled */
1046 if (addr && (!addr->ready(block_id(), index()) ||
1047 !m_dest->ready(block_id(), index() - 1)))
1048 return false;
1049 }
1050
1051 /* If a register is updates, we have to make sure that uses before that
1052 * update are scheduled, otherwise we may use the updated value when we
1053 * shouldn't */
1054 for (auto u : m_dest->uses()) {
1055 if (u->block_id() <= block_id() && u->index() < index() &&
1056 !u->is_scheduled()) {
1057 return false;
1058 }
1059 }
1060 }
1061
1062 for (auto& r : m_extra_dependencies) {
1063 if (!r->ready(block_id(), index()))
1064 return false;
1065 }
1066
1067 return true;
1068 }
1069
visit(AluGroup * instr)1070 void AluInstrVisitor::visit(AluGroup *instr)
1071 {
1072 for (auto& i : *instr) {
1073 if (i)
1074 i->accept(*this);
1075 }
1076 }
1077
visit(Block * instr)1078 void AluInstrVisitor::visit(Block *instr)
1079 {
1080 for (auto& i : *instr)
1081 i->accept(*this);
1082 }
1083
visit(IfInstr * instr)1084 void AluInstrVisitor::visit(IfInstr *instr)
1085 {
1086 instr->predicate()->accept(*this);
1087 }
1088
1089 static bool emit_alu_b2x(const nir_alu_instr& alu, AluInlineConstants mask, Shader& shader);
1090
1091 static bool emit_alu_op1(const nir_alu_instr& alu, EAluOp opcode, Shader& shader,
1092 const AluOpFlags& flags = 0);
1093 static bool emit_alu_op1_64bit(const nir_alu_instr& alu, EAluOp opcode, Shader& shader, bool switch_chan);
1094 static bool emit_alu_mov_64bit(const nir_alu_instr& alu, Shader& shader);
1095 static bool emit_alu_neg(const nir_alu_instr& alu, Shader& shader);
1096 static bool emit_alu_op1_64bit_trans(const nir_alu_instr& alu, EAluOp opcode, Shader& shader);
1097 static bool emit_alu_op2_64bit(const nir_alu_instr& alu, EAluOp opcode, Shader& shader, bool switch_order);
1098 static bool emit_alu_op2_64bit_one_dst(const nir_alu_instr& alu, EAluOp opcode, Shader& shader, bool switch_order);
1099 static bool emit_alu_fma_64bit(const nir_alu_instr& alu, EAluOp opcode, Shader& shader);
1100 static bool emit_alu_b2f64(const nir_alu_instr& alu, Shader& shader);
1101 static bool emit_alu_f2f64(const nir_alu_instr& alu, Shader& shader);
1102 static bool emit_alu_i2f64(const nir_alu_instr& alu, EAluOp op, Shader& shader);
1103 static bool emit_alu_f2f32(const nir_alu_instr& alu, Shader& shader);
1104 static bool emit_alu_abs64(const nir_alu_instr& alu, Shader& shader);
1105
1106 static bool emit_alu_op2(const nir_alu_instr& alu, EAluOp opcode, Shader& shader,
1107 AluInstr::Op2Options opts = AluInstr::op2_opt_none);
1108 static bool emit_alu_op2_int(const nir_alu_instr& alu, EAluOp opcode, Shader& shader,
1109 AluInstr::Op2Options opts = AluInstr::op2_opt_none);
1110 static bool emit_alu_op3(const nir_alu_instr& alu, EAluOp opcode, Shader& shader,
1111 const std::array<int, 3>& src_shuffle = {0,1,2});
1112 static bool emit_any_all_fcomp2(const nir_alu_instr& alu, EAluOp opcode, Shader& shader);
1113 static bool emit_any_all_fcomp(const nir_alu_instr& alu, EAluOp opcode, int nc, bool all, Shader& shader);
1114 static bool emit_any_all_icomp(const nir_alu_instr& alu, EAluOp opcode, int nc, bool all, Shader& shader);
1115
1116 static bool emit_alu_i2orf2_b1(const nir_alu_instr& alu, EAluOp opcode, Shader& shader);
1117
1118 static bool emit_alu_comb_with_zero(const nir_alu_instr& alu, EAluOp opcode, Shader& shader);
1119 static bool emit_unpack_64_2x32_split(const nir_alu_instr& alu, int comp, Shader& shader);
1120 static bool emit_pack_64_2x32(const nir_alu_instr& alu, Shader& shader);
1121 static bool emit_unpack_64_2x32(const nir_alu_instr& alu, Shader& shader);
1122 static bool emit_pack_64_2x32_split(const nir_alu_instr& alu, Shader& shader);
1123 static bool emit_pack_32_2x16_split(const nir_alu_instr& alu, Shader& shader);
1124 static bool emit_alu_vec2_64(const nir_alu_instr& alu, Shader& shader);
1125
1126 static bool emit_unpack_32_2x16_split_x(const nir_alu_instr& alu, Shader& shader);
1127 static bool emit_unpack_32_2x16_split_y(const nir_alu_instr& alu, Shader& shader);
1128
1129 static bool emit_dot(const nir_alu_instr& alu, int nelm, Shader& shader);
1130 static bool emit_create_vec(const nir_alu_instr& instr, unsigned nc, Shader& shader);
1131
1132 static bool emit_alu_trans_op1_eg(const nir_alu_instr& alu, EAluOp opcode, Shader& shader);
1133 static bool emit_alu_trans_op1_cayman(const nir_alu_instr& alu, EAluOp opcode, Shader& shader);
1134
1135 static bool emit_alu_trans_op2_eg(const nir_alu_instr& alu, EAluOp opcode, Shader& shader);
1136 static bool emit_alu_trans_op2_cayman(const nir_alu_instr& alu, EAluOp opcode, Shader& shader);
1137
1138 static bool emit_alu_f2i32_or_u32_eg(const nir_alu_instr& alu, EAluOp opcode, Shader& shader);
1139
1140 static bool emit_tex_fdd(const nir_alu_instr& alu, TexInstr::Opcode opcode, bool fine, Shader& shader);
1141
1142 static bool emit_alu_cube(const nir_alu_instr& alu, Shader& shader);
1143
1144 static bool emit_fdph(const nir_alu_instr& alu, Shader& shader);
1145
check_64_bit_op_src(nir_src * src,void * state)1146 static bool check_64_bit_op_src(nir_src *src, void *state)
1147 {
1148 if (nir_src_bit_size(*src) == 64) {
1149 *(bool*)state = true;
1150 return false;
1151 }
1152 return true;
1153 }
1154
check_64_bit_op_dest(nir_dest * dest,void * state)1155 static bool check_64_bit_op_dest(nir_dest *dest, void *state)
1156 {
1157 if (nir_dest_bit_size(*dest) == 64) {
1158 *(bool*)state = true;
1159 return false;
1160 }
1161 return true;
1162 }
1163
from_nir(nir_alu_instr * alu,Shader & shader)1164 bool AluInstr::from_nir(nir_alu_instr *alu, Shader& shader)
1165 {
1166 bool is_64bit_op = false;
1167 nir_foreach_src(&alu->instr, check_64_bit_op_src, &is_64bit_op);
1168 if (!is_64bit_op)
1169 nir_foreach_dest(&alu->instr, check_64_bit_op_dest, &is_64bit_op);
1170
1171
1172 if (is_64bit_op) {
1173 switch (alu->op) {
1174 case nir_op_pack_64_2x32:
1175 case nir_op_unpack_64_2x32:
1176 case nir_op_pack_64_2x32_split:
1177 case nir_op_pack_half_2x16_split:
1178 case nir_op_unpack_64_2x32_split_x:
1179 case nir_op_unpack_64_2x32_split_y: break;
1180 case nir_op_mov: return emit_alu_mov_64bit(*alu, shader);
1181 case nir_op_fneg: return emit_alu_neg(*alu, shader);
1182 case nir_op_ffract: return emit_alu_op1_64bit(*alu, op1_fract_64, shader, true);
1183 case nir_op_feq32: return emit_alu_op2_64bit_one_dst(*alu, op2_sete_64, shader, false);
1184 case nir_op_fge32: return emit_alu_op2_64bit_one_dst(*alu, op2_setge_64, shader, false);
1185 case nir_op_flt32: return emit_alu_op2_64bit_one_dst(*alu, op2_setgt_64, shader, true);
1186 case nir_op_fneu32: return emit_alu_op2_64bit_one_dst(*alu, op2_setne_64, shader, false);
1187 case nir_op_ffma: return emit_alu_fma_64bit(*alu, op3_fma_64, shader);
1188
1189 case nir_op_fadd: return emit_alu_op2_64bit(*alu, op2_add_64, shader, false);
1190 case nir_op_fmul: return emit_alu_op2_64bit(*alu, op2_mul_64, shader, false);
1191 case nir_op_fmax: return emit_alu_op2_64bit(*alu, op2_max_64, shader, false);
1192 case nir_op_fmin: return emit_alu_op2_64bit(*alu, op2_min_64, shader, false);
1193 case nir_op_b2f64: return emit_alu_b2f64(*alu, shader);
1194 case nir_op_f2f64: return emit_alu_f2f64(*alu, shader);
1195 case nir_op_i2f64: return emit_alu_i2f64(*alu, op1_int_to_flt, shader);
1196 case nir_op_u2f64: return emit_alu_i2f64(*alu, op1_uint_to_flt, shader);
1197 case nir_op_f2f32: return emit_alu_f2f32(*alu, shader);
1198 case nir_op_fabs: return emit_alu_abs64(*alu, shader);
1199 case nir_op_fsqrt: return emit_alu_op1_64bit_trans(*alu, op1_sqrt_64, shader);
1200 case nir_op_frcp: return emit_alu_op1_64bit_trans(*alu, op1_recip_64, shader);
1201 case nir_op_frsq: return emit_alu_op1_64bit_trans(*alu, op1_recipsqrt_64, shader);
1202 case nir_op_vec2: return emit_alu_vec2_64(*alu, shader);
1203 default:
1204 return false;
1205 ;
1206 }
1207 }
1208
1209
1210 if (shader.chip_class() == ISA_CC_CAYMAN) {
1211 switch (alu->op) {
1212 case nir_op_fcos_amd: return emit_alu_trans_op1_cayman(*alu, op1_cos, shader);
1213 case nir_op_fexp2: return emit_alu_trans_op1_cayman(*alu, op1_exp_ieee, shader);
1214 case nir_op_flog2: return emit_alu_trans_op1_cayman(*alu, op1_log_clamped, shader);
1215 case nir_op_frcp: return emit_alu_trans_op1_cayman(*alu, op1_recip_ieee, shader);
1216 case nir_op_frsq: return emit_alu_trans_op1_cayman(*alu, op1_recipsqrt_ieee1, shader);
1217 case nir_op_fsqrt: return emit_alu_trans_op1_cayman(*alu, op1_sqrt_ieee, shader);
1218 case nir_op_fsin_amd: return emit_alu_trans_op1_cayman(*alu, op1_sin, shader);
1219 case nir_op_i2f32: return emit_alu_op1(*alu, op1_int_to_flt, shader);
1220 case nir_op_u2f32: return emit_alu_op1(*alu, op1_uint_to_flt, shader);
1221 case nir_op_imul: return emit_alu_trans_op2_cayman(*alu, op2_mullo_int, shader);
1222 case nir_op_imul_high: return emit_alu_trans_op2_cayman(*alu, op2_mulhi_int, shader);
1223 case nir_op_umul_high: return emit_alu_trans_op2_cayman(*alu, op2_mulhi_uint, shader);
1224 case nir_op_f2u32: return emit_alu_op1(*alu, op1_flt_to_uint, shader);
1225 case nir_op_f2i32: return emit_alu_op1(*alu, op1_flt_to_int, shader);
1226 case nir_op_ishl: return emit_alu_op2_int(*alu, op2_lshl_int, shader);
1227 case nir_op_ishr: return emit_alu_op2_int(*alu, op2_ashr_int, shader);
1228 case nir_op_ushr: return emit_alu_op2_int(*alu, op2_lshr_int, shader);
1229 default:
1230 ;
1231 }
1232 } else {
1233 if (shader.chip_class() == ISA_CC_EVERGREEN) {
1234 switch (alu->op) {
1235 case nir_op_f2i32: return emit_alu_f2i32_or_u32_eg(*alu, op1_flt_to_int, shader);
1236 case nir_op_f2u32: return emit_alu_f2i32_or_u32_eg(*alu, op1_flt_to_uint, shader);
1237 default:
1238 ;
1239 }
1240 }
1241
1242 if (shader.chip_class() >= ISA_CC_R700) {
1243 switch (alu->op) {
1244 case nir_op_ishl: return emit_alu_op2_int(*alu, op2_lshl_int, shader);
1245 case nir_op_ishr: return emit_alu_op2_int(*alu, op2_ashr_int, shader);
1246 case nir_op_ushr: return emit_alu_op2_int(*alu, op2_lshr_int, shader);
1247 default:
1248 ;
1249 }
1250 } else {
1251 switch (alu->op) {
1252 case nir_op_ishl: return emit_alu_trans_op2_eg(*alu, op2_lshl_int, shader);
1253 case nir_op_ishr: return emit_alu_trans_op2_eg(*alu, op2_ashr_int, shader);
1254 case nir_op_ushr: return emit_alu_trans_op2_eg(*alu, op2_lshr_int, shader);
1255 default:
1256 ;
1257 }
1258 }
1259
1260 switch (alu->op) {
1261 case nir_op_f2i32: return emit_alu_trans_op1_eg(*alu, op1_flt_to_int, shader);
1262 case nir_op_f2u32: return emit_alu_trans_op1_eg(*alu, op1_flt_to_uint, shader);
1263 case nir_op_fcos_amd: return emit_alu_trans_op1_eg(*alu, op1_cos, shader);
1264 case nir_op_fexp2: return emit_alu_trans_op1_eg(*alu, op1_exp_ieee, shader);
1265 case nir_op_flog2: return emit_alu_trans_op1_eg(*alu, op1_log_clamped, shader);
1266 case nir_op_frcp: return emit_alu_trans_op1_eg(*alu, op1_recip_ieee, shader);
1267 case nir_op_frsq: return emit_alu_trans_op1_eg(*alu, op1_recipsqrt_ieee1, shader);
1268 case nir_op_fsin_amd: return emit_alu_trans_op1_eg(*alu, op1_sin, shader);
1269 case nir_op_fsqrt: return emit_alu_trans_op1_eg(*alu, op1_sqrt_ieee, shader);
1270 case nir_op_i2f32: return emit_alu_trans_op1_eg(*alu, op1_int_to_flt, shader);
1271 case nir_op_u2f32: return emit_alu_trans_op1_eg(*alu, op1_uint_to_flt, shader);
1272 case nir_op_imul: return emit_alu_trans_op2_eg(*alu, op2_mullo_int, shader);
1273 case nir_op_imul_high: return emit_alu_trans_op2_eg(*alu, op2_mulhi_int, shader);
1274 case nir_op_umul_high: return emit_alu_trans_op2_eg(*alu, op2_mulhi_uint, shader);
1275 default:
1276 ;
1277 }
1278 }
1279
1280 switch (alu->op) {
1281 case nir_op_b2b1: return emit_alu_op1(*alu, op1_mov, shader);
1282 case nir_op_b2b32: return emit_alu_op1(*alu, op1_mov, shader);
1283 case nir_op_b2f32: return emit_alu_b2x(*alu, ALU_SRC_1, shader);
1284 case nir_op_b2i32: return emit_alu_b2x(*alu, ALU_SRC_1_INT, shader);
1285
1286 case nir_op_bfm: return emit_alu_op2_int(*alu, op2_bfm_int, shader, op2_opt_none);
1287 case nir_op_bit_count: return emit_alu_op1(*alu, op1_bcnt_int, shader);
1288
1289 case nir_op_bitfield_reverse: return emit_alu_op1(*alu, op1_bfrev_int, shader);
1290 case nir_op_bitfield_select: return emit_alu_op3(*alu, op3_bfi_int, shader);
1291
1292 case nir_op_b32all_fequal2: return emit_any_all_fcomp2(*alu, op2_sete_dx10, shader);
1293 case nir_op_b32all_fequal3: return emit_any_all_fcomp(*alu, op2_sete, 3, true, shader);
1294 case nir_op_b32all_fequal4: return emit_any_all_fcomp(*alu, op2_sete, 4, true, shader);
1295 case nir_op_b32all_iequal2: return emit_any_all_icomp(*alu, op2_sete_int, 2, true, shader);
1296 case nir_op_b32all_iequal3: return emit_any_all_icomp(*alu, op2_sete_int, 3, true, shader);
1297 case nir_op_b32all_iequal4: return emit_any_all_icomp(*alu, op2_sete_int, 4, true, shader);
1298 case nir_op_b32any_fnequal2: return emit_any_all_fcomp2(*alu, op2_setne_dx10, shader);
1299 case nir_op_b32any_fnequal3: return emit_any_all_fcomp(*alu, op2_setne, 3, false, shader);
1300 case nir_op_b32any_fnequal4: return emit_any_all_fcomp(*alu, op2_setne, 4, false, shader);
1301 case nir_op_b32any_inequal2: return emit_any_all_icomp(*alu, op2_setne_int, 2, false, shader);
1302 case nir_op_b32any_inequal3: return emit_any_all_icomp(*alu, op2_setne_int, 3, false, shader);
1303 case nir_op_b32any_inequal4: return emit_any_all_icomp(*alu, op2_setne_int, 4, false, shader);
1304 case nir_op_b32csel: return emit_alu_op3(*alu, op3_cnde_int, shader, {0, 2, 1});
1305
1306 case nir_op_f2b32: return emit_alu_comb_with_zero(*alu, op2_setne_dx10, shader);
1307 case nir_op_fabs: return emit_alu_op1(*alu, op1_mov, shader, {1 << alu_src0_abs});
1308 case nir_op_fadd: return emit_alu_op2(*alu, op2_add, shader);
1309 case nir_op_fceil: return emit_alu_op1(*alu, op1_ceil, shader);
1310 case nir_op_fcsel: return emit_alu_op3(*alu, op3_cnde, shader, {0, 2, 1});
1311 case nir_op_fcsel_ge: return emit_alu_op3(*alu, op3_cndge, shader, {0, 1, 2});
1312 case nir_op_fcsel_gt: return emit_alu_op3(*alu, op3_cndgt, shader, {0, 1, 2});
1313
1314 case nir_op_fdph: return emit_fdph(*alu, shader);
1315 case nir_op_fdot2: return emit_dot(*alu, 2, shader);
1316 case nir_op_fdot3: return emit_dot(*alu, 3, shader);
1317 case nir_op_fdot4: return emit_dot(*alu, 4, shader);
1318
1319 case nir_op_feq32:
1320 case nir_op_feq: return emit_alu_op2(*alu, op2_sete_dx10, shader);
1321 case nir_op_ffloor: return emit_alu_op1(*alu, op1_floor, shader);
1322 case nir_op_ffract: return emit_alu_op1(*alu, op1_fract, shader);
1323 case nir_op_fge32: return emit_alu_op2(*alu, op2_setge_dx10, shader);
1324 case nir_op_fge: return emit_alu_op2(*alu, op2_setge_dx10, shader);
1325 case nir_op_find_lsb: return emit_alu_op1(*alu, op1_ffbl_int, shader);
1326
1327 case nir_op_flt32: return emit_alu_op2(*alu, op2_setgt_dx10, shader, op2_opt_reverse);
1328 case nir_op_flt: return emit_alu_op2(*alu, op2_setgt_dx10, shader, op2_opt_reverse);
1329 case nir_op_fmax: return emit_alu_op2(*alu, op2_max_dx10, shader);
1330 case nir_op_fmin: return emit_alu_op2(*alu, op2_min_dx10, shader);
1331
1332 case nir_op_fmul:
1333 if (!shader.has_flag(Shader::sh_legacy_math_rules))
1334 return emit_alu_op2(*alu, op2_mul_ieee, shader);
1335 FALLTHROUGH;
1336 case nir_op_fmulz: return emit_alu_op2(*alu, op2_mul, shader);
1337
1338 case nir_op_fneg: return emit_alu_op1(*alu, op1_mov, shader, {1 << alu_src0_neg});
1339 case nir_op_fneu32: return emit_alu_op2(*alu, op2_setne_dx10, shader);
1340 case nir_op_fneu: return emit_alu_op2(*alu, op2_setne_dx10, shader);
1341
1342 case nir_op_fround_even: return emit_alu_op1(*alu, op1_rndne, shader);
1343 case nir_op_fsat: return emit_alu_op1(*alu, op1_mov, shader, {1 << alu_dst_clamp});
1344 case nir_op_fsub: return emit_alu_op2(*alu, op2_add, shader, op2_opt_neg_src1);
1345 case nir_op_ftrunc: return emit_alu_op1(*alu, op1_trunc, shader);
1346 case nir_op_i2b1:
1347 case nir_op_i2b32: return emit_alu_i2orf2_b1(*alu, op2_setne_int, shader);
1348 case nir_op_iadd: return emit_alu_op2_int(*alu, op2_add_int, shader);
1349 case nir_op_iand: return emit_alu_op2_int(*alu, op2_and_int, shader);
1350 case nir_op_ibfe: return emit_alu_op3(*alu, op3_bfe_int, shader);
1351 case nir_op_i32csel_ge: return emit_alu_op3(*alu, op3_cndge_int, shader, {0, 1, 2});
1352 case nir_op_i32csel_gt: return emit_alu_op3(*alu, op3_cndgt_int, shader, {0, 1, 2});
1353 case nir_op_ieq32: return emit_alu_op2_int(*alu, op2_sete_int, shader);
1354 case nir_op_ieq: return emit_alu_op2_int(*alu, op2_sete_int, shader);
1355 case nir_op_ifind_msb_rev: return emit_alu_op1(*alu, op1_ffbh_int, shader);
1356 case nir_op_ige32: return emit_alu_op2_int(*alu, op2_setge_int, shader);
1357 case nir_op_ige: return emit_alu_op2_int(*alu, op2_setge_int, shader);
1358 case nir_op_ilt32: return emit_alu_op2_int(*alu, op2_setgt_int, shader, op2_opt_reverse);
1359 case nir_op_ilt: return emit_alu_op2_int(*alu, op2_setgt_int, shader, op2_opt_reverse);
1360 case nir_op_imax: return emit_alu_op2_int(*alu, op2_max_int, shader);
1361 case nir_op_imin: return emit_alu_op2_int(*alu, op2_min_int, shader);
1362 case nir_op_ine32: return emit_alu_op2_int(*alu, op2_setne_int, shader);
1363 case nir_op_ine: return emit_alu_op2_int(*alu, op2_setne_int, shader);
1364 case nir_op_ineg: return emit_alu_comb_with_zero(*alu, op2_sub_int, shader);
1365 case nir_op_inot: return emit_alu_op1(*alu, op1_not_int, shader);
1366 case nir_op_ior: return emit_alu_op2_int(*alu, op2_or_int, shader);
1367 case nir_op_isub: return emit_alu_op2_int(*alu, op2_sub_int, shader);
1368 case nir_op_ixor: return emit_alu_op2_int(*alu, op2_xor_int, shader);
1369 case nir_op_pack_64_2x32: return emit_pack_64_2x32(*alu, shader);
1370 case nir_op_unpack_64_2x32: return emit_unpack_64_2x32(*alu, shader);
1371 case nir_op_pack_64_2x32_split: return emit_pack_64_2x32_split(*alu, shader);
1372 case nir_op_pack_half_2x16_split: return emit_pack_32_2x16_split(*alu, shader);
1373 case nir_op_slt: return emit_alu_op2(*alu, op2_setgt, shader, op2_opt_reverse);
1374 case nir_op_sge: return emit_alu_op2(*alu, op2_setge, shader);
1375 case nir_op_ubfe: return emit_alu_op3(*alu, op3_bfe_uint, shader);
1376 case nir_op_ufind_msb_rev: return emit_alu_op1(*alu, op1_ffbh_uint, shader);
1377 case nir_op_uge32: return emit_alu_op2_int(*alu, op2_setge_uint, shader);
1378 case nir_op_uge: return emit_alu_op2_int(*alu, op2_setge_uint, shader);
1379 case nir_op_ult32: return emit_alu_op2_int(*alu, op2_setgt_uint, shader, op2_opt_reverse);
1380 case nir_op_ult: return emit_alu_op2_int(*alu, op2_setgt_uint, shader, op2_opt_reverse);
1381 case nir_op_umad24: return emit_alu_op3(*alu, op3_muladd_uint24, shader, {0, 1, 2});
1382 case nir_op_umax: return emit_alu_op2_int(*alu, op2_max_uint, shader);
1383 case nir_op_umin: return emit_alu_op2_int(*alu, op2_min_uint, shader);
1384 case nir_op_umul24: return emit_alu_op2(*alu, op2_mul_uint24, shader);
1385 case nir_op_unpack_64_2x32_split_x: return emit_unpack_64_2x32_split(*alu, 0, shader);
1386 case nir_op_unpack_64_2x32_split_y: return emit_unpack_64_2x32_split(*alu, 1, shader);
1387 case nir_op_unpack_half_2x16_split_x: return emit_unpack_32_2x16_split_x(*alu, shader);
1388 case nir_op_unpack_half_2x16_split_y: return emit_unpack_32_2x16_split_y(*alu, shader);
1389
1390 case nir_op_ffma:
1391 if (!shader.has_flag(Shader::sh_legacy_math_rules))
1392 return emit_alu_op3(*alu, op3_muladd_ieee, shader);
1393 FALLTHROUGH;
1394 case nir_op_ffmaz: return emit_alu_op3(*alu, op3_muladd, shader);
1395
1396 case nir_op_mov: return emit_alu_op1(*alu, op1_mov, shader);
1397 case nir_op_f2i32: return emit_alu_op1(*alu, op1_flt_to_int, shader);
1398 case nir_op_vec2: return emit_create_vec(*alu, 2, shader);
1399 case nir_op_vec3: return emit_create_vec(*alu, 3, shader);
1400 case nir_op_vec4: return emit_create_vec(*alu, 4, shader);
1401
1402 case nir_op_fddx:
1403 case nir_op_fddx_coarse: return emit_tex_fdd(*alu, TexInstr::get_gradient_h, false, shader);
1404 case nir_op_fddx_fine: return emit_tex_fdd(*alu, TexInstr::get_gradient_h, true, shader);
1405 case nir_op_fddy:
1406 case nir_op_fddy_coarse: return emit_tex_fdd(*alu,TexInstr::get_gradient_v, false, shader);
1407 case nir_op_fddy_fine: return emit_tex_fdd(*alu, TexInstr::get_gradient_v, true, shader);
1408 case nir_op_cube_r600: return emit_alu_cube(*alu, shader);
1409 default:
1410 fprintf(stderr, "Unknown instruction '");
1411 nir_print_instr(&alu->instr, stderr);
1412 fprintf(stderr, "'\n");
1413 assert(0);
1414 return false;
1415 }
1416 }
1417
pin_for_components(const nir_alu_instr & alu)1418 static Pin pin_for_components(const nir_alu_instr& alu)
1419 {
1420 return (alu.dest.dest.is_ssa &&
1421 (nir_dest_num_components(alu.dest.dest) == 1)) ? pin_free : pin_none;
1422
1423 }
1424
emit_alu_op1_64bit(const nir_alu_instr & alu,EAluOp opcode,Shader & shader,bool switch_chan)1425 static bool emit_alu_op1_64bit(const nir_alu_instr& alu, EAluOp opcode, Shader& shader, bool switch_chan)
1426 {
1427 auto& value_factory = shader.value_factory();
1428
1429 auto group = new AluGroup();
1430
1431 AluInstr *ir = nullptr;
1432
1433 int swz[2] = {0,1};
1434 if (switch_chan) {
1435 swz[0] = 1;
1436 swz[1] = 0;
1437 }
1438
1439 for (unsigned i = 0; i < nir_dest_num_components(alu.dest.dest) ; ++i) {
1440 for (unsigned c = 0; c < 2 ; ++c) {
1441 ir = new AluInstr(opcode,
1442 value_factory.dest(alu.dest, 2 * i + c, pin_chan),
1443 value_factory.src64(alu.src[0], i, swz[c]),
1444 {alu_write});
1445 group->add_instruction(ir);
1446 }
1447 if (alu.src[0].abs) ir->set_alu_flag(alu_src0_abs);
1448 if (alu.src[0].negate) ir->set_alu_flag(alu_src0_neg);
1449 }
1450 if (ir)
1451 ir->set_alu_flag(alu_last_instr);
1452 shader.emit_instruction(group);
1453 return true;
1454 }
1455
emit_alu_mov_64bit(const nir_alu_instr & alu,Shader & shader)1456 static bool emit_alu_mov_64bit(const nir_alu_instr& alu, Shader& shader)
1457 {
1458 auto& value_factory = shader.value_factory();
1459
1460 AluInstr *ir = nullptr;
1461
1462 for (unsigned i = 0; i < nir_dest_num_components(alu.dest.dest) ; ++i) {
1463 for (unsigned c = 0; c < 2 ; ++c) {
1464 ir = new AluInstr(op1_mov,
1465 value_factory.dest(alu.dest, 2 * i + c, pin_free),
1466 value_factory.src64(alu.src[0], i, c),
1467 {alu_write});
1468 shader.emit_instruction(ir);
1469 }
1470 if (alu.src[0].abs) ir->set_alu_flag(alu_src0_abs);
1471 if (alu.src[0].negate) ir->set_alu_flag(alu_src0_neg);
1472 }
1473 if (ir)
1474 ir->set_alu_flag(alu_last_instr);
1475 return true;
1476 }
1477
emit_alu_neg(const nir_alu_instr & alu,Shader & shader)1478 static bool emit_alu_neg(const nir_alu_instr& alu, Shader& shader)
1479 {
1480 auto& value_factory = shader.value_factory();
1481
1482 auto group = new AluGroup();
1483
1484 AluInstr *ir = nullptr;
1485
1486 for (unsigned i = 0; i < nir_dest_num_components(alu.dest.dest) ; ++i) {
1487 for (unsigned c = 0; c < 2 ; ++c) {
1488 ir = new AluInstr(op1_mov,
1489 value_factory.dest(alu.dest, 2 * i + c, pin_chan),
1490 value_factory.src64(alu.src[0], i, c),
1491 {alu_write});
1492 group->add_instruction(ir);
1493 }
1494 ir->set_alu_flag(alu_src0_neg);
1495 }
1496 if (ir)
1497 ir->set_alu_flag(alu_last_instr);
1498 shader.emit_instruction(group);
1499 return true;
1500 }
1501
emit_alu_abs64(const nir_alu_instr & alu,Shader & shader)1502 static bool emit_alu_abs64(const nir_alu_instr& alu, Shader& shader)
1503 {
1504 auto& value_factory = shader.value_factory();
1505
1506 assert(nir_dest_num_components(alu.dest.dest) == 1);
1507
1508 shader.emit_instruction(new AluInstr(op1_mov, value_factory.dest(alu.dest, 0, pin_chan),
1509 value_factory.src64(alu.src[0], 0, 0),
1510 AluInstr::write));
1511
1512 auto ir = new AluInstr(op1_mov, value_factory.dest(alu.dest, 1, pin_chan),
1513 value_factory.src64(alu.src[0], 0, 1),
1514 AluInstr::last_write);
1515 ir->set_alu_flag(alu_src0_abs);
1516 shader.emit_instruction(ir);
1517 return true;
1518 }
1519
emit_alu_op2_64bit(const nir_alu_instr & alu,EAluOp opcode,Shader & shader,bool switch_src)1520 static bool emit_alu_op2_64bit(const nir_alu_instr& alu, EAluOp opcode, Shader& shader, bool switch_src)
1521 {
1522 auto& value_factory = shader.value_factory();
1523 auto group = new AluGroup();
1524 AluInstr *ir = nullptr;
1525 int order[2] = {0, 1};
1526 if (switch_src) {
1527 order[0] = 1;
1528 order[1] = 0;
1529 }
1530
1531 int num_emit0 = opcode == op2_mul_64 ? 3 : 1;
1532
1533 assert(num_emit0 == 1 || nir_dest_num_components(alu.dest.dest) == 1);
1534
1535
1536 for (unsigned k = 0; k < nir_dest_num_components(alu.dest.dest); ++k) {
1537 int i = 0;
1538 for (; i < num_emit0; ++i) {
1539 auto dest = i < 2 ? value_factory.dest(alu.dest, i, pin_chan) :
1540 value_factory.dummy_dest(i);
1541
1542 ir = new AluInstr(opcode,
1543 dest,
1544 value_factory.src64(alu.src[order[0]], k, 1),
1545 value_factory.src64(alu.src[order[1]], k, 1),
1546 i < 2 ? AluInstr::write : AluInstr::empty);
1547
1548 if (alu.src[0].abs) ir->set_alu_flag(switch_src ? alu_src1_abs : alu_src0_abs);
1549 if (alu.src[1].abs) ir->set_alu_flag(switch_src ? alu_src0_abs : alu_src1_abs);
1550 if (alu.src[0].negate) ir->set_alu_flag(switch_src ? alu_src1_neg : alu_src0_neg);
1551 if (alu.src[1].negate) ir->set_alu_flag(switch_src ? alu_src0_neg : alu_src1_neg);
1552 if (alu.dest.saturate && i == 0) {
1553 ir->set_alu_flag(alu_dst_clamp);
1554 }
1555
1556 group->add_instruction(ir);
1557 }
1558
1559 auto dest = i == 1 ? value_factory.dest(alu.dest, i, pin_chan) :
1560 value_factory.dummy_dest(i);
1561
1562 ir = new AluInstr(opcode,
1563 dest,
1564 value_factory.src64(alu.src[order[0]], k, 0),
1565 value_factory.src64(alu.src[order[1]], k, 0),
1566 i == 1 ? AluInstr::write : AluInstr::empty);
1567 group->add_instruction(ir);
1568 }
1569 if (ir)
1570 ir->set_alu_flag(alu_last_instr);
1571
1572 shader.emit_instruction(group);
1573 return true;
1574 }
1575
emit_alu_op2_64bit_one_dst(const nir_alu_instr & alu,EAluOp opcode,Shader & shader,bool switch_order)1576 static bool emit_alu_op2_64bit_one_dst(const nir_alu_instr& alu, EAluOp opcode,
1577 Shader& shader, bool switch_order)
1578 {
1579 auto& value_factory = shader.value_factory();
1580 AluInstr *ir = nullptr;
1581 int order[2] = {0, 1};
1582 if (switch_order) {
1583 order[0] = 1;
1584 order[1] = 0;
1585 }
1586
1587 AluInstr::SrcValues src(4);
1588
1589 for (unsigned k = 0; k < nir_dest_num_components(alu.dest.dest); ++k) {
1590 auto dest = value_factory.dest(alu.dest, 2 * k, pin_chan);
1591 src[0] = value_factory.src64(alu.src[order[0]], k, 1);
1592 src[1] = value_factory.src64(alu.src[order[1]], k, 1);
1593 src[2] = value_factory.src64(alu.src[order[0]], k, 0);
1594 src[3] = value_factory.src64(alu.src[order[1]], k, 0);
1595
1596 ir = new AluInstr(opcode, dest, src, AluInstr::write, 2);
1597
1598 if (alu.src[0].abs) ir->set_alu_flag(switch_order ? alu_src1_abs : alu_src0_abs);
1599 if (alu.src[1].abs) ir->set_alu_flag(switch_order ? alu_src0_abs : alu_src1_abs);
1600 if (alu.src[0].negate) ir->set_alu_flag(switch_order ? alu_src1_neg : alu_src0_neg);
1601 if (alu.src[1].negate) ir->set_alu_flag(switch_order ? alu_src0_neg : alu_src1_neg);
1602 ir->set_alu_flag(alu_64bit_op);
1603
1604 shader.emit_instruction(ir);
1605 }
1606 if (ir)
1607 ir->set_alu_flag(alu_last_instr);
1608
1609 return true;
1610 }
1611
emit_alu_op1_64bit_trans(const nir_alu_instr & alu,EAluOp opcode,Shader & shader)1612 static bool emit_alu_op1_64bit_trans(const nir_alu_instr& alu, EAluOp opcode, Shader& shader)
1613 {
1614 auto& value_factory = shader.value_factory();
1615 auto group = new AluGroup();
1616 AluInstr *ir = nullptr;
1617 for (unsigned i = 0; i < 3; ++i) {
1618 ir = new AluInstr(opcode,
1619 i < 2 ? value_factory.dest(alu.dest, i, pin_chan) :
1620 value_factory.dummy_dest(i),
1621 value_factory.src64(alu.src[0], 0, 1),
1622 value_factory.src64(alu.src[0], 0, 0),
1623 i < 2 ? AluInstr::write : AluInstr::empty);
1624
1625 if (alu.src[0].abs || opcode == op1_sqrt_64) ir->set_alu_flag(alu_src1_abs);
1626 if (alu.src[0].negate) ir->set_alu_flag(alu_src1_neg);
1627
1628 group->add_instruction(ir);
1629 }
1630 if (ir)
1631 ir->set_alu_flag(alu_last_instr);
1632 shader.emit_instruction(group);
1633 return true;
1634
1635 }
1636
emit_alu_fma_64bit(const nir_alu_instr & alu,EAluOp opcode,Shader & shader)1637 static bool emit_alu_fma_64bit(const nir_alu_instr& alu, EAluOp opcode, Shader& shader)
1638 {
1639 auto& value_factory = shader.value_factory();
1640 auto group = new AluGroup();
1641 AluInstr *ir = nullptr;
1642 for (unsigned i = 0; i < 4 ; ++i) {
1643
1644 int chan = i < 3 ? 1 : 0;
1645 auto dest = i < 2 ? value_factory.dest(alu.dest, i, pin_chan) :
1646 value_factory.dummy_dest(i);
1647
1648 ir = new AluInstr(opcode,
1649 dest,
1650 value_factory.src64(alu.src[0], 0, chan),
1651 value_factory.src64(alu.src[1], 0, chan),
1652 value_factory.src64(alu.src[2], 0, chan),
1653 i < 2 ? AluInstr::write : AluInstr::empty);
1654
1655 if (i < 3) {
1656 if (alu.src[0].negate) ir->set_alu_flag(alu_src0_neg);
1657 if (alu.src[1].negate) ir->set_alu_flag(alu_src1_neg);
1658 if (alu.src[2].negate) ir->set_alu_flag(alu_src2_neg);
1659 }
1660
1661 group->add_instruction(ir);
1662
1663 }
1664 if (ir)
1665 ir->set_alu_flag(alu_last_instr);
1666 shader.emit_instruction(group);
1667 return true;
1668 }
1669
emit_alu_b2f64(const nir_alu_instr & alu,Shader & shader)1670 static bool emit_alu_b2f64(const nir_alu_instr& alu, Shader& shader)
1671 {
1672 auto& value_factory = shader.value_factory();
1673 auto group = new AluGroup();
1674 AluInstr *ir = nullptr;
1675
1676 for (unsigned i = 0; i < nir_dest_num_components(alu.dest.dest) ; ++i) {
1677 ir = new AluInstr(op2_and_int,
1678 value_factory.dest(alu.dest, 2 * i, pin_group),
1679 value_factory.src(alu.src[0], i),
1680 value_factory.zero(),
1681 {alu_write});
1682 group->add_instruction(ir);
1683
1684 ir = new AluInstr(op2_and_int,
1685 value_factory.dest(alu.dest, 2 * i + 1, pin_group),
1686 value_factory.src(alu.src[0], i),
1687 value_factory.literal(0x3ff00000),
1688 {alu_write});
1689 group->add_instruction(ir);
1690 }
1691 if (ir)
1692 ir->set_alu_flag(alu_last_instr);
1693 shader.emit_instruction(group);
1694 return true;
1695 }
1696
emit_alu_i2f64(const nir_alu_instr & alu,EAluOp op,Shader & shader)1697 static bool emit_alu_i2f64(const nir_alu_instr& alu, EAluOp op, Shader& shader)
1698 {
1699 /* int 64 to f64 should have been lowered, so we only handle i32 to f64 */
1700 auto& value_factory = shader.value_factory();
1701 auto group = new AluGroup();
1702 AluInstr *ir = nullptr;
1703
1704 assert(nir_dest_num_components(alu.dest.dest) == 1);
1705
1706 auto tmpx = value_factory.temp_register();
1707 shader.emit_instruction(new AluInstr(op2_and_int, tmpx,
1708 value_factory.src(alu.src[0], 0),
1709 value_factory.literal(0xffffff00),
1710 AluInstr::write));
1711 auto tmpy = value_factory.temp_register();
1712 shader.emit_instruction(new AluInstr(op2_and_int, tmpy,
1713 value_factory.src(alu.src[0], 0),
1714 value_factory.literal(0xff),
1715 AluInstr::last_write));
1716
1717
1718 auto tmpx2 = value_factory.temp_register();
1719 auto tmpy2 = value_factory.temp_register();
1720 shader.emit_instruction(new AluInstr(op, tmpx2, tmpx,
1721 AluInstr::last_write));
1722 shader.emit_instruction(new AluInstr(op, tmpy2, tmpy,
1723 AluInstr::last_write));
1724
1725 auto tmpx3 = value_factory.temp_register(0);
1726 auto tmpy3 = value_factory.temp_register(1);
1727 auto tmpz3 = value_factory.temp_register(2);
1728 auto tmpw3 = value_factory.temp_register(3);
1729
1730
1731 ir = new AluInstr(op1_flt32_to_flt64,
1732 tmpx3,
1733 tmpx2, AluInstr::write);
1734 group->add_instruction(ir);
1735 ir = new AluInstr(op1_flt32_to_flt64,
1736 tmpy3,
1737 value_factory.zero(), AluInstr::write);
1738 group->add_instruction(ir);
1739 ir = new AluInstr(op1_flt32_to_flt64,
1740 tmpz3,
1741 tmpy2, AluInstr::write);
1742 group->add_instruction(ir);
1743 ir = new AluInstr(op1_flt32_to_flt64,
1744 tmpw3,
1745 value_factory.zero(), AluInstr::last_write);
1746 group->add_instruction(ir);
1747 shader.emit_instruction(group);
1748
1749 group = new AluGroup();
1750
1751 ir = new AluInstr(op2_add_64,
1752 value_factory.dest(alu.dest, 0, pin_chan),
1753 tmpy3, tmpw3, AluInstr::write);
1754 group->add_instruction(ir);
1755 ir = new AluInstr(op2_add_64,
1756 value_factory.dest(alu.dest, 1, pin_chan),
1757 tmpx3, tmpz3, AluInstr::write);
1758 group->add_instruction(ir);
1759 shader.emit_instruction(group);
1760
1761 return true;
1762 }
1763
emit_alu_f2f64(const nir_alu_instr & alu,Shader & shader)1764 static bool emit_alu_f2f64(const nir_alu_instr& alu, Shader& shader)
1765 {
1766 auto& value_factory = shader.value_factory();
1767 auto group = new AluGroup();
1768 AluInstr *ir = nullptr;
1769
1770 assert(nir_dest_num_components(alu.dest.dest) == 1);
1771
1772 ir = new AluInstr(op1_flt32_to_flt64,
1773 value_factory.dest(alu.dest, 0, pin_chan),
1774 value_factory.src(alu.src[0], 0), AluInstr::write);
1775 group->add_instruction(ir);
1776 ir = new AluInstr(op1_flt32_to_flt64,
1777 value_factory.dest(alu.dest, 1, pin_chan),
1778 value_factory.zero(), AluInstr::last_write);
1779 group->add_instruction(ir);
1780 shader.emit_instruction(group);
1781 return true;
1782 }
1783
emit_alu_f2f32(const nir_alu_instr & alu,Shader & shader)1784 static bool emit_alu_f2f32(const nir_alu_instr& alu, Shader& shader)
1785 {
1786 auto& value_factory = shader.value_factory();
1787 auto group = new AluGroup();
1788 AluInstr *ir = nullptr;
1789
1790 ir = new AluInstr(op1v_flt64_to_flt32,
1791 value_factory.dest(alu.dest, 0, pin_chan),
1792 value_factory.src64(alu.src[0], 0, 1), {alu_write});
1793 group->add_instruction(ir);
1794 ir = new AluInstr(op1v_flt64_to_flt32,
1795 value_factory.dummy_dest(1),
1796 value_factory.src64(alu.src[0], 0, 0), AluInstr::last);
1797 group->add_instruction(ir);
1798 shader.emit_instruction(group);
1799 return true;
1800
1801 }
1802
emit_alu_b2x(const nir_alu_instr & alu,AluInlineConstants mask,Shader & shader)1803 static bool emit_alu_b2x(const nir_alu_instr& alu, AluInlineConstants mask, Shader& shader)
1804 {
1805 auto& value_factory = shader.value_factory();
1806 AluInstr *ir = nullptr;
1807 auto pin = pin_for_components(alu);
1808
1809 for (unsigned i = 0; i < nir_dest_num_components(alu.dest.dest) ; ++i) {
1810 if (alu.dest.write_mask & (1 << i)){
1811 ir = new AluInstr(op2_and_int,
1812 value_factory.dest(alu.dest, i, pin),
1813 value_factory.src(alu.src[0], i),
1814 value_factory.inline_const(mask, 0),
1815 {alu_write});
1816 if (alu.src[0].negate) ir->set_alu_flag(alu_src0_neg);
1817 if (alu.src[0].abs) ir->set_alu_flag(alu_src0_abs);
1818 shader.emit_instruction(ir);
1819 }
1820 }
1821 if (ir)
1822 ir->set_alu_flag(alu_last_instr);
1823 return true;
1824 }
1825
emit_alu_op1(const nir_alu_instr & alu,EAluOp opcode,Shader & shader,const AluOpFlags & flags)1826 static bool emit_alu_op1(const nir_alu_instr& alu, EAluOp opcode, Shader& shader, const AluOpFlags& flags)
1827 {
1828 auto& value_factory = shader.value_factory();
1829
1830 AluInstr *ir = nullptr;
1831 auto pin = pin_for_components(alu);
1832
1833 for (unsigned i = 0; i < nir_dest_num_components(alu.dest.dest) ; ++i) {
1834 if (alu.dest.write_mask & (1 << i)){
1835 ir = new AluInstr(opcode, value_factory.dest(alu.dest, i, pin),
1836 value_factory.src(alu.src[0], i), {alu_write});
1837
1838 if (flags.test(alu_src0_abs) || alu.src[0].abs)
1839 ir->set_alu_flag(alu_src0_abs);
1840
1841 if (alu.src[0].negate ^ flags.test(alu_src0_neg))
1842 ir->set_alu_flag(alu_src0_neg);
1843
1844 if (flags.test(alu_dst_clamp) || alu.dest.saturate)
1845 ir->set_alu_flag(alu_dst_clamp);
1846
1847 shader.emit_instruction(ir);
1848 }
1849 }
1850 if (ir)
1851 ir->set_alu_flag(alu_last_instr);
1852 return true;
1853 }
1854
emit_alu_op2(const nir_alu_instr & alu,EAluOp opcode,Shader & shader,AluInstr::Op2Options opts)1855 static bool emit_alu_op2(const nir_alu_instr& alu, EAluOp opcode, Shader& shader,
1856 AluInstr::Op2Options opts)
1857 {
1858 auto& value_factory = shader.value_factory();
1859 const nir_alu_src *src0 = &alu.src[0];
1860 const nir_alu_src *src1 = &alu.src[1];
1861
1862 int idx0 = 0;
1863 int idx1 = 1;
1864 if (opts & AluInstr::op2_opt_reverse) {
1865 std::swap(src0, src1);
1866 std::swap(idx0, idx1);
1867 }
1868
1869 bool src1_negate = (opts & AluInstr::op2_opt_neg_src1) ^ src1->negate;
1870
1871 auto pin = pin_for_components(alu);
1872 AluInstr *ir = nullptr;
1873 for (unsigned i = 0; i < nir_dest_num_components(alu.dest.dest) ; ++i) {
1874 if (alu.dest.write_mask & (1 << i)){
1875 ir = new AluInstr(opcode,
1876 value_factory.dest(alu.dest.dest, i, pin),
1877 value_factory.src(*src0, i),
1878 value_factory.src(*src1, i), {alu_write});
1879
1880 if (src0->negate) ir->set_alu_flag(alu_src0_neg);
1881 if (src0->abs) ir->set_alu_flag(alu_src0_abs);
1882 if (src1_negate) ir->set_alu_flag(alu_src1_neg);
1883 if (src1->abs) ir->set_alu_flag(alu_src1_abs);
1884 if (alu.dest.saturate) ir->set_alu_flag(alu_dst_clamp);
1885 shader.emit_instruction(ir);
1886 }
1887 }
1888 if (ir)
1889 ir->set_alu_flag(alu_last_instr);
1890 return true;
1891 }
1892
emit_alu_op2_int(const nir_alu_instr & alu,EAluOp opcode,Shader & shader,AluInstr::Op2Options opts)1893 static bool emit_alu_op2_int(const nir_alu_instr& alu, EAluOp opcode, Shader& shader,
1894 AluInstr::Op2Options opts)
1895 {
1896 assert(!alu.src[0].abs);
1897 assert(!alu.src[0].negate);
1898 assert(!alu.src[1].abs);
1899 assert(!alu.src[1].negate);
1900
1901 return emit_alu_op2(alu, opcode, shader, opts);
1902 }
1903
emit_alu_op3(const nir_alu_instr & alu,EAluOp opcode,Shader & shader,const std::array<int,3> & src_shuffle)1904 static bool emit_alu_op3(const nir_alu_instr& alu, EAluOp opcode, Shader& shader,
1905 const std::array<int, 3>& src_shuffle)
1906 {
1907 auto& value_factory = shader.value_factory();
1908 const nir_alu_src *src[3];
1909 src[0] = &alu.src[src_shuffle[0]];
1910 src[1] = &alu.src[src_shuffle[1]];
1911 src[2] = &alu.src[src_shuffle[2]];
1912
1913 auto pin = pin_for_components(alu);
1914 AluInstr *ir = nullptr;
1915 for (unsigned i = 0; i < nir_dest_num_components(alu.dest.dest) ; ++i) {
1916 if (alu.dest.write_mask & (1 << i)){
1917 ir = new AluInstr(opcode, value_factory.dest(alu.dest.dest, i, pin),
1918 value_factory.src(*src[0], i),
1919 value_factory.src(*src[1], i),
1920 value_factory.src(*src[2], i),
1921 {alu_write});
1922
1923 if (src[0]->negate) ir->set_alu_flag(alu_src0_neg);
1924 if (src[1]->negate) ir->set_alu_flag(alu_src1_neg);
1925 if (src[2]->negate) ir->set_alu_flag(alu_src2_neg);
1926
1927 if (alu.dest.saturate) ir->set_alu_flag(alu_dst_clamp);
1928 ir->set_alu_flag(alu_write);
1929 shader.emit_instruction(ir);
1930 }
1931 }
1932 if (ir)
1933 ir->set_alu_flag(alu_last_instr);
1934 return true;
1935 }
1936
emit_any_all_fcomp2(const nir_alu_instr & alu,EAluOp opcode,Shader & shader)1937 static bool emit_any_all_fcomp2(const nir_alu_instr& alu, EAluOp opcode, Shader& shader)
1938 {
1939 AluInstr *ir = nullptr;
1940 auto& value_factory = shader.value_factory();
1941
1942 PRegister tmp[2];
1943 tmp[0] = value_factory.temp_register();
1944 tmp[1] = value_factory.temp_register();
1945
1946 for (unsigned i = 0; i < 2 ; ++i) {
1947 ir = new AluInstr(opcode, tmp[i],
1948 value_factory.src(alu.src[0], i),
1949 value_factory.src(alu.src[1], i), {alu_write});
1950 if (alu.src[0].abs)
1951 ir->set_alu_flag(alu_src0_abs);
1952 if (alu.src[0].negate)
1953 ir->set_alu_flag(alu_src0_neg);
1954
1955 if (alu.src[1].abs)
1956 ir->set_alu_flag(alu_src1_abs);
1957 if (alu.src[1].negate)
1958 ir->set_alu_flag(alu_src1_neg);
1959
1960 shader.emit_instruction(ir);
1961 }
1962 ir->set_alu_flag(alu_last_instr);
1963
1964 opcode = (opcode == op2_setne_dx10) ? op2_or_int: op2_and_int;
1965 ir = new AluInstr(opcode,
1966 value_factory.dest(alu.dest, 0, pin_free),
1967 tmp[0], tmp[1], AluInstr::last_write);
1968 shader.emit_instruction(ir);
1969 return true;
1970 }
1971
emit_any_all_fcomp(const nir_alu_instr & alu,EAluOp op,int nc,bool all,Shader & shader)1972 static bool emit_any_all_fcomp(const nir_alu_instr& alu, EAluOp op, int nc, bool all, Shader& shader)
1973 {
1974 /* This should probabyl be lowered in nir */
1975 auto& value_factory = shader.value_factory();
1976
1977 AluInstr *ir = nullptr;
1978 RegisterVec4 v = value_factory.temp_vec4(pin_group);
1979 AluInstr::SrcValues s;
1980
1981 for (int i = 0; i < nc ; ++i) {
1982 s.push_back(v[i]);
1983 }
1984
1985 for (int i = nc; i < 4 ; ++i)
1986 s.push_back(value_factory.inline_const(all ? ALU_SRC_1 : ALU_SRC_0, 0));
1987
1988 for (int i = 0; i < nc ; ++i) {
1989 ir = new AluInstr(op, v[i],
1990 value_factory.src(alu.src[0], i),
1991 value_factory.src(alu.src[1], i), {alu_write});
1992
1993 if (alu.src[0].abs)
1994 ir->set_alu_flag(alu_src0_abs);
1995 if (alu.src[0].negate)
1996 ir->set_alu_flag(alu_src0_neg);
1997
1998 if (alu.src[1].abs)
1999 ir->set_alu_flag(alu_src1_abs);
2000 if (alu.src[1].negate)
2001 ir->set_alu_flag(alu_src1_neg);
2002
2003 shader.emit_instruction(ir);
2004 }
2005 if (ir)
2006 ir->set_alu_flag(alu_last_instr);
2007
2008 auto max_val = value_factory.temp_register();
2009
2010 ir = new AluInstr(op1_max4, max_val, s, AluInstr::last_write, 4);
2011
2012 if (all)
2013 ir->set_alu_flag(alu_src0_neg);
2014
2015 shader.emit_instruction(ir);
2016
2017 if (all)
2018 op = (op == op2_sete) ? op2_sete_dx10: op2_setne_dx10;
2019 else
2020 op = (op == op2_sete) ? op2_setne_dx10: op2_sete_dx10;
2021
2022 ir = new AluInstr(op,
2023 value_factory.dest(alu.dest, 0, pin_free),
2024 max_val,
2025 value_factory.inline_const(ALU_SRC_1, 0),
2026 AluInstr::last_write);
2027 if (all)
2028 ir->set_alu_flag(alu_src1_neg);
2029 shader.emit_instruction(ir);
2030
2031 return true;
2032 }
2033
emit_any_all_icomp(const nir_alu_instr & alu,EAluOp op,int nc,bool all,Shader & shader)2034 static bool emit_any_all_icomp(const nir_alu_instr& alu, EAluOp op, int nc, bool all, Shader& shader)
2035 {
2036 assert(!alu.src[0].abs);
2037 assert(!alu.src[0].negate);
2038 assert(!alu.src[1].abs);
2039 assert(!alu.src[1].negate);
2040
2041 /* This should probabyl be lowered in nir */
2042 auto& value_factory = shader.value_factory();
2043
2044 AluInstr *ir = nullptr;
2045 PRegister v[6];
2046
2047 auto dest = value_factory.dest(alu.dest.dest, 0, pin_free);
2048
2049 for (int i = 0; i < nc + nc/2; ++i)
2050 v[i] = value_factory.temp_register();
2051
2052 EAluOp combine = all ? op2_and_int : op2_or_int;
2053
2054 for (int i = 0; i < nc ; ++i) {
2055 ir = new AluInstr(op, v[i], value_factory.src(alu.src[0], i),
2056 value_factory.src(alu.src[1], i), AluInstr::write);
2057 shader.emit_instruction(ir);
2058 }
2059 if (ir)
2060 ir->set_alu_flag(alu_last_instr);
2061
2062 if (nc ==2) {
2063 ir = new AluInstr(combine, dest, v[0], v[1], AluInstr::last_write);
2064 shader.emit_instruction(ir);
2065 return true;
2066 }
2067
2068 if (nc == 3) {
2069 ir = new AluInstr(combine, v[3], v[0], v[1], AluInstr::last_write);
2070 shader.emit_instruction(ir);
2071 ir = new AluInstr(combine, dest, v[3], v[2], AluInstr::last_write);
2072 shader.emit_instruction(ir);
2073 return true;
2074 }
2075
2076 if (nc == 4) {
2077 ir = new AluInstr(combine, v[4], v[0], v[1], AluInstr::write);
2078 shader.emit_instruction(ir);
2079 ir = new AluInstr(combine, v[5], v[2], v[3], AluInstr::last_write);
2080 shader.emit_instruction(ir);
2081 ir = new AluInstr(combine, dest, v[4], v[5], AluInstr::last_write);
2082 shader.emit_instruction(ir);
2083 return true;
2084 }
2085
2086 return false;
2087 }
2088
emit_dot(const nir_alu_instr & alu,int n,Shader & shader)2089 static bool emit_dot(const nir_alu_instr& alu, int n, Shader& shader)
2090 {
2091 auto& value_factory = shader.value_factory();
2092 const nir_alu_src& src0 = alu.src[0];
2093 const nir_alu_src& src1 = alu.src[1];
2094
2095 auto dest = value_factory.dest(alu.dest.dest, 0, pin_free);
2096
2097 AluInstr::SrcValues srcs(8);
2098
2099 for (int i = 0; i < n ; ++i) {
2100 srcs[2 * i ] = value_factory.src(src0, i);
2101 srcs[2 * i + 1] = value_factory.src(src1, i);
2102 }
2103
2104 for (int i = n; i < 4 ; ++i) {
2105 srcs[2 * i ] = value_factory.zero();
2106 srcs[2 * i + 1] = value_factory.zero();
2107 }
2108
2109 auto op = unlikely(shader.has_flag(Shader::sh_legacy_math_rules)) ?
2110 op2_dot4 : op2_dot4_ieee;
2111 AluInstr *ir = new AluInstr(op, dest, srcs, AluInstr::last_write, 4);
2112
2113 if (src0.negate) ir->set_alu_flag(alu_src0_neg);
2114 if (src0.abs) ir->set_alu_flag(alu_src0_abs);
2115 if (src1.negate) ir->set_alu_flag(alu_src1_neg);
2116 if (src1.abs) ir->set_alu_flag(alu_src1_abs);
2117
2118 if (alu.dest.saturate) ir->set_alu_flag(alu_dst_clamp);
2119
2120 shader.emit_instruction(ir);
2121 return true;
2122 }
2123
emit_fdph(const nir_alu_instr & alu,Shader & shader)2124 static bool emit_fdph(const nir_alu_instr& alu, Shader& shader)
2125 {
2126 auto& value_factory = shader.value_factory();
2127 const nir_alu_src& src0 = alu.src[0];
2128 const nir_alu_src& src1 = alu.src[1];
2129
2130 auto dest = value_factory.dest(alu.dest.dest, 0, pin_free);
2131
2132 AluInstr::SrcValues srcs(8);
2133
2134 for (int i = 0; i < 3 ; ++i) {
2135 srcs[2 * i ] = value_factory.src(src0, i);
2136 srcs[2 * i + 1] = value_factory.src(src1, i);
2137 }
2138
2139 srcs[6] = value_factory.one();
2140 srcs[7] = value_factory.src(src1, 3);
2141
2142 auto op = unlikely(shader.has_flag(Shader::sh_legacy_math_rules)) ?
2143 op2_dot4 : op2_dot4_ieee;
2144 AluInstr *ir = new AluInstr(op, dest, srcs, AluInstr::last_write, 4);
2145
2146 if (src0.negate) ir->set_alu_flag(alu_src0_neg);
2147 if (src0.abs) ir->set_alu_flag(alu_src0_abs);
2148 if (src1.negate) ir->set_alu_flag(alu_src1_neg);
2149 if (src1.abs) ir->set_alu_flag(alu_src1_abs);
2150
2151 if (alu.dest.saturate) ir->set_alu_flag(alu_dst_clamp);
2152
2153 shader.emit_instruction(ir);
2154 return true;
2155 }
2156
emit_create_vec(const nir_alu_instr & instr,unsigned nc,Shader & shader)2157 static bool emit_create_vec(const nir_alu_instr& instr, unsigned nc, Shader& shader)
2158 {
2159 auto& value_factory = shader.value_factory();
2160 AluInstr *ir = nullptr;
2161
2162 for(unsigned i = 0; i < nc; ++i) {
2163 if (instr.dest.write_mask & (1 << i)){
2164 auto src = value_factory.src(instr.src[i].src, instr.src[i].swizzle[0]);
2165 auto dst = value_factory.dest(instr.dest.dest, i, pin_chan);
2166 ir = new AluInstr(op1_mov, dst, src, {alu_write});
2167
2168 if (instr.dest.saturate) ir->set_alu_flag(alu_dst_clamp);
2169 if (instr.src[i].negate) ir->set_alu_flag(alu_src0_neg);
2170 if (instr.src[i].abs) ir->set_alu_flag(alu_src0_abs);
2171
2172 shader.emit_instruction(ir);
2173 }
2174 }
2175
2176 if (ir)
2177 ir->set_alu_flag(alu_last_instr);
2178 return true;
2179 }
2180
emit_alu_i2orf2_b1(const nir_alu_instr & alu,EAluOp opcode,Shader & shader)2181 static bool emit_alu_i2orf2_b1(const nir_alu_instr& alu, EAluOp opcode, Shader& shader)
2182 {
2183 auto& value_factory = shader.value_factory();
2184 AluInstr *ir = nullptr;
2185 Pin pin = nir_dest_num_components(alu.dest.dest) == 1 ? pin_free : pin_none;
2186
2187 for (int i = 0; i < 4 ; ++i) {
2188 if (alu.dest.write_mask & (1 << i)) {
2189 ir = new AluInstr(opcode,
2190 value_factory.dest(alu.dest, i, pin),
2191 value_factory.src(alu.src[0], i),
2192 value_factory.zero(),
2193 AluInstr::write);
2194 shader.emit_instruction(ir);
2195 }
2196 }
2197 if (ir)
2198 ir->set_alu_flag(alu_last_instr);
2199 return true;
2200 }
2201
emit_alu_comb_with_zero(const nir_alu_instr & alu,EAluOp opcode,Shader & shader)2202 static bool emit_alu_comb_with_zero(const nir_alu_instr& alu, EAluOp opcode, Shader& shader)
2203 {
2204 auto& value_factory = shader.value_factory();
2205 AluInstr *ir = nullptr;
2206 auto pin = pin_for_components(alu);
2207 for (int i = 0; i < 4 ; ++i) {
2208 if (alu.dest.write_mask & (1 << i)){
2209 ir = new AluInstr(opcode,
2210 value_factory.dest(alu.dest, i, pin),
2211 value_factory.zero(),
2212 value_factory.src(alu.src[0], i),
2213 AluInstr::write);
2214 shader.emit_instruction(ir);
2215 }
2216 }
2217 if (ir)
2218 ir->set_alu_flag(alu_last_instr);
2219
2220 return true;
2221 }
2222
emit_pack_64_2x32_split(const nir_alu_instr & alu,Shader & shader)2223 static bool emit_pack_64_2x32_split(const nir_alu_instr& alu, Shader& shader)
2224 {
2225 auto& value_factory = shader.value_factory();
2226 AluInstr *ir = nullptr;
2227 for (unsigned i = 0; i < 2; ++i) {
2228 ir = new AluInstr(op1_mov, value_factory.dest(alu.dest, i, pin_none),
2229 value_factory.src(alu.src[i], 0), AluInstr::write);
2230 shader.emit_instruction(ir);
2231 }
2232 ir->set_alu_flag(alu_last_instr);
2233 return true;
2234 }
2235
emit_pack_64_2x32(const nir_alu_instr & alu,Shader & shader)2236 static bool emit_pack_64_2x32(const nir_alu_instr& alu, Shader& shader)
2237 {
2238 auto& value_factory = shader.value_factory();
2239 AluInstr *ir = nullptr;
2240 for (unsigned i = 0; i < 2; ++i) {
2241 ir = new AluInstr(op1_mov, value_factory.dest(alu.dest, i, pin_none),
2242 value_factory.src(alu.src[0], i), AluInstr::write);
2243 shader.emit_instruction(ir);
2244 }
2245 ir->set_alu_flag(alu_last_instr);
2246 return true;
2247 }
2248
2249
emit_unpack_64_2x32(const nir_alu_instr & alu,Shader & shader)2250 static bool emit_unpack_64_2x32(const nir_alu_instr& alu, Shader& shader)
2251 {
2252 auto& value_factory = shader.value_factory();
2253 AluInstr *ir = nullptr;
2254 for (unsigned i = 0; i < 2; ++i) {
2255 ir = new AluInstr(op1_mov, value_factory.dest(alu.dest, i, pin_none),
2256 value_factory.src64(alu.src[0], 0, i), AluInstr::write);
2257 shader.emit_instruction(ir);
2258 }
2259 ir->set_alu_flag(alu_last_instr);
2260 return true;
2261 }
2262
emit_alu_vec2_64(const nir_alu_instr & alu,Shader & shader)2263 bool emit_alu_vec2_64(const nir_alu_instr& alu, Shader& shader)
2264 {
2265 auto& value_factory = shader.value_factory();
2266 AluInstr *ir = nullptr;
2267 for (unsigned i = 0; i < 2; ++i) {
2268 ir = new AluInstr(op1_mov, value_factory.dest(alu.dest, i, pin_chan),
2269 value_factory.src64(alu.src[0], 0, i), AluInstr::write);
2270 shader.emit_instruction(ir);
2271 }
2272 for (unsigned i = 0; i < 2; ++i) {
2273 ir = new AluInstr(op1_mov, value_factory.dest(alu.dest, i + 2, pin_chan),
2274 value_factory.src64(alu.src[1], 1, i), AluInstr::write);
2275 shader.emit_instruction(ir);
2276 }
2277 ir->set_alu_flag(alu_last_instr);
2278 return true;
2279 }
2280
emit_pack_32_2x16_split(const nir_alu_instr & alu,Shader & shader)2281 static bool emit_pack_32_2x16_split(const nir_alu_instr& alu, Shader& shader)
2282 {
2283 auto& value_factory = shader.value_factory();
2284
2285 auto x = value_factory.temp_register();
2286 auto y = value_factory.temp_register();
2287 auto yy = value_factory.temp_register();
2288
2289 shader.emit_instruction(new AluInstr(op1_flt32_to_flt16, x,
2290 value_factory.src(alu.src[0], 0), AluInstr::last_write));
2291
2292 shader.emit_instruction(new AluInstr(op1_flt32_to_flt16, y,
2293 value_factory.src(alu.src[1], 0), AluInstr::last_write));
2294
2295 shader.emit_instruction(new AluInstr(op2_lshl_int, yy, y, value_factory.literal(16), AluInstr::last_write));
2296
2297 shader.emit_instruction(new AluInstr(op2_or_int,
2298 value_factory.dest(alu.dest, 0, pin_free),
2299 x, yy, AluInstr::last_write));
2300 return true;
2301 }
2302
emit_unpack_64_2x32_split(const nir_alu_instr & alu,int comp,Shader & shader)2303 static bool emit_unpack_64_2x32_split(const nir_alu_instr& alu, int comp, Shader& shader)
2304 {
2305 auto& value_factory = shader.value_factory();
2306 shader.emit_instruction(new AluInstr(op1_mov, value_factory.dest(alu.dest, 0, pin_free),
2307 value_factory.src64(alu.src[0], 0, comp), AluInstr::last_write));
2308 return true;
2309 }
2310
emit_unpack_32_2x16_split_x(const nir_alu_instr & alu,Shader & shader)2311 static bool emit_unpack_32_2x16_split_x(const nir_alu_instr& alu, Shader& shader)
2312 {
2313 auto& value_factory = shader.value_factory();
2314 shader.emit_instruction(new AluInstr(op1_flt16_to_flt32, value_factory.dest(alu.dest, 0, pin_free),
2315 value_factory.src(alu.src[0], 0), AluInstr::last_write));
2316 return true;
2317 }
emit_unpack_32_2x16_split_y(const nir_alu_instr & alu,Shader & shader)2318 static bool emit_unpack_32_2x16_split_y(const nir_alu_instr& alu, Shader& shader)
2319 {
2320 auto& value_factory = shader.value_factory();
2321 auto tmp = value_factory.temp_register();
2322 shader.emit_instruction(new AluInstr(op2_lshr_int, tmp,
2323 value_factory.src(alu.src[0], 0),
2324 value_factory.literal(16),
2325 AluInstr::last_write));
2326
2327 shader.emit_instruction(new AluInstr(op1_flt16_to_flt32,
2328 value_factory.dest(alu.dest, 0, pin_free),
2329 tmp, AluInstr::last_write));
2330 return true;
2331 }
2332
2333
2334
emit_alu_trans_op1_eg(const nir_alu_instr & alu,EAluOp opcode,Shader & shader)2335 static bool emit_alu_trans_op1_eg(const nir_alu_instr& alu, EAluOp opcode, Shader& shader)
2336 {
2337 auto& value_factory = shader.value_factory();
2338 const nir_alu_src& src0 = alu.src[0];
2339
2340 AluInstr *ir = nullptr;
2341 auto pin = pin_for_components(alu);
2342
2343 for (unsigned i = 0; i < nir_dest_num_components(alu.dest.dest); ++i) {
2344 if (alu.dest.write_mask & (1 << i)){
2345 ir = new AluInstr(opcode,
2346 value_factory.dest(alu.dest.dest, i, pin),
2347 value_factory.src(src0, i),
2348 AluInstr::last_write);
2349 if (src0.negate) ir->set_alu_flag(alu_src0_neg);
2350 if (src0.abs) ir->set_alu_flag(alu_src0_abs);
2351 if (alu.dest.saturate) ir->set_alu_flag(alu_dst_clamp);
2352 ir->set_alu_flag(alu_is_trans);
2353 shader.emit_instruction(ir);
2354 }
2355 }
2356
2357 return true;
2358 }
2359
emit_alu_f2i32_or_u32_eg(const nir_alu_instr & alu,EAluOp opcode,Shader & shader)2360 static bool emit_alu_f2i32_or_u32_eg(const nir_alu_instr& alu, EAluOp opcode, Shader& shader)
2361 {
2362 auto& value_factory = shader.value_factory();
2363 AluInstr *ir = nullptr;
2364
2365 PRegister reg[4];
2366
2367 int num_comp = nir_dest_num_components(alu.dest.dest);
2368
2369 for (int i = 0; i < num_comp; ++i) {
2370 reg[i] = value_factory.temp_register();
2371 ir = new AluInstr(op1_trunc, reg[i], value_factory.src(alu.src[0], i), AluInstr::last_write);
2372 if (alu.src[0].abs) ir->set_alu_flag(alu_src0_abs);
2373 if (alu.src[0].negate) ir->set_alu_flag(alu_src0_neg);
2374 shader.emit_instruction(ir);
2375 }
2376
2377 auto pin = pin_for_components(alu);
2378 for (int i = 0; i < num_comp; ++i) {
2379 ir = new AluInstr(opcode,
2380 value_factory.dest(alu.dest, i, pin),
2381 reg[i], AluInstr::write);
2382 if (opcode == op1_flt_to_uint) {
2383 ir->set_alu_flag(alu_is_trans);
2384 ir->set_alu_flag(alu_last_instr);
2385 }
2386 shader.emit_instruction(ir);
2387 }
2388 ir->set_alu_flag(alu_last_instr);
2389 return true;
2390 }
2391
emit_alu_trans_op1_cayman(const nir_alu_instr & alu,EAluOp opcode,Shader & shader)2392 static bool emit_alu_trans_op1_cayman(const nir_alu_instr& alu, EAluOp opcode, Shader& shader)
2393 {
2394 auto& value_factory = shader.value_factory();
2395 const nir_alu_src& src0 = alu.src[0];
2396
2397 auto pin = pin_for_components(alu);
2398
2399 /* todo: Actually we need only three channels, but then we have
2400 * to make sure that we don't hava w dest */
2401 for (unsigned j = 0; j < 4; ++j) {
2402 if (alu.dest.write_mask & (1 << j)) {
2403 AluInstr::SrcValues srcs(4);
2404 PRegister dest = value_factory.dest(alu.dest.dest, j, pin);
2405
2406 for (unsigned i = 0; i < 4; ++i)
2407 srcs[i] = value_factory.src(src0, j);
2408
2409 auto ir = new AluInstr(opcode, dest, srcs, AluInstr::last_write, 4);
2410
2411 if (alu.src[0].abs) ir->set_alu_flag(alu_src0_abs);
2412 if (alu.src[0].negate) ir->set_alu_flag(alu_src0_neg);
2413 if (alu.dest.saturate) ir->set_alu_flag(alu_dst_clamp);
2414
2415 ir->set_alu_flag(alu_is_cayman_trans);
2416
2417
2418 shader.emit_instruction(ir);
2419 }
2420 }
2421 return true;
2422 }
2423
emit_alu_trans_op2_eg(const nir_alu_instr & alu,EAluOp opcode,Shader & shader)2424 static bool emit_alu_trans_op2_eg(const nir_alu_instr& alu, EAluOp opcode, Shader& shader)
2425 {
2426 auto& value_factory = shader.value_factory();
2427
2428 const nir_alu_src& src0 = alu.src[0];
2429 const nir_alu_src& src1 = alu.src[1];
2430
2431 AluInstr *ir = nullptr;
2432
2433 auto pin = pin_for_components(alu);
2434 for (int i = 0; i < 4 ; ++i) {
2435 if (alu.dest.write_mask & (1 << i)){
2436 ir = new AluInstr(opcode, value_factory.dest(alu.dest.dest, i, pin),
2437 value_factory.src(src0, i),
2438 value_factory.src(src1, i), AluInstr::last_write);
2439 if (src0.negate) ir->set_alu_flag(alu_src0_neg);
2440 if (src0.abs) ir->set_alu_flag(alu_src0_abs);
2441 if (src1.negate) ir->set_alu_flag(alu_src1_neg);
2442 if (src1.abs) ir->set_alu_flag(alu_src1_abs);
2443 if (alu.dest.saturate) ir->set_alu_flag(alu_dst_clamp);
2444 ir->set_alu_flag(alu_is_trans);
2445 shader.emit_instruction(ir);
2446 }
2447 }
2448 return true;
2449 }
2450
emit_alu_trans_op2_cayman(const nir_alu_instr & alu,EAluOp opcode,Shader & shader)2451 static bool emit_alu_trans_op2_cayman(const nir_alu_instr& alu, EAluOp opcode, Shader& shader)
2452 {
2453 auto& value_factory = shader.value_factory();
2454
2455 const nir_alu_src& src0 = alu.src[0];
2456 const nir_alu_src& src1 = alu.src[1];
2457
2458 unsigned last_slot = 4;
2459
2460 for (unsigned k = 0; k < nir_dest_num_components(alu.dest.dest); ++k) {
2461 if (alu.dest.write_mask & (1 << k)) {
2462 AluInstr::SrcValues srcs(2 * last_slot);
2463 PRegister dest = value_factory.dest(alu.dest.dest, k, pin_free);
2464
2465 for (unsigned i = 0; i < last_slot ; ++i) {
2466 srcs[2 * i ] = value_factory.src(src0, k);
2467 srcs[2 * i + 1] = value_factory.src(src1, k);
2468 }
2469
2470 auto ir = new AluInstr(opcode,
2471 dest, srcs, AluInstr::last_write, last_slot);
2472
2473 if (src0.negate) ir->set_alu_flag(alu_src0_neg);
2474 if (src0.abs) ir->set_alu_flag(alu_src0_abs);
2475 if (src1.negate) ir->set_alu_flag(alu_src1_neg);
2476 if (src1.abs) ir->set_alu_flag(alu_src1_abs);
2477 if (alu.dest.saturate) ir->set_alu_flag(alu_dst_clamp);
2478 ir->set_alu_flag(alu_is_cayman_trans);
2479 shader.emit_instruction(ir);
2480 }
2481 }
2482 return true;
2483 }
2484
2485
emit_tex_fdd(const nir_alu_instr & alu,TexInstr::Opcode opcode,bool fine,Shader & shader)2486 static bool emit_tex_fdd(const nir_alu_instr& alu, TexInstr::Opcode opcode, bool fine, Shader& shader)
2487 {
2488 auto& value_factory = shader.value_factory();
2489
2490 int ncomp = nir_dest_num_components(alu.dest.dest);
2491 RegisterVec4::Swizzle src_swz = {7,7,7,7};
2492 for (auto i = 0; i < ncomp; ++i)
2493 src_swz[i] = alu.src[0].swizzle[i];
2494
2495 auto src = value_factory.src_vec4(alu.src[0].src, pin_group, src_swz);
2496
2497 auto tmp = value_factory.temp_vec4(pin_group);
2498 AluInstr *mv = nullptr;
2499 for (int i = 0; i < ncomp; ++i) {
2500 mv = new AluInstr(op1_mov, tmp[i], src[i], AluInstr::write);
2501 if (alu.src[0].abs)
2502 mv->set_alu_flag(alu_src0_abs);
2503 if (alu.src[0].negate)
2504 mv->set_alu_flag(alu_src0_neg);
2505 shader.emit_instruction(mv);
2506 }
2507 if (mv)
2508 mv->set_alu_flag(alu_last_instr);
2509
2510 auto dst = value_factory.dest_vec4(alu.dest.dest, pin_group);
2511 RegisterVec4::Swizzle dst_swz = {7,7,7,7};
2512 for (auto i = 0; i < ncomp; ++i) {
2513 if (alu.dest.write_mask & (1 << i))
2514 dst_swz[i] = i;
2515 }
2516
2517 auto tex = new TexInstr(opcode, dst, dst_swz, tmp, 0, R600_MAX_CONST_BUFFERS);
2518
2519 if (fine)
2520 tex->set_tex_flag(TexInstr::grad_fine);
2521
2522 shader.emit_instruction(tex);
2523
2524 return true;
2525 }
2526
emit_alu_cube(const nir_alu_instr & alu,Shader & shader)2527 static bool emit_alu_cube(const nir_alu_instr& alu, Shader& shader)
2528 {
2529 auto& value_factory = shader.value_factory();
2530 AluInstr *ir = nullptr;
2531
2532 const uint16_t src0_chan[4] = {2, 2, 0, 1};
2533 const uint16_t src1_chan[4] = {1, 0, 2, 2};
2534
2535 auto group = new AluGroup();
2536
2537 for (int i = 0; i < 4; ++i) {
2538
2539
2540 ir = new AluInstr(op2_cube, value_factory.dest(alu.dest.dest, i, pin_chan),
2541 value_factory.src(alu.src[0], src0_chan[i]),
2542 value_factory.src(alu.src[0], src1_chan[i]),
2543 AluInstr::write);
2544 group->add_instruction(ir);
2545 }
2546 ir->set_alu_flag(alu_last_instr);
2547 shader.emit_instruction(group);
2548 return true;
2549 }
2550
2551 const std::set<AluModifiers> AluInstr::empty;
2552 const std::set<AluModifiers> AluInstr::write({alu_write});
2553 const std::set<AluModifiers> AluInstr::last({alu_last_instr});
2554 const std::set<AluModifiers> AluInstr::last_write({alu_write, alu_last_instr});
2555
2556 }
2557