1 /* -*- mesa-c++ -*-
2 *
3 * Copyright (c) 2022 Collabora LTD
4 *
5 * Author: Gert Wollny <gert.wollny@collabora.com>
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * on the rights to use, copy, modify, merge, publish, distribute, sub
11 * license, and/or sell copies of the Software, and to permit persons to whom
12 * the Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
16 * Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 * USE OR OTHER DEALINGS IN THE SOFTWARE.
25 */
26
27 #include "sfn_instr_alu.h"
28
29 #include "sfn_alu_defines.h"
30 #include "sfn_debug.h"
31 #include "sfn_instr_alugroup.h"
32 #include "sfn_instr_tex.h"
33 #include "sfn_shader.h"
34 #include "sfn_virtualvalues.h"
35
36 #include <algorithm>
37 #include <sstream>
38
39 namespace r600 {
40
41 using std::istream;
42 using std::string;
43 using std::vector;
44
AluInstr(EAluOp opcode,PRegister dest,SrcValues src,const std::set<AluModifiers> & flags,int slots)45 AluInstr::AluInstr(EAluOp opcode,
46 PRegister dest,
47 SrcValues src,
48 const std::set<AluModifiers>& flags,
49 int slots):
50 m_opcode(opcode),
51 m_dest(dest),
52 m_bank_swizzle(alu_vec_unknown),
53 m_cf_type(cf_alu),
54 m_alu_slots(slots)
55 {
56 m_src.swap(src);
57
58 if (m_src.size() == 3)
59 m_alu_flags.set(alu_op3);
60
61 for (auto f : flags)
62 m_alu_flags.set(f);
63
64 ASSERT_OR_THROW(m_src.size() ==
65 static_cast<size_t>(alu_ops.at(opcode).nsrc * m_alu_slots),
66 "Unexpected number of source values");
67
68 if (m_alu_flags.test(alu_write))
69 ASSERT_OR_THROW(dest, "Write flag is set, but no destination register is given");
70
71 update_uses();
72
73 if (dest && slots > 1) {
74 switch (m_opcode) {
75 case op2_dot_ieee: m_allowed_dest_mask = (1 << (5 - slots)) - 1;
76 break;
77 default:
78 if (has_alu_flag(alu_is_cayman_trans)) {
79 m_allowed_dest_mask = (1 << slots) - 1;
80 }
81 }
82 }
83 assert(!dest || (m_allowed_dest_mask & (1 << dest->chan())));
84 }
85
AluInstr(EAluOp opcode)86 AluInstr::AluInstr(EAluOp opcode):
87 AluInstr(opcode, nullptr, SrcValues(alu_ops.at(opcode).nsrc), {}, 1)
88 {
89 }
90
AluInstr(EAluOp opcode,int chan)91 AluInstr::AluInstr(EAluOp opcode, int chan):
92 AluInstr(opcode, nullptr, SrcValues(), {}, 1)
93 {
94 m_fallback_chan = chan;
95 }
96
AluInstr(EAluOp opcode,PRegister dest,PVirtualValue src0,const std::set<AluModifiers> & m_flags)97 AluInstr::AluInstr(EAluOp opcode,
98 PRegister dest,
99 PVirtualValue src0,
100 const std::set<AluModifiers>& m_flags):
101 AluInstr(opcode, dest, SrcValues{src0}, m_flags, 1)
102 {
103 }
104
AluInstr(EAluOp opcode,PRegister dest,PVirtualValue src0,PVirtualValue src1,const std::set<AluModifiers> & m_flags)105 AluInstr::AluInstr(EAluOp opcode,
106 PRegister dest,
107 PVirtualValue src0,
108 PVirtualValue src1,
109 const std::set<AluModifiers>& m_flags):
110 AluInstr(opcode, dest, SrcValues{src0, src1}, m_flags, 1)
111 {
112 }
113
AluInstr(EAluOp opcode,PRegister dest,PVirtualValue src0,PVirtualValue src1,PVirtualValue src2,const std::set<AluModifiers> & m_flags)114 AluInstr::AluInstr(EAluOp opcode,
115 PRegister dest,
116 PVirtualValue src0,
117 PVirtualValue src1,
118 PVirtualValue src2,
119 const std::set<AluModifiers>& m_flags):
120 AluInstr(opcode, dest, SrcValues{src0, src1, src2}, m_flags, 1)
121 {
122 }
123
AluInstr(ESDOp op,PVirtualValue src0,PVirtualValue src1,PVirtualValue address)124 AluInstr::AluInstr(ESDOp op,
125 PVirtualValue src0,
126 PVirtualValue src1,
127 PVirtualValue address):
128 m_lds_opcode(op)
129 {
130 set_alu_flag(alu_is_lds);
131
132 m_src.push_back(address);
133 if (src0) {
134 m_src.push_back(src0);
135 if (src1)
136 m_src.push_back(src1);
137 }
138 update_uses();
139 }
140
AluInstr(ESDOp op,const SrcValues & src,const std::set<AluModifiers> & flags)141 AluInstr::AluInstr(ESDOp op, const SrcValues& src, const std::set<AluModifiers>& flags):
142 m_lds_opcode(op),
143 m_src(src)
144 {
145 for (auto f : flags)
146 set_alu_flag(f);
147
148 set_alu_flag(alu_is_lds);
149 update_uses();
150 }
151
152 void
update_uses()153 AluInstr::update_uses()
154 {
155 for (auto& s : m_src) {
156 auto r = s->as_register();
157 if (r) {
158 r->add_use(this);
159 // move this to add_use
160 if (r->pin() == pin_array) {
161 auto array_elm = static_cast<LocalArrayValue *>(r);
162 auto addr = array_elm->addr();
163 if (addr && addr->as_register())
164 addr->as_register()->add_use(this);
165 }
166 }
167 auto u = s->as_uniform();
168 if (u && u->buf_addr() && u->buf_addr()->as_register())
169 u->buf_addr()->as_register()->add_use(this);
170 }
171
172 if (m_dest &&
173 (has_alu_flag(alu_write) ||
174 m_opcode == op1_mova_int ||
175 m_opcode == op1_set_cf_idx0 ||
176 m_opcode == op1_set_cf_idx1)) {
177 m_dest->add_parent(this);
178
179 if (m_dest->pin() == pin_array) {
180 // move this to add_parent
181 auto array_elm = static_cast<LocalArrayValue *>(m_dest);
182 auto addr = array_elm->addr();
183 if (addr && addr->as_register())
184 addr->as_register()->add_use(this);
185 }
186 }
187 }
188
189 void
accept(ConstInstrVisitor & visitor) const190 AluInstr::accept(ConstInstrVisitor& visitor) const
191 {
192 visitor.visit(*this);
193 }
194
195 void
accept(InstrVisitor & visitor)196 AluInstr::accept(InstrVisitor& visitor)
197 {
198 visitor.visit(this);
199 }
200
201 const std::map<ECFAluOpCode, std::string> AluInstr::cf_map = {
202 {cf_alu_break, "BREAK" },
203 {cf_alu_continue, "CONT" },
204 {cf_alu_else_after, "ELSE_AFTER" },
205 {cf_alu_extended, "EXTENDED" },
206 {cf_alu_pop_after, "POP_AFTER" },
207 {cf_alu_pop2_after, "POP2_AFTER" },
208 {cf_alu_push_before, "PUSH_BEFORE"}
209 };
210
211 const std::map<AluBankSwizzle, std::string> AluInstr::bank_swizzle_map = {
212 {alu_vec_012, "VEC_012"},
213 {alu_vec_021, "VEC_021"},
214 {alu_vec_102, "VEC_102"},
215 {alu_vec_120, "VEC_120"},
216 {alu_vec_201, "VEC_201"},
217 {alu_vec_210, "VEC_210"}
218 };
219
220 const AluModifiers AluInstr::src_rel_flags[3] = {
221 alu_src0_rel, alu_src1_rel, alu_src2_rel};
222
223 struct ValuePrintFlags {
ValuePrintFlagsr600::ValuePrintFlags224 ValuePrintFlags(int im, int f):
225 index_mode(im),
226 flags(f)
227 {
228 }
229 int index_mode = 0;
230 int flags = 0;
231 static const int is_rel = 1;
232 static const int has_abs = 2;
233 static const int has_neg = 4;
234 static const int literal_is_float = 8;
235 static const int index_ar = 16;
236 static const int index_loopidx = 32;
237 };
238
239 void
do_print(std::ostream & os) const240 AluInstr::do_print(std::ostream& os) const
241 {
242 const char swzchar[] = "xyzw01?_";
243
244 unsigned i = 0;
245
246 os << "ALU ";
247
248 if (has_alu_flag(alu_is_lds)) {
249 os << "LDS " << lds_ops.at(m_lds_opcode).name;
250 os << " __.x : ";
251 } else {
252
253 os << alu_ops.at(m_opcode).name;
254 if (has_alu_flag(alu_dst_clamp))
255 os << " CLAMP";
256
257 if (m_dest) {
258 if (has_alu_flag(alu_write) || m_dest->has_flag(Register::addr_or_idx)) {
259 os << " " << *m_dest;
260 } else {
261 os << " __"
262 << "." << swzchar[m_dest->chan()];
263 if (m_dest->pin() != pin_none)
264 os << "@" << m_dest->pin();
265 }
266 os << " : ";
267 } else {
268 os << " __." << swzchar[dest_chan()] << " : ";
269 }
270 }
271
272 const int n_source_per_slot =
273 has_alu_flag(alu_is_lds) ? m_src.size() : alu_ops.at(m_opcode).nsrc;
274
275
276 for (int s = 0; s < m_alu_slots; ++s) {
277
278 if (s > 0)
279 os << " +";
280
281 for (int k = 0; k < n_source_per_slot; ++k) {
282 int pflags = 0;
283 if (i)
284 os << ' ';
285 if (has_source_mod(i, mod_neg))
286 pflags |= ValuePrintFlags::has_neg;
287 if (has_alu_flag(src_rel_flags[k]))
288 pflags |= ValuePrintFlags::is_rel;
289 if (n_source_per_slot <= 2)
290 if (has_source_mod(i, mod_abs))
291 pflags |= ValuePrintFlags::has_abs;
292
293 if (pflags & ValuePrintFlags::has_neg)
294 os << '-';
295 if (pflags & ValuePrintFlags::has_abs)
296 os << '|';
297 os << *m_src[i];
298 if (pflags & ValuePrintFlags::has_abs)
299 os << '|';
300 ++i;
301 }
302 }
303
304 os << " {";
305 if (has_alu_flag(alu_write))
306 os << 'W';
307 if (has_alu_flag(alu_last_instr))
308 os << 'L';
309 if (has_alu_flag(alu_update_exec))
310 os << 'E';
311 if (has_alu_flag(alu_update_pred))
312 os << 'P';
313 os << "}";
314
315 auto bs_name = bank_swizzle_map.find(m_bank_swizzle);
316 if (bs_name != bank_swizzle_map.end())
317 os << ' ' << bs_name->second;
318
319 auto cf_name = cf_map.find(m_cf_type);
320 if (cf_name != cf_map.end())
321 os << ' ' << cf_name->second;
322 }
323
324 bool
can_propagate_src() const325 AluInstr::can_propagate_src() const
326 {
327 /* We can use the source in the next instruction */
328 if (!can_copy_propagate())
329 return false;
330
331 auto src_reg = m_src[0]->as_register();
332 if (!src_reg)
333 return true;
334
335 assert(m_dest);
336
337 if (!m_dest->has_flag(Register::ssa)) {
338 return false;
339 }
340
341 if (m_dest->pin() == pin_fully)
342 return m_dest->equal_to(*src_reg);
343
344 if (m_dest->pin() == pin_chan)
345 return src_reg->pin() == pin_none ||
346 src_reg->pin() == pin_free ||
347 (src_reg->pin() == pin_chan && src_reg->chan() == m_dest->chan());
348
349 return m_dest->pin() == pin_none || m_dest->pin() == pin_free;
350 }
351
352 class ReplaceIndirectArrayAddr : public RegisterVisitor {
353 public:
visit(Register & value)354 void visit(Register& value) override { (void)value; }
visit(LocalArray & value)355 void visit(LocalArray& value) override
356 {
357 (void)value;
358 unreachable("An array can't be used as address");
359 }
360 void visit(LocalArrayValue& value) override;
361 void visit(UniformValue& value) override;
visit(LiteralConstant & value)362 void visit(LiteralConstant& value) override { (void)value; }
visit(InlineConstant & value)363 void visit(InlineConstant& value) override { (void)value; }
364
365 PRegister new_addr;
366 };
367
visit(LocalArrayValue & value)368 void ReplaceIndirectArrayAddr::visit(LocalArrayValue& value)
369 {
370 if (new_addr->sel() == 0 && value.addr()
371 && value.addr()->as_register())
372 value.set_addr(new_addr);
373 }
374
visit(UniformValue & value)375 void ReplaceIndirectArrayAddr::visit(UniformValue& value)
376 {
377 if (value.buf_addr() && value.buf_addr()->as_register() &&
378 (new_addr->sel() == 1 || new_addr->sel() == 2)) {
379 value.set_buf_addr(new_addr);
380 }
381 }
382
update_indirect_addr(UNUSED PRegister old_reg,PRegister reg)383 void AluInstr::update_indirect_addr(UNUSED PRegister old_reg, PRegister reg)
384 {
385 ReplaceIndirectArrayAddr visitor;
386
387 visitor.new_addr = reg;
388 assert(reg->has_flag(Register::addr_or_idx));
389
390 if (m_dest)
391 m_dest->accept(visitor);
392
393 for (auto src : m_src)
394 src->accept(visitor);
395
396 reg->add_use(this);
397 }
398
399 bool
can_propagate_dest() const400 AluInstr::can_propagate_dest() const
401 {
402 if (!can_copy_propagate()) {
403 return false;
404 }
405
406 auto src_reg = m_src[0]->as_register();
407 if (!src_reg) {
408 return false;
409 }
410
411 assert(m_dest);
412
413 if (src_reg->pin() == pin_fully) {
414 return false;
415 }
416
417 if (!src_reg->has_flag(Register::ssa))
418 return false;
419
420 if (!m_dest->has_flag(Register::ssa))
421 return false;
422
423 if (src_reg->pin() == pin_chan)
424 return m_dest->pin() == pin_none || m_dest->pin() == pin_free ||
425 ((m_dest->pin() == pin_chan || m_dest->pin() == pin_group) &&
426 src_reg->chan() == m_dest->chan());
427
428 return (src_reg->pin() == pin_none || src_reg->pin() == pin_free);
429 }
430
431 bool
can_copy_propagate() const432 AluInstr::can_copy_propagate() const
433 {
434 if (m_opcode != op1_mov)
435 return false;
436
437 if (has_source_mod(0, mod_abs) || has_source_mod(0, mod_neg) ||
438 has_alu_flag(alu_dst_clamp))
439 return false;
440
441 return has_alu_flag(alu_write);
442 }
443
444 bool
replace_source(PRegister old_src,PVirtualValue new_src)445 AluInstr::replace_source(PRegister old_src, PVirtualValue new_src)
446 {
447 if (!can_replace_source(old_src, new_src))
448 return false;
449
450 return do_replace_source(old_src, new_src);
451 }
452
do_replace_source(PRegister old_src,PVirtualValue new_src)453 bool AluInstr::do_replace_source(PRegister old_src, PVirtualValue new_src)
454 {
455 bool process = false;
456
457 for (unsigned i = 0; i < m_src.size(); ++i) {
458 if (old_src->equal_to(*m_src[i])) {
459 m_src[i] = new_src;
460 process = true;
461 }
462 }
463 if (process) {
464 auto r = new_src->as_register();
465 if (r)
466 r->add_use(this);
467 old_src->del_use(this);
468 }
469
470 return process;
471 }
472
replace_src(int i,PVirtualValue new_src,uint32_t to_set,SourceMod to_clear)473 bool AluInstr::replace_src(int i, PVirtualValue new_src, uint32_t to_set,
474 SourceMod to_clear)
475 {
476 auto old_src = m_src[i]->as_register();
477 assert(old_src);
478
479 if (!can_replace_source(old_src, new_src))
480 return false;
481
482 assert(old_src);
483 old_src->del_use(this);
484
485 m_src[i] = new_src;
486
487 auto r = new_src->as_register();
488 if (r)
489 r->add_use(this);
490
491 m_source_modifiers |= to_set << (2 * i);
492 m_source_modifiers &= ~(to_clear << (2 * i));
493
494 return true;
495 }
496
497
can_replace_source(PRegister old_src,PVirtualValue new_src)498 bool AluInstr::can_replace_source(PRegister old_src, PVirtualValue new_src)
499 {
500 if (!check_readport_validation(old_src, new_src))
501 return false;
502
503 /* If the old or new source is an array element, we assume that there
504 * might have been an (untracked) indirect access, so don't replace
505 * this source */
506 if (old_src->pin() == pin_array && new_src->pin() == pin_array)
507 return false;
508
509 auto [addr, dummy, index] = indirect_addr();
510 auto addr_reg = addr ? addr->as_register() : nullptr;
511 auto index_reg = index ? index->as_register() : nullptr;
512
513 if (auto u = new_src->as_uniform()) {
514 if (u && u->buf_addr()) {
515
516 /* Don't mix indirect buffer and indirect registers, because the
517 * scheduler can't handle it yet. */
518 if (addr_reg)
519 return false;
520
521 /* Don't allow two different index registers, can't deal with that yet */
522 if (index_reg && !index_reg->equal_to(*u->buf_addr()))
523 return false;
524 }
525 }
526
527 if (auto new_addr = new_src->get_addr()) {
528 auto new_addr_reg = new_addr->as_register();
529 bool new_addr_lowered = new_addr_reg &&
530 new_addr_reg->has_flag(Register::addr_or_idx);
531
532 if (addr_reg) {
533 if (!addr_reg->equal_to(*new_addr) || new_addr_lowered ||
534 addr_reg->has_flag(Register::addr_or_idx))
535 return false;
536 }
537 if (m_dest->has_flag(Register::addr_or_idx)) {
538 if (new_src->pin() == pin_array) {
539 auto s = static_cast<const LocalArrayValue *>(new_src)->addr();
540 if (!s->as_inline_const() || !s->as_literal())
541 return false;
542 }
543 }
544 }
545 return true;
546 }
547
548 void
set_sources(SrcValues src)549 AluInstr::set_sources(SrcValues src)
550 {
551 for (auto& s : m_src) {
552 auto r = s->as_register();
553 if (r)
554 r->del_use(this);
555 }
556 m_src.swap(src);
557 for (auto& s : m_src) {
558 auto r = s->as_register();
559 if (r)
560 r->add_use(this);
561 }
562 }
563
allowed_src_chan_mask() const564 uint8_t AluInstr::allowed_src_chan_mask() const
565 {
566 if (m_alu_slots < 2)
567 return 0xf;
568
569 int chan_use_count[4] = {0};
570
571 for (auto s : m_src) {
572 auto r = s->as_register();
573 if (r)
574 ++chan_use_count[r->chan()];
575 }
576 /* Each channel can only be loaded in one of three cycles,
577 * so if a channel is already used three times, we can't
578 * add another source withthis channel.
579 * Since we want to move away from one channel to another, it
580 * is not important to know which is the old channel that will
581 * be freed by the channel switch.*/
582 int mask = 0;
583
584 /* Be conservative about channel use when using more than two
585 * slots. Currently a constellatioon of
586 *
587 * ALU d.x = f(r0.x, r1.y)
588 * ALU _.y = f(r2.y, r3.x)
589 * ALU _.z = f(r4.x, r5.y)
590 *
591 * will fail to be split. To get constellations like this to be scheduled
592 * properly will need some work on the bank swizzle check.
593 */
594 int maxuse = m_alu_slots > 2 ? 2 : 3;
595 for (int i = 0; i < 4; ++i) {
596 if (chan_use_count[i] < maxuse)
597 mask |= 1 << i;
598 }
599 return mask;
600 }
601
602 bool
replace_dest(PRegister new_dest,AluInstr * move_instr)603 AluInstr::replace_dest(PRegister new_dest, AluInstr *move_instr)
604 {
605 if (m_dest->equal_to(*new_dest))
606 return false;
607
608 if (m_dest->uses().size() > 1)
609 return false;
610
611 if (new_dest->pin() == pin_array)
612 return false;
613
614 /* Currently we bail out when an array write should be moved, because
615 * declaring an array write is currently not well defined. The
616 * Whole "backwards" copy propagation should dprobably be replaced by some
617 * forward peep holew optimization */
618 /*
619 if (new_dest->pin() == pin_array) {
620 auto dav = static_cast<const LocalArrayValue *>(new_dest)->addr();
621 for (auto s: m_src) {
622 if (s->pin() == pin_array) {
623 auto sav = static_cast<const LocalArrayValue *>(s)->addr();
624 if (dav && sav && dav->as_register() && !dav->equal_to(*sav))
625 return false;
626 }
627 }
628 }
629 */
630
631 if (m_dest->pin() == pin_chan && new_dest->chan() != m_dest->chan())
632 return false;
633
634 if (m_dest->pin() == pin_chan) {
635 if (new_dest->pin() == pin_group)
636 new_dest->set_pin(pin_chgr);
637 else if (new_dest->pin() != pin_chgr)
638 new_dest->set_pin(pin_chan);
639 }
640
641 m_dest = new_dest;
642 if (!move_instr->has_alu_flag(alu_last_instr))
643 reset_alu_flag(alu_last_instr);
644
645 if (has_alu_flag(alu_is_cayman_trans)) {
646 /* Copy propagation puts an instruction into the w channel, but we
647 * don't have the slots for a w channel */
648 if (m_dest->chan() == 3 && m_alu_slots < 4) {
649 m_alu_slots = 4;
650 assert(m_src.size() == 3);
651 m_src.push_back(m_src[0]);
652 }
653 }
654
655 return true;
656 }
657
658 void
pin_sources_to_chan()659 AluInstr::pin_sources_to_chan()
660 {
661 for (auto s : m_src) {
662 auto r = s->as_register();
663 if (r) {
664 if (r->pin() == pin_free)
665 r->set_pin(pin_chan);
666 else if (r->pin() == pin_group)
667 r->set_pin(pin_chgr);
668 }
669 }
670 }
671
672 bool
check_readport_validation(PRegister old_src,PVirtualValue new_src) const673 AluInstr::check_readport_validation(PRegister old_src, PVirtualValue new_src) const
674 {
675 if (m_src.size() < 3)
676 return true;
677
678 bool success = true;
679 AluReadportReservation rpr_sum;
680
681 unsigned nsrc = alu_ops.at(m_opcode).nsrc;
682 assert(nsrc * m_alu_slots == m_src.size());
683
684 for (int s = 0; s < m_alu_slots && success; ++s) {
685 PVirtualValue src[3];
686 auto ireg = m_src.begin() + s * nsrc;
687
688 for (unsigned i = 0; i < nsrc; ++i, ++ireg)
689 src[i] = old_src->equal_to(**ireg) ? new_src : *ireg;
690
691 AluBankSwizzle bs = alu_vec_012;
692 while (bs != alu_vec_unknown) {
693 AluReadportReservation rpr = rpr_sum;
694 if (rpr.schedule_vec_src(src, nsrc, bs)) {
695 rpr_sum = rpr;
696 break;
697 }
698 ++bs;
699 }
700
701 if (bs == alu_vec_unknown)
702 success = false;
703 }
704 return success;
705 }
706
707 void
add_extra_dependency(PVirtualValue value)708 AluInstr::add_extra_dependency(PVirtualValue value)
709 {
710 auto reg = value->as_register();
711 if (reg)
712 m_extra_dependencies.insert(reg);
713 }
714
715 bool
is_equal_to(const AluInstr & lhs) const716 AluInstr::is_equal_to(const AluInstr& lhs) const
717 {
718 if (lhs.m_opcode != m_opcode || lhs.m_bank_swizzle != m_bank_swizzle ||
719 lhs.m_cf_type != m_cf_type || lhs.m_alu_flags != m_alu_flags) {
720 return false;
721 }
722
723 if (m_dest) {
724 if (!lhs.m_dest) {
725 return false;
726 } else {
727 if (has_alu_flag(alu_write)) {
728 if (!m_dest->equal_to(*lhs.m_dest))
729 return false;
730 } else {
731 if (m_dest->chan() != lhs.m_dest->chan())
732 return false;
733 }
734 }
735 } else {
736 if (lhs.m_dest)
737 return false;
738 }
739
740 if (m_src.size() != lhs.m_src.size())
741 return false;
742
743 for (unsigned i = 0; i < m_src.size(); ++i) {
744 if (!m_src[i]->equal_to(*lhs.m_src[i]))
745 return false;
746 }
747
748 return true;
749 }
750
751 class ResolveIndirectArrayAddr : public ConstRegisterVisitor {
752 public:
visit(const Register & value)753 void visit(const Register& value) { (void)value; }
visit(const LocalArray & value)754 void visit(const LocalArray& value)
755 {
756 (void)value;
757 unreachable("An array can't be used as address");
758 }
759 void visit(const LocalArrayValue& value);
760 void visit(const UniformValue& value);
visit(const LiteralConstant & value)761 void visit(const LiteralConstant& value) { (void)value; }
visit(const InlineConstant & value)762 void visit(const InlineConstant& value) { (void)value; }
763
764 PRegister addr{nullptr};
765 PRegister index{nullptr};
766 bool addr_is_for_dest{false};
767 };
768
769 void
visit(const LocalArrayValue & value)770 ResolveIndirectArrayAddr::visit(const LocalArrayValue& value)
771 {
772 auto a = value.addr();
773 if (a) {
774 addr = a->as_register();
775 assert(!addr_is_for_dest);
776 }
777 }
778
779 void
visit(const UniformValue & value)780 ResolveIndirectArrayAddr::visit(const UniformValue& value)
781 {
782 auto a = value.buf_addr();
783 if (a) {
784 index = a->as_register();
785 }
786 }
787
788 std::tuple<PRegister, bool, PRegister>
indirect_addr() const789 AluInstr::indirect_addr() const
790 {
791 ResolveIndirectArrayAddr visitor;
792
793 if (m_dest) {
794 m_dest->accept(visitor);
795 if (visitor.addr)
796 visitor.addr_is_for_dest = true;
797 }
798
799 for (auto s : m_src) {
800 s->accept(visitor);
801 }
802 return {visitor.addr, visitor.addr_is_for_dest, visitor.index};
803 }
804
805 AluGroup *
split(ValueFactory & vf)806 AluInstr::split(ValueFactory& vf)
807 {
808 if (m_alu_slots == 1)
809 return nullptr;
810
811 sfn_log << SfnLog::instr << "Split " << *this << "\n";
812
813 auto group = new AluGroup();
814
815 m_dest->del_parent(this);
816
817 int start_slot = 0;
818 bool is_dot = m_opcode == op2_dot_ieee;
819 auto last_opcode = m_opcode;
820
821 if (is_dot) {
822 start_slot = m_dest->chan();
823 last_opcode = op2_mul_ieee;
824 }
825
826
827 for (int k = 0; k < m_alu_slots; ++k) {
828 int s = k + start_slot;
829
830 PRegister dst = m_dest->chan() == s ? m_dest : vf.dummy_dest(s);
831 if (dst->pin() != pin_chgr) {
832 auto pin = pin_chan;
833 if (dst->pin() == pin_group && m_dest->chan() == s)
834 pin = pin_chgr;
835 dst->set_pin(pin);
836 }
837
838 SrcValues src;
839 int nsrc = alu_ops.at(m_opcode).nsrc;
840 for (int i = 0; i < nsrc; ++i) {
841 auto old_src = m_src[k * nsrc + i];
842 // Make it easy for the scheduler and pin the register to the
843 // channel, otherwise scheduler would have to check whether a
844 // channel switch is possible
845 auto r = old_src->as_register();
846 if (r) {
847 if (r->pin() == pin_free || r->pin() == pin_none)
848 r->set_pin(pin_chan);
849 else if (r->pin() == pin_group)
850 r->set_pin(pin_chgr);
851 }
852 src.push_back(old_src);
853 }
854
855 auto opcode = k < m_alu_slots -1 ? m_opcode : last_opcode;
856
857
858 auto instr = new AluInstr(opcode, dst, src, {}, 1);
859 instr->set_blockid(block_id(), index());
860
861 if (s == 0 || !m_alu_flags.test(alu_64bit_op)) {
862 if (has_source_mod(nsrc * k + 0, mod_neg))
863 instr->set_source_mod(0, mod_neg);
864 if (has_source_mod(nsrc * k + 1, mod_neg))
865 instr->set_source_mod(1, mod_neg);
866 if (has_source_mod(nsrc * k + 2, mod_neg))
867 instr->set_source_mod(2, mod_neg);
868 if (has_source_mod(nsrc * k + 0, mod_abs))
869 instr->set_source_mod(0, mod_abs);
870 if (has_source_mod(nsrc * k + 1, mod_abs))
871 instr->set_source_mod(1, mod_abs);
872 }
873 if (has_alu_flag(alu_dst_clamp))
874 instr->set_alu_flag(alu_dst_clamp);
875
876 if (s == m_dest->chan())
877 instr->set_alu_flag(alu_write);
878
879 m_dest->add_parent(instr);
880 sfn_log << SfnLog::instr << " " << *instr << "\n";
881
882 if (!group->add_instruction(instr)) {
883 std::cerr << "Unable to schedule '" << *instr << "' into\n" << *group << "\n";
884
885 unreachable("Invalid group instruction");
886 }
887 }
888 group->set_blockid(block_id(), index());
889
890 for (auto s : m_src) {
891 auto r = s->as_register();
892 if (r) {
893 r->del_use(this);
894 }
895 }
896 group->set_origin(this);
897
898 return group;
899 }
900
901 /* Alu instructions that have SSA dest registers increase the regietsr
902 * pressure Alu instructions that read from SSA registers may decresase the
903 * register pressure hency evaluate a priorityx values based on register
904 * pressure change */
905 int
register_priority() const906 AluInstr::register_priority() const
907 {
908 int priority = 0;
909 if (!has_alu_flag(alu_no_schedule_bias)) {
910
911 if (m_dest) {
912 if (m_dest->has_flag(Register::ssa) && has_alu_flag(alu_write)) {
913 if (m_dest->pin() != pin_group && m_dest->pin() != pin_chgr &&
914 !m_dest->addr())
915 priority--;
916 } else {
917 // Arrays and registers are pre-allocated, hence scheduling
918 // assignments early is unlikely to increase register pressure
919 priority++;
920 }
921 }
922
923 for (const auto s : m_src) {
924 auto r = s->as_register();
925 if (r) {
926 if (r->has_flag(Register::ssa)) {
927 int pending = 0;
928 for (auto b : r->uses()) {
929 if (!b->is_scheduled())
930 ++pending;
931 }
932 if (pending == 1)
933 ++priority;
934 }
935 if (r->addr() && r->addr()->as_register())
936 priority += 2;
937 }
938 if (s->as_uniform())
939 ++priority;
940 }
941 }
942 return priority;
943 }
944
945 bool
propagate_death()946 AluInstr::propagate_death()
947 {
948 if (!m_dest)
949 return true;
950
951 if (m_dest->pin() == pin_group || m_dest->pin() == pin_chan) {
952 switch (m_opcode) {
953 case op2_interp_x:
954 case op2_interp_xy:
955 case op2_interp_z:
956 case op2_interp_zw:
957 reset_alu_flag(alu_write);
958 return false;
959 default:;
960 }
961 }
962
963 if (m_dest->pin() == pin_array)
964 return false;
965
966 /* We assume that nir does a good job in eliminating all ALU results that
967 * are not needed, and we don't let copy propagation doesn't make the
968 * instruction obsolete, so just keep all */
969 if (has_alu_flag(alu_is_cayman_trans))
970 return false;
971
972 for (auto& src : m_src) {
973 auto reg = src->as_register();
974 if (reg)
975 reg->del_use(this);
976 }
977 return true;
978 }
979
980 bool
has_lds_access() const981 AluInstr::has_lds_access() const
982 {
983 return has_alu_flag(alu_is_lds) || has_lds_queue_read();
984 }
985
986 bool
has_lds_queue_read() const987 AluInstr::has_lds_queue_read() const
988 {
989 for (auto& s : m_src) {
990 auto ic = s->as_inline_const();
991 if (!ic)
992 continue;
993
994 if (ic->sel() == ALU_SRC_LDS_OQ_A_POP || ic->sel() == ALU_SRC_LDS_OQ_B_POP)
995 return true;
996 }
997 return false;
998 }
999
1000 struct OpDescr {
1001 union {
1002 EAluOp alu_opcode;
1003 ESDOp lds_opcode;
1004 };
1005 int nsrc;
1006 };
1007
1008 static std::map<std::string, OpDescr> s_alu_map_by_name;
1009 static std::map<std::string, OpDescr> s_lds_map_by_name;
1010
1011 Instr::Pointer
from_string(istream & is,ValueFactory & value_factory,AluGroup * group,bool is_cayman)1012 AluInstr::from_string(istream& is, ValueFactory& value_factory, AluGroup *group, bool is_cayman)
1013 {
1014 vector<string> tokens;
1015
1016 while (is.good() && !is.eof()) {
1017 string t;
1018 is >> t;
1019 if (t.length() > 0) {
1020 tokens.push_back(t);
1021 }
1022 }
1023
1024 std::set<AluModifiers> flags;
1025 auto t = tokens.begin();
1026
1027 bool is_lds = false;
1028
1029 if (*t == "LDS") {
1030 is_lds = true;
1031 t++;
1032 }
1033
1034 string opstr = *t++;
1035 string deststr = *t++;
1036
1037 if (deststr == "CLAMP") {
1038 flags.insert(alu_dst_clamp);
1039 deststr = *t++;
1040 }
1041
1042 assert(*t == ":");
1043 OpDescr op_descr = {{op_invalid}, -1};
1044
1045 if (is_lds) {
1046 auto op = s_lds_map_by_name.find(opstr);
1047 if (op == s_lds_map_by_name.end()) {
1048 for (auto [opcode, opdescr] : lds_ops) {
1049 if (opstr == opdescr.name) {
1050 op_descr.lds_opcode = opcode;
1051 op_descr.nsrc = opdescr.nsrc;
1052 s_alu_map_by_name[opstr] = op_descr;
1053 break;
1054 }
1055 }
1056
1057 if (op_descr.nsrc == -1) {
1058 std::cerr << "'" << opstr << "'";
1059 unreachable("Unknown opcode");
1060 return nullptr;
1061 }
1062 } else {
1063 op_descr = op->second;
1064 }
1065 } else {
1066 auto op = s_alu_map_by_name.find(opstr);
1067 if (op == s_alu_map_by_name.end()) {
1068 for (auto [opcode, opdescr] : alu_ops) {
1069 if (opstr == opdescr.name) {
1070 op_descr = {{opcode}, opdescr.nsrc};
1071 s_alu_map_by_name[opstr] = op_descr;
1072 break;
1073 }
1074 }
1075
1076 if (op_descr.nsrc == -1) {
1077 std::cerr << "'" << opstr << "'";
1078 unreachable("Unknown opcode");
1079 return nullptr;
1080 }
1081 } else {
1082 op_descr = op->second;
1083 }
1084 if (is_cayman) {
1085 switch (op_descr.alu_opcode) {
1086 case op1_cos:
1087 case op1_exp_ieee:
1088 case op1_log_clamped:
1089 case op1_recip_ieee:
1090 case op1_recipsqrt_ieee1:
1091 case op1_sqrt_ieee:
1092 case op1_sin:
1093 case op2_mullo_int:
1094 case op2_mulhi_int:
1095 case op2_mulhi_uint:
1096 flags.insert(alu_is_cayman_trans);
1097 default:
1098 ;
1099 }
1100 }
1101 }
1102
1103 int slots = 0;
1104
1105 uint32_t src_mods = 0;
1106 SrcValues sources;
1107 do {
1108 ++t;
1109 for (int i = 0; i < op_descr.nsrc; ++i) {
1110 string srcstr = *t++;
1111
1112 if (srcstr[0] == '-') {
1113 src_mods |= AluInstr::mod_neg << (2 * sources.size());
1114 srcstr = srcstr.substr(1);
1115 }
1116
1117 if (srcstr[0] == '|') {
1118 assert(srcstr[srcstr.length() - 1] == '|');
1119 src_mods |= AluInstr::mod_abs << (2 * sources.size());
1120 srcstr = srcstr.substr(1, srcstr.length() - 2);
1121 }
1122
1123 auto src = value_factory.src_from_string(srcstr);
1124 if (!src) {
1125 std::cerr << "Unable to create src[" << i << "] from " << srcstr << "\n";
1126 assert(src);
1127 }
1128 sources.push_back(src);
1129 }
1130 ++slots;
1131 } while (t != tokens.end() && *t == "+");
1132
1133 AluBankSwizzle bank_swizzle = alu_vec_unknown;
1134 ECFAluOpCode cf = cf_alu;
1135
1136 while (t != tokens.end()) {
1137
1138 switch ((*t)[0]) {
1139 case '{': {
1140 auto iflag = t->begin() + 1;
1141 while (iflag != t->end()) {
1142 if (*iflag == '}')
1143 break;
1144
1145 switch (*iflag) {
1146 case 'L':
1147 flags.insert(alu_last_instr);
1148 break;
1149 case 'W':
1150 flags.insert(alu_write);
1151 break;
1152 case 'E':
1153 flags.insert(alu_update_exec);
1154 break;
1155 case 'P':
1156 flags.insert(alu_update_pred);
1157 break;
1158 }
1159 ++iflag;
1160 }
1161 } break;
1162
1163 case 'V': {
1164 string bs = *t;
1165 if (bs == "VEC_012")
1166 bank_swizzle = alu_vec_012;
1167 else if (bs == "VEC_021")
1168 bank_swizzle = alu_vec_021;
1169 else if (bs == "VEC_102")
1170 bank_swizzle = alu_vec_102;
1171 else if (bs == "VEC_120")
1172 bank_swizzle = alu_vec_120;
1173 else if (bs == "VEC_201")
1174 bank_swizzle = alu_vec_201;
1175 else if (bs == "VEC_210")
1176 bank_swizzle = alu_vec_210;
1177 else {
1178 std::cerr << "'" << bs << "': ";
1179 unreachable("Unknowe bankswizzle given");
1180 }
1181 } break;
1182
1183 default: {
1184 string cf_str = *t;
1185 if (cf_str == "PUSH_BEFORE")
1186 cf = cf_alu_push_before;
1187 else if (cf_str == "POP_AFTER")
1188 cf = cf_alu_pop_after;
1189 else if (cf_str == "POP2_AFTER")
1190 cf = cf_alu_pop2_after;
1191 else if (cf_str == "EXTENDED")
1192 cf = cf_alu_extended;
1193 else if (cf_str == "BREAK")
1194 cf = cf_alu_break;
1195 else if (cf_str == "CONT")
1196 cf = cf_alu_continue;
1197 else if (cf_str == "ELSE_AFTER")
1198 cf = cf_alu_else_after;
1199 else {
1200 std::cerr << " '" << cf_str << "' ";
1201 unreachable("Unknown tocken in ALU instruction");
1202 }
1203 }
1204 }
1205 ++t;
1206 }
1207
1208 PRegister dest = nullptr;
1209 // construct instruction
1210 if (deststr != "(null)")
1211 dest = value_factory.dest_from_string(deststr);
1212
1213 AluInstr *retval = nullptr;
1214 if (is_lds)
1215 retval = new AluInstr(op_descr.lds_opcode, sources, flags);
1216 else
1217 retval = new AluInstr(op_descr.alu_opcode, dest, sources, flags, slots);
1218
1219 retval->m_source_modifiers = src_mods;
1220 retval->set_bank_swizzle(bank_swizzle);
1221 retval->set_cf_type(cf);
1222 if (group) {
1223 group->add_instruction(retval);
1224 retval = nullptr;
1225 }
1226 return retval;
1227 }
1228
1229 bool
do_ready() const1230 AluInstr::do_ready() const
1231 {
1232 /* Alu instructions are shuffled by the scheduler, so
1233 * we have to make sure that required ops are already
1234 * scheduled before marking this one ready */
1235 for (auto i : required_instr()) {
1236 if (i->is_dead())
1237 continue;
1238
1239 bool is_older_instr = i->block_id() <= block_id() &&
1240 i->index() < index();
1241 bool is_lds = i->as_alu() && i->as_alu()->has_lds_access();
1242 if (!i->is_scheduled() && (is_older_instr || is_lds))
1243 return false;
1244 }
1245
1246 for (auto s : m_src) {
1247 auto r = s->as_register();
1248 if (r) {
1249 if (!r->ready(block_id(), index()))
1250 return false;
1251 }
1252 auto u = s->as_uniform();
1253 if (u && u->buf_addr() && u->buf_addr()->as_register()) {
1254 if (!u->buf_addr()->as_register()->ready(block_id(), index()))
1255 return false;
1256 }
1257 }
1258
1259 if (m_dest && !m_dest->has_flag(Register::ssa)) {
1260 if (m_dest->pin() == pin_array) {
1261 auto av = static_cast<const LocalArrayValue *>(m_dest);
1262 auto addr = av->addr();
1263 /* For true indiect dest access we have to make sure that all
1264 * instructions that write the value before are schedukled */
1265 if (addr && (!addr->ready(block_id(), index()) ||
1266 !m_dest->ready(block_id(), index() - 1)))
1267 return false;
1268 }
1269
1270 /* If a register is updates, we have to make sure that uses before that
1271 * update are scheduled, otherwise we may use the updated value when we
1272 * shouldn't */
1273 for (auto u : m_dest->uses()) {
1274 /* TODO: This is working around some sloppy use updates, dead instrzuctions
1275 * should remove themselves from uses. */
1276 if (u->is_dead())
1277 continue;
1278 if (!u->is_scheduled() &&
1279 u->block_id() <= block_id() &&
1280 u->index() < index()) {
1281 return false;
1282 }
1283 }
1284 }
1285
1286 for (auto& r : m_extra_dependencies) {
1287 if (!r->ready(block_id(), index()))
1288 return false;
1289 }
1290
1291 return true;
1292 }
1293
1294 void
visit(AluGroup * instr)1295 AluInstrVisitor::visit(AluGroup *instr)
1296 {
1297 for (auto& i : *instr) {
1298 if (i)
1299 i->accept(*this);
1300 }
1301 }
1302
1303 void
visit(Block * instr)1304 AluInstrVisitor::visit(Block *instr)
1305 {
1306 for (auto& i : *instr)
1307 i->accept(*this);
1308 }
1309
1310 void
visit(IfInstr * instr)1311 AluInstrVisitor::visit(IfInstr *instr)
1312 {
1313 instr->predicate()->accept(*this);
1314 }
1315
is_kill() const1316 bool AluInstr::is_kill() const
1317 {
1318 if (has_alu_flag(alu_is_lds))
1319 return false;
1320
1321 switch (m_opcode) {
1322 case op2_kille:
1323 case op2_kille_int:
1324 case op2_killne:
1325 case op2_killne_int:
1326 case op2_killge:
1327 case op2_killge_int:
1328 case op2_killge_uint:
1329 case op2_killgt:
1330 case op2_killgt_int:
1331 case op2_killgt_uint:
1332 return true;
1333 default:
1334 return false;
1335 }
1336 }
1337
1338 enum AluMods {
1339 mod_none,
1340 mod_src0_abs,
1341 mod_src0_neg,
1342 mod_dest_clamp,
1343 };
1344
1345 static bool
1346 emit_alu_b2x(const nir_alu_instr& alu, AluInlineConstants mask, Shader& shader);
1347
1348
1349
1350 static bool
1351 emit_alu_op1(const nir_alu_instr& alu,
1352 EAluOp opcode,
1353 Shader& shader,
1354 AluMods mod = mod_none);
1355 static bool
1356 emit_alu_op1_64bit(const nir_alu_instr& alu,
1357 EAluOp opcode,
1358 Shader& shader,
1359 bool switch_chan);
1360 static bool
1361 emit_alu_mov_64bit(const nir_alu_instr& alu, Shader& shader);
1362 static bool
1363 emit_alu_neg(const nir_alu_instr& alu, Shader& shader);
1364 static bool
1365 emit_alu_op1_64bit_trans(const nir_alu_instr& alu, EAluOp opcode, Shader& shader);
1366 static bool
1367 emit_alu_op2_64bit(const nir_alu_instr& alu,
1368 EAluOp opcode,
1369 Shader& shader,
1370 bool switch_order);
1371 static bool
1372 emit_alu_op2_64bit_one_dst(const nir_alu_instr& alu,
1373 EAluOp opcode,
1374 Shader& shader,
1375 bool switch_order);
1376 static bool
1377 emit_alu_fma_64bit(const nir_alu_instr& alu, EAluOp opcode, Shader& shader);
1378 static bool
1379 emit_alu_b2f64(const nir_alu_instr& alu, Shader& shader);
1380 static bool
1381 emit_alu_f2f64(const nir_alu_instr& alu, Shader& shader);
1382 static bool
1383 emit_alu_i2f64(const nir_alu_instr& alu, EAluOp op, Shader& shader);
1384 static bool
1385 emit_alu_f2f32(const nir_alu_instr& alu, Shader& shader);
1386 static bool
1387 emit_alu_abs64(const nir_alu_instr& alu, Shader& shader);
1388 static bool
1389 emit_alu_fsat64(const nir_alu_instr& alu, Shader& shader);
1390
1391 static bool
1392 emit_alu_op2(const nir_alu_instr& alu,
1393 EAluOp opcode,
1394 Shader& shader,
1395 AluInstr::Op2Options opts = AluInstr::op2_opt_none);
1396 static bool
1397 emit_alu_op2_int(const nir_alu_instr& alu,
1398 EAluOp opcode,
1399 Shader& shader,
1400 AluInstr::Op2Options opts = AluInstr::op2_opt_none);
1401 static bool
1402 emit_alu_op3(const nir_alu_instr& alu,
1403 EAluOp opcode,
1404 Shader& shader,
1405 const std::array<int, 3>& src_shuffle = {0, 1, 2});
1406 static bool
1407 emit_any_all_fcomp2(const nir_alu_instr& alu, EAluOp opcode, Shader& shader);
1408 static bool
1409 emit_any_all_fcomp(
1410 const nir_alu_instr& alu, EAluOp opcode, int nc, bool all, Shader& shader);
1411 static bool
1412 emit_any_all_icomp(
1413 const nir_alu_instr& alu, EAluOp opcode, int nc, bool all, Shader& shader);
1414
1415 static bool
1416 emit_alu_comb_with_zero(const nir_alu_instr& alu, EAluOp opcode, Shader& shader);
1417 static bool
1418 emit_unpack_64_2x32_split(const nir_alu_instr& alu, int comp, Shader& shader);
1419 static bool
1420 emit_pack_64_2x32(const nir_alu_instr& alu, Shader& shader);
1421 static bool
1422 emit_unpack_64_2x32(const nir_alu_instr& alu, Shader& shader);
1423 static bool
1424 emit_pack_64_2x32_split(const nir_alu_instr& alu, Shader& shader);
1425 static bool
1426 emit_pack_32_2x16_split(const nir_alu_instr& alu, Shader& shader);
1427 static bool
1428 emit_alu_vec2_64(const nir_alu_instr& alu, Shader& shader);
1429
1430 static bool
1431 emit_unpack_32_2x16_split_x(const nir_alu_instr& alu, Shader& shader);
1432 static bool
1433 emit_unpack_32_2x16_split_y(const nir_alu_instr& alu, Shader& shader);
1434
1435 static bool
1436 emit_dot(const nir_alu_instr& alu, int nelm, Shader& shader);
1437 static bool
1438 emit_dot4(const nir_alu_instr& alu, int nelm, Shader& shader);
1439 static bool
1440 emit_create_vec(const nir_alu_instr& instr, unsigned nc, Shader& shader);
1441
1442 static bool
1443 emit_alu_trans_op1_eg(const nir_alu_instr& alu, EAluOp opcode, Shader& shader);
1444 static bool
1445 emit_alu_trans_op1_cayman(const nir_alu_instr& alu, EAluOp opcode, Shader& shader);
1446
1447 static bool
1448 emit_alu_trans_op2_eg(const nir_alu_instr& alu, EAluOp opcode, Shader& shader);
1449 static bool
1450 emit_alu_trans_op2_cayman(const nir_alu_instr& alu, EAluOp opcode, Shader& shader);
1451
1452 static bool
1453 emit_alu_f2i32_or_u32_eg(const nir_alu_instr& alu, EAluOp opcode, Shader& shader);
1454
1455 static bool
1456 emit_tex_fdd(const nir_alu_instr& alu, TexInstr::Opcode opcode, bool fine, Shader& shader);
1457
1458 static bool
1459 emit_alu_cube(const nir_alu_instr& alu, Shader& shader);
1460
1461 static bool
1462 emit_fdph(const nir_alu_instr& alu, Shader& shader);
1463
1464 static bool
check_64_bit_op_src(nir_src * src,void * state)1465 check_64_bit_op_src(nir_src *src, void *state)
1466 {
1467 if (nir_src_bit_size(*src) == 64) {
1468 *(bool *)state = true;
1469 return false;
1470 }
1471 return true;
1472 }
1473
1474 static bool
check_64_bit_op_def(nir_def * def,void * state)1475 check_64_bit_op_def(nir_def *def, void *state)
1476 {
1477 if (def->bit_size == 64) {
1478 *(bool *)state = true;
1479 return false;
1480 }
1481 return true;
1482 }
1483
1484 bool
from_nir(nir_alu_instr * alu,Shader & shader)1485 AluInstr::from_nir(nir_alu_instr *alu, Shader& shader)
1486 {
1487 bool is_64bit_op = false;
1488 nir_foreach_src(&alu->instr, check_64_bit_op_src, &is_64bit_op);
1489 if (!is_64bit_op)
1490 nir_foreach_def(&alu->instr, check_64_bit_op_def, &is_64bit_op);
1491
1492 if (is_64bit_op) {
1493 switch (alu->op) {
1494 case nir_op_pack_64_2x32:
1495 case nir_op_unpack_64_2x32:
1496 case nir_op_pack_64_2x32_split:
1497 case nir_op_pack_half_2x16_split:
1498 case nir_op_unpack_64_2x32_split_x:
1499 case nir_op_unpack_64_2x32_split_y:
1500 break;
1501 case nir_op_mov:
1502 return emit_alu_mov_64bit(*alu, shader);
1503 case nir_op_fneg:
1504 return emit_alu_neg(*alu, shader);
1505 case nir_op_fsat:
1506 return emit_alu_fsat64(*alu, shader);
1507 case nir_op_ffract:
1508 return emit_alu_op1_64bit(*alu, op1_fract_64, shader, true);
1509 case nir_op_feq32:
1510 return emit_alu_op2_64bit_one_dst(*alu, op2_sete_64, shader, false);
1511 case nir_op_fge32:
1512 return emit_alu_op2_64bit_one_dst(*alu, op2_setge_64, shader, false);
1513 case nir_op_flt32:
1514 return emit_alu_op2_64bit_one_dst(*alu, op2_setgt_64, shader, true);
1515 case nir_op_fneu32:
1516 return emit_alu_op2_64bit_one_dst(*alu, op2_setne_64, shader, false);
1517 case nir_op_ffma:
1518 return emit_alu_fma_64bit(*alu, op3_fma_64, shader);
1519
1520 case nir_op_fadd:
1521 return emit_alu_op2_64bit(*alu, op2_add_64, shader, false);
1522 case nir_op_fmul:
1523 return emit_alu_op2_64bit(*alu, op2_mul_64, shader, false);
1524 case nir_op_fmax:
1525 return emit_alu_op2_64bit(*alu, op2_max_64, shader, false);
1526 case nir_op_fmin:
1527 return emit_alu_op2_64bit(*alu, op2_min_64, shader, false);
1528 case nir_op_b2f64:
1529 return emit_alu_b2f64(*alu, shader);
1530 case nir_op_f2f64:
1531 return emit_alu_f2f64(*alu, shader);
1532 case nir_op_i2f64:
1533 return emit_alu_i2f64(*alu, op1_int_to_flt, shader);
1534 case nir_op_u2f64:
1535 return emit_alu_i2f64(*alu, op1_uint_to_flt, shader);
1536 case nir_op_f2f32:
1537 return emit_alu_f2f32(*alu, shader);
1538 case nir_op_fabs:
1539 return emit_alu_abs64(*alu, shader);
1540 case nir_op_fsqrt:
1541 return emit_alu_op1_64bit_trans(*alu, op1_sqrt_64, shader);
1542 case nir_op_frcp:
1543 return emit_alu_op1_64bit_trans(*alu, op1_recip_64, shader);
1544 case nir_op_frsq:
1545 return emit_alu_op1_64bit_trans(*alu, op1_recipsqrt_64, shader);
1546 case nir_op_vec2:
1547 return emit_alu_vec2_64(*alu, shader);
1548 default:
1549 return false;
1550 ;
1551 }
1552 }
1553
1554 if (shader.chip_class() == ISA_CC_CAYMAN) {
1555 switch (alu->op) {
1556 case nir_op_fcos_amd:
1557 return emit_alu_trans_op1_cayman(*alu, op1_cos, shader);
1558 case nir_op_fexp2:
1559 return emit_alu_trans_op1_cayman(*alu, op1_exp_ieee, shader);
1560 case nir_op_flog2:
1561 return emit_alu_trans_op1_cayman(*alu, op1_log_clamped, shader);
1562 case nir_op_frcp:
1563 return emit_alu_trans_op1_cayman(*alu, op1_recip_ieee, shader);
1564 case nir_op_frsq:
1565 return emit_alu_trans_op1_cayman(*alu, op1_recipsqrt_ieee1, shader);
1566 case nir_op_fsqrt:
1567 return emit_alu_trans_op1_cayman(*alu, op1_sqrt_ieee, shader);
1568 case nir_op_fsin_amd:
1569 return emit_alu_trans_op1_cayman(*alu, op1_sin, shader);
1570 case nir_op_i2f32:
1571 return emit_alu_op1(*alu, op1_int_to_flt, shader);
1572 case nir_op_u2f32:
1573 return emit_alu_op1(*alu, op1_uint_to_flt, shader);
1574 case nir_op_imul:
1575 return emit_alu_trans_op2_cayman(*alu, op2_mullo_int, shader);
1576 case nir_op_imul_high:
1577 return emit_alu_trans_op2_cayman(*alu, op2_mulhi_int, shader);
1578 case nir_op_umul_high:
1579 return emit_alu_trans_op2_cayman(*alu, op2_mulhi_uint, shader);
1580 case nir_op_f2u32:
1581 return emit_alu_op1(*alu, op1_flt_to_uint, shader);
1582 case nir_op_f2i32:
1583 return emit_alu_op1(*alu, op1_flt_to_int, shader);
1584 case nir_op_ishl:
1585 return emit_alu_op2_int(*alu, op2_lshl_int, shader);
1586 case nir_op_ishr:
1587 return emit_alu_op2_int(*alu, op2_ashr_int, shader);
1588 case nir_op_ushr:
1589 return emit_alu_op2_int(*alu, op2_lshr_int, shader);
1590 default:;
1591 }
1592 } else {
1593 if (shader.chip_class() == ISA_CC_EVERGREEN) {
1594 switch (alu->op) {
1595 case nir_op_f2i32:
1596 return emit_alu_f2i32_or_u32_eg(*alu, op1_flt_to_int, shader);
1597 case nir_op_f2u32:
1598 return emit_alu_f2i32_or_u32_eg(*alu, op1_flt_to_uint, shader);
1599 default:;
1600 }
1601 }
1602
1603 if (shader.chip_class() >= ISA_CC_R700) {
1604 switch (alu->op) {
1605 case nir_op_ishl:
1606 return emit_alu_op2_int(*alu, op2_lshl_int, shader);
1607 case nir_op_ishr:
1608 return emit_alu_op2_int(*alu, op2_ashr_int, shader);
1609 case nir_op_ushr:
1610 return emit_alu_op2_int(*alu, op2_lshr_int, shader);
1611 default:;
1612 }
1613 } else {
1614 switch (alu->op) {
1615 case nir_op_ishl:
1616 return emit_alu_trans_op2_eg(*alu, op2_lshl_int, shader);
1617 case nir_op_ishr:
1618 return emit_alu_trans_op2_eg(*alu, op2_ashr_int, shader);
1619 case nir_op_ushr:
1620 return emit_alu_trans_op2_eg(*alu, op2_lshr_int, shader);
1621 default:;
1622 }
1623 }
1624
1625 switch (alu->op) {
1626 case nir_op_f2i32:
1627 return emit_alu_trans_op1_eg(*alu, op1_flt_to_int, shader);
1628 case nir_op_f2u32:
1629 return emit_alu_trans_op1_eg(*alu, op1_flt_to_uint, shader);
1630 case nir_op_fcos_amd:
1631 return emit_alu_trans_op1_eg(*alu, op1_cos, shader);
1632 case nir_op_fexp2:
1633 return emit_alu_trans_op1_eg(*alu, op1_exp_ieee, shader);
1634 case nir_op_flog2:
1635 return emit_alu_trans_op1_eg(*alu, op1_log_clamped, shader);
1636 case nir_op_frcp:
1637 return emit_alu_trans_op1_eg(*alu, op1_recip_ieee, shader);
1638 case nir_op_frsq:
1639 return emit_alu_trans_op1_eg(*alu, op1_recipsqrt_ieee1, shader);
1640 case nir_op_fsin_amd:
1641 return emit_alu_trans_op1_eg(*alu, op1_sin, shader);
1642 case nir_op_fsqrt:
1643 return emit_alu_trans_op1_eg(*alu, op1_sqrt_ieee, shader);
1644 case nir_op_i2f32:
1645 return emit_alu_trans_op1_eg(*alu, op1_int_to_flt, shader);
1646 case nir_op_u2f32:
1647 return emit_alu_trans_op1_eg(*alu, op1_uint_to_flt, shader);
1648 case nir_op_imul:
1649 return emit_alu_trans_op2_eg(*alu, op2_mullo_int, shader);
1650 case nir_op_imul_high:
1651 return emit_alu_trans_op2_eg(*alu, op2_mulhi_int, shader);
1652 case nir_op_umul_high:
1653 return emit_alu_trans_op2_eg(*alu, op2_mulhi_uint, shader);
1654 default:;
1655 }
1656 }
1657
1658 switch (alu->op) {
1659 case nir_op_b2b1:
1660 return emit_alu_op1(*alu, op1_mov, shader);
1661 case nir_op_b2b32:
1662 return emit_alu_op1(*alu, op1_mov, shader);
1663 case nir_op_b2f32:
1664 return emit_alu_b2x(*alu, ALU_SRC_1, shader);
1665 case nir_op_b2i32:
1666 return emit_alu_b2x(*alu, ALU_SRC_1_INT, shader);
1667
1668 case nir_op_bfm:
1669 return emit_alu_op2_int(*alu, op2_bfm_int, shader, op2_opt_none);
1670 case nir_op_bit_count:
1671 return emit_alu_op1(*alu, op1_bcnt_int, shader);
1672
1673 case nir_op_bitfield_reverse:
1674 return emit_alu_op1(*alu, op1_bfrev_int, shader);
1675 case nir_op_bitfield_select:
1676 return emit_alu_op3(*alu, op3_bfi_int, shader);
1677
1678 case nir_op_b32all_fequal2:
1679 return emit_any_all_fcomp2(*alu, op2_sete_dx10, shader);
1680 case nir_op_b32all_fequal3:
1681 return emit_any_all_fcomp(*alu, op2_sete, 3, true, shader);
1682 case nir_op_b32all_fequal4:
1683 return emit_any_all_fcomp(*alu, op2_sete, 4, true, shader);
1684 case nir_op_b32all_iequal2:
1685 return emit_any_all_icomp(*alu, op2_sete_int, 2, true, shader);
1686 case nir_op_b32all_iequal3:
1687 return emit_any_all_icomp(*alu, op2_sete_int, 3, true, shader);
1688 case nir_op_b32all_iequal4:
1689 return emit_any_all_icomp(*alu, op2_sete_int, 4, true, shader);
1690 case nir_op_b32any_fnequal2:
1691 return emit_any_all_fcomp2(*alu, op2_setne_dx10, shader);
1692 case nir_op_b32any_fnequal3:
1693 return emit_any_all_fcomp(*alu, op2_setne, 3, false, shader);
1694 case nir_op_b32any_fnequal4:
1695 return emit_any_all_fcomp(*alu, op2_setne, 4, false, shader);
1696 case nir_op_b32any_inequal2:
1697 return emit_any_all_icomp(*alu, op2_setne_int, 2, false, shader);
1698 case nir_op_b32any_inequal3:
1699 return emit_any_all_icomp(*alu, op2_setne_int, 3, false, shader);
1700 case nir_op_b32any_inequal4:
1701 return emit_any_all_icomp(*alu, op2_setne_int, 4, false, shader);
1702 case nir_op_b32csel:
1703 return emit_alu_op3(*alu, op3_cnde_int, shader, {0, 2, 1});
1704
1705 case nir_op_fabs:
1706 return emit_alu_op1(*alu, op1_mov, shader, mod_src0_abs);
1707 case nir_op_fadd:
1708 return emit_alu_op2(*alu, op2_add, shader);
1709 case nir_op_fceil:
1710 return emit_alu_op1(*alu, op1_ceil, shader);
1711 case nir_op_fcsel:
1712 return emit_alu_op3(*alu, op3_cnde, shader, {0, 2, 1});
1713 case nir_op_fcsel_ge:
1714 return emit_alu_op3(*alu, op3_cndge, shader, {0, 1, 2});
1715 case nir_op_fcsel_gt:
1716 return emit_alu_op3(*alu, op3_cndgt, shader, {0, 1, 2});
1717
1718 case nir_op_fdph:
1719 return emit_fdph(*alu, shader);
1720 case nir_op_fdot2:
1721 if (shader.chip_class() >= ISA_CC_EVERGREEN)
1722 return emit_dot(*alu, 2, shader);
1723 else
1724 return emit_dot4(*alu, 2, shader);
1725 case nir_op_fdot3:
1726 if (shader.chip_class() >= ISA_CC_EVERGREEN)
1727 return emit_dot(*alu, 3, shader);
1728 else
1729 return emit_dot4(*alu, 3, shader);
1730 case nir_op_fdot4:
1731 return emit_dot4(*alu, 4, shader);
1732
1733 case nir_op_feq32:
1734 case nir_op_feq:
1735 return emit_alu_op2(*alu, op2_sete_dx10, shader);
1736 case nir_op_ffloor:
1737 return emit_alu_op1(*alu, op1_floor, shader);
1738 case nir_op_ffract:
1739 return emit_alu_op1(*alu, op1_fract, shader);
1740 case nir_op_fge32:
1741 return emit_alu_op2(*alu, op2_setge_dx10, shader);
1742 case nir_op_fge:
1743 return emit_alu_op2(*alu, op2_setge_dx10, shader);
1744 case nir_op_find_lsb:
1745 return emit_alu_op1(*alu, op1_ffbl_int, shader);
1746
1747 case nir_op_flt32:
1748 return emit_alu_op2(*alu, op2_setgt_dx10, shader, op2_opt_reverse);
1749 case nir_op_flt:
1750 return emit_alu_op2(*alu, op2_setgt_dx10, shader, op2_opt_reverse);
1751 case nir_op_fmax:
1752 return emit_alu_op2(*alu, op2_max_dx10, shader);
1753 case nir_op_fmin:
1754 return emit_alu_op2(*alu, op2_min_dx10, shader);
1755
1756 case nir_op_fmul:
1757 if (!shader.has_flag(Shader::sh_legacy_math_rules))
1758 return emit_alu_op2(*alu, op2_mul_ieee, shader);
1759 FALLTHROUGH;
1760 case nir_op_fmulz:
1761 return emit_alu_op2(*alu, op2_mul, shader);
1762
1763 case nir_op_fneg:
1764 return emit_alu_op1(*alu, op1_mov, shader, mod_src0_neg);
1765 case nir_op_fneu32:
1766 return emit_alu_op2(*alu, op2_setne_dx10, shader);
1767 case nir_op_fneu:
1768 return emit_alu_op2(*alu, op2_setne_dx10, shader);
1769
1770 case nir_op_fround_even:
1771 return emit_alu_op1(*alu, op1_rndne, shader);
1772 case nir_op_fsat:
1773 return emit_alu_op1(*alu, op1_mov, shader, mod_dest_clamp);
1774 case nir_op_fsub:
1775 return emit_alu_op2(*alu, op2_add, shader, op2_opt_neg_src1);
1776 case nir_op_ftrunc:
1777 return emit_alu_op1(*alu, op1_trunc, shader);
1778 case nir_op_iadd:
1779 return emit_alu_op2_int(*alu, op2_add_int, shader);
1780 case nir_op_iand:
1781 return emit_alu_op2_int(*alu, op2_and_int, shader);
1782 case nir_op_ibfe:
1783 return emit_alu_op3(*alu, op3_bfe_int, shader);
1784 case nir_op_i32csel_ge:
1785 return emit_alu_op3(*alu, op3_cndge_int, shader, {0, 1, 2});
1786 case nir_op_i32csel_gt:
1787 return emit_alu_op3(*alu, op3_cndgt_int, shader, {0, 1, 2});
1788 case nir_op_ieq32:
1789 return emit_alu_op2_int(*alu, op2_sete_int, shader);
1790 case nir_op_ieq:
1791 return emit_alu_op2_int(*alu, op2_sete_int, shader);
1792 case nir_op_ifind_msb_rev:
1793 return emit_alu_op1(*alu, op1_ffbh_int, shader);
1794 case nir_op_ige32:
1795 return emit_alu_op2_int(*alu, op2_setge_int, shader);
1796 case nir_op_ige:
1797 return emit_alu_op2_int(*alu, op2_setge_int, shader);
1798 case nir_op_ilt32:
1799 return emit_alu_op2_int(*alu, op2_setgt_int, shader, op2_opt_reverse);
1800 case nir_op_ilt:
1801 return emit_alu_op2_int(*alu, op2_setgt_int, shader, op2_opt_reverse);
1802 case nir_op_imax:
1803 return emit_alu_op2_int(*alu, op2_max_int, shader);
1804 case nir_op_imin:
1805 return emit_alu_op2_int(*alu, op2_min_int, shader);
1806 case nir_op_ine32:
1807 return emit_alu_op2_int(*alu, op2_setne_int, shader);
1808 case nir_op_ine:
1809 return emit_alu_op2_int(*alu, op2_setne_int, shader);
1810 case nir_op_ineg:
1811 return emit_alu_comb_with_zero(*alu, op2_sub_int, shader);
1812 case nir_op_inot:
1813 return emit_alu_op1(*alu, op1_not_int, shader);
1814 case nir_op_ior:
1815 return emit_alu_op2_int(*alu, op2_or_int, shader);
1816 case nir_op_isub:
1817 return emit_alu_op2_int(*alu, op2_sub_int, shader);
1818 case nir_op_ixor:
1819 return emit_alu_op2_int(*alu, op2_xor_int, shader);
1820 case nir_op_pack_64_2x32:
1821 return emit_pack_64_2x32(*alu, shader);
1822 case nir_op_unpack_64_2x32:
1823 return emit_unpack_64_2x32(*alu, shader);
1824 case nir_op_pack_64_2x32_split:
1825 return emit_pack_64_2x32_split(*alu, shader);
1826 case nir_op_pack_half_2x16_split:
1827 return emit_pack_32_2x16_split(*alu, shader);
1828 case nir_op_slt:
1829 return emit_alu_op2(*alu, op2_setgt, shader, op2_opt_reverse);
1830 case nir_op_sge:
1831 return emit_alu_op2(*alu, op2_setge, shader);
1832 case nir_op_seq:
1833 return emit_alu_op2(*alu, op2_sete, shader);
1834 case nir_op_sne:
1835 return emit_alu_op2(*alu, op2_setne, shader);
1836 case nir_op_ubfe:
1837 return emit_alu_op3(*alu, op3_bfe_uint, shader);
1838 case nir_op_ufind_msb_rev:
1839 return emit_alu_op1(*alu, op1_ffbh_uint, shader);
1840 case nir_op_uge32:
1841 return emit_alu_op2_int(*alu, op2_setge_uint, shader);
1842 case nir_op_uge:
1843 return emit_alu_op2_int(*alu, op2_setge_uint, shader);
1844 case nir_op_ult32:
1845 return emit_alu_op2_int(*alu, op2_setgt_uint, shader, op2_opt_reverse);
1846 case nir_op_ult:
1847 return emit_alu_op2_int(*alu, op2_setgt_uint, shader, op2_opt_reverse);
1848 case nir_op_umad24:
1849 return emit_alu_op3(*alu, op3_muladd_uint24, shader, {0, 1, 2});
1850 case nir_op_umax:
1851 return emit_alu_op2_int(*alu, op2_max_uint, shader);
1852 case nir_op_umin:
1853 return emit_alu_op2_int(*alu, op2_min_uint, shader);
1854 case nir_op_umul24:
1855 return emit_alu_op2(*alu, op2_mul_uint24, shader);
1856 case nir_op_unpack_64_2x32_split_x:
1857 return emit_unpack_64_2x32_split(*alu, 0, shader);
1858 case nir_op_unpack_64_2x32_split_y:
1859 return emit_unpack_64_2x32_split(*alu, 1, shader);
1860 case nir_op_unpack_half_2x16_split_x:
1861 return emit_unpack_32_2x16_split_x(*alu, shader);
1862 case nir_op_unpack_half_2x16_split_y:
1863 return emit_unpack_32_2x16_split_y(*alu, shader);
1864
1865 case nir_op_ffma:
1866 if (!shader.has_flag(Shader::sh_legacy_math_rules))
1867 return emit_alu_op3(*alu, op3_muladd_ieee, shader);
1868 FALLTHROUGH;
1869 case nir_op_ffmaz:
1870 return emit_alu_op3(*alu, op3_muladd, shader);
1871
1872 case nir_op_mov:
1873 return emit_alu_op1(*alu, op1_mov, shader);
1874 case nir_op_f2i32:
1875 return emit_alu_op1(*alu, op1_flt_to_int, shader);
1876 case nir_op_vec2:
1877 return emit_create_vec(*alu, 2, shader);
1878 case nir_op_vec3:
1879 return emit_create_vec(*alu, 3, shader);
1880 case nir_op_vec4:
1881 return emit_create_vec(*alu, 4, shader);
1882
1883 case nir_op_fddx:
1884 case nir_op_fddx_coarse:
1885 return emit_tex_fdd(*alu, TexInstr::get_gradient_h, false, shader);
1886 case nir_op_fddx_fine:
1887 return emit_tex_fdd(*alu, TexInstr::get_gradient_h, true, shader);
1888 case nir_op_fddy:
1889 case nir_op_fddy_coarse:
1890 return emit_tex_fdd(*alu, TexInstr::get_gradient_v, false, shader);
1891 case nir_op_fddy_fine:
1892 return emit_tex_fdd(*alu, TexInstr::get_gradient_v, true, shader);
1893 case nir_op_cube_amd:
1894 return emit_alu_cube(*alu, shader);
1895 default:
1896 fprintf(stderr, "Unknown instruction '");
1897 nir_print_instr(&alu->instr, stderr);
1898 fprintf(stderr, "'\n");
1899 assert(0);
1900 return false;
1901 }
1902 }
1903
1904 static Pin
pin_for_components(const nir_alu_instr & alu)1905 pin_for_components(const nir_alu_instr& alu)
1906 {
1907 return alu.def.num_components == 1 ? pin_free : pin_none;
1908 }
1909
1910 static bool
emit_alu_op1_64bit(const nir_alu_instr & alu,EAluOp opcode,Shader & shader,bool switch_chan)1911 emit_alu_op1_64bit(const nir_alu_instr& alu,
1912 EAluOp opcode,
1913 Shader& shader,
1914 bool switch_chan)
1915 {
1916 auto& value_factory = shader.value_factory();
1917
1918 auto group = new AluGroup();
1919
1920 AluInstr *ir = nullptr;
1921
1922 int swz[2] = {0, 1};
1923 if (switch_chan) {
1924 swz[0] = 1;
1925 swz[1] = 0;
1926 }
1927
1928 for (unsigned i = 0; i < alu.def.num_components; ++i) {
1929 ir = new AluInstr(opcode,
1930 value_factory.dest(alu.def, 2 * i, pin_chan),
1931 value_factory.src64(alu.src[0], i, swz[0]),
1932 {alu_write});
1933 group->add_instruction(ir);
1934
1935 ir = new AluInstr(opcode,
1936 value_factory.dest(alu.def, 2 * i + 1, pin_chan),
1937 value_factory.src64(alu.src[0], i, swz[1]),
1938 {alu_write});
1939 group->add_instruction(ir);
1940 }
1941 if (ir)
1942 ir->set_alu_flag(alu_last_instr);
1943 shader.emit_instruction(group);
1944 return true;
1945 }
1946
1947 static bool
emit_alu_mov_64bit(const nir_alu_instr & alu,Shader & shader)1948 emit_alu_mov_64bit(const nir_alu_instr& alu, Shader& shader)
1949 {
1950 auto& value_factory = shader.value_factory();
1951
1952 AluInstr *ir = nullptr;
1953
1954 for (unsigned i = 0; i < alu.def.num_components; ++i) {
1955 for (unsigned c = 0; c < 2; ++c) {
1956 ir = new AluInstr(op1_mov,
1957 value_factory.dest(alu.def, 2 * i + c, pin_free),
1958 value_factory.src64(alu.src[0], i, c),
1959 {alu_write});
1960 shader.emit_instruction(ir);
1961 }
1962 }
1963 if (ir)
1964 ir->set_alu_flag(alu_last_instr);
1965 return true;
1966 }
1967
1968 static bool
emit_alu_neg(const nir_alu_instr & alu,Shader & shader)1969 emit_alu_neg(const nir_alu_instr& alu, Shader& shader)
1970 {
1971 auto& value_factory = shader.value_factory();
1972
1973 AluInstr *ir = nullptr;
1974
1975 for (unsigned i = 0; i < alu.def.num_components; ++i) {
1976 for (unsigned c = 0; c < 2; ++c) {
1977 ir = new AluInstr(op1_mov,
1978 value_factory.dest(alu.def, 2 * i + c, pin_chan),
1979 value_factory.src64(alu.src[0], i, c),
1980 {alu_write});
1981 shader.emit_instruction(ir);
1982 }
1983 ir->set_source_mod(0, AluInstr::mod_neg);
1984 }
1985 if (ir)
1986 ir->set_alu_flag(alu_last_instr);
1987
1988 return true;
1989 }
1990
1991 static bool
emit_alu_abs64(const nir_alu_instr & alu,Shader & shader)1992 emit_alu_abs64(const nir_alu_instr& alu, Shader& shader)
1993 {
1994 auto& value_factory = shader.value_factory();
1995
1996 assert(alu.def.num_components == 1);
1997
1998 shader.emit_instruction(new AluInstr(op1_mov,
1999 value_factory.dest(alu.def, 0, pin_chan),
2000 value_factory.src64(alu.src[0], 0, 0),
2001 AluInstr::write));
2002
2003 auto ir = new AluInstr(op1_mov,
2004 value_factory.dest(alu.def, 1, pin_chan),
2005 value_factory.src64(alu.src[0], 0, 1),
2006 AluInstr::last_write);
2007 ir->set_source_mod(0, AluInstr::mod_abs);
2008 shader.emit_instruction(ir);
2009 return true;
2010 }
2011
2012 static bool
try_propagat_fsat64(const nir_alu_instr & alu,Shader & shader)2013 try_propagat_fsat64(const nir_alu_instr& alu, Shader& shader)
2014 {
2015 auto& value_factory = shader.value_factory();
2016 auto src0 = value_factory.src64(alu.src[0], 0, 0);
2017 auto reg0 = src0->as_register();
2018 if (!reg0)
2019 return false;
2020
2021 if (!reg0->has_flag(Register::ssa))
2022 return false;
2023
2024 if (reg0->parents().size() != 1)
2025 return false;
2026
2027 if (!reg0->uses().empty())
2028 return false;
2029
2030 auto parent = (*reg0->parents().begin())->as_alu();
2031 if (!parent)
2032 return false;
2033
2034 auto opinfo = alu_ops.at(parent->opcode());
2035 if (!opinfo.can_clamp)
2036 return false;
2037
2038 parent->set_alu_flag(alu_dst_clamp);
2039 return true;
2040 }
2041
2042
2043 static bool
emit_alu_fsat64(const nir_alu_instr & alu,Shader & shader)2044 emit_alu_fsat64(const nir_alu_instr& alu, Shader& shader)
2045 {
2046 auto& value_factory = shader.value_factory();
2047
2048 assert(alu.def.num_components == 1);
2049
2050 if (try_propagat_fsat64(alu, shader)) {
2051 auto ir = new AluInstr(op1_mov,
2052 value_factory.dest(alu.def, 0, pin_chan),
2053 value_factory.src64(alu.src[0], 0, 0),
2054 AluInstr::write);
2055 shader.emit_instruction(ir);
2056
2057 shader.emit_instruction(new AluInstr(op1_mov,
2058 value_factory.dest(alu.def, 1, pin_chan),
2059 value_factory.src64(alu.src[0], 0, 1),
2060 AluInstr::last_write));
2061 } else {
2062
2063 /* dest clamp doesn't work on plain 64 bit move, so add a zero
2064 * to apply the modifier */
2065
2066 auto group = new AluGroup();
2067 auto ir = new AluInstr(op2_add_64,
2068 value_factory.dest(alu.def, 0, pin_chan),
2069 value_factory.src64(alu.src[0], 0, 1),
2070 value_factory.literal(0),
2071 AluInstr::write);
2072 ir->set_alu_flag(alu_dst_clamp);
2073 group->add_instruction(ir);
2074
2075 group->add_instruction(new AluInstr(op2_add_64,
2076 value_factory.dest(alu.def, 1, pin_chan),
2077 value_factory.src64(alu.src[0], 0, 0),
2078 value_factory.literal(0),
2079 AluInstr::last_write));
2080 shader.emit_instruction(group);
2081
2082 }
2083 return true;
2084 }
2085
2086
2087 static bool
emit_alu_op2_64bit(const nir_alu_instr & alu,EAluOp opcode,Shader & shader,bool switch_src)2088 emit_alu_op2_64bit(const nir_alu_instr& alu,
2089 EAluOp opcode,
2090 Shader& shader,
2091 bool switch_src)
2092 {
2093 auto& value_factory = shader.value_factory();
2094 auto group = new AluGroup();
2095 AluInstr *ir = nullptr;
2096 int order[2] = {0, 1};
2097 if (switch_src) {
2098 order[0] = 1;
2099 order[1] = 0;
2100 }
2101
2102 int num_emit0 = opcode == op2_mul_64 ? 3 : 1;
2103
2104 assert(num_emit0 == 1 || alu.def.num_components == 1);
2105
2106 for (unsigned k = 0; k < alu.def.num_components; ++k) {
2107 int i = 0;
2108 for (; i < num_emit0; ++i) {
2109 auto dest = i < 2 ? value_factory.dest(alu.def, i, pin_chan)
2110 : value_factory.dummy_dest(i);
2111
2112 ir = new AluInstr(opcode,
2113 dest,
2114 value_factory.src64(alu.src[order[0]], k, 1),
2115 value_factory.src64(alu.src[order[1]], k, 1),
2116 i < 2 ? AluInstr::write : AluInstr::empty);
2117 group->add_instruction(ir);
2118 }
2119
2120 auto dest =
2121 i == 1 ? value_factory.dest(alu.def, i, pin_chan) : value_factory.dummy_dest(i);
2122
2123 ir = new AluInstr(opcode,
2124 dest,
2125 value_factory.src64(alu.src[order[0]], k, 0),
2126 value_factory.src64(alu.src[order[1]], k, 0),
2127 i == 1 ? AluInstr::write : AluInstr::empty);
2128 group->add_instruction(ir);
2129 }
2130 if (ir)
2131 ir->set_alu_flag(alu_last_instr);
2132
2133 shader.emit_instruction(group);
2134 return true;
2135 }
2136
2137 static bool
emit_alu_op2_64bit_one_dst(const nir_alu_instr & alu,EAluOp opcode,Shader & shader,bool switch_order)2138 emit_alu_op2_64bit_one_dst(const nir_alu_instr& alu,
2139 EAluOp opcode,
2140 Shader& shader,
2141 bool switch_order)
2142 {
2143 auto& value_factory = shader.value_factory();
2144 AluInstr *ir = nullptr;
2145 int order[2] = {0, 1};
2146 if (switch_order) {
2147 order[0] = 1;
2148 order[1] = 0;
2149 }
2150
2151 AluInstr::SrcValues src(4);
2152
2153 for (unsigned k = 0; k < alu.def.num_components; ++k) {
2154 auto dest = value_factory.dest(alu.def, 2 * k, pin_chan);
2155 src[0] = value_factory.src64(alu.src[order[0]], k, 1);
2156 src[1] = value_factory.src64(alu.src[order[1]], k, 1);
2157 src[2] = value_factory.src64(alu.src[order[0]], k, 0);
2158 src[3] = value_factory.src64(alu.src[order[1]], k, 0);
2159
2160 ir = new AluInstr(opcode, dest, src, AluInstr::write, 2);
2161 ir->set_alu_flag(alu_64bit_op);
2162
2163 shader.emit_instruction(ir);
2164 }
2165 if (ir)
2166 ir->set_alu_flag(alu_last_instr);
2167
2168 return true;
2169 }
2170
2171 static bool
emit_alu_op1_64bit_trans(const nir_alu_instr & alu,EAluOp opcode,Shader & shader)2172 emit_alu_op1_64bit_trans(const nir_alu_instr& alu, EAluOp opcode, Shader& shader)
2173 {
2174 auto& value_factory = shader.value_factory();
2175 auto group = new AluGroup();
2176 AluInstr *ir = nullptr;
2177 for (unsigned i = 0; i < 3; ++i) {
2178 ir = new AluInstr(opcode,
2179 i < 2 ? value_factory.dest(alu.def, i, pin_chan)
2180 : value_factory.dummy_dest(i),
2181 value_factory.src64(alu.src[0], 0, 1),
2182 value_factory.src64(alu.src[0], 0, 0),
2183 i < 2 ? AluInstr::write : AluInstr::empty);
2184
2185 if (opcode == op1_sqrt_64)
2186 ir->set_source_mod(0, AluInstr::mod_abs);
2187 group->add_instruction(ir);
2188 }
2189 if (ir)
2190 ir->set_alu_flag(alu_last_instr);
2191 shader.emit_instruction(group);
2192 return true;
2193 }
2194
2195 static bool
emit_alu_fma_64bit(const nir_alu_instr & alu,EAluOp opcode,Shader & shader)2196 emit_alu_fma_64bit(const nir_alu_instr& alu, EAluOp opcode, Shader& shader)
2197 {
2198 auto& value_factory = shader.value_factory();
2199 auto group = new AluGroup();
2200 AluInstr *ir = nullptr;
2201 for (unsigned i = 0; i < 4; ++i) {
2202
2203 int chan = i < 3 ? 1 : 0;
2204 auto dest =
2205 i < 2 ? value_factory.dest(alu.def, i, pin_chan) : value_factory.dummy_dest(i);
2206
2207 ir = new AluInstr(opcode,
2208 dest,
2209 value_factory.src64(alu.src[0], 0, chan),
2210 value_factory.src64(alu.src[1], 0, chan),
2211 value_factory.src64(alu.src[2], 0, chan),
2212 i < 2 ? AluInstr::write : AluInstr::empty);
2213 group->add_instruction(ir);
2214 }
2215 if (ir)
2216 ir->set_alu_flag(alu_last_instr);
2217 shader.emit_instruction(group);
2218 return true;
2219 }
2220
2221 static bool
emit_alu_b2f64(const nir_alu_instr & alu,Shader & shader)2222 emit_alu_b2f64(const nir_alu_instr& alu, Shader& shader)
2223 {
2224 auto& value_factory = shader.value_factory();
2225 auto group = new AluGroup();
2226 AluInstr *ir = nullptr;
2227
2228 for (unsigned i = 0; i < alu.def.num_components; ++i) {
2229 ir = new AluInstr(op2_and_int,
2230 value_factory.dest(alu.def, 2 * i, pin_group),
2231 value_factory.src(alu.src[0], i),
2232 value_factory.zero(),
2233 {alu_write});
2234 group->add_instruction(ir);
2235
2236 ir = new AluInstr(op2_and_int,
2237 value_factory.dest(alu.def, 2 * i + 1, pin_group),
2238 value_factory.src(alu.src[0], i),
2239 value_factory.literal(0x3ff00000),
2240 {alu_write});
2241 group->add_instruction(ir);
2242 }
2243 if (ir)
2244 ir->set_alu_flag(alu_last_instr);
2245 shader.emit_instruction(group);
2246 return true;
2247 }
2248
2249 static bool
emit_alu_i2f64(const nir_alu_instr & alu,EAluOp op,Shader & shader)2250 emit_alu_i2f64(const nir_alu_instr& alu, EAluOp op, Shader& shader)
2251 {
2252 /* int 64 to f64 should have been lowered, so we only handle i32 to f64 */
2253 auto& value_factory = shader.value_factory();
2254 auto group = new AluGroup();
2255 AluInstr *ir = nullptr;
2256
2257 assert(alu.def.num_components == 1);
2258
2259 auto tmpx = value_factory.temp_register();
2260 shader.emit_instruction(new AluInstr(op2_and_int,
2261 tmpx,
2262 value_factory.src(alu.src[0], 0),
2263 value_factory.literal(0xffffff00),
2264 AluInstr::write));
2265 auto tmpy = value_factory.temp_register();
2266 shader.emit_instruction(new AluInstr(op2_and_int,
2267 tmpy,
2268 value_factory.src(alu.src[0], 0),
2269 value_factory.literal(0xff),
2270 AluInstr::last_write));
2271
2272 auto tmpx2 = value_factory.temp_register();
2273 auto tmpy2 = value_factory.temp_register();
2274 shader.emit_instruction(new AluInstr(op, tmpx2, tmpx, AluInstr::last_write));
2275 shader.emit_instruction(new AluInstr(op, tmpy2, tmpy, AluInstr::last_write));
2276
2277 auto tmpx3 = value_factory.temp_register(0);
2278 auto tmpy3 = value_factory.temp_register(1);
2279 auto tmpz3 = value_factory.temp_register(2);
2280 auto tmpw3 = value_factory.temp_register(3);
2281
2282 ir = new AluInstr(op1_flt32_to_flt64, tmpx3, tmpx2, AluInstr::write);
2283 group->add_instruction(ir);
2284 ir = new AluInstr(op1_flt32_to_flt64, tmpy3, value_factory.zero(), AluInstr::write);
2285 group->add_instruction(ir);
2286 ir = new AluInstr(op1_flt32_to_flt64, tmpz3, tmpy2, AluInstr::write);
2287 group->add_instruction(ir);
2288 ir =
2289 new AluInstr(op1_flt32_to_flt64, tmpw3, value_factory.zero(), AluInstr::last_write);
2290 group->add_instruction(ir);
2291 shader.emit_instruction(group);
2292
2293 group = new AluGroup();
2294
2295 ir = new AluInstr(op2_add_64,
2296 value_factory.dest(alu.def, 0, pin_chan),
2297 tmpy3,
2298 tmpw3,
2299 AluInstr::write);
2300 group->add_instruction(ir);
2301 ir = new AluInstr(op2_add_64,
2302 value_factory.dest(alu.def, 1, pin_chan),
2303 tmpx3,
2304 tmpz3,
2305 AluInstr::write);
2306 group->add_instruction(ir);
2307 shader.emit_instruction(group);
2308
2309 return true;
2310 }
2311
2312 static bool
emit_alu_f2f64(const nir_alu_instr & alu,Shader & shader)2313 emit_alu_f2f64(const nir_alu_instr& alu, Shader& shader)
2314 {
2315 auto& value_factory = shader.value_factory();
2316 auto group = new AluGroup();
2317 AluInstr *ir = nullptr;
2318
2319 assert(alu.def.num_components == 1);
2320
2321 ir = new AluInstr(op1_flt32_to_flt64,
2322 value_factory.dest(alu.def, 0, pin_chan),
2323 value_factory.src(alu.src[0], 0),
2324 AluInstr::write);
2325 group->add_instruction(ir);
2326 ir = new AluInstr(op1_flt32_to_flt64,
2327 value_factory.dest(alu.def, 1, pin_chan),
2328 value_factory.zero(),
2329 AluInstr::last_write);
2330 group->add_instruction(ir);
2331 shader.emit_instruction(group);
2332 return true;
2333 }
2334
2335 static bool
emit_alu_f2f32(const nir_alu_instr & alu,Shader & shader)2336 emit_alu_f2f32(const nir_alu_instr& alu, Shader& shader)
2337 {
2338 auto& value_factory = shader.value_factory();
2339 auto group = new AluGroup();
2340 AluInstr *ir = nullptr;
2341
2342 ir = new AluInstr(op1v_flt64_to_flt32,
2343 value_factory.dest(alu.def, 0, pin_chan),
2344 value_factory.src64(alu.src[0], 0, 1),
2345 {alu_write});
2346 group->add_instruction(ir);
2347 ir = new AluInstr(op1v_flt64_to_flt32,
2348 value_factory.dummy_dest(1),
2349 value_factory.src64(alu.src[0], 0, 0),
2350 AluInstr::last);
2351 group->add_instruction(ir);
2352 shader.emit_instruction(group);
2353 return true;
2354 }
2355
2356 static bool
emit_alu_b2x(const nir_alu_instr & alu,AluInlineConstants mask,Shader & shader)2357 emit_alu_b2x(const nir_alu_instr& alu, AluInlineConstants mask, Shader& shader)
2358 {
2359 auto& value_factory = shader.value_factory();
2360 AluInstr *ir = nullptr;
2361 auto pin = pin_for_components(alu);
2362
2363 for (unsigned i = 0; i < alu.def.num_components; ++i) {
2364 auto src = value_factory.src(alu.src[0], i);
2365 ir = new AluInstr(op2_and_int,
2366 value_factory.dest(alu.def, i, pin),
2367 src,
2368 value_factory.inline_const(mask, 0),
2369 {alu_write});
2370 shader.emit_instruction(ir);
2371 }
2372 if (ir)
2373 ir->set_alu_flag(alu_last_instr);
2374 return true;
2375 }
2376
2377 static bool
emit_alu_op1(const nir_alu_instr & alu,EAluOp opcode,Shader & shader,AluMods mod)2378 emit_alu_op1(const nir_alu_instr& alu,
2379 EAluOp opcode,
2380 Shader& shader,
2381 AluMods mod)
2382 {
2383 auto& value_factory = shader.value_factory();
2384
2385 AluInstr *ir = nullptr;
2386 auto pin = pin_for_components(alu);
2387
2388 for (unsigned i = 0; i < alu.def.num_components; ++i) {
2389 ir = new AluInstr(opcode,
2390 value_factory.dest(alu.def, i, pin),
2391 value_factory.src(alu.src[0], i),
2392 {alu_write});
2393 switch (mod) {
2394 case mod_src0_abs:
2395 ir->set_source_mod(0, AluInstr::mod_abs); break;
2396 case mod_src0_neg:
2397 ir->set_source_mod(0, AluInstr::mod_neg); break;
2398 case mod_dest_clamp:
2399 ir->set_alu_flag(alu_dst_clamp);
2400 default:;
2401 }
2402 shader.emit_instruction(ir);
2403 }
2404 if (ir)
2405 ir->set_alu_flag(alu_last_instr);
2406 return true;
2407 }
2408
2409 static bool
emit_alu_op2(const nir_alu_instr & alu,EAluOp opcode,Shader & shader,AluInstr::Op2Options opts)2410 emit_alu_op2(const nir_alu_instr& alu,
2411 EAluOp opcode,
2412 Shader& shader,
2413 AluInstr::Op2Options opts)
2414 {
2415 auto& value_factory = shader.value_factory();
2416 const nir_alu_src *src0 = &alu.src[0];
2417 const nir_alu_src *src1 = &alu.src[1];
2418
2419 int idx0 = 0;
2420 int idx1 = 1;
2421 if (opts & AluInstr::op2_opt_reverse) {
2422 std::swap(src0, src1);
2423 std::swap(idx0, idx1);
2424 }
2425
2426 bool src1_negate = (opts & AluInstr::op2_opt_neg_src1);
2427
2428 auto pin = pin_for_components(alu);
2429 AluInstr *ir = nullptr;
2430 for (unsigned i = 0; i < alu.def.num_components; ++i) {
2431 ir = new AluInstr(opcode,
2432 value_factory.dest(alu.def, i, pin),
2433 value_factory.src(*src0, i),
2434 value_factory.src(*src1, i),
2435 {alu_write});
2436 if (src1_negate)
2437 ir->set_source_mod(1, AluInstr::mod_neg);
2438 shader.emit_instruction(ir);
2439 }
2440 if (ir)
2441 ir->set_alu_flag(alu_last_instr);
2442 return true;
2443 }
2444
2445 static bool
emit_alu_op2_int(const nir_alu_instr & alu,EAluOp opcode,Shader & shader,AluInstr::Op2Options opts)2446 emit_alu_op2_int(const nir_alu_instr& alu,
2447 EAluOp opcode,
2448 Shader& shader,
2449 AluInstr::Op2Options opts)
2450 {
2451 return emit_alu_op2(alu, opcode, shader, opts);
2452 }
2453
2454 static bool
emit_alu_op3(const nir_alu_instr & alu,EAluOp opcode,Shader & shader,const std::array<int,3> & src_shuffle)2455 emit_alu_op3(const nir_alu_instr& alu,
2456 EAluOp opcode,
2457 Shader& shader,
2458 const std::array<int, 3>& src_shuffle)
2459 {
2460 auto& value_factory = shader.value_factory();
2461 const nir_alu_src *src[3];
2462 src[0] = &alu.src[src_shuffle[0]];
2463 src[1] = &alu.src[src_shuffle[1]];
2464 src[2] = &alu.src[src_shuffle[2]];
2465
2466 auto pin = pin_for_components(alu);
2467 AluInstr *ir = nullptr;
2468 for (unsigned i = 0; i < alu.def.num_components; ++i) {
2469 ir = new AluInstr(opcode,
2470 value_factory.dest(alu.def, i, pin),
2471 value_factory.src(*src[0], i),
2472 value_factory.src(*src[1], i),
2473 value_factory.src(*src[2], i),
2474 {alu_write});
2475 ir->set_alu_flag(alu_write);
2476 shader.emit_instruction(ir);
2477 }
2478 if (ir)
2479 ir->set_alu_flag(alu_last_instr);
2480 return true;
2481 }
2482
2483 static bool
emit_any_all_fcomp2(const nir_alu_instr & alu,EAluOp opcode,Shader & shader)2484 emit_any_all_fcomp2(const nir_alu_instr& alu, EAluOp opcode, Shader& shader)
2485 {
2486 AluInstr *ir = nullptr;
2487 auto& value_factory = shader.value_factory();
2488
2489 PRegister tmp[2];
2490 tmp[0] = value_factory.temp_register();
2491 tmp[1] = value_factory.temp_register();
2492
2493 for (unsigned i = 0; i < 2; ++i) {
2494 ir = new AluInstr(opcode,
2495 tmp[i],
2496 value_factory.src(alu.src[0], i),
2497 value_factory.src(alu.src[1], i),
2498 {alu_write});
2499 shader.emit_instruction(ir);
2500 }
2501 ir->set_alu_flag(alu_last_instr);
2502
2503 opcode = (opcode == op2_setne_dx10) ? op2_or_int : op2_and_int;
2504 ir = new AluInstr(opcode,
2505 value_factory.dest(alu.def, 0, pin_free),
2506 tmp[0],
2507 tmp[1],
2508 AluInstr::last_write);
2509 shader.emit_instruction(ir);
2510 return true;
2511 }
2512
2513 static bool
emit_any_all_fcomp(const nir_alu_instr & alu,EAluOp op,int nc,bool all,Shader & shader)2514 emit_any_all_fcomp(const nir_alu_instr& alu, EAluOp op, int nc, bool all, Shader& shader)
2515 {
2516 /* This should probabyl be lowered in nir */
2517 auto& value_factory = shader.value_factory();
2518
2519 AluInstr *ir = nullptr;
2520 RegisterVec4 v = value_factory.temp_vec4(pin_group);
2521 AluInstr::SrcValues s;
2522
2523 for (int i = 0; i < nc; ++i) {
2524 s.push_back(v[i]);
2525 }
2526
2527 for (int i = nc; i < 4; ++i)
2528 s.push_back(value_factory.inline_const(all ? ALU_SRC_1 : ALU_SRC_0, 0));
2529
2530 for (int i = 0; i < nc; ++i) {
2531 ir = new AluInstr(op,
2532 v[i],
2533 value_factory.src(alu.src[0], i),
2534 value_factory.src(alu.src[1], i),
2535 {alu_write});
2536 shader.emit_instruction(ir);
2537 }
2538 if (ir)
2539 ir->set_alu_flag(alu_last_instr);
2540
2541 auto max_val = value_factory.temp_register();
2542
2543 ir = new AluInstr(op1_max4, max_val, s, AluInstr::last_write, 4);
2544
2545 if (all) {
2546 ir->set_source_mod(0, AluInstr::mod_neg);
2547 ir->set_source_mod(1, AluInstr::mod_neg);
2548 ir->set_source_mod(2, AluInstr::mod_neg);
2549 ir->set_source_mod(3, AluInstr::mod_neg);
2550 }
2551
2552 shader.emit_instruction(ir);
2553
2554 if (all)
2555 op = (op == op2_sete) ? op2_sete_dx10 : op2_setne_dx10;
2556 else
2557 op = (op == op2_sete) ? op2_setne_dx10 : op2_sete_dx10;
2558
2559 ir = new AluInstr(op,
2560 value_factory.dest(alu.def, 0, pin_free),
2561 max_val,
2562 value_factory.inline_const(ALU_SRC_1, 0),
2563 AluInstr::last_write);
2564 if (all)
2565 ir->set_source_mod(1, AluInstr::mod_neg);
2566 shader.emit_instruction(ir);
2567
2568 return true;
2569 }
2570
2571 static bool
emit_any_all_icomp(const nir_alu_instr & alu,EAluOp op,int nc,bool all,Shader & shader)2572 emit_any_all_icomp(const nir_alu_instr& alu, EAluOp op, int nc, bool all, Shader& shader)
2573 {
2574 /* This should probabyl be lowered in nir */
2575 auto& value_factory = shader.value_factory();
2576
2577 AluInstr *ir = nullptr;
2578 PRegister v[6];
2579
2580 auto dest = value_factory.dest(alu.def, 0, pin_free);
2581
2582 for (int i = 0; i < nc + nc / 2; ++i)
2583 v[i] = value_factory.temp_register();
2584
2585 EAluOp combine = all ? op2_and_int : op2_or_int;
2586
2587 for (int i = 0; i < nc; ++i) {
2588 ir = new AluInstr(op,
2589 v[i],
2590 value_factory.src(alu.src[0], i),
2591 value_factory.src(alu.src[1], i),
2592 AluInstr::write);
2593 shader.emit_instruction(ir);
2594 }
2595 if (ir)
2596 ir->set_alu_flag(alu_last_instr);
2597
2598 if (nc == 2) {
2599 ir = new AluInstr(combine, dest, v[0], v[1], AluInstr::last_write);
2600 shader.emit_instruction(ir);
2601 return true;
2602 }
2603
2604 if (nc == 3) {
2605 ir = new AluInstr(combine, v[3], v[0], v[1], AluInstr::last_write);
2606 shader.emit_instruction(ir);
2607 ir = new AluInstr(combine, dest, v[3], v[2], AluInstr::last_write);
2608 shader.emit_instruction(ir);
2609 return true;
2610 }
2611
2612 if (nc == 4) {
2613 ir = new AluInstr(combine, v[4], v[0], v[1], AluInstr::write);
2614 shader.emit_instruction(ir);
2615 ir = new AluInstr(combine, v[5], v[2], v[3], AluInstr::last_write);
2616 shader.emit_instruction(ir);
2617 ir = new AluInstr(combine, dest, v[4], v[5], AluInstr::last_write);
2618 shader.emit_instruction(ir);
2619 return true;
2620 }
2621
2622 return false;
2623 }
2624
2625 static bool
emit_dot(const nir_alu_instr & alu,int n,Shader & shader)2626 emit_dot(const nir_alu_instr& alu, int n, Shader& shader)
2627 {
2628 auto& value_factory = shader.value_factory();
2629 const nir_alu_src& src0 = alu.src[0];
2630 const nir_alu_src& src1 = alu.src[1];
2631
2632 auto dest = value_factory.dest(alu.def, 0, pin_chan);
2633
2634 AluInstr::SrcValues srcs(2 * n);
2635
2636 for (int i = 0; i < n; ++i) {
2637 srcs[2 * i] = value_factory.src(src0, i);
2638 srcs[2 * i + 1] = value_factory.src(src1, i);
2639 }
2640
2641 AluInstr *ir = new AluInstr(op2_dot_ieee, dest, srcs, AluInstr::last_write, n);
2642
2643 shader.emit_instruction(ir);
2644 shader.set_flag(Shader::sh_disble_sb);
2645
2646 return true;
2647 }
2648
2649 static bool
emit_dot4(const nir_alu_instr & alu,int nelm,Shader & shader)2650 emit_dot4(const nir_alu_instr& alu, int nelm, Shader& shader)
2651 {
2652 auto& value_factory = shader.value_factory();
2653 const nir_alu_src& src0 = alu.src[0];
2654 const nir_alu_src& src1 = alu.src[1];
2655
2656 auto dest = value_factory.dest(alu.def, 0, pin_free);
2657
2658 AluInstr::SrcValues srcs(8);
2659
2660 for (int i = 0; i < nelm; ++i) {
2661 srcs[2 * i] = value_factory.src(src0, i);
2662 srcs[2 * i + 1] = value_factory.src(src1, i);
2663 }
2664
2665 for (int i = nelm; i < 4; ++i) {
2666 srcs[2 * i] = value_factory.zero();
2667 srcs[2 * i + 1] = value_factory.zero();
2668 }
2669
2670 AluInstr *ir = new AluInstr(op2_dot4_ieee, dest, srcs, AluInstr::last_write, 4);
2671
2672 shader.emit_instruction(ir);
2673 return true;
2674 }
2675
2676 static bool
emit_fdph(const nir_alu_instr & alu,Shader & shader)2677 emit_fdph(const nir_alu_instr& alu, Shader& shader)
2678 {
2679 auto& value_factory = shader.value_factory();
2680 const nir_alu_src& src0 = alu.src[0];
2681 const nir_alu_src& src1 = alu.src[1];
2682
2683 auto dest = value_factory.dest(alu.def, 0, pin_free);
2684
2685 AluInstr::SrcValues srcs(8);
2686
2687 for (int i = 0; i < 3; ++i) {
2688 srcs[2 * i] = value_factory.src(src0, i);
2689 srcs[2 * i + 1] = value_factory.src(src1, i);
2690 }
2691
2692 srcs[6] = value_factory.one();
2693 srcs[7] = value_factory.src(src1, 3);
2694
2695 AluInstr *ir = new AluInstr(op2_dot4_ieee, dest, srcs, AluInstr::last_write, 4);
2696 shader.emit_instruction(ir);
2697 return true;
2698 }
2699
2700 static bool
emit_create_vec(const nir_alu_instr & instr,unsigned nc,Shader & shader)2701 emit_create_vec(const nir_alu_instr& instr, unsigned nc, Shader& shader)
2702 {
2703 auto& value_factory = shader.value_factory();
2704 AluInstr *ir = nullptr;
2705
2706 for (unsigned i = 0; i < nc; ++i) {
2707 auto src = value_factory.src(instr.src[i].src, instr.src[i].swizzle[0]);
2708 auto dst = value_factory.dest(instr.def, i, pin_none);
2709 shader.emit_instruction(new AluInstr(op1_mov, dst, src, {alu_write}));
2710 }
2711
2712 if (ir)
2713 ir->set_alu_flag(alu_last_instr);
2714 return true;
2715 }
2716
2717 static bool
emit_alu_comb_with_zero(const nir_alu_instr & alu,EAluOp opcode,Shader & shader)2718 emit_alu_comb_with_zero(const nir_alu_instr& alu, EAluOp opcode, Shader& shader)
2719 {
2720 auto& value_factory = shader.value_factory();
2721 AluInstr *ir = nullptr;
2722 auto pin = pin_for_components(alu);
2723 for (unsigned i = 0; i < alu.def.num_components; ++i) {
2724 ir = new AluInstr(opcode,
2725 value_factory.dest(alu.def, i, pin),
2726 value_factory.zero(),
2727 value_factory.src(alu.src[0], i),
2728 AluInstr::write);
2729 shader.emit_instruction(ir);
2730 }
2731 if (ir)
2732 ir->set_alu_flag(alu_last_instr);
2733
2734 return true;
2735 }
2736
2737 static bool
emit_pack_64_2x32_split(const nir_alu_instr & alu,Shader & shader)2738 emit_pack_64_2x32_split(const nir_alu_instr& alu, Shader& shader)
2739 {
2740 auto& value_factory = shader.value_factory();
2741 AluInstr *ir = nullptr;
2742 for (unsigned i = 0; i < 2; ++i) {
2743 ir = new AluInstr(op1_mov,
2744 value_factory.dest(alu.def, i, pin_none),
2745 value_factory.src(alu.src[i], 0),
2746 AluInstr::write);
2747 shader.emit_instruction(ir);
2748 }
2749 ir->set_alu_flag(alu_last_instr);
2750 return true;
2751 }
2752
2753 static bool
emit_pack_64_2x32(const nir_alu_instr & alu,Shader & shader)2754 emit_pack_64_2x32(const nir_alu_instr& alu, Shader& shader)
2755 {
2756 auto& value_factory = shader.value_factory();
2757 AluInstr *ir = nullptr;
2758 for (unsigned i = 0; i < 2; ++i) {
2759 ir = new AluInstr(op1_mov,
2760 value_factory.dest(alu.def, i, pin_none),
2761 value_factory.src(alu.src[0], i),
2762 AluInstr::write);
2763 shader.emit_instruction(ir);
2764 }
2765 ir->set_alu_flag(alu_last_instr);
2766 return true;
2767 }
2768
2769 static bool
emit_unpack_64_2x32(const nir_alu_instr & alu,Shader & shader)2770 emit_unpack_64_2x32(const nir_alu_instr& alu, Shader& shader)
2771 {
2772 auto& value_factory = shader.value_factory();
2773 AluInstr *ir = nullptr;
2774 for (unsigned i = 0; i < 2; ++i) {
2775 ir = new AluInstr(op1_mov,
2776 value_factory.dest(alu.def, i, pin_none),
2777 value_factory.src64(alu.src[0], 0, i),
2778 AluInstr::write);
2779 shader.emit_instruction(ir);
2780 }
2781 ir->set_alu_flag(alu_last_instr);
2782 return true;
2783 }
2784
2785 bool
emit_alu_vec2_64(const nir_alu_instr & alu,Shader & shader)2786 emit_alu_vec2_64(const nir_alu_instr& alu, Shader& shader)
2787 {
2788 auto& value_factory = shader.value_factory();
2789 AluInstr *ir = nullptr;
2790 for (unsigned i = 0; i < 2; ++i) {
2791 ir = new AluInstr(op1_mov,
2792 value_factory.dest(alu.def, i, pin_chan),
2793 value_factory.src64(alu.src[0], 0, i),
2794 AluInstr::write);
2795 shader.emit_instruction(ir);
2796 }
2797 for (unsigned i = 0; i < 2; ++i) {
2798 ir = new AluInstr(op1_mov,
2799 value_factory.dest(alu.def, i + 2, pin_chan),
2800 value_factory.src64(alu.src[1], 1, i),
2801 AluInstr::write);
2802 shader.emit_instruction(ir);
2803 }
2804 ir->set_alu_flag(alu_last_instr);
2805 return true;
2806 }
2807
2808 static bool
emit_pack_32_2x16_split(const nir_alu_instr & alu,Shader & shader)2809 emit_pack_32_2x16_split(const nir_alu_instr& alu, Shader& shader)
2810 {
2811 auto& value_factory = shader.value_factory();
2812
2813 auto x = value_factory.temp_register();
2814 auto y = value_factory.temp_register();
2815 auto yy = value_factory.temp_register();
2816
2817 shader.emit_instruction(new AluInstr(
2818 op1_flt32_to_flt16, x, value_factory.src(alu.src[0], 0), AluInstr::last_write));
2819
2820 shader.emit_instruction(new AluInstr(
2821 op1_flt32_to_flt16, y, value_factory.src(alu.src[1], 0), AluInstr::last_write));
2822
2823 shader.emit_instruction(
2824 new AluInstr(op2_lshl_int, yy, y, value_factory.literal(16), AluInstr::last_write));
2825
2826 shader.emit_instruction(new AluInstr(op2_or_int,
2827 value_factory.dest(alu.def, 0, pin_free),
2828 x,
2829 yy,
2830 AluInstr::last_write));
2831 return true;
2832 }
2833
2834 static bool
emit_unpack_64_2x32_split(const nir_alu_instr & alu,int comp,Shader & shader)2835 emit_unpack_64_2x32_split(const nir_alu_instr& alu, int comp, Shader& shader)
2836 {
2837 auto& value_factory = shader.value_factory();
2838 shader.emit_instruction(new AluInstr(op1_mov,
2839 value_factory.dest(alu.def, 0, pin_free),
2840 value_factory.src64(alu.src[0], 0, comp),
2841 AluInstr::last_write));
2842 return true;
2843 }
2844
2845 static bool
emit_unpack_32_2x16_split_x(const nir_alu_instr & alu,Shader & shader)2846 emit_unpack_32_2x16_split_x(const nir_alu_instr& alu, Shader& shader)
2847 {
2848 auto& value_factory = shader.value_factory();
2849 shader.emit_instruction(new AluInstr(op1_flt16_to_flt32,
2850 value_factory.dest(alu.def, 0, pin_free),
2851 value_factory.src(alu.src[0], 0),
2852 AluInstr::last_write));
2853 return true;
2854 }
2855 static bool
emit_unpack_32_2x16_split_y(const nir_alu_instr & alu,Shader & shader)2856 emit_unpack_32_2x16_split_y(const nir_alu_instr& alu, Shader& shader)
2857 {
2858 auto& value_factory = shader.value_factory();
2859 auto tmp = value_factory.temp_register();
2860 shader.emit_instruction(new AluInstr(op2_lshr_int,
2861 tmp,
2862 value_factory.src(alu.src[0], 0),
2863 value_factory.literal(16),
2864 AluInstr::last_write));
2865
2866 shader.emit_instruction(new AluInstr(op1_flt16_to_flt32,
2867 value_factory.dest(alu.def, 0, pin_free),
2868 tmp,
2869 AluInstr::last_write));
2870 return true;
2871 }
2872
2873 static bool
emit_alu_trans_op1_eg(const nir_alu_instr & alu,EAluOp opcode,Shader & shader)2874 emit_alu_trans_op1_eg(const nir_alu_instr& alu, EAluOp opcode, Shader& shader)
2875 {
2876 auto& value_factory = shader.value_factory();
2877 const nir_alu_src& src0 = alu.src[0];
2878
2879 AluInstr *ir = nullptr;
2880 auto pin = pin_for_components(alu);
2881
2882 for (unsigned i = 0; i < alu.def.num_components; ++i) {
2883 ir = new AluInstr(opcode,
2884 value_factory.dest(alu.def, i, pin),
2885 value_factory.src(src0, i),
2886 AluInstr::last_write);
2887 ir->set_alu_flag(alu_is_trans);
2888 shader.emit_instruction(ir);
2889 }
2890
2891 return true;
2892 }
2893
2894 static bool
emit_alu_f2i32_or_u32_eg(const nir_alu_instr & alu,EAluOp opcode,Shader & shader)2895 emit_alu_f2i32_or_u32_eg(const nir_alu_instr& alu, EAluOp opcode, Shader& shader)
2896 {
2897 auto& value_factory = shader.value_factory();
2898 AluInstr *ir = nullptr;
2899
2900 PRegister reg[4];
2901
2902 int num_comp = alu.def.num_components;
2903
2904 for (int i = 0; i < num_comp; ++i) {
2905 reg[i] = value_factory.temp_register();
2906 ir = new AluInstr(op1_trunc,
2907 reg[i],
2908 value_factory.src(alu.src[0], i),
2909 AluInstr::last_write);
2910 shader.emit_instruction(ir);
2911 }
2912
2913 auto pin = pin_for_components(alu);
2914 for (int i = 0; i < num_comp; ++i) {
2915 ir = new AluInstr(opcode,
2916 value_factory.dest(alu.def, i, pin),
2917 reg[i],
2918 AluInstr::write);
2919 if (opcode == op1_flt_to_uint) {
2920 ir->set_alu_flag(alu_is_trans);
2921 ir->set_alu_flag(alu_last_instr);
2922 }
2923 shader.emit_instruction(ir);
2924 }
2925 ir->set_alu_flag(alu_last_instr);
2926 return true;
2927 }
2928
2929 static bool
emit_alu_trans_op1_cayman(const nir_alu_instr & alu,EAluOp opcode,Shader & shader)2930 emit_alu_trans_op1_cayman(const nir_alu_instr& alu, EAluOp opcode, Shader& shader)
2931 {
2932 auto& value_factory = shader.value_factory();
2933 const nir_alu_src& src0 = alu.src[0];
2934
2935 auto pin = pin_for_components(alu);
2936
2937 const std::set<AluModifiers> flags({alu_write, alu_last_instr, alu_is_cayman_trans});
2938
2939 for (unsigned j = 0; j < alu.def.num_components; ++j) {
2940 unsigned ncomp = j == 3 ? 4 : 3;
2941
2942 AluInstr::SrcValues srcs(ncomp);
2943 PRegister dest = value_factory.dest(alu.def, j, pin, (1 << ncomp) - 1);
2944
2945 for (unsigned i = 0; i < ncomp; ++i)
2946 srcs[i] = value_factory.src(src0, j);
2947
2948 auto ir = new AluInstr(opcode, dest, srcs, flags, ncomp);
2949 shader.emit_instruction(ir);
2950 }
2951 return true;
2952 }
2953
2954 static bool
emit_alu_trans_op2_eg(const nir_alu_instr & alu,EAluOp opcode,Shader & shader)2955 emit_alu_trans_op2_eg(const nir_alu_instr& alu, EAluOp opcode, Shader& shader)
2956 {
2957 auto& value_factory = shader.value_factory();
2958
2959 const nir_alu_src& src0 = alu.src[0];
2960 const nir_alu_src& src1 = alu.src[1];
2961
2962 AluInstr *ir = nullptr;
2963
2964 auto pin = pin_for_components(alu);
2965 for (unsigned i = 0; i < alu.def.num_components; ++i) {
2966 ir = new AluInstr(opcode,
2967 value_factory.dest(alu.def, i, pin),
2968 value_factory.src(src0, i),
2969 value_factory.src(src1, i),
2970 AluInstr::last_write);
2971 ir->set_alu_flag(alu_is_trans);
2972 shader.emit_instruction(ir);
2973 }
2974 return true;
2975 }
2976
2977 static bool
emit_alu_trans_op2_cayman(const nir_alu_instr & alu,EAluOp opcode,Shader & shader)2978 emit_alu_trans_op2_cayman(const nir_alu_instr& alu, EAluOp opcode, Shader& shader)
2979 {
2980 auto& value_factory = shader.value_factory();
2981
2982 const nir_alu_src& src0 = alu.src[0];
2983 const nir_alu_src& src1 = alu.src[1];
2984
2985 unsigned last_slot = 4;
2986
2987 const std::set<AluModifiers> flags({alu_write, alu_last_instr, alu_is_cayman_trans});
2988
2989 for (unsigned k = 0; k < alu.def.num_components; ++k) {
2990 AluInstr::SrcValues srcs(2 * last_slot);
2991 PRegister dest = value_factory.dest(alu.def, k, pin_free);
2992
2993 for (unsigned i = 0; i < last_slot; ++i) {
2994 srcs[2 * i] = value_factory.src(src0, k);
2995 srcs[2 * i + 1] = value_factory.src(src1, k);
2996 }
2997
2998 auto ir = new AluInstr(opcode, dest, srcs, flags, last_slot);
2999 ir->set_alu_flag(alu_is_cayman_trans);
3000 shader.emit_instruction(ir);
3001 }
3002 return true;
3003 }
3004
3005 static bool
emit_tex_fdd(const nir_alu_instr & alu,TexInstr::Opcode opcode,bool fine,Shader & shader)3006 emit_tex_fdd(const nir_alu_instr& alu, TexInstr::Opcode opcode, bool fine, Shader& shader)
3007 {
3008 auto& value_factory = shader.value_factory();
3009
3010 int ncomp = alu.def.num_components;
3011 RegisterVec4::Swizzle src_swz = {7, 7, 7, 7};
3012 RegisterVec4::Swizzle tmp_swz = {7, 7, 7, 7};
3013 for (auto i = 0; i < ncomp; ++i) {
3014 src_swz[i] = alu.src[0].swizzle[i];
3015 tmp_swz[i] = i;
3016 }
3017
3018 auto src = value_factory.src_vec4(alu.src[0].src, pin_none, src_swz);
3019
3020 auto tmp = value_factory.temp_vec4(pin_group, tmp_swz);
3021 AluInstr *mv = nullptr;
3022 for (int i = 0; i < ncomp; ++i) {
3023 mv = new AluInstr(op1_mov, tmp[i], src[i], AluInstr::write);
3024 shader.emit_instruction(mv);
3025 }
3026 if (mv)
3027 mv->set_alu_flag(alu_last_instr);
3028
3029 auto dst = value_factory.dest_vec4(alu.def, pin_group);
3030 RegisterVec4::Swizzle dst_swz = {7, 7, 7, 7};
3031 for (auto i = 0; i < ncomp; ++i) {
3032 dst_swz[i] = i;
3033 }
3034
3035 auto tex = new TexInstr(opcode, dst, dst_swz, tmp, R600_MAX_CONST_BUFFERS, nullptr);
3036
3037 if (fine)
3038 tex->set_tex_flag(TexInstr::grad_fine);
3039
3040 shader.emit_instruction(tex);
3041
3042 return true;
3043 }
3044
3045 static bool
emit_alu_cube(const nir_alu_instr & alu,Shader & shader)3046 emit_alu_cube(const nir_alu_instr& alu, Shader& shader)
3047 {
3048 auto& value_factory = shader.value_factory();
3049 AluInstr *ir = nullptr;
3050
3051 const uint16_t src0_chan[4] = {2, 2, 0, 1};
3052 const uint16_t src1_chan[4] = {1, 0, 2, 2};
3053
3054 auto group = new AluGroup();
3055
3056 for (int i = 0; i < 4; ++i) {
3057
3058 ir = new AluInstr(op2_cube,
3059 value_factory.dest(alu.def, i, pin_chan),
3060 value_factory.src(alu.src[0], src0_chan[i]),
3061 value_factory.src(alu.src[0], src1_chan[i]),
3062 AluInstr::write);
3063 group->add_instruction(ir);
3064 }
3065 ir->set_alu_flag(alu_last_instr);
3066 shader.emit_instruction(group);
3067 return true;
3068 }
3069
3070 const std::set<AluModifiers> AluInstr::empty;
3071 const std::set<AluModifiers> AluInstr::write({alu_write});
3072 const std::set<AluModifiers> AluInstr::last({alu_last_instr});
3073 const std::set<AluModifiers> AluInstr::last_write({alu_write, alu_last_instr});
3074
3075 } // namespace r600
3076