1 /* -*- mesa-c++ -*-
2 *
3 * Copyright (c) 2021 Collabora LTD
4 *
5 * Author: Gert Wollny <gert.wollny@collabora.com>
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * on the rights to use, copy, modify, merge, publish, distribute, sub
11 * license, and/or sell copies of the Software, and to permit persons to whom
12 * the Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
16 * Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 * USE OR OTHER DEALINGS IN THE SOFTWARE.
25 */
26
27 #include "sfn_instr_alugroup.h"
28 #include "sfn_instr_controlflow.h"
29 #include "sfn_instr_export.h"
30 #include "sfn_instr_fetch.h"
31 #include "sfn_instr_lds.h"
32 #include "sfn_instr_mem.h"
33 #include "sfn_instr_tex.h"
34
35 #include <iostream>
36 #include <limits>
37 #include <numeric>
38 #include <sstream>
39
40 namespace r600 {
41
42 using std::string;
43 using std::vector;
44
Instr()45 Instr::Instr():
46 m_use_count(0),
47 m_block_id(std::numeric_limits<int>::max()),
48 m_index(std::numeric_limits<int>::max())
49 {
50 }
51
~Instr()52 Instr::~Instr() {}
53
54 void
print(std::ostream & os) const55 Instr::print(std::ostream& os) const
56 {
57 do_print(os);
58 }
59
60 bool
ready() const61 Instr::ready() const
62 {
63 if (is_scheduled())
64 return true;
65 for (auto& i : m_required_instr)
66 if (!i->ready())
67 return false;
68 return do_ready();
69 }
70
71 bool
int_from_string_with_prefix_optional(const std::string & str,const std::string & prefix,int & value)72 int_from_string_with_prefix_optional(const std::string& str,
73 const std::string& prefix,
74 int& value)
75 {
76 if (str.substr(0, prefix.length()) != prefix) {
77 return false;
78 }
79
80 std::stringstream help(str.substr(prefix.length()));
81 help >> value;
82 return true;
83 }
84
85 int
int_from_string_with_prefix(const std::string & str,const std::string & prefix)86 int_from_string_with_prefix(const std::string& str, const std::string& prefix)
87 {
88 int retval = 0;
89 if (!int_from_string_with_prefix_optional(str, prefix, retval)) {
90 std::cerr << "Expect '" << prefix << "' as start of '" << str << "'\n";
91 assert(0);
92 }
93 return retval;
94 }
95
96 int
sel_and_szw_from_string(const std::string & str,RegisterVec4::Swizzle & swz,bool & is_ssa)97 sel_and_szw_from_string(const std::string& str, RegisterVec4::Swizzle& swz, bool& is_ssa)
98 {
99 assert(str[0] == 'R' || str[0] == '_' || str[0] == 'S');
100 int sel = 0;
101
102 auto istr = str.begin() + 1;
103
104 if (str[0] == '_') {
105 while (istr != str.end() && *istr == '_')
106 ++istr;
107 sel = std::numeric_limits<int>::max();
108 } else {
109 while (istr != str.end() && isdigit(*istr)) {
110 sel *= 10;
111 sel += *istr - '0';
112 ++istr;
113 }
114 }
115
116 assert(*istr == '.');
117 istr++;
118
119 int i = 0;
120 while (istr != str.end()) {
121 switch (*istr) {
122 case 'x':
123 swz[i] = 0;
124 break;
125 case 'y':
126 swz[i] = 1;
127 break;
128 case 'z':
129 swz[i] = 2;
130 break;
131 case 'w':
132 swz[i] = 3;
133 break;
134 case '0':
135 swz[i] = 4;
136 break;
137 case '1':
138 swz[i] = 5;
139 break;
140 case '_':
141 swz[i] = 7;
142 break;
143 default:
144 unreachable("Unknown swizzle character");
145 }
146 ++istr;
147 ++i;
148 }
149
150 is_ssa = str[0] == 'S';
151
152 return sel;
153 }
154
155 bool
is_last() const156 Instr::is_last() const
157 {
158 return true;
159 }
160
161 bool
set_dead()162 Instr::set_dead()
163 {
164 if (m_instr_flags.test(always_keep))
165 return false;
166 bool is_dead = propagate_death();
167 m_instr_flags.set(dead);
168 return is_dead;
169 }
170
171 bool
propagate_death()172 Instr::propagate_death()
173 {
174 return true;
175 }
176
177 bool
replace_source(PRegister old_src,PVirtualValue new_src)178 Instr::replace_source(PRegister old_src, PVirtualValue new_src)
179 {
180 (void)old_src;
181 (void)new_src;
182 return false;
183 }
184
185 void
add_required_instr(Instr * instr)186 Instr::add_required_instr(Instr *instr)
187 {
188 assert(instr);
189 m_required_instr.push_back(instr);
190 instr->m_dependend_instr.push_back(this);
191 }
192
193 void
replace_required_instr(Instr * old_instr,Instr * new_instr)194 Instr::replace_required_instr(Instr *old_instr, Instr *new_instr)
195 {
196
197 for (auto i = m_required_instr.begin(); i != m_required_instr.end(); ++i) {
198 if (*i == old_instr)
199 *i = new_instr;
200 }
201 }
202
203 bool
replace_dest(PRegister new_dest,r600::AluInstr * move_instr)204 Instr::replace_dest(PRegister new_dest, r600::AluInstr *move_instr)
205 {
206 (void)new_dest;
207 (void)move_instr;
208 return false;
209 }
210
211 void
set_blockid(int id,int index)212 Instr::set_blockid(int id, int index)
213 {
214 m_block_id = id;
215 m_index = index;
216 forward_set_blockid(id, index);
217 }
218
219 void
forward_set_blockid(int id,int index)220 Instr::forward_set_blockid(int id, int index)
221 {
222 (void)id;
223 (void)index;
224 }
225
InstrWithVectorResult(const RegisterVec4 & dest,const RegisterVec4::Swizzle & dest_swizzle,int resource_base,PRegister resource_offset)226 InstrWithVectorResult::InstrWithVectorResult(const RegisterVec4& dest,
227 const RegisterVec4::Swizzle& dest_swizzle,
228 int resource_base,
229 PRegister resource_offset):
230 Resource(this, resource_base, resource_offset),
231 m_dest(dest),
232 m_dest_swizzle(dest_swizzle)
233 {
234 for (int i = 0; i < 4; ++i) {
235 if (m_dest_swizzle[i] < 6)
236 m_dest[i]->add_parent(this);
237 }
238 }
239
240 void
print_dest(std::ostream & os) const241 InstrWithVectorResult::print_dest(std::ostream& os) const
242 {
243 os << (m_dest[0]->has_flag(Register::ssa) ? 'S' : 'R') << m_dest.sel();
244 os << ".";
245 for (int i = 0; i < 4; ++i)
246 os << VirtualValue::chanchar[m_dest_swizzle[i]];
247 }
248
249 bool
comp_dest(const RegisterVec4 & dest,const RegisterVec4::Swizzle & dest_swizzle) const250 InstrWithVectorResult::comp_dest(const RegisterVec4& dest,
251 const RegisterVec4::Swizzle& dest_swizzle) const
252 {
253 for (int i = 0; i < 4; ++i) {
254 if (!m_dest[i]->equal_to(*dest[i])) {
255 return false;
256 }
257 if (m_dest_swizzle[i] != dest_swizzle[i])
258 return false;
259 }
260 return true;
261 }
262
263 void
do_print(std::ostream & os) const264 Block::do_print(std::ostream& os) const
265 {
266 for (int j = 0; j < 2 * m_nesting_depth; ++j)
267 os << ' ';
268 os << "BLOCK START\n";
269 for (auto& i : m_instructions) {
270 for (int j = 0; j < 2 * (m_nesting_depth + i->nesting_corr()) + 2; ++j)
271 os << ' ';
272 os << *i << "\n";
273 }
274 for (int j = 0; j < 2 * m_nesting_depth; ++j)
275 os << ' ';
276 os << "BLOCK END\n";
277 }
278
279 bool
is_equal_to(const Block & lhs) const280 Block::is_equal_to(const Block& lhs) const
281 {
282 if (m_id != lhs.m_id || m_nesting_depth != lhs.m_nesting_depth)
283 return false;
284
285 if (m_instructions.size() != lhs.m_instructions.size())
286 return false;
287
288 return std::inner_product(
289 m_instructions.begin(),
290 m_instructions.end(),
291 lhs.m_instructions.begin(),
292 true,
293 [](bool l, bool r) { return l && r; },
294 [](PInst l, PInst r) { return l->equal_to(*r); });
295 }
296
297 inline bool
operator !=(const Block & lhs,const Block & rhs)298 operator!=(const Block& lhs, const Block& rhs)
299 {
300 return !lhs.is_equal_to(rhs);
301 }
302
303 void
erase(iterator node)304 Block::erase(iterator node)
305 {
306 m_instructions.erase(node);
307 }
308
309 void
set_type(Type t,r600_chip_class chip_class)310 Block::set_type(Type t, r600_chip_class chip_class)
311 {
312 m_block_type = t;
313 switch (t) {
314 case vtx:
315 /* In theory on >= EG VTX support 16 slots, but with vertex fetch
316 * instructions the register pressure increases fast - i.e. in the worst
317 * case four register more get used, so stick to 8 slots for now.
318 * TODO: think about some trickery in the schedler to make use of up
319 * to 16 slots if the register pressure doesn't get too high.
320 */
321 m_remaining_slots = 8;
322 break;
323 case gds:
324 case tex:
325 m_remaining_slots = chip_class >= ISA_CC_EVERGREEN ? 16 : 8;
326 break;
327 case alu:
328 /* 128 but a follow up block might need to emit and ADDR + INDEX load */
329 m_remaining_slots = 118;
330 break;
331 default:
332 m_remaining_slots = 0xffff;
333 }
334 }
335
Block(int nesting_depth,int id)336 Block::Block(int nesting_depth, int id):
337 m_nesting_depth(nesting_depth),
338 m_id(id),
339 m_next_index(0)
340 {
341 assert(!has_instr_flag(force_cf));
342 }
343
344 void
accept(ConstInstrVisitor & visitor) const345 Block::accept(ConstInstrVisitor& visitor) const
346 {
347 visitor.visit(*this);
348 }
349
350 void
accept(InstrVisitor & visitor)351 Block::accept(InstrVisitor& visitor)
352 {
353 visitor.visit(this);
354 }
355
356 void
push_back(PInst instr)357 Block::push_back(PInst instr)
358 {
359 instr->set_blockid(m_id, m_next_index++);
360 if (m_remaining_slots != 0xffff) {
361 uint32_t new_slots = instr->slots();
362 m_remaining_slots -= new_slots;
363 }
364 if (m_lds_group_start)
365 m_lds_group_requirement += instr->slots();
366
367 m_instructions.push_back(instr);
368 }
369
370 Block::iterator
insert(const iterator pos,Instr * instr)371 Block::insert(const iterator pos, Instr *instr)
372 {
373 return m_instructions.insert(pos, instr);
374 }
375
376 bool
try_reserve_kcache(const AluGroup & group)377 Block::try_reserve_kcache(const AluGroup& group)
378 {
379 auto kcache = m_kcache;
380
381 auto kcache_constants = group.get_kconsts();
382 for (auto& kc : kcache_constants) {
383 auto u = kc->as_uniform();
384 assert(u);
385 if (!try_reserve_kcache(*u, kcache)) {
386 m_kcache_alloc_failed = true;
387 return false;
388 }
389 }
390
391 m_kcache = kcache;
392 m_kcache_alloc_failed = false;
393 return true;
394 }
395
396 bool
try_reserve_kcache(const AluInstr & instr)397 Block::try_reserve_kcache(const AluInstr& instr)
398 {
399 auto kcache = m_kcache;
400
401 for (auto& src : instr.sources()) {
402 auto u = src->as_uniform();
403 if (u) {
404 if (!try_reserve_kcache(*u, kcache)) {
405 m_kcache_alloc_failed = true;
406 return false;
407 }
408 }
409 }
410 m_kcache = kcache;
411 m_kcache_alloc_failed = false;
412 return true;
413 }
414
415 void
set_chipclass(r600_chip_class chip_class)416 Block::set_chipclass(r600_chip_class chip_class)
417 {
418 if (chip_class < ISA_CC_EVERGREEN)
419 s_max_kcache_banks = 2;
420 else
421 s_max_kcache_banks = 4;
422 }
423
424 unsigned Block::s_max_kcache_banks = 4;
425
426 bool
try_reserve_kcache(const UniformValue & u,std::array<KCacheLine,4> & kcache) const427 Block::try_reserve_kcache(const UniformValue& u, std::array<KCacheLine, 4>& kcache) const
428 {
429 const int kcache_banks = s_max_kcache_banks; // TODO: handle pre-evergreen
430
431 int bank = u.kcache_bank();
432 int sel = (u.sel() - 512);
433 int line = sel >> 4;
434 EBufferIndexMode index_mode = bim_none;
435
436 if (auto addr = u.buf_addr())
437 index_mode = addr->sel() == AddressRegister::idx0 ? bim_zero : bim_one;
438
439 bool found = false;
440
441 for (int i = 0; i < kcache_banks && !found; ++i) {
442 if (kcache[i].mode) {
443 if (kcache[i].bank < bank)
444 continue;
445
446
447 if (kcache[i].bank == bank &&
448 kcache[i].index_mode != bim_none &&
449 kcache[i].index_mode != index_mode) {
450 return false;
451 }
452 if ((kcache[i].bank == bank && kcache[i].addr > line + 1) ||
453 kcache[i].bank > bank) {
454 if (kcache[kcache_banks - 1].mode)
455 return false;
456
457 memmove(&kcache[i + 1],
458 &kcache[i],
459 (kcache_banks - i - 1) * sizeof(KCacheLine));
460 kcache[i].mode = KCacheLine::lock_1;
461 kcache[i].bank = bank;
462 kcache[i].addr = line;
463 kcache[i].index_mode = index_mode;
464 return true;
465 }
466
467 int d = line - kcache[i].addr;
468
469 if (d == -1) {
470 kcache[i].addr--;
471 if (kcache[i].mode == KCacheLine::lock_2) {
472 /* we are prepending the line to the current set,
473 * discarding the existing second line,
474 * so we'll have to insert line+2 after it */
475 line += 2;
476 continue;
477 } else if (kcache[i].mode == KCacheLine::lock_1) {
478 kcache[i].mode = KCacheLine::lock_2;
479 return true;
480 } else {
481 /* V_SQ_CF_KCACHE_LOCK_LOOP_INDEX is not supported */
482 return false;
483 }
484 } else if (d == 1) {
485 kcache[i].mode = KCacheLine::lock_2;
486 return true;
487 } else if (d == 0) {
488 return true;
489 }
490 } else { /* free kcache set - use it */
491 kcache[i].mode = KCacheLine::lock_1;
492 kcache[i].bank = bank;
493 kcache[i].addr = line;
494 kcache[i].index_mode = index_mode;
495 return true;
496 }
497 }
498 return false;
499 }
500
501 void
lds_group_start(AluInstr * alu)502 Block::lds_group_start(AluInstr *alu)
503 {
504 assert(!m_lds_group_start);
505 m_lds_group_start = alu;
506 m_lds_group_requirement = 0;
507 }
508
509 void
lds_group_end()510 Block::lds_group_end()
511 {
512 assert(m_lds_group_start);
513 m_lds_group_start->set_required_slots(m_lds_group_requirement);
514 m_lds_group_start = 0;
515 }
516
InstrWithVectorResult(const InstrWithVectorResult & orig)517 InstrWithVectorResult::InstrWithVectorResult(const InstrWithVectorResult& orig):
518 Resource(orig),
519 m_dest(orig.m_dest),
520 m_dest_swizzle(orig.m_dest_swizzle)
521 {
522 }
523
update_indirect_addr(UNUSED PRegister old_reg,PRegister addr)524 void InstrWithVectorResult::update_indirect_addr(UNUSED PRegister old_reg, PRegister addr)
525 {
526 set_resource_offset(addr);
527 }
528
529 class InstrComparer : public ConstInstrVisitor {
530 public:
531 InstrComparer() = default;
532 bool result{false};
533
534 #define DECLARE_MEMBER(TYPE) \
535 InstrComparer(const TYPE *instr) { this_##TYPE = instr; } \
536 \
537 void visit(const TYPE& instr) \
538 { \
539 result = false; \
540 if (!this_##TYPE) \
541 return; \
542 result = this_##TYPE->is_equal_to(instr); \
543 } \
544 \
545 const TYPE *this_##TYPE{nullptr};
546
547 DECLARE_MEMBER(AluInstr);
548 DECLARE_MEMBER(AluGroup);
549 DECLARE_MEMBER(TexInstr);
550 DECLARE_MEMBER(ExportInstr);
551 DECLARE_MEMBER(FetchInstr);
552 DECLARE_MEMBER(Block);
553 DECLARE_MEMBER(ControlFlowInstr);
554 DECLARE_MEMBER(IfInstr);
555 DECLARE_MEMBER(ScratchIOInstr);
556 DECLARE_MEMBER(StreamOutInstr);
557 DECLARE_MEMBER(MemRingOutInstr);
558 DECLARE_MEMBER(EmitVertexInstr);
559 DECLARE_MEMBER(GDSInstr);
560 DECLARE_MEMBER(WriteTFInstr);
561 DECLARE_MEMBER(LDSAtomicInstr);
562 DECLARE_MEMBER(LDSReadInstr);
563 DECLARE_MEMBER(RatInstr);
564 };
565
566 class InstrCompareForward : public ConstInstrVisitor {
567 public:
visit(const AluInstr & instr)568 void visit(const AluInstr& instr) override { m_comparer = InstrComparer(&instr); }
569
visit(const AluGroup & instr)570 void visit(const AluGroup& instr) override { m_comparer = InstrComparer(&instr); }
571
visit(const TexInstr & instr)572 void visit(const TexInstr& instr) override { m_comparer = InstrComparer(&instr); }
573
visit(const ExportInstr & instr)574 void visit(const ExportInstr& instr) override { m_comparer = InstrComparer(&instr); }
575
visit(const FetchInstr & instr)576 void visit(const FetchInstr& instr) override { m_comparer = InstrComparer(&instr); }
577
visit(const Block & instr)578 void visit(const Block& instr) override { m_comparer = InstrComparer(&instr); }
579
visit(const ControlFlowInstr & instr)580 void visit(const ControlFlowInstr& instr) override
581 {
582 m_comparer = InstrComparer(&instr);
583 }
584
visit(const IfInstr & instr)585 void visit(const IfInstr& instr) override { m_comparer = InstrComparer(&instr); }
586
visit(const ScratchIOInstr & instr)587 void visit(const ScratchIOInstr& instr) override
588 {
589 m_comparer = InstrComparer(&instr);
590 }
591
visit(const StreamOutInstr & instr)592 void visit(const StreamOutInstr& instr) override
593 {
594 m_comparer = InstrComparer(&instr);
595 }
596
visit(const MemRingOutInstr & instr)597 void visit(const MemRingOutInstr& instr) override
598 {
599 m_comparer = InstrComparer(&instr);
600 }
601
visit(const EmitVertexInstr & instr)602 void visit(const EmitVertexInstr& instr) override
603 {
604 m_comparer = InstrComparer(&instr);
605 }
606
visit(const GDSInstr & instr)607 void visit(const GDSInstr& instr) override { m_comparer = InstrComparer(&instr); }
608
visit(const WriteTFInstr & instr)609 void visit(const WriteTFInstr& instr) override { m_comparer = InstrComparer(&instr); }
610
visit(const LDSAtomicInstr & instr)611 void visit(const LDSAtomicInstr& instr) override
612 {
613 m_comparer = InstrComparer(&instr);
614 }
615
visit(const LDSReadInstr & instr)616 void visit(const LDSReadInstr& instr) override { m_comparer = InstrComparer(&instr); }
617
visit(const RatInstr & instr)618 void visit(const RatInstr& instr) override { m_comparer = InstrComparer(&instr); }
619
620 InstrComparer m_comparer;
621 };
622
623 bool
equal_to(const Instr & lhs) const624 Instr::equal_to(const Instr& lhs) const
625 {
626 InstrCompareForward cmp;
627 accept(cmp);
628 lhs.accept(cmp.m_comparer);
629
630 return cmp.m_comparer.result;
631 }
632
633 } // namespace r600
634