1 /* -*- mesa-c++ -*-
2 *
3 * Copyright (c) 2022 Collabora LTD
4 *
5 * Author: Gert Wollny <gert.wollny@collabora.com>
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * on the rights to use, copy, modify, merge, publish, distribute, sub
11 * license, and/or sell copies of the Software, and to permit persons to whom
12 * the Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
16 * Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 * USE OR OTHER DEALINGS IN THE SOFTWARE.
25 */
26
27 #include "sfn_instr_alugroup.h"
28
29 #include "sfn_debug.h"
30 #include "sfn_instr_export.h"
31 #include "sfn_instr_mem.h"
32 #include "sfn_instr_tex.h"
33
34 #include <algorithm>
35
36 namespace r600 {
37
AluGroup()38 AluGroup::AluGroup() { std::fill(m_slots.begin(), m_slots.end(), nullptr); }
39
40 bool
add_instruction(AluInstr * instr)41 AluGroup::add_instruction(AluInstr *instr)
42 {
43 /* we can only schedule one op that accesses LDS or
44 the LDS read queue */
45 if (m_has_lds_op && instr->has_lds_access())
46 return false;
47
48 if (instr->has_alu_flag(alu_is_trans)) {
49 ASSERTED auto opinfo = alu_ops.find(instr->opcode());
50 assert(opinfo->second.can_channel(AluOp::t, s_chip_class));
51 if (add_trans_instructions(instr)) {
52 m_has_kill_op |= instr->is_kill();
53 return true;
54 }
55 }
56
57 if (add_vec_instructions(instr) && !instr->has_alu_flag(alu_is_trans)) {
58 instr->set_parent_group(this);
59 m_has_kill_op |= instr->is_kill();
60 return true;
61 }
62
63 auto opinfo = alu_ops.find(instr->opcode());
64 assert(opinfo != alu_ops.end());
65
66 if (s_max_slots > 4 && opinfo->second.can_channel(AluOp::t, s_chip_class) &&
67 add_trans_instructions(instr)) {
68 instr->set_parent_group(this);
69 m_has_kill_op |= instr->is_kill();
70 return true;
71 }
72
73 return false;
74 }
75
76 bool
add_trans_instructions(AluInstr * instr)77 AluGroup::add_trans_instructions(AluInstr *instr)
78 {
79 if (m_slots[4] || s_max_slots < 5)
80 return false;
81
82 /* LDS instructions have to be scheduled in X */
83 if (instr->has_alu_flag(alu_is_lds))
84 return false;
85
86 auto opinfo = alu_ops.find(instr->opcode());
87 assert(opinfo != alu_ops.end());
88
89 if (!opinfo->second.can_channel(AluOp::t, s_chip_class))
90 return false;
91
92 /* if we schedule a non-trans instr into the trans slot, we have to make
93 * sure that the corresponding vector slot is already occupied, otherwise
94 * the hardware will schedule it as vector op and the bank-swizzle as
95 * checked here (and in r600_asm.c) will not catch conflicts.
96 */
97 if (!instr->has_alu_flag(alu_is_trans) && !m_slots[instr->dest_chan()]) {
98 if (instr->dest() && instr->dest()->pin() == pin_free) {
99 int used_slot = 3;
100 auto dest = instr->dest();
101 int free_mask = 0xf;
102
103 for (auto p : dest->parents()) {
104 auto alu = p->as_alu();
105 if (alu)
106 free_mask &= alu->allowed_dest_chan_mask();
107 }
108
109 for (auto u : dest->uses()) {
110 free_mask &= u->allowed_src_chan_mask();
111 if (!free_mask)
112 return false;
113 }
114
115 while (used_slot >= 0 &&
116 (!m_slots[used_slot] || !(free_mask & (1 << used_slot))))
117 --used_slot;
118
119 // if we schedule a non-trans instr into the trans slot,
120 // there should always be some slot that is already used
121 if (used_slot < 0)
122 return false;
123
124 instr->dest()->set_chan(used_slot);
125 }
126 }
127
128 if (!instr->has_alu_flag(alu_is_trans) && !m_slots[instr->dest_chan()])
129 return false;
130
131 for (AluBankSwizzle i = sq_alu_scl_201; i != sq_alu_scl_unknown; ++i) {
132 AluReadportReservation readports_evaluator = m_readports_evaluator;
133 if (readports_evaluator.schedule_trans_instruction(*instr, i) &&
134 update_indirect_access(instr)) {
135 m_readports_evaluator = readports_evaluator;
136 m_slots[4] = instr;
137 instr->pin_sources_to_chan();
138 sfn_log << SfnLog::schedule << "T: " << *instr << "\n";
139
140 /* We added a vector op in the trans channel, so we have to
141 * make sure the corresponding vector channel is used */
142 assert(instr->has_alu_flag(alu_is_trans) || m_slots[instr->dest_chan()]);
143 m_has_kill_op |= instr->is_kill();
144 return true;
145 }
146 }
147 return false;
148 }
149
150 int
free_slots() const151 AluGroup::free_slots() const
152 {
153 int free_mask = 0;
154 for (int i = 0; i < s_max_slots; ++i) {
155 if (!m_slots[i])
156 free_mask |= 1 << i;
157 }
158 return free_mask;
159 }
160
161 bool
add_vec_instructions(AluInstr * instr)162 AluGroup::add_vec_instructions(AluInstr *instr)
163 {
164 int param_src = -1;
165 for (auto& s : instr->sources()) {
166 auto is = s->as_inline_const();
167 if (is)
168 param_src = is->sel() - ALU_SRC_PARAM_BASE;
169 }
170
171 if (param_src >= 0) {
172 if (m_param_used < 0)
173 m_param_used = param_src;
174 else if (m_param_used != param_src)
175 return false;
176 }
177
178 if (m_has_lds_op && instr->has_lds_access())
179 return false;
180
181 int preferred_chan = instr->dest_chan();
182 if (!m_slots[preferred_chan]) {
183 if (instr->bank_swizzle() != alu_vec_unknown) {
184 if (try_readport(instr, instr->bank_swizzle())) {
185 m_has_kill_op |= instr->is_kill();
186 return true;
187 }
188 } else {
189 for (AluBankSwizzle i = alu_vec_012; i != alu_vec_unknown; ++i) {
190 if (try_readport(instr, i)) {
191 m_has_kill_op |= instr->is_kill();
192 return true;
193 }
194 }
195 }
196 } else {
197
198 auto dest = instr->dest();
199 if (dest && (dest->pin() == pin_free || dest->pin() == pin_group)) {
200
201 int free_mask = 0xf;
202 for (auto p : dest->parents()) {
203 auto alu = p->as_alu();
204 if (alu)
205 free_mask &= alu->allowed_dest_chan_mask();
206 }
207
208 for (auto u : dest->uses()) {
209 free_mask &= u->allowed_src_chan_mask();
210 if (!free_mask)
211 return false;
212 }
213
214 int free_chan = 0;
215 while (free_chan < 4 && (m_slots[free_chan] || !(free_mask & (1 << free_chan))))
216 free_chan++;
217
218 if (free_chan < 4) {
219 sfn_log << SfnLog::schedule << "V: Try force channel " << free_chan << "\n";
220 dest->set_chan(free_chan);
221 if (instr->bank_swizzle() != alu_vec_unknown) {
222 if (try_readport(instr, instr->bank_swizzle())) {
223 m_has_kill_op |= instr->is_kill();
224 return true;
225 }
226 } else {
227 for (AluBankSwizzle i = alu_vec_012; i != alu_vec_unknown; ++i) {
228 if (try_readport(instr, i)) {
229 m_has_kill_op |= instr->is_kill();
230 return true;
231 }
232 }
233 }
234 }
235 }
236 }
237 return false;
238 }
239
update_readport_reserver()240 void AluGroup::update_readport_reserver()
241 {
242 AluReadportReservation readports_evaluator;
243 for (int i = 0; i < 4; ++i) {
244 if (!m_slots[i])
245 continue;
246
247 AluReadportReservation re = readports_evaluator;
248 AluBankSwizzle bs = alu_vec_012;
249 while (bs != alu_vec_unknown) {
250 if (re.schedule_vec_instruction(*m_slots[i], bs)) {
251 readports_evaluator = re;
252 break;
253 }
254 ++bs;
255 }
256 if (bs == alu_vec_unknown)
257 unreachable("Bank swizzle should have been checked before");
258 }
259
260 if (s_max_slots == 5 && m_slots[4]) {
261 AluReadportReservation re = readports_evaluator;
262 AluBankSwizzle bs = sq_alu_scl_201;
263 while (bs != sq_alu_scl_unknown) {
264 if (re.schedule_vec_instruction(*m_slots[4], bs)) {
265 readports_evaluator = re;
266 break;
267 }
268 ++bs;
269 }
270 if (bs == sq_alu_scl_unknown)
271 unreachable("Bank swizzle should have been checked before");
272 }
273 }
274
275 bool
try_readport(AluInstr * instr,AluBankSwizzle cycle)276 AluGroup::try_readport(AluInstr *instr, AluBankSwizzle cycle)
277 {
278 int preferred_chan = instr->dest_chan();
279 AluReadportReservation readports_evaluator = m_readports_evaluator;
280 if (readports_evaluator.schedule_vec_instruction(*instr, cycle) &&
281 update_indirect_access(instr)) {
282 m_readports_evaluator = readports_evaluator;
283 m_slots[preferred_chan] = instr;
284 m_has_lds_op |= instr->has_lds_access();
285 sfn_log << SfnLog::schedule << "V: " << *instr << "\n";
286 auto dest = instr->dest();
287 if (dest) {
288 if (dest->pin() == pin_free)
289 dest->set_pin(pin_chan);
290 else if (dest->pin() == pin_group)
291 dest->set_pin(pin_chgr);
292 }
293 instr->pin_sources_to_chan();
294 return true;
295 }
296 return false;
297 }
298
replace_source(PRegister old_src,PVirtualValue new_src)299 bool AluGroup::replace_source(PRegister old_src, PVirtualValue new_src)
300 {
301 AluReadportReservation rpr_sum;
302
303 // At this point we should not have anything in slot 4
304 assert(s_max_slots == 4 || !m_slots[4]);
305
306 for (int slot = 0; slot < 4; ++slot) {
307 if (!m_slots[slot])
308 continue;
309
310 assert(m_slots[slot]->alu_slots() == 1);
311
312 if (!m_slots[slot]->can_replace_source(old_src, new_src))
313 return false;
314
315 auto& srcs = m_slots[slot]->sources();
316
317 PVirtualValue test_src[3];
318 std::transform(srcs.begin(), srcs.end(), test_src,
319 [old_src, new_src](PVirtualValue s) {
320 return old_src->equal_to(*s) ? new_src : s;
321 });
322
323 AluBankSwizzle bs = alu_vec_012;
324 while (bs != alu_vec_unknown) {
325 AluReadportReservation rpr = rpr_sum;
326 if (rpr.schedule_vec_src(test_src,srcs.size(), bs)) {
327 rpr_sum = rpr;
328 break;
329 }
330 ++bs;
331 }
332
333 if (bs == alu_vec_unknown)
334 return false;
335 }
336
337 bool success = false;
338
339 for (int slot = 0; slot < 4; ++slot) {
340 if (!m_slots[slot])
341 continue;
342 success |= m_slots[slot]->do_replace_source(old_src, new_src);
343 for (auto& s : m_slots[slot]->sources()) {
344 if (s->pin() == pin_free)
345 s->set_pin(pin_chan);
346 else if (s->pin() == pin_group)
347 s->set_pin(pin_chgr);
348 }
349 }
350
351 m_readports_evaluator = rpr_sum;
352 return success;
353 }
354
355 bool
update_indirect_access(AluInstr * instr)356 AluGroup::update_indirect_access(AluInstr *instr)
357 {
358 auto [indirect_addr, for_dest, index_reg] = instr->indirect_addr();
359
360 if (indirect_addr) {
361 assert(!index_reg);
362 if (!m_addr_used) {
363 m_addr_used = indirect_addr;
364 m_addr_for_src = !for_dest;
365 m_addr_is_index = false;
366 } else if (!indirect_addr->equal_to(*m_addr_used) || m_addr_is_index) {
367 return false;
368 }
369 } else if (index_reg) {
370 if (!m_addr_used) {
371 m_addr_used = index_reg;
372 m_addr_is_index = true;
373 } else if (!index_reg->equal_to(*m_addr_used) || !m_addr_is_index) {
374 return false;
375 }
376 }
377 return true;
378 }
379
index_mode_load()380 bool AluGroup::index_mode_load()
381 {
382 if (!m_slots[0] || !m_slots[0]->dest())
383 return false;
384
385 Register *dst = m_slots[0]->dest();
386 return dst->has_flag(Register::addr_or_idx) && dst->sel() > 0;
387 }
388
389 void
accept(ConstInstrVisitor & visitor) const390 AluGroup::accept(ConstInstrVisitor& visitor) const
391 {
392 visitor.visit(*this);
393 }
394
395 void
accept(InstrVisitor & visitor)396 AluGroup::accept(InstrVisitor& visitor)
397 {
398 visitor.visit(this);
399 }
400
401 void
set_scheduled()402 AluGroup::set_scheduled()
403 {
404 for (int i = 0; i < s_max_slots; ++i) {
405 if (m_slots[i])
406 m_slots[i]->set_scheduled();
407 }
408 if (m_origin)
409 m_origin->set_scheduled();
410 }
411
412 void
fix_last_flag()413 AluGroup::fix_last_flag()
414 {
415 bool last_seen = false;
416 for (int i = s_max_slots - 1; i >= 0; --i) {
417 if (m_slots[i]) {
418 if (!last_seen) {
419 m_slots[i]->set_alu_flag(alu_last_instr);
420 last_seen = true;
421 } else {
422 m_slots[i]->reset_alu_flag(alu_last_instr);
423 }
424 }
425 }
426 }
427
428 bool
is_equal_to(const AluGroup & other) const429 AluGroup::is_equal_to(const AluGroup& other) const
430 {
431 for (int i = 0; i < s_max_slots; ++i) {
432 if (!other.m_slots[i]) {
433 if (!m_slots[i])
434 continue;
435 else
436 return false;
437 }
438
439 if (m_slots[i]) {
440 if (!other.m_slots[i])
441 return false;
442 else if (!m_slots[i]->is_equal_to(*other.m_slots[i]))
443 return false;
444 }
445 }
446 return true;
447 }
448
449 bool
has_lds_group_end() const450 AluGroup::has_lds_group_end() const
451 {
452 for (int i = 0; i < s_max_slots; ++i) {
453 if (m_slots[i] && m_slots[i]->has_alu_flag(alu_lds_group_end))
454 return true;
455 }
456 return false;
457 }
458
459 bool
do_ready() const460 AluGroup::do_ready() const
461 {
462 for (int i = 0; i < s_max_slots; ++i) {
463 if (m_slots[i] && !m_slots[i]->ready())
464 return false;
465 }
466 return true;
467 }
468
469 void
forward_set_blockid(int id,int index)470 AluGroup::forward_set_blockid(int id, int index)
471 {
472 for (int i = 0; i < s_max_slots; ++i) {
473 if (m_slots[i]) {
474 m_slots[i]->set_blockid(id, index);
475 }
476 }
477 }
478
479 uint32_t
slots() const480 AluGroup::slots() const
481 {
482 uint32_t result = (m_readports_evaluator.m_nliterals + 1) >> 1;
483 for (int i = 0; i < s_max_slots; ++i) {
484 if (m_slots[i])
485 ++result;
486 }
487 if (m_addr_used) {
488 ++result;
489 if (m_addr_is_index && s_max_slots == 5)
490 ++result;
491 }
492
493 return result;
494 }
495
496 void
do_print(std::ostream & os) const497 AluGroup::do_print(std::ostream& os) const
498 {
499 const char slotname[] = "xyzwt";
500
501 os << "ALU_GROUP_BEGIN\n";
502 for (int i = 0; i < s_max_slots; ++i) {
503 if (m_slots[i]) {
504 for (int j = 0; j < 2 * m_nesting_depth + 4; ++j)
505 os << ' ';
506 os << slotname[i] << ": ";
507 m_slots[i]->print(os);
508 os << "\n";
509 }
510 }
511 for (int i = 0; i < 2 * m_nesting_depth + 2; ++i)
512 os << ' ';
513 os << "ALU_GROUP_END";
514 }
515
516 AluInstr::SrcValues
get_kconsts() const517 AluGroup::get_kconsts() const
518 {
519 AluInstr::SrcValues result;
520
521 for (int i = 0; i < s_max_slots; ++i) {
522 if (m_slots[i]) {
523 for (auto s : m_slots[i]->sources())
524 if (s->as_uniform())
525 result.push_back(s);
526 }
527 }
528 return result;
529 }
530
531 void
set_chipclass(r600_chip_class chip_class)532 AluGroup::set_chipclass(r600_chip_class chip_class)
533 {
534 s_chip_class = chip_class;
535 s_max_slots = chip_class == ISA_CC_CAYMAN ? 4 : 5;
536 }
537
538 int AluGroup::s_max_slots = 5;
539 r600_chip_class AluGroup::s_chip_class = ISA_CC_EVERGREEN;
540 } // namespace r600
541