• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* -*- mesa-c++  -*-
2  *
3  * Copyright (c) 2022 Collabora LTD
4  *
5  * Author: Gert Wollny <gert.wollny@collabora.com>
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and associated documentation files (the "Software"),
9  * to deal in the Software without restriction, including without limitation
10  * on the rights to use, copy, modify, merge, publish, distribute, sub
11  * license, and/or sell copies of the Software, and to permit persons to whom
12  * the Software is furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the next
15  * paragraph) shall be included in all copies or substantial portions of the
16  * Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24  * USE OR OTHER DEALINGS IN THE SOFTWARE.
25  */
26 
27 #include "sfn_instr_alugroup.h"
28 
29 #include "sfn_debug.h"
30 #include "sfn_instr_export.h"
31 #include "sfn_instr_mem.h"
32 #include "sfn_instr_tex.h"
33 
34 #include <algorithm>
35 
36 namespace r600 {
37 
AluGroup()38 AluGroup::AluGroup() { std::fill(m_slots.begin(), m_slots.end(), nullptr); }
39 
40 bool
add_instruction(AluInstr * instr)41 AluGroup::add_instruction(AluInstr *instr)
42 {
43    /* we can only schedule one op that accesses LDS or
44      the LDS read queue */
45    if (m_has_lds_op && instr->has_lds_access())
46       return false;
47 
48    if (instr->has_alu_flag(alu_is_trans)) {
49       ASSERTED auto opinfo = alu_ops.find(instr->opcode());
50       assert(opinfo->second.can_channel(AluOp::t, s_chip_class));
51       if (add_trans_instructions(instr)) {
52          m_has_kill_op |= instr->is_kill();
53          return true;
54       }
55    }
56 
57    if (add_vec_instructions(instr) && !instr->has_alu_flag(alu_is_trans)) {
58       instr->set_parent_group(this);
59       m_has_kill_op |= instr->is_kill();
60       return true;
61    }
62 
63    auto opinfo = alu_ops.find(instr->opcode());
64    assert(opinfo != alu_ops.end());
65 
66    if (s_max_slots > 4 && opinfo->second.can_channel(AluOp::t, s_chip_class) &&
67        add_trans_instructions(instr)) {
68       instr->set_parent_group(this);
69       m_has_kill_op |= instr->is_kill();
70       return true;
71    }
72 
73    return false;
74 }
75 
76 bool
add_trans_instructions(AluInstr * instr)77 AluGroup::add_trans_instructions(AluInstr *instr)
78 {
79    if (m_slots[4] || s_max_slots < 5)
80       return false;
81 
82    /* LDS instructions have to be scheduled in X */
83    if (instr->has_alu_flag(alu_is_lds))
84       return false;
85 
86    auto opinfo = alu_ops.find(instr->opcode());
87    assert(opinfo != alu_ops.end());
88 
89    if (!opinfo->second.can_channel(AluOp::t, s_chip_class))
90       return false;
91 
92    /* if we schedule a non-trans instr into the trans slot, we have to make
93     * sure that the corresponding vector slot is already occupied, otherwise
94     * the hardware will schedule it as vector op and the bank-swizzle as
95     * checked here (and in r600_asm.c) will not catch conflicts.
96     */
97    if (!instr->has_alu_flag(alu_is_trans) && !m_slots[instr->dest_chan()]) {
98       if (instr->dest() && instr->dest()->pin() == pin_free) {
99          int used_slot = 3;
100          auto dest = instr->dest();
101          int free_mask = 0xf;
102 
103          for (auto p : dest->parents()) {
104             auto alu = p->as_alu();
105             if (alu)
106                free_mask &= alu->allowed_dest_chan_mask();
107          }
108 
109          for (auto u : dest->uses()) {
110             free_mask &= u->allowed_src_chan_mask();
111             if (!free_mask)
112                return false;
113          }
114 
115          while (used_slot >= 0 &&
116                 (!m_slots[used_slot] || !(free_mask & (1 << used_slot))))
117             --used_slot;
118 
119          // if we schedule a non-trans instr into the trans slot,
120          // there should always be some slot that is already used
121          if (used_slot < 0)
122             return false;
123 
124          instr->dest()->set_chan(used_slot);
125       }
126    }
127 
128    if (!instr->has_alu_flag(alu_is_trans) && !m_slots[instr->dest_chan()])
129       return false;
130 
131    for (AluBankSwizzle i = sq_alu_scl_201; i != sq_alu_scl_unknown; ++i) {
132       AluReadportReservation readports_evaluator = m_readports_evaluator;
133       if (readports_evaluator.schedule_trans_instruction(*instr, i) &&
134           update_indirect_access(instr)) {
135          m_readports_evaluator = readports_evaluator;
136          m_slots[4] = instr;
137          instr->pin_sources_to_chan();
138          sfn_log << SfnLog::schedule << "T: " << *instr << "\n";
139 
140          /* We added a vector op in the trans channel, so we have to
141           * make sure the corresponding vector channel is used */
142          assert(instr->has_alu_flag(alu_is_trans) || m_slots[instr->dest_chan()]);
143          m_has_kill_op |= instr->is_kill();
144          return true;
145       }
146    }
147    return false;
148 }
149 
150 int
free_slots() const151 AluGroup::free_slots() const
152 {
153    int free_mask = 0;
154    for (int i = 0; i < s_max_slots; ++i) {
155       if (!m_slots[i])
156          free_mask |= 1 << i;
157    }
158    return free_mask;
159 }
160 
161 bool
add_vec_instructions(AluInstr * instr)162 AluGroup::add_vec_instructions(AluInstr *instr)
163 {
164    int param_src = -1;
165    for (auto& s : instr->sources()) {
166       auto is = s->as_inline_const();
167       if (is)
168          param_src = is->sel() - ALU_SRC_PARAM_BASE;
169    }
170 
171    if (param_src >= 0) {
172       if (m_param_used < 0)
173          m_param_used = param_src;
174       else if (m_param_used != param_src)
175          return false;
176    }
177 
178    if (m_has_lds_op && instr->has_lds_access())
179       return false;
180 
181    int preferred_chan = instr->dest_chan();
182    if (!m_slots[preferred_chan]) {
183       if (instr->bank_swizzle() != alu_vec_unknown) {
184          if (try_readport(instr, instr->bank_swizzle())) {
185             m_has_kill_op |= instr->is_kill();
186             return true;
187          }
188       } else {
189          for (AluBankSwizzle i = alu_vec_012; i != alu_vec_unknown; ++i) {
190             if (try_readport(instr, i)) {
191                m_has_kill_op |= instr->is_kill();
192                return true;
193             }
194          }
195       }
196    } else {
197 
198       auto dest = instr->dest();
199       if (dest && (dest->pin() == pin_free || dest->pin() == pin_group)) {
200 
201          int free_mask = 0xf;
202          for (auto p : dest->parents()) {
203             auto alu = p->as_alu();
204             if (alu)
205                free_mask &= alu->allowed_dest_chan_mask();
206          }
207 
208          for (auto u : dest->uses()) {
209             free_mask &= u->allowed_src_chan_mask();
210             if (!free_mask)
211                return false;
212          }
213 
214          int free_chan = 0;
215          while (free_chan < 4 && (m_slots[free_chan] || !(free_mask & (1 << free_chan))))
216             free_chan++;
217 
218          if (free_chan < 4) {
219             sfn_log << SfnLog::schedule << "V: Try force channel " << free_chan << "\n";
220             dest->set_chan(free_chan);
221             if (instr->bank_swizzle() != alu_vec_unknown) {
222                if (try_readport(instr, instr->bank_swizzle())) {
223                   m_has_kill_op |= instr->is_kill();
224                   return true;
225                }
226             } else {
227                for (AluBankSwizzle i = alu_vec_012; i != alu_vec_unknown; ++i) {
228                   if (try_readport(instr, i)) {
229                      m_has_kill_op |= instr->is_kill();
230                      return true;
231                   }
232                }
233             }
234          }
235       }
236    }
237    return false;
238 }
239 
update_readport_reserver()240 void AluGroup::update_readport_reserver()
241 {
242    AluReadportReservation readports_evaluator;
243    for (int i = 0; i < 4;  ++i) {
244       if (!m_slots[i])
245          continue;
246 
247       AluReadportReservation re = readports_evaluator;
248       AluBankSwizzle bs = alu_vec_012;
249       while (bs != alu_vec_unknown) {
250          if (re.schedule_vec_instruction(*m_slots[i], bs)) {
251             readports_evaluator = re;
252             break;
253          }
254          ++bs;
255       }
256       if (bs == alu_vec_unknown)
257          unreachable("Bank swizzle should have been checked before");
258    }
259 
260    if (s_max_slots == 5 && m_slots[4]) {
261       AluReadportReservation re = readports_evaluator;
262       AluBankSwizzle bs = sq_alu_scl_201;
263       while (bs != sq_alu_scl_unknown) {
264          if (re.schedule_vec_instruction(*m_slots[4], bs)) {
265             readports_evaluator = re;
266             break;
267          }
268          ++bs;
269       }
270       if (bs == sq_alu_scl_unknown)
271          unreachable("Bank swizzle should have been checked before");
272    }
273 }
274 
275 bool
try_readport(AluInstr * instr,AluBankSwizzle cycle)276 AluGroup::try_readport(AluInstr *instr, AluBankSwizzle cycle)
277 {
278    int preferred_chan = instr->dest_chan();
279    AluReadportReservation readports_evaluator = m_readports_evaluator;
280    if (readports_evaluator.schedule_vec_instruction(*instr, cycle) &&
281        update_indirect_access(instr)) {
282       m_readports_evaluator = readports_evaluator;
283       m_slots[preferred_chan] = instr;
284       m_has_lds_op |= instr->has_lds_access();
285       sfn_log << SfnLog::schedule << "V: " << *instr << "\n";
286       auto dest = instr->dest();
287       if (dest) {
288          if (dest->pin() == pin_free)
289             dest->set_pin(pin_chan);
290          else if (dest->pin() == pin_group)
291             dest->set_pin(pin_chgr);
292       }
293       instr->pin_sources_to_chan();
294       return true;
295    }
296    return false;
297 }
298 
replace_source(PRegister old_src,PVirtualValue new_src)299 bool AluGroup::replace_source(PRegister old_src, PVirtualValue new_src)
300 {
301    AluReadportReservation rpr_sum;
302 
303    // At this point we should not have anything in slot 4
304    assert(s_max_slots == 4 || !m_slots[4]);
305 
306    for (int slot = 0; slot < 4; ++slot) {
307       if (!m_slots[slot])
308          continue;
309 
310       assert(m_slots[slot]->alu_slots() == 1);
311 
312       if (!m_slots[slot]->can_replace_source(old_src, new_src))
313          return false;
314 
315       auto& srcs = m_slots[slot]->sources();
316 
317       PVirtualValue test_src[3];
318       std::transform(srcs.begin(), srcs.end(), test_src,
319                      [old_src, new_src](PVirtualValue s) {
320          return old_src->equal_to(*s) ? new_src : s;
321       });
322 
323       AluBankSwizzle bs = alu_vec_012;
324       while (bs != alu_vec_unknown) {
325          AluReadportReservation rpr = rpr_sum;
326          if (rpr.schedule_vec_src(test_src,srcs.size(), bs)) {
327             rpr_sum = rpr;
328             break;
329          }
330          ++bs;
331       }
332 
333       if (bs == alu_vec_unknown)
334          return false;
335    }
336 
337    bool success = false;
338 
339    for (int slot = 0; slot < 4; ++slot) {
340       if (!m_slots[slot])
341          continue;
342       success |= m_slots[slot]->do_replace_source(old_src, new_src);
343       for (auto& s : m_slots[slot]->sources()) {
344          if (s->pin() == pin_free)
345             s->set_pin(pin_chan);
346          else if (s->pin() == pin_group)
347                s->set_pin(pin_chgr);
348       }
349    }
350 
351    m_readports_evaluator = rpr_sum;
352    return success;
353 }
354 
355 bool
update_indirect_access(AluInstr * instr)356 AluGroup::update_indirect_access(AluInstr *instr)
357 {
358    auto [indirect_addr, for_dest, index_reg] = instr->indirect_addr();
359 
360    if (indirect_addr) {
361       assert(!index_reg);
362       if (!m_addr_used) {
363          m_addr_used = indirect_addr;
364          m_addr_for_src = !for_dest;
365          m_addr_is_index = false;
366       } else if (!indirect_addr->equal_to(*m_addr_used) || m_addr_is_index) {
367          return false;
368       }
369    } else if (index_reg) {
370        if (!m_addr_used) {
371            m_addr_used = index_reg;
372            m_addr_is_index = true;
373        } else if (!index_reg->equal_to(*m_addr_used) || !m_addr_is_index) {
374            return false;
375        }
376    }
377    return true;
378 }
379 
index_mode_load()380 bool AluGroup::index_mode_load()
381 {
382    if (!m_slots[0] || !m_slots[0]->dest())
383       return false;
384 
385    Register *dst = m_slots[0]->dest();
386    return dst->has_flag(Register::addr_or_idx) && dst->sel() > 0;
387 }
388 
389 void
accept(ConstInstrVisitor & visitor) const390 AluGroup::accept(ConstInstrVisitor& visitor) const
391 {
392    visitor.visit(*this);
393 }
394 
395 void
accept(InstrVisitor & visitor)396 AluGroup::accept(InstrVisitor& visitor)
397 {
398    visitor.visit(this);
399 }
400 
401 void
set_scheduled()402 AluGroup::set_scheduled()
403 {
404    for (int i = 0; i < s_max_slots; ++i) {
405       if (m_slots[i])
406          m_slots[i]->set_scheduled();
407    }
408    if (m_origin)
409       m_origin->set_scheduled();
410 }
411 
412 void
fix_last_flag()413 AluGroup::fix_last_flag()
414 {
415    bool last_seen = false;
416    for (int i = s_max_slots - 1; i >= 0; --i) {
417       if (m_slots[i]) {
418          if (!last_seen) {
419             m_slots[i]->set_alu_flag(alu_last_instr);
420             last_seen = true;
421          } else {
422             m_slots[i]->reset_alu_flag(alu_last_instr);
423          }
424       }
425    }
426 }
427 
428 bool
is_equal_to(const AluGroup & other) const429 AluGroup::is_equal_to(const AluGroup& other) const
430 {
431    for (int i = 0; i < s_max_slots; ++i) {
432       if (!other.m_slots[i]) {
433          if (!m_slots[i])
434             continue;
435          else
436             return false;
437       }
438 
439       if (m_slots[i]) {
440          if (!other.m_slots[i])
441             return false;
442          else if (!m_slots[i]->is_equal_to(*other.m_slots[i]))
443             return false;
444       }
445    }
446    return true;
447 }
448 
449 bool
has_lds_group_end() const450 AluGroup::has_lds_group_end() const
451 {
452    for (int i = 0; i < s_max_slots; ++i) {
453       if (m_slots[i] && m_slots[i]->has_alu_flag(alu_lds_group_end))
454          return true;
455    }
456    return false;
457 }
458 
459 bool
do_ready() const460 AluGroup::do_ready() const
461 {
462    for (int i = 0; i < s_max_slots; ++i) {
463       if (m_slots[i] && !m_slots[i]->ready())
464          return false;
465    }
466    return true;
467 }
468 
469 void
forward_set_blockid(int id,int index)470 AluGroup::forward_set_blockid(int id, int index)
471 {
472    for (int i = 0; i < s_max_slots; ++i) {
473       if (m_slots[i]) {
474          m_slots[i]->set_blockid(id, index);
475       }
476    }
477 }
478 
479 uint32_t
slots() const480 AluGroup::slots() const
481 {
482    uint32_t result = (m_readports_evaluator.m_nliterals + 1) >> 1;
483    for (int i = 0; i < s_max_slots; ++i) {
484       if (m_slots[i])
485          ++result;
486    }
487    if (m_addr_used) {
488       ++result;
489       if (m_addr_is_index && s_max_slots == 5)
490          ++result;
491    }
492 
493    return result;
494 }
495 
496 void
do_print(std::ostream & os) const497 AluGroup::do_print(std::ostream& os) const
498 {
499    const char slotname[] = "xyzwt";
500 
501    os << "ALU_GROUP_BEGIN\n";
502    for (int i = 0; i < s_max_slots; ++i) {
503       if (m_slots[i]) {
504          for (int j = 0; j < 2 * m_nesting_depth + 4; ++j)
505             os << ' ';
506          os << slotname[i] << ": ";
507          m_slots[i]->print(os);
508          os << "\n";
509       }
510    }
511    for (int i = 0; i < 2 * m_nesting_depth + 2; ++i)
512       os << ' ';
513    os << "ALU_GROUP_END";
514 }
515 
516 AluInstr::SrcValues
get_kconsts() const517 AluGroup::get_kconsts() const
518 {
519    AluInstr::SrcValues result;
520 
521    for (int i = 0; i < s_max_slots; ++i) {
522       if (m_slots[i]) {
523          for (auto s : m_slots[i]->sources())
524             if (s->as_uniform())
525                result.push_back(s);
526       }
527    }
528    return result;
529 }
530 
531 void
set_chipclass(r600_chip_class chip_class)532 AluGroup::set_chipclass(r600_chip_class chip_class)
533 {
534    s_chip_class = chip_class;
535    s_max_slots = chip_class == ISA_CC_CAYMAN ? 4 : 5;
536 }
537 
538 int AluGroup::s_max_slots = 5;
539 r600_chip_class AluGroup::s_chip_class = ISA_CC_EVERGREEN;
540 } // namespace r600
541