• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* -*- mesa-c++  -*-
2  *
3  * Copyright (c) 2022 Collabora LTD
4  *
5  * Author: Gert Wollny <gert.wollny@collabora.com>
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and associated documentation files (the "Software"),
9  * to deal in the Software without restriction, including without limitation
10  * on the rights to use, copy, modify, merge, publish, distribute, sub
11  * license, and/or sell copies of the Software, and to permit persons to whom
12  * the Software is furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the next
15  * paragraph) shall be included in all copies or substantial portions of the
16  * Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24  * USE OR OTHER DEALINGS IN THE SOFTWARE.
25  */
26 
27 #include "sfn_instr_alugroup.h"
28 #include "sfn_debug.h"
29 #include <algorithm>
30 
31 namespace r600 {
32 
AluGroup()33 AluGroup::AluGroup()
34 {
35    std::fill(m_slots.begin(), m_slots.end(), nullptr);
36 }
37 
is_kill(EAluOp op)38 static bool is_kill(EAluOp op)
39 {
40    switch (op) {
41    case op2_kille:
42    case op2_kille_int:
43    case op2_killne:
44    case op2_killne_int:
45    case op2_killge:
46    case op2_killge_int:
47    case op2_killge_uint:
48    case op2_killgt:
49    case op2_killgt_int:
50    case op2_killgt_uint:
51       return true;
52    default:
53       return false;
54    }
55 }
56 
add_instruction(AluInstr * instr)57 bool AluGroup::add_instruction(AluInstr *instr)
58 {
59    /* we can only schedule one op that accesses LDS or
60      the LDS read queue */
61    if (m_has_lds_op && instr->has_lds_access())
62       return false;
63 
64    if (instr->has_alu_flag(alu_is_trans)) {
65       auto opinfo = alu_ops.find(instr->opcode());
66       assert(opinfo->second.can_channel(AluOp::t, s_chip_class));
67       if (add_trans_instructions(instr)) {
68          if (is_kill(instr->opcode()))
69             m_has_kill_op = true;
70          return true;
71       }
72    }
73 
74    if (add_vec_instructions(instr) && !instr->has_alu_flag(alu_is_trans)) {
75       instr->set_parent_group(this);
76       if (!instr->has_alu_flag(alu_is_lds) && is_kill(instr->opcode()))
77          m_has_kill_op = true;
78       return true;
79    }
80 
81    auto opinfo = alu_ops.find(instr->opcode());
82    assert(opinfo != alu_ops.end());
83 
84    if (s_max_slots > 4 &&
85        opinfo->second.can_channel(AluOp::t, s_chip_class) &&
86        add_trans_instructions(instr)) {
87       instr->set_parent_group(this);
88       if (is_kill(instr->opcode()))
89          m_has_kill_op = true;
90       return true;
91    }
92 
93    return false;
94 }
95 
add_trans_instructions(AluInstr * instr)96 bool AluGroup::add_trans_instructions(AluInstr *instr)
97 {
98    if (m_slots[4] || s_max_slots < 5)
99       return false;
100 
101    if (!update_indirect_access(instr))
102       return false;
103 
104    /* LDS instructions have to be scheduled in X */
105    if (instr->has_alu_flag(alu_is_lds))
106       return false;
107 
108    auto opinfo = alu_ops.find(instr->opcode());
109    assert(opinfo != alu_ops.end());
110 
111    if (!opinfo->second.can_channel(AluOp::t, s_chip_class))
112       return false;
113 
114    /* if we schedule a non-trans instr into the trans slot, we have to make
115     * sure that the corresponding vector slot is already occupied, otherwise
116     * the hardware will schedule it as vector op and the bank-swizzle as
117     * checked here (and in r600_asm.c) will not catch conflicts.
118     */
119    if (!instr->has_alu_flag(alu_is_trans) && !m_slots[instr->dest_chan()]) {
120       if (instr->dest() && instr->dest()->pin() == pin_free) {
121          int used_slot = 3;
122          while (!m_slots[used_slot] && used_slot >= 0)
123             --used_slot;
124 
125          // if we schedule a non-trans instr into the trans slot,
126          // there should always be some slot that is already used
127          assert(used_slot >= 0);
128          instr->dest()->set_chan(used_slot);
129       }
130    }
131 
132    for (AluBankSwizzle i = sq_alu_scl_201; i != sq_alu_scl_unknown ; ++i) {
133       AluReadportReservation readports_evaluator = m_readports_evaluator;
134       if (readports_evaluator.schedule_trans_instruction(*instr, i)) {
135          m_readports_evaluator = readports_evaluator;
136          m_slots[4] = instr;
137          instr->pin_sources_to_chan();
138          sfn_log << SfnLog::schedule << "T: " << *instr << "\n";
139 
140          /* We added a vector op in the trans channel, so we have to
141           * make sure the corresponding vector channel is used */
142          if (!instr->has_alu_flag(alu_is_trans) && !m_slots[instr->dest_chan()])
143             m_slots[instr->dest_chan()] =
144                   new AluInstr(op0_nop, instr->dest_chan());
145          return true;
146       }
147    }
148    return false;
149 }
150 
free_slots() const151 int AluGroup::free_slots() const
152 {
153    int free_mask = 0;
154    for(int i = 0; i < s_max_slots; ++i) {
155       if (!m_slots[i])
156          free_mask |= 1 << i;
157    }
158    return free_mask;
159 }
160 
161 class AluAllowSlotSwitch : public AluInstrVisitor {
162 public:
163    using AluInstrVisitor::visit;
164 
visit(AluInstr * alu)165    void visit(AluInstr *alu) {
166       yes = (alu->alu_slots() == 1 || alu->has_alu_flag(alu_is_cayman_trans));
167    }
168 
169    bool yes{false};
170 
171 };
172 
add_vec_instructions(AluInstr * instr)173 bool AluGroup::add_vec_instructions(AluInstr *instr)
174 {
175    if (!update_indirect_access(instr))
176       return false;
177 
178    int param_src = -1;
179    for (auto& s : instr->sources()) {
180       auto is = s->as_inline_const();
181       if (is)
182          param_src = is->sel() - ALU_SRC_PARAM_BASE;
183    }
184 
185    if (param_src >= 0) {
186       if (m_param_used < 0)
187          m_param_used = param_src;
188       else if (m_param_used != param_src)
189          return false;
190    }
191 
192    if (m_has_lds_op && instr->has_lds_access())
193       return false;
194 
195    int preferred_chan = instr->dest_chan();
196    if (!m_slots[preferred_chan]) {
197       if (instr->bank_swizzle() != alu_vec_unknown) {
198          if (try_readport(instr, instr->bank_swizzle()))
199              return true;
200       } else {
201          for (AluBankSwizzle i = alu_vec_012; i != alu_vec_unknown; ++i) {
202             if (try_readport(instr, i))
203                return true;
204          }
205       }
206    } else {
207 
208       auto dest = instr->dest();
209       if (dest && dest->pin() == pin_free) {
210 
211          for (auto u : dest->uses()) {
212             AluAllowSlotSwitch swich_allowed;
213             u->accept(swich_allowed);
214             if (!swich_allowed.yes)
215                return false;
216          }
217 
218          int free_chan = 0;
219          while (m_slots[free_chan] && free_chan < 4)
220             free_chan++;
221 
222          if (!m_slots[free_chan] && free_chan < 4) {
223             sfn_log << SfnLog::schedule << "V: Try force channel " << free_chan << "\n";
224             dest->set_chan(free_chan);
225             if (instr->bank_swizzle() != alu_vec_unknown) {
226                if (try_readport(instr, instr->bank_swizzle()))
227                   return true;
228             } else {
229                for (AluBankSwizzle i = alu_vec_012; i != alu_vec_unknown; ++i) {
230                   if (try_readport(instr, i))
231                      return true;
232                }
233             }
234          }
235       }
236    }
237    return false;
238 }
239 
try_readport(AluInstr * instr,AluBankSwizzle cycle)240 bool AluGroup::try_readport(AluInstr *instr, AluBankSwizzle cycle)
241 {
242    int preferred_chan = instr->dest_chan();
243    AluReadportReservation readports_evaluator = m_readports_evaluator;
244    if (readports_evaluator.schedule_vec_instruction(*instr, cycle)) {
245       m_readports_evaluator = readports_evaluator;
246       m_slots[preferred_chan] = instr;
247       m_has_lds_op |= instr->has_lds_access();
248       sfn_log << SfnLog::schedule << "V: " << *instr << "\n";
249       auto dest = instr->dest();
250       if (dest && dest->pin() == pin_free)
251          dest->set_pin(pin_chan);
252       instr->pin_sources_to_chan();
253       return true;
254    }
255    return false;
256 }
257 
update_indirect_access(AluInstr * instr)258 bool AluGroup::update_indirect_access(AluInstr *instr)
259 {
260    auto [indirect_addr, for_src, is_index ] = instr->indirect_addr();
261 
262    if (indirect_addr) {
263       if (!m_addr_used) {
264          m_addr_used = indirect_addr;
265          m_addr_for_src = for_src;
266          m_addr_is_index = is_index;
267       } else if (!indirect_addr->equal_to(*m_addr_used)) {
268          return false;
269       }
270    }
271 
272    return true;
273 }
274 
accept(ConstInstrVisitor & visitor) const275 void AluGroup::accept(ConstInstrVisitor& visitor) const
276 {
277    visitor.visit(*this);
278 }
279 
accept(InstrVisitor & visitor)280 void AluGroup::accept(InstrVisitor& visitor)
281 {
282    visitor.visit(this);
283 }
284 
set_scheduled()285 void AluGroup::set_scheduled()
286 {
287    for (int i = 0; i < s_max_slots; ++i) {
288       if (m_slots[i])
289          m_slots[i]->set_scheduled();
290    }
291 }
292 
fix_last_flag()293 void AluGroup::fix_last_flag()
294 {
295    bool last_seen = false;
296    for (int i = s_max_slots - 1; i >= 0; --i) {
297       if (m_slots[i]) {
298          if (!last_seen) {
299             m_slots[i]->set_alu_flag(alu_last_instr);
300             last_seen = true;
301          } else {
302             m_slots[i]->reset_alu_flag(alu_last_instr);
303          }
304       }
305    }
306 }
307 
is_equal_to(const AluGroup & other) const308 bool AluGroup::is_equal_to(const AluGroup& other) const
309 {
310    for (int i = 0; i < s_max_slots; ++i) {
311       if (!other.m_slots[i]) {
312          if (!m_slots[i])
313             continue;
314          else
315             return false;
316       }
317 
318       if (m_slots[i]) {
319          if (!other.m_slots[i])
320             return false;
321          else if (!m_slots[i]->is_equal_to(*other.m_slots[i]))
322             return false;
323       }
324    }
325    return true;
326 }
327 
has_lds_group_end() const328 bool AluGroup::has_lds_group_end() const
329 {
330    for (int i = 0; i < s_max_slots; ++i) {
331       if (m_slots[i] && m_slots[i]->has_alu_flag(alu_lds_group_end))
332          return true;
333    }
334    return false;
335 }
336 
do_ready() const337 bool AluGroup::do_ready() const
338 {
339    for (int i = 0; i < s_max_slots; ++i) {
340       if (m_slots[i] && !m_slots[i]->ready())
341          return false;
342    }
343    return true;
344 }
345 
forward_set_blockid(int id,int index)346 void AluGroup::forward_set_blockid(int id, int index)
347 {
348    for (int i = 0; i < s_max_slots; ++i) {
349       if (m_slots[i]) {
350          m_slots[i]->set_blockid(id, index);
351       }
352    }
353 }
354 
slots() const355 uint32_t AluGroup::slots() const
356 {
357    uint32_t result = (m_readports_evaluator.m_nliterals + 1) >> 1;
358    for (int i = 0; i < s_max_slots; ++i) {
359       if (m_slots[i])
360          ++result;
361    }
362    if (m_addr_used) {
363       ++result;
364       if (m_addr_is_index)
365          ++result;
366    }
367 
368    return result;
369 }
370 
do_print(std::ostream & os) const371 void AluGroup::do_print(std::ostream& os) const
372 {
373    const char slotname[] = "xyzwt";
374 
375    os << "ALU_GROUP_BEGIN\n";
376    for (int i = 0; i < s_max_slots; ++i) {
377       if (m_slots[i]) {
378          for (int j = 0; j < 2 * m_nesting_depth + 4; ++j)
379             os << ' ';
380          os << slotname[i] << ": ";
381          m_slots[i]->print(os);
382          os << "\n";
383       }
384    }
385    for (int i = 0; i < 2 * m_nesting_depth + 2; ++i)
386       os << ' ';
387    os << "ALU_GROUP_END";
388 }
389 
get_kconsts() const390 AluInstr::SrcValues AluGroup::get_kconsts() const
391 {
392    AluInstr::SrcValues result;
393 
394    for (int i = 0; i < s_max_slots; ++i) {
395       if (m_slots[i]) {
396          for (auto s : m_slots[i]->sources())
397             if (s->as_uniform())
398                result.push_back(s);
399       }
400    }
401    return result;
402 }
403 
set_chipclass(r600_chip_class chip_class)404 void AluGroup::set_chipclass(r600_chip_class chip_class)
405 {
406    s_chip_class = chip_class;
407    s_max_slots  = chip_class == ISA_CC_CAYMAN ? 4 : 5;
408 }
409 
410 int AluGroup::s_max_slots = 5;
411 r600_chip_class AluGroup::s_chip_class = ISA_CC_EVERGREEN;
412 }
413