1 /* -*- mesa-c++ -*-
2 *
3 * Copyright (c) 2022 Collabora LTD
4 *
5 * Author: Gert Wollny <gert.wollny@collabora.com>
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * on the rights to use, copy, modify, merge, publish, distribute, sub
11 * license, and/or sell copies of the Software, and to permit persons to whom
12 * the Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
16 * Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 * USE OR OTHER DEALINGS IN THE SOFTWARE.
25 */
26
27 #include "sfn_instr_alugroup.h"
28 #include "sfn_debug.h"
29 #include <algorithm>
30
31 namespace r600 {
32
AluGroup()33 AluGroup::AluGroup()
34 {
35 std::fill(m_slots.begin(), m_slots.end(), nullptr);
36 }
37
is_kill(EAluOp op)38 static bool is_kill(EAluOp op)
39 {
40 switch (op) {
41 case op2_kille:
42 case op2_kille_int:
43 case op2_killne:
44 case op2_killne_int:
45 case op2_killge:
46 case op2_killge_int:
47 case op2_killge_uint:
48 case op2_killgt:
49 case op2_killgt_int:
50 case op2_killgt_uint:
51 return true;
52 default:
53 return false;
54 }
55 }
56
add_instruction(AluInstr * instr)57 bool AluGroup::add_instruction(AluInstr *instr)
58 {
59 /* we can only schedule one op that accesses LDS or
60 the LDS read queue */
61 if (m_has_lds_op && instr->has_lds_access())
62 return false;
63
64 if (instr->has_alu_flag(alu_is_trans)) {
65 auto opinfo = alu_ops.find(instr->opcode());
66 assert(opinfo->second.can_channel(AluOp::t, s_chip_class));
67 if (add_trans_instructions(instr)) {
68 if (is_kill(instr->opcode()))
69 m_has_kill_op = true;
70 return true;
71 }
72 }
73
74 if (add_vec_instructions(instr) && !instr->has_alu_flag(alu_is_trans)) {
75 instr->set_parent_group(this);
76 if (!instr->has_alu_flag(alu_is_lds) && is_kill(instr->opcode()))
77 m_has_kill_op = true;
78 return true;
79 }
80
81 auto opinfo = alu_ops.find(instr->opcode());
82 assert(opinfo != alu_ops.end());
83
84 if (s_max_slots > 4 &&
85 opinfo->second.can_channel(AluOp::t, s_chip_class) &&
86 add_trans_instructions(instr)) {
87 instr->set_parent_group(this);
88 if (is_kill(instr->opcode()))
89 m_has_kill_op = true;
90 return true;
91 }
92
93 return false;
94 }
95
add_trans_instructions(AluInstr * instr)96 bool AluGroup::add_trans_instructions(AluInstr *instr)
97 {
98 if (m_slots[4] || s_max_slots < 5)
99 return false;
100
101 if (!update_indirect_access(instr))
102 return false;
103
104 /* LDS instructions have to be scheduled in X */
105 if (instr->has_alu_flag(alu_is_lds))
106 return false;
107
108 auto opinfo = alu_ops.find(instr->opcode());
109 assert(opinfo != alu_ops.end());
110
111 if (!opinfo->second.can_channel(AluOp::t, s_chip_class))
112 return false;
113
114 /* if we schedule a non-trans instr into the trans slot, we have to make
115 * sure that the corresponding vector slot is already occupied, otherwise
116 * the hardware will schedule it as vector op and the bank-swizzle as
117 * checked here (and in r600_asm.c) will not catch conflicts.
118 */
119 if (!instr->has_alu_flag(alu_is_trans) && !m_slots[instr->dest_chan()]) {
120 if (instr->dest() && instr->dest()->pin() == pin_free) {
121 int used_slot = 3;
122 while (!m_slots[used_slot] && used_slot >= 0)
123 --used_slot;
124
125 // if we schedule a non-trans instr into the trans slot,
126 // there should always be some slot that is already used
127 assert(used_slot >= 0);
128 instr->dest()->set_chan(used_slot);
129 }
130 }
131
132 for (AluBankSwizzle i = sq_alu_scl_201; i != sq_alu_scl_unknown ; ++i) {
133 AluReadportReservation readports_evaluator = m_readports_evaluator;
134 if (readports_evaluator.schedule_trans_instruction(*instr, i)) {
135 m_readports_evaluator = readports_evaluator;
136 m_slots[4] = instr;
137 instr->pin_sources_to_chan();
138 sfn_log << SfnLog::schedule << "T: " << *instr << "\n";
139
140 /* We added a vector op in the trans channel, so we have to
141 * make sure the corresponding vector channel is used */
142 if (!instr->has_alu_flag(alu_is_trans) && !m_slots[instr->dest_chan()])
143 m_slots[instr->dest_chan()] =
144 new AluInstr(op0_nop, instr->dest_chan());
145 return true;
146 }
147 }
148 return false;
149 }
150
free_slots() const151 int AluGroup::free_slots() const
152 {
153 int free_mask = 0;
154 for(int i = 0; i < s_max_slots; ++i) {
155 if (!m_slots[i])
156 free_mask |= 1 << i;
157 }
158 return free_mask;
159 }
160
161 class AluAllowSlotSwitch : public AluInstrVisitor {
162 public:
163 using AluInstrVisitor::visit;
164
visit(AluInstr * alu)165 void visit(AluInstr *alu) {
166 yes = (alu->alu_slots() == 1 || alu->has_alu_flag(alu_is_cayman_trans));
167 }
168
169 bool yes{false};
170
171 };
172
add_vec_instructions(AluInstr * instr)173 bool AluGroup::add_vec_instructions(AluInstr *instr)
174 {
175 if (!update_indirect_access(instr))
176 return false;
177
178 int param_src = -1;
179 for (auto& s : instr->sources()) {
180 auto is = s->as_inline_const();
181 if (is)
182 param_src = is->sel() - ALU_SRC_PARAM_BASE;
183 }
184
185 if (param_src >= 0) {
186 if (m_param_used < 0)
187 m_param_used = param_src;
188 else if (m_param_used != param_src)
189 return false;
190 }
191
192 if (m_has_lds_op && instr->has_lds_access())
193 return false;
194
195 int preferred_chan = instr->dest_chan();
196 if (!m_slots[preferred_chan]) {
197 if (instr->bank_swizzle() != alu_vec_unknown) {
198 if (try_readport(instr, instr->bank_swizzle()))
199 return true;
200 } else {
201 for (AluBankSwizzle i = alu_vec_012; i != alu_vec_unknown; ++i) {
202 if (try_readport(instr, i))
203 return true;
204 }
205 }
206 } else {
207
208 auto dest = instr->dest();
209 if (dest && dest->pin() == pin_free) {
210
211 for (auto u : dest->uses()) {
212 AluAllowSlotSwitch swich_allowed;
213 u->accept(swich_allowed);
214 if (!swich_allowed.yes)
215 return false;
216 }
217
218 int free_chan = 0;
219 while (m_slots[free_chan] && free_chan < 4)
220 free_chan++;
221
222 if (!m_slots[free_chan] && free_chan < 4) {
223 sfn_log << SfnLog::schedule << "V: Try force channel " << free_chan << "\n";
224 dest->set_chan(free_chan);
225 if (instr->bank_swizzle() != alu_vec_unknown) {
226 if (try_readport(instr, instr->bank_swizzle()))
227 return true;
228 } else {
229 for (AluBankSwizzle i = alu_vec_012; i != alu_vec_unknown; ++i) {
230 if (try_readport(instr, i))
231 return true;
232 }
233 }
234 }
235 }
236 }
237 return false;
238 }
239
try_readport(AluInstr * instr,AluBankSwizzle cycle)240 bool AluGroup::try_readport(AluInstr *instr, AluBankSwizzle cycle)
241 {
242 int preferred_chan = instr->dest_chan();
243 AluReadportReservation readports_evaluator = m_readports_evaluator;
244 if (readports_evaluator.schedule_vec_instruction(*instr, cycle)) {
245 m_readports_evaluator = readports_evaluator;
246 m_slots[preferred_chan] = instr;
247 m_has_lds_op |= instr->has_lds_access();
248 sfn_log << SfnLog::schedule << "V: " << *instr << "\n";
249 auto dest = instr->dest();
250 if (dest && dest->pin() == pin_free)
251 dest->set_pin(pin_chan);
252 instr->pin_sources_to_chan();
253 return true;
254 }
255 return false;
256 }
257
update_indirect_access(AluInstr * instr)258 bool AluGroup::update_indirect_access(AluInstr *instr)
259 {
260 auto [indirect_addr, for_src, is_index ] = instr->indirect_addr();
261
262 if (indirect_addr) {
263 if (!m_addr_used) {
264 m_addr_used = indirect_addr;
265 m_addr_for_src = for_src;
266 m_addr_is_index = is_index;
267 } else if (!indirect_addr->equal_to(*m_addr_used)) {
268 return false;
269 }
270 }
271
272 return true;
273 }
274
accept(ConstInstrVisitor & visitor) const275 void AluGroup::accept(ConstInstrVisitor& visitor) const
276 {
277 visitor.visit(*this);
278 }
279
accept(InstrVisitor & visitor)280 void AluGroup::accept(InstrVisitor& visitor)
281 {
282 visitor.visit(this);
283 }
284
set_scheduled()285 void AluGroup::set_scheduled()
286 {
287 for (int i = 0; i < s_max_slots; ++i) {
288 if (m_slots[i])
289 m_slots[i]->set_scheduled();
290 }
291 }
292
fix_last_flag()293 void AluGroup::fix_last_flag()
294 {
295 bool last_seen = false;
296 for (int i = s_max_slots - 1; i >= 0; --i) {
297 if (m_slots[i]) {
298 if (!last_seen) {
299 m_slots[i]->set_alu_flag(alu_last_instr);
300 last_seen = true;
301 } else {
302 m_slots[i]->reset_alu_flag(alu_last_instr);
303 }
304 }
305 }
306 }
307
is_equal_to(const AluGroup & other) const308 bool AluGroup::is_equal_to(const AluGroup& other) const
309 {
310 for (int i = 0; i < s_max_slots; ++i) {
311 if (!other.m_slots[i]) {
312 if (!m_slots[i])
313 continue;
314 else
315 return false;
316 }
317
318 if (m_slots[i]) {
319 if (!other.m_slots[i])
320 return false;
321 else if (!m_slots[i]->is_equal_to(*other.m_slots[i]))
322 return false;
323 }
324 }
325 return true;
326 }
327
has_lds_group_end() const328 bool AluGroup::has_lds_group_end() const
329 {
330 for (int i = 0; i < s_max_slots; ++i) {
331 if (m_slots[i] && m_slots[i]->has_alu_flag(alu_lds_group_end))
332 return true;
333 }
334 return false;
335 }
336
do_ready() const337 bool AluGroup::do_ready() const
338 {
339 for (int i = 0; i < s_max_slots; ++i) {
340 if (m_slots[i] && !m_slots[i]->ready())
341 return false;
342 }
343 return true;
344 }
345
forward_set_blockid(int id,int index)346 void AluGroup::forward_set_blockid(int id, int index)
347 {
348 for (int i = 0; i < s_max_slots; ++i) {
349 if (m_slots[i]) {
350 m_slots[i]->set_blockid(id, index);
351 }
352 }
353 }
354
slots() const355 uint32_t AluGroup::slots() const
356 {
357 uint32_t result = (m_readports_evaluator.m_nliterals + 1) >> 1;
358 for (int i = 0; i < s_max_slots; ++i) {
359 if (m_slots[i])
360 ++result;
361 }
362 if (m_addr_used) {
363 ++result;
364 if (m_addr_is_index)
365 ++result;
366 }
367
368 return result;
369 }
370
do_print(std::ostream & os) const371 void AluGroup::do_print(std::ostream& os) const
372 {
373 const char slotname[] = "xyzwt";
374
375 os << "ALU_GROUP_BEGIN\n";
376 for (int i = 0; i < s_max_slots; ++i) {
377 if (m_slots[i]) {
378 for (int j = 0; j < 2 * m_nesting_depth + 4; ++j)
379 os << ' ';
380 os << slotname[i] << ": ";
381 m_slots[i]->print(os);
382 os << "\n";
383 }
384 }
385 for (int i = 0; i < 2 * m_nesting_depth + 2; ++i)
386 os << ' ';
387 os << "ALU_GROUP_END";
388 }
389
get_kconsts() const390 AluInstr::SrcValues AluGroup::get_kconsts() const
391 {
392 AluInstr::SrcValues result;
393
394 for (int i = 0; i < s_max_slots; ++i) {
395 if (m_slots[i]) {
396 for (auto s : m_slots[i]->sources())
397 if (s->as_uniform())
398 result.push_back(s);
399 }
400 }
401 return result;
402 }
403
set_chipclass(r600_chip_class chip_class)404 void AluGroup::set_chipclass(r600_chip_class chip_class)
405 {
406 s_chip_class = chip_class;
407 s_max_slots = chip_class == ISA_CC_CAYMAN ? 4 : 5;
408 }
409
410 int AluGroup::s_max_slots = 5;
411 r600_chip_class AluGroup::s_chip_class = ISA_CC_EVERGREEN;
412 }
413