1 /* -*- mesa-c++ -*-
2 *
3 * Copyright (c) 2018 Collabora LTD
4 *
5 * Author: Gert Wollny <gert.wollny@collabora.com>
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * on the rights to use, copy, modify, merge, publish, distribute, sub
11 * license, and/or sell copies of the Software, and to permit persons to whom
12 * the Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
16 * Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 * USE OR OTHER DEALINGS IN THE SOFTWARE.
25 */
26
27 #include "sfn_ir_to_assembly.h"
28 #include "sfn_conditionaljumptracker.h"
29 #include "sfn_callstack.h"
30 #include "sfn_instruction_gds.h"
31 #include "sfn_instruction_misc.h"
32 #include "sfn_instruction_fetch.h"
33 #include "sfn_instruction_lds.h"
34
35 #include "../r600_shader.h"
36 #include "../r600_sq.h"
37
38 namespace r600 {
39
40 using std::vector;
41
42 struct AssemblyFromShaderLegacyImpl {
43
44 AssemblyFromShaderLegacyImpl(r600_shader *sh, r600_shader_key *key);
45 bool emit(const Instruction::Pointer i);
reset_addr_registerr600::AssemblyFromShaderLegacyImpl46 void reset_addr_register() {m_last_addr.reset();}
47
48 private:
49 bool emit_alu(const AluInstruction& ai, ECFAluOpCode cf_op);
50 bool emit_export(const ExportInstruction & exi);
51 bool emit_streamout(const StreamOutIntruction& instr);
52 bool emit_memringwrite(const MemRingOutIntruction& instr);
53 bool emit_tex(const TexInstruction & tex_instr);
54 bool emit_vtx(const FetchInstruction& fetch_instr);
55 bool emit_if_start(const IfInstruction & if_instr);
56 bool emit_else(const ElseInstruction & else_instr);
57 bool emit_endif(const IfElseEndInstruction & endif_instr);
58 bool emit_emit_vertex(const EmitVertex &instr);
59
60 bool emit_loop_begin(const LoopBeginInstruction& instr);
61 bool emit_loop_end(const LoopEndInstruction& instr);
62 bool emit_loop_break(const LoopBreakInstruction& instr);
63 bool emit_loop_continue(const LoopContInstruction& instr);
64 bool emit_wait_ack(const WaitAck& instr);
65 bool emit_wr_scratch(const WriteScratchInstruction& instr);
66 bool emit_gds(const GDSInstr& instr);
67 bool emit_rat(const RatInstruction& instr);
68 bool emit_ldswrite(const LDSWriteInstruction& instr);
69 bool emit_ldsread(const LDSReadInstruction& instr);
70 bool emit_ldsatomic(const LDSAtomicInstruction& instr);
71 bool emit_tf_write(const GDSStoreTessFactor& instr);
72
73 bool emit_load_addr(PValue addr);
74 bool emit_fs_pixel_export(const ExportInstruction & exi);
75 bool emit_vs_pos_export(const ExportInstruction & exi);
76 bool emit_vs_param_export(const ExportInstruction & exi);
77 bool copy_dst(r600_bytecode_alu_dst& dst, const Value& src);
78 bool copy_src(r600_bytecode_alu_src& src, const Value& s);
79
80 EBufferIndexMode emit_index_reg(const Value& reg, unsigned idx);
81
82 ConditionalJumpTracker m_jump_tracker;
83 CallStack m_callstack;
84
85 public:
86 r600_bytecode *m_bc;
87 r600_shader *m_shader;
88 r600_shader_key *m_key;
89 r600_bytecode_output m_output;
90 unsigned m_max_color_exports;
91 bool has_pos_output;
92 bool has_param_output;
93 PValue m_last_addr;
94 int m_loop_nesting;
95 int m_nliterals_in_group;
96 std::set<int> vtx_fetch_results;
97 };
98
99
AssemblyFromShaderLegacy(struct r600_shader * sh,r600_shader_key * key)100 AssemblyFromShaderLegacy::AssemblyFromShaderLegacy(struct r600_shader *sh,
101 r600_shader_key *key)
102 {
103 impl = new AssemblyFromShaderLegacyImpl(sh, key);
104 }
105
~AssemblyFromShaderLegacy()106 AssemblyFromShaderLegacy::~AssemblyFromShaderLegacy()
107 {
108 delete impl;
109 }
110
do_lower(const std::vector<InstructionBlock> & ir)111 bool AssemblyFromShaderLegacy::do_lower(const std::vector<InstructionBlock>& ir)
112 {
113 if (impl->m_shader->processor_type == PIPE_SHADER_VERTEX &&
114 impl->m_shader->ninput > 0)
115 r600_bytecode_add_cfinst(impl->m_bc, CF_OP_CALL_FS);
116
117
118 std::vector<Instruction::Pointer> exports;
119
120 for (const auto& block : ir) {
121 for (const auto& i : block) {
122 if (!impl->emit(i))
123 return false;
124 if (i->type() != Instruction::alu)
125 impl->reset_addr_register();
126 }
127 }
128 /*
129 for (const auto& i : exports) {
130 if (!impl->emit_export(static_cast<const ExportInstruction&>(*i)))
131 return false;
132 }*/
133
134
135 const struct cf_op_info *last = nullptr;
136 if (impl->m_bc->cf_last)
137 last = r600_isa_cf(impl->m_bc->cf_last->op);
138
139 /* alu clause instructions don't have EOP bit, so add NOP */
140 if (!last || last->flags & CF_ALU || impl->m_bc->cf_last->op == CF_OP_LOOP_END
141 || impl->m_bc->cf_last->op == CF_OP_POP)
142 r600_bytecode_add_cfinst(impl->m_bc, CF_OP_NOP);
143
144 /* A fetch shader only can't be EOP (results in hang), but we can replace it
145 * by a NOP */
146 else if (impl->m_bc->cf_last->op == CF_OP_CALL_FS)
147 impl->m_bc->cf_last->op = CF_OP_NOP;
148
149 if (impl->m_shader->bc.chip_class != CAYMAN)
150 impl->m_bc->cf_last->end_of_program = 1;
151 else
152 cm_bytecode_add_cf_end(impl->m_bc);
153
154 return true;
155 }
156
emit(const Instruction::Pointer i)157 bool AssemblyFromShaderLegacyImpl::emit(const Instruction::Pointer i)
158 {
159 if (i->type() != Instruction::vtx)
160 vtx_fetch_results.clear();
161
162 sfn_log << SfnLog::assembly << "Emit from '" << *i << "\n";
163 switch (i->type()) {
164 case Instruction::alu:
165 return emit_alu(static_cast<const AluInstruction&>(*i), cf_alu_undefined);
166 case Instruction::exprt:
167 return emit_export(static_cast<const ExportInstruction&>(*i));
168 case Instruction::tex:
169 return emit_tex(static_cast<const TexInstruction&>(*i));
170 case Instruction::vtx:
171 return emit_vtx(static_cast<const FetchInstruction&>(*i));
172 case Instruction::cond_if:
173 return emit_if_start(static_cast<const IfInstruction&>(*i));
174 case Instruction::cond_else:
175 return emit_else(static_cast<const ElseInstruction&>(*i));
176 case Instruction::cond_endif:
177 return emit_endif(static_cast<const IfElseEndInstruction&>(*i));
178 case Instruction::loop_begin:
179 return emit_loop_begin(static_cast<const LoopBeginInstruction&>(*i));
180 case Instruction::loop_end:
181 return emit_loop_end(static_cast<const LoopEndInstruction&>(*i));
182 case Instruction::loop_break:
183 return emit_loop_break(static_cast<const LoopBreakInstruction&>(*i));
184 case Instruction::loop_continue:
185 return emit_loop_continue(static_cast<const LoopContInstruction&>(*i));
186 case Instruction::streamout:
187 return emit_streamout(static_cast<const StreamOutIntruction&>(*i));
188 case Instruction::ring:
189 return emit_memringwrite(static_cast<const MemRingOutIntruction&>(*i));
190 case Instruction::emit_vtx:
191 return emit_emit_vertex(static_cast<const EmitVertex&>(*i));
192 case Instruction::wait_ack:
193 return emit_wait_ack(static_cast<const WaitAck&>(*i));
194 case Instruction::mem_wr_scratch:
195 return emit_wr_scratch(static_cast<const WriteScratchInstruction&>(*i));
196 case Instruction::gds:
197 return emit_gds(static_cast<const GDSInstr&>(*i));
198 case Instruction::rat:
199 return emit_rat(static_cast<const RatInstruction&>(*i));
200 case Instruction::lds_write:
201 return emit_ldswrite(static_cast<const LDSWriteInstruction&>(*i));
202 case Instruction::lds_read:
203 return emit_ldsread(static_cast<const LDSReadInstruction&>(*i));
204 case Instruction::lds_atomic:
205 return emit_ldsatomic(static_cast<const LDSAtomicInstruction&>(*i));
206 case Instruction::tf_write:
207 return emit_tf_write(static_cast<const GDSStoreTessFactor&>(*i));
208 default:
209 return false;
210 }
211 }
212
AssemblyFromShaderLegacyImpl(r600_shader * sh,r600_shader_key * key)213 AssemblyFromShaderLegacyImpl::AssemblyFromShaderLegacyImpl(r600_shader *sh,
214 r600_shader_key *key):
215 m_callstack(sh->bc),
216 m_bc(&sh->bc),
217 m_shader(sh),
218 m_key(key),
219 has_pos_output(false),
220 has_param_output(false),
221 m_loop_nesting(0),
222 m_nliterals_in_group(0)
223 {
224 m_max_color_exports = MAX2(m_key->ps.nr_cbufs, 1);
225 }
226
227 extern const std::map<EAluOp, int> opcode_map;
228
emit_load_addr(PValue addr)229 bool AssemblyFromShaderLegacyImpl::emit_load_addr(PValue addr)
230 {
231 m_bc->ar_reg = addr->sel();
232 m_bc->ar_chan = addr->chan();
233 m_bc->ar_loaded = 0;
234 m_last_addr = addr;
235
236 sfn_log << SfnLog::assembly << " Prepare " << *addr << " to address register\n";
237
238 return true;
239 }
240
emit_alu(const AluInstruction & ai,ECFAluOpCode cf_op)241 bool AssemblyFromShaderLegacyImpl::emit_alu(const AluInstruction& ai, ECFAluOpCode cf_op)
242 {
243
244 struct r600_bytecode_alu alu;
245 memset(&alu, 0, sizeof(alu));
246 PValue addr_in_use;
247
248 if (opcode_map.find(ai.opcode()) == opcode_map.end()) {
249 std::cerr << "Opcode not handled for " << ai <<"\n";
250 return false;
251 }
252
253 unsigned old_nliterals_in_group = m_nliterals_in_group;
254 for (unsigned i = 0; i < ai.n_sources(); ++i) {
255 auto& s = ai.src(i);
256 if (s.type() == Value::literal)
257 ++m_nliterals_in_group;
258 }
259
260 /* This instruction group would exeed the limit of literals, so
261 * force a new instruction group by adding a NOP as last
262 * instruction. This will no loner be needed with a real
263 * scheduler */
264 if (m_nliterals_in_group > 4) {
265 sfn_log << SfnLog::assembly << " Have " << m_nliterals_in_group << " inject a last op (nop)\n";
266 alu.op = ALU_OP0_NOP;
267 alu.last = 1;
268 alu.dst.chan = 3;
269 int retval = r600_bytecode_add_alu(m_bc, &alu);
270 if (retval)
271 return false;
272 memset(&alu, 0, sizeof(alu));
273 m_nliterals_in_group -= old_nliterals_in_group;
274 }
275
276 alu.op = opcode_map.at(ai.opcode());
277
278 /* Missing test whether ai actually has a dest */
279 auto dst = ai.dest();
280
281 if (dst) {
282 if (!copy_dst(alu.dst, *dst))
283 return false;
284
285 alu.dst.write = ai.flag(alu_write);
286 alu.dst.clamp = ai.flag(alu_dst_clamp);
287
288 if (dst->type() == Value::gpr_array_value) {
289 auto& v = static_cast<const GPRArrayValue&>(*dst);
290 PValue addr = v.indirect();
291 if (addr) {
292 if (!m_last_addr || *addr != *m_last_addr) {
293 emit_load_addr(addr);
294 addr_in_use = addr;
295 }
296 alu.dst.rel = addr ? 1 : 0;;
297 }
298 }
299 }
300
301 alu.is_op3 = ai.n_sources() == 3;
302
303 for (unsigned i = 0; i < ai.n_sources(); ++i) {
304 auto& s = ai.src(i);
305
306 if (!copy_src(alu.src[i], s))
307 return false;
308 alu.src[i].neg = ai.flag(AluInstruction::src_neg_flags[i]);
309
310 if (s.type() == Value::gpr_array_value) {
311 auto& v = static_cast<const GPRArrayValue&>(s);
312 PValue addr = v.indirect();
313 if (addr) {
314 assert(!addr_in_use || (*addr_in_use == *addr));
315 if (!m_last_addr || *addr != *m_last_addr) {
316 emit_load_addr(addr);
317 addr_in_use = addr;
318 }
319 alu.src[i].rel = addr ? 1 : 0;
320 }
321 }
322 if (!alu.is_op3)
323 alu.src[i].abs = ai.flag(AluInstruction::src_abs_flags[i]);
324 }
325
326 if (ai.bank_swizzle() != alu_vec_unknown)
327 alu.bank_swizzle_force = ai.bank_swizzle();
328
329 alu.last = ai.flag(alu_last_instr);
330 alu.update_pred = ai.flag(alu_update_pred);
331 alu.execute_mask = ai.flag(alu_update_exec);
332
333 /* If the destination register is equal to the last loaded address register
334 * then clear the latter one, because the values will no longer be identical */
335 if (m_last_addr)
336 sfn_log << SfnLog::assembly << " Current address register is " << *m_last_addr << "\n";
337
338 if (dst)
339 sfn_log << SfnLog::assembly << " Current dst register is " << *dst << "\n";
340
341 if (dst && m_last_addr)
342 if (*dst == *m_last_addr) {
343 sfn_log << SfnLog::assembly << " Clear address register (was " << *m_last_addr << "\n";
344 m_last_addr.reset();
345 }
346
347 if (cf_op == cf_alu_undefined)
348 cf_op = ai.cf_type();
349
350 unsigned type = 0;
351 switch (cf_op) {
352 case cf_alu: type = CF_OP_ALU; break;
353 case cf_alu_push_before: type = CF_OP_ALU_PUSH_BEFORE; break;
354 case cf_alu_pop_after: type = CF_OP_ALU_POP_AFTER; break;
355 case cf_alu_pop2_after: type = CF_OP_ALU_POP2_AFTER; break;
356 case cf_alu_break: type = CF_OP_ALU_BREAK; break;
357 case cf_alu_else_after: type = CF_OP_ALU_ELSE_AFTER; break;
358 case cf_alu_continue: type = CF_OP_ALU_CONTINUE; break;
359 case cf_alu_extended: type = CF_OP_ALU_EXT; break;
360 default:
361 assert(0 && "cf_alu_undefined should have been replaced");
362 }
363
364 if (alu.last)
365 m_nliterals_in_group = 0;
366
367 bool retval = !r600_bytecode_add_alu_type(m_bc, &alu, type);
368
369 if (ai.opcode() == op1_mova_int)
370 m_bc->ar_loaded = 0;
371
372 if (ai.opcode() == op1_set_cf_idx0)
373 m_bc->index_loaded[0] = 1;
374
375 if (ai.opcode() == op1_set_cf_idx1)
376 m_bc->index_loaded[1] = 1;
377
378
379 m_bc->force_add_cf |= (ai.opcode() == op2_kille ||
380 ai.opcode() == op2_killne_int ||
381 ai.opcode() == op1_set_cf_idx0 ||
382 ai.opcode() == op1_set_cf_idx1);
383 return retval;
384 }
385
emit_vs_pos_export(const ExportInstruction & exi)386 bool AssemblyFromShaderLegacyImpl::emit_vs_pos_export(const ExportInstruction & exi)
387 {
388 r600_bytecode_output output;
389 memset(&output, 0, sizeof(output));
390 assert(exi.gpr().type() == Value::gpr_vector);
391 const auto& gpr = exi.gpr();
392 output.gpr = gpr.sel();
393 output.elem_size = 3;
394 output.swizzle_x = gpr.chan_i(0);
395 output.swizzle_y = gpr.chan_i(1);
396 output.swizzle_z = gpr.chan_i(2);
397 output.swizzle_w = gpr.chan_i(3);
398 output.burst_count = 1;
399 output.array_base = 60 + exi.location();
400 output.op = exi.is_last_export() ? CF_OP_EXPORT_DONE: CF_OP_EXPORT;
401 output.type = exi.export_type();
402
403
404 if (r600_bytecode_add_output(m_bc, &output)) {
405 R600_ERR("Error adding pixel export at location %d\n", exi.location());
406 return false;
407 }
408
409 return true;
410 }
411
412
emit_vs_param_export(const ExportInstruction & exi)413 bool AssemblyFromShaderLegacyImpl::emit_vs_param_export(const ExportInstruction & exi)
414 {
415 r600_bytecode_output output;
416 assert(exi.gpr().type() == Value::gpr_vector);
417 const auto& gpr = exi.gpr();
418
419 memset(&output, 0, sizeof(output));
420 output.gpr = gpr.sel();
421 output.elem_size = 3;
422 output.swizzle_x = gpr.chan_i(0);
423 output.swizzle_y = gpr.chan_i(1);
424 output.swizzle_z = gpr.chan_i(2);
425 output.swizzle_w = gpr.chan_i(3);
426 output.burst_count = 1;
427 output.array_base = exi.location();
428 output.op = exi.is_last_export() ? CF_OP_EXPORT_DONE: CF_OP_EXPORT;
429 output.type = exi.export_type();
430
431
432 if (r600_bytecode_add_output(m_bc, &output)) {
433 R600_ERR("Error adding pixel export at location %d\n", exi.location());
434 return false;
435 }
436
437 return true;
438 }
439
440
emit_fs_pixel_export(const ExportInstruction & exi)441 bool AssemblyFromShaderLegacyImpl::emit_fs_pixel_export(const ExportInstruction & exi)
442 {
443 if (exi.location() >= m_max_color_exports && exi.location() < 60) {
444 R600_ERR("shader_from_nir: ignore pixel export %u, because supported max is %u\n",
445 exi.location(), m_max_color_exports);
446 return true;
447 }
448
449 assert(exi.gpr().type() == Value::gpr_vector);
450 const auto& gpr = exi.gpr();
451
452 r600_bytecode_output output;
453 memset(&output, 0, sizeof(output));
454
455 output.gpr = gpr.sel();
456 output.elem_size = 3;
457 output.swizzle_x = gpr.chan_i(0);
458 output.swizzle_y = gpr.chan_i(1);
459 output.swizzle_z = gpr.chan_i(2);
460 output.swizzle_w = m_key->ps.alpha_to_one ? 5 : gpr.chan_i(3); ;
461 output.burst_count = 1;
462 output.array_base = exi.location();
463 output.op = exi.is_last_export() ? CF_OP_EXPORT_DONE: CF_OP_EXPORT;
464 output.type = exi.export_type();
465
466
467 if (r600_bytecode_add_output(m_bc, &output)) {
468 R600_ERR("Error adding pixel export at location %d\n", exi.location());
469 return false;
470 }
471
472 return true;
473 }
474
475
emit_export(const ExportInstruction & exi)476 bool AssemblyFromShaderLegacyImpl::emit_export(const ExportInstruction & exi)
477 {
478 switch (exi.export_type()) {
479 case ExportInstruction::et_pixel:
480 return emit_fs_pixel_export(exi);
481 case ExportInstruction::et_pos:
482 return emit_vs_pos_export(exi);
483 case ExportInstruction::et_param:
484 return emit_vs_param_export(exi);
485 default:
486 R600_ERR("shader_from_nir: export %d type not yet supported\n", exi.export_type());
487 return false;
488 }
489 }
490
emit_if_start(const IfInstruction & if_instr)491 bool AssemblyFromShaderLegacyImpl::emit_if_start(const IfInstruction & if_instr)
492 {
493 bool needs_workaround = false;
494 int elems = m_callstack.push(FC_PUSH_VPM);
495
496 if (m_bc->chip_class == CAYMAN && m_bc->stack.loop > 1)
497 needs_workaround = true;
498 if (m_bc->family != CHIP_HEMLOCK &&
499 m_bc->family != CHIP_CYPRESS &&
500 m_bc->family != CHIP_JUNIPER) {
501 unsigned dmod1 = (elems - 1) % m_bc->stack.entry_size;
502 unsigned dmod2 = (elems) % m_bc->stack.entry_size;
503
504 if (elems && (!dmod1 || !dmod2))
505 needs_workaround = true;
506 }
507
508 auto& pred = if_instr.pred();
509 auto op = cf_alu_push_before;
510
511 if (needs_workaround) {
512 r600_bytecode_add_cfinst(m_bc, CF_OP_PUSH);
513 m_bc->cf_last->cf_addr = m_bc->cf_last->id + 2;
514 op = cf_alu;
515 }
516 emit_alu(pred, op);
517
518 r600_bytecode_add_cfinst(m_bc, CF_OP_JUMP);
519
520 m_jump_tracker.push(m_bc->cf_last, jt_if);
521 return true;
522 }
523
emit_else(UNUSED const ElseInstruction & else_instr)524 bool AssemblyFromShaderLegacyImpl::emit_else(UNUSED const ElseInstruction & else_instr)
525 {
526 r600_bytecode_add_cfinst(m_bc, CF_OP_ELSE);
527 m_bc->cf_last->pop_count = 1;
528 return m_jump_tracker.add_mid(m_bc->cf_last, jt_if);
529 }
530
emit_endif(UNUSED const IfElseEndInstruction & endif_instr)531 bool AssemblyFromShaderLegacyImpl::emit_endif(UNUSED const IfElseEndInstruction & endif_instr)
532 {
533 m_callstack.pop(FC_PUSH_VPM);
534
535 unsigned force_pop = m_bc->force_add_cf;
536 if (!force_pop) {
537 int alu_pop = 3;
538 if (m_bc->cf_last) {
539 if (m_bc->cf_last->op == CF_OP_ALU)
540 alu_pop = 0;
541 else if (m_bc->cf_last->op == CF_OP_ALU_POP_AFTER)
542 alu_pop = 1;
543 }
544 alu_pop += 1;
545 if (alu_pop == 1) {
546 m_bc->cf_last->op = CF_OP_ALU_POP_AFTER;
547 m_bc->force_add_cf = 1;
548 } else if (alu_pop == 2) {
549 m_bc->cf_last->op = CF_OP_ALU_POP2_AFTER;
550 m_bc->force_add_cf = 1;
551 } else {
552 force_pop = 1;
553 }
554 }
555
556 if (force_pop) {
557 r600_bytecode_add_cfinst(m_bc, CF_OP_POP);
558 m_bc->cf_last->pop_count = 1;
559 m_bc->cf_last->cf_addr = m_bc->cf_last->id + 2;
560 }
561
562 return m_jump_tracker.pop(m_bc->cf_last, jt_if);
563 }
564
emit_loop_begin(UNUSED const LoopBeginInstruction & instr)565 bool AssemblyFromShaderLegacyImpl::emit_loop_begin(UNUSED const LoopBeginInstruction& instr)
566 {
567 r600_bytecode_add_cfinst(m_bc, CF_OP_LOOP_START_DX10);
568 m_jump_tracker.push(m_bc->cf_last, jt_loop);
569 m_callstack.push(FC_LOOP);
570 ++m_loop_nesting;
571 return true;
572 }
573
emit_loop_end(UNUSED const LoopEndInstruction & instr)574 bool AssemblyFromShaderLegacyImpl::emit_loop_end(UNUSED const LoopEndInstruction& instr)
575 {
576 r600_bytecode_add_cfinst(m_bc, CF_OP_LOOP_END);
577 m_callstack.pop(FC_LOOP);
578 assert(m_loop_nesting);
579 --m_loop_nesting;
580 return m_jump_tracker.pop(m_bc->cf_last, jt_loop);
581 }
582
emit_loop_break(UNUSED const LoopBreakInstruction & instr)583 bool AssemblyFromShaderLegacyImpl::emit_loop_break(UNUSED const LoopBreakInstruction& instr)
584 {
585 r600_bytecode_add_cfinst(m_bc, CF_OP_LOOP_BREAK);
586 return m_jump_tracker.add_mid(m_bc->cf_last, jt_loop);
587 }
588
emit_loop_continue(UNUSED const LoopContInstruction & instr)589 bool AssemblyFromShaderLegacyImpl::emit_loop_continue(UNUSED const LoopContInstruction &instr)
590 {
591 r600_bytecode_add_cfinst(m_bc, CF_OP_LOOP_CONTINUE);
592 return m_jump_tracker.add_mid(m_bc->cf_last, jt_loop);
593 }
594
emit_streamout(const StreamOutIntruction & so_instr)595 bool AssemblyFromShaderLegacyImpl::emit_streamout(const StreamOutIntruction& so_instr)
596 {
597 struct r600_bytecode_output output;
598 memset(&output, 0, sizeof(struct r600_bytecode_output));
599
600 output.gpr = so_instr.gpr().sel();
601 output.elem_size = so_instr.element_size();
602 output.array_base = so_instr.array_base();
603 output.type = V_SQ_CF_ALLOC_EXPORT_WORD0_SQ_EXPORT_WRITE;
604 output.burst_count = so_instr.burst_count();
605 output.array_size = so_instr.array_size();
606 output.comp_mask = so_instr.comp_mask();
607 output.op = so_instr.op();
608
609 assert(output.op >= CF_OP_MEM_STREAM0_BUF0 && output.op <= CF_OP_MEM_STREAM3_BUF3);
610
611
612 if (r600_bytecode_add_output(m_bc, &output)) {
613 R600_ERR("shader_from_nir: Error creating stream output instruction\n");
614 return false;
615 }
616 return true;
617 }
618
619
emit_memringwrite(const MemRingOutIntruction & instr)620 bool AssemblyFromShaderLegacyImpl::emit_memringwrite(const MemRingOutIntruction& instr)
621 {
622 struct r600_bytecode_output output;
623 memset(&output, 0, sizeof(struct r600_bytecode_output));
624
625 output.gpr = instr.gpr().sel();
626 output.type = instr.type();
627 output.elem_size = 3;
628 output.comp_mask = 0xf;
629 output.burst_count = 1;
630 output.op = instr.op();
631 if (instr.type() == mem_write_ind || instr.type() == mem_write_ind_ack) {
632 output.index_gpr = instr.index_reg();
633 output.array_size = 0xfff;
634 }
635 output.array_base = instr.array_base();
636
637 if (r600_bytecode_add_output(m_bc, &output)) {
638 R600_ERR("shader_from_nir: Error creating mem ring write instruction\n");
639 return false;
640 }
641 return true;
642 }
643
644
emit_tex(const TexInstruction & tex_instr)645 bool AssemblyFromShaderLegacyImpl::emit_tex(const TexInstruction & tex_instr)
646 {
647 auto addr = tex_instr.sampler_offset();
648 if (addr && (!m_bc->index_loaded[1] || m_loop_nesting
649 || m_bc->index_reg[1] != addr->sel()
650 || m_bc->index_reg_chan[1] != addr->chan())) {
651 struct r600_bytecode_alu alu;
652 memset(&alu, 0, sizeof(alu));
653 alu.op = opcode_map.at(op1_mova_int);
654 alu.dst.chan = 0;
655 alu.src[0].sel = addr->sel();
656 alu.src[0].chan = addr->chan();
657 alu.last = 1;
658 int r = r600_bytecode_add_alu(m_bc, &alu);
659 if (r)
660 return false;
661
662 m_bc->ar_loaded = 0;
663
664 alu.op = opcode_map.at(op1_set_cf_idx1);
665 alu.dst.chan = 0;
666 alu.src[0].sel = 0;
667 alu.src[0].chan = 0;
668 alu.last = 1;
669
670 r = r600_bytecode_add_alu(m_bc, &alu);
671 if (r)
672 return false;
673
674 m_bc->index_reg[1] = addr->sel();
675 m_bc->index_reg_chan[1] = addr->chan();
676 m_bc->index_loaded[1] = true;
677 }
678
679 r600_bytecode_tex tex;
680 memset(&tex, 0, sizeof(struct r600_bytecode_tex));
681 tex.op = tex_instr.opcode();
682 tex.sampler_id = tex_instr.sampler_id();
683 tex.sampler_index_mode = 0;
684 tex.resource_id = tex_instr.resource_id();;
685 tex.resource_index_mode = 0;
686 tex.src_gpr = tex_instr.src().sel();
687 tex.dst_gpr = tex_instr.dst().sel();
688 tex.dst_sel_x = tex_instr.dest_swizzle(0);
689 tex.dst_sel_y = tex_instr.dest_swizzle(1);
690 tex.dst_sel_z = tex_instr.dest_swizzle(2);
691 tex.dst_sel_w = tex_instr.dest_swizzle(3);
692 tex.src_sel_x = tex_instr.src().chan_i(0);
693 tex.src_sel_y = tex_instr.src().chan_i(1);
694 tex.src_sel_z = tex_instr.src().chan_i(2);
695 tex.src_sel_w = tex_instr.src().chan_i(3);
696 tex.coord_type_x = !tex_instr.has_flag(TexInstruction::x_unnormalized);
697 tex.coord_type_y = !tex_instr.has_flag(TexInstruction::y_unnormalized);
698 tex.coord_type_z = !tex_instr.has_flag(TexInstruction::z_unnormalized);
699 tex.coord_type_w = !tex_instr.has_flag(TexInstruction::w_unnormalized);
700 tex.offset_x = tex_instr.get_offset(0);
701 tex.offset_y = tex_instr.get_offset(1);
702 tex.offset_z = tex_instr.get_offset(2);
703 tex.resource_index_mode = (!!addr) ? 2 : 0;
704 tex.sampler_index_mode = tex.resource_index_mode;
705
706 if (tex_instr.opcode() == TexInstruction::get_gradient_h ||
707 tex_instr.opcode() == TexInstruction::get_gradient_v)
708 tex.inst_mod = tex_instr.has_flag(TexInstruction::grad_fine) ? 1 : 0;
709 else
710 tex.inst_mod = tex_instr.inst_mode();
711 if (r600_bytecode_add_tex(m_bc, &tex)) {
712 R600_ERR("shader_from_nir: Error creating tex assembly instruction\n");
713 return false;
714 }
715 return true;
716 }
717
emit_vtx(const FetchInstruction & fetch_instr)718 bool AssemblyFromShaderLegacyImpl::emit_vtx(const FetchInstruction& fetch_instr)
719 {
720 int buffer_offset = 0;
721 auto addr = fetch_instr.buffer_offset();
722 auto index_mode = fetch_instr.buffer_index_mode();
723
724 if (addr) {
725 if (addr->type() == Value::literal) {
726 const auto& boffs = static_cast<const LiteralValue&>(*addr);
727 buffer_offset = boffs.value();
728 } else {
729 index_mode = emit_index_reg(*addr, 0);
730 }
731 }
732
733 if (fetch_instr.has_prelude()) {
734 for(auto &i : fetch_instr.prelude()) {
735 if (!emit(i))
736 return false;
737 }
738 }
739
740 if (vtx_fetch_results.find(fetch_instr.src().sel()) !=
741 vtx_fetch_results.end()) {
742 m_bc->force_add_cf = 1;
743 vtx_fetch_results.clear();
744 }
745 vtx_fetch_results.insert(fetch_instr.dst().sel());
746
747 struct r600_bytecode_vtx vtx;
748 memset(&vtx, 0, sizeof(vtx));
749 vtx.op = fetch_instr.vc_opcode();
750 vtx.buffer_id = fetch_instr.buffer_id() + buffer_offset;
751 vtx.fetch_type = fetch_instr.fetch_type();
752 vtx.src_gpr = fetch_instr.src().sel();
753 vtx.src_sel_x = fetch_instr.src().chan();
754 vtx.mega_fetch_count = fetch_instr.mega_fetch_count();
755 vtx.dst_gpr = fetch_instr.dst().sel();
756 vtx.dst_sel_x = fetch_instr.swz(0); /* SEL_X */
757 vtx.dst_sel_y = fetch_instr.swz(1); /* SEL_Y */
758 vtx.dst_sel_z = fetch_instr.swz(2); /* SEL_Z */
759 vtx.dst_sel_w = fetch_instr.swz(3); /* SEL_W */
760 vtx.use_const_fields = fetch_instr.use_const_fields();
761 vtx.data_format = fetch_instr.data_format();
762 vtx.num_format_all = fetch_instr.num_format(); /* NUM_FORMAT_SCALED */
763 vtx.format_comp_all = fetch_instr.is_signed(); /* FORMAT_COMP_SIGNED */
764 vtx.endian = fetch_instr.endian_swap();
765 vtx.buffer_index_mode = index_mode;
766 vtx.offset = fetch_instr.offset();
767 vtx.indexed = fetch_instr.indexed();
768 vtx.uncached = fetch_instr.uncached();
769 vtx.elem_size = fetch_instr.elm_size();
770 vtx.array_base = fetch_instr.array_base();
771 vtx.array_size = fetch_instr.array_size();
772 vtx.srf_mode_all = fetch_instr.srf_mode_no_zero();
773
774 if (fetch_instr.use_tc()) {
775 if ((r600_bytecode_add_vtx_tc(m_bc, &vtx))) {
776 R600_ERR("shader_from_nir: Error creating tex assembly instruction\n");
777 return false;
778 }
779
780 } else {
781 if ((r600_bytecode_add_vtx(m_bc, &vtx))) {
782 R600_ERR("shader_from_nir: Error creating tex assembly instruction\n");
783 return false;
784 }
785 }
786
787 m_bc->cf_last->vpm = fetch_instr.use_vpm();
788 m_bc->cf_last->barrier = 1;
789
790 return true;
791 }
792
emit_emit_vertex(const EmitVertex & instr)793 bool AssemblyFromShaderLegacyImpl::emit_emit_vertex(const EmitVertex &instr)
794 {
795 int r = r600_bytecode_add_cfinst(m_bc, instr.op());
796 if (!r)
797 m_bc->cf_last->count = instr.stream();
798 assert(m_bc->cf_last->count < 4);
799
800 return r == 0;
801 }
802
emit_wait_ack(const WaitAck & instr)803 bool AssemblyFromShaderLegacyImpl::emit_wait_ack(const WaitAck& instr)
804 {
805 int r = r600_bytecode_add_cfinst(m_bc, instr.op());
806 if (!r)
807 m_bc->cf_last->cf_addr = instr.n_ack();
808
809 return r == 0;
810 }
811
emit_wr_scratch(const WriteScratchInstruction & instr)812 bool AssemblyFromShaderLegacyImpl::emit_wr_scratch(const WriteScratchInstruction& instr)
813 {
814 struct r600_bytecode_output cf;
815
816 memset(&cf, 0, sizeof(struct r600_bytecode_output));
817
818 cf.op = CF_OP_MEM_SCRATCH;
819 cf.elem_size = 3;
820 cf.gpr = instr.gpr().sel();
821 cf.mark = 1;
822 cf.comp_mask = instr.write_mask();
823 cf.swizzle_x = 0;
824 cf.swizzle_y = 1;
825 cf.swizzle_z = 2;
826 cf.swizzle_w = 3;
827 cf.burst_count = 1;
828
829 if (instr.indirect()) {
830 cf.type = 3;
831 cf.index_gpr = instr.address();
832
833 /* The docu seems to be wrong here: In indirect addressing the
834 * address_base seems to be the array_size */
835 cf.array_size = instr.array_size();
836 } else {
837 cf.type = 2;
838 cf.array_base = instr.location();
839 }
840 /* This should be 0, but the address calculation is apparently wrong */
841
842
843 if (r600_bytecode_add_output(m_bc, &cf)){
844 R600_ERR("shader_from_nir: Error creating SCRATCH_WR assembly instruction\n");
845 return false;
846 }
847
848 return true;
849 }
850
851 extern const std::map<ESDOp, int> ds_opcode_map;
852
emit_gds(const GDSInstr & instr)853 bool AssemblyFromShaderLegacyImpl::emit_gds(const GDSInstr& instr)
854 {
855 struct r600_bytecode_gds gds;
856
857 int uav_idx = -1;
858 auto addr = instr.uav_id();
859 if (addr->type() != Value::literal) {
860 emit_index_reg(*addr, 1);
861 } else {
862 const LiteralValue& addr_reg = static_cast<const LiteralValue&>(*addr);
863 uav_idx = addr_reg.value();
864 }
865
866 memset(&gds, 0, sizeof(struct r600_bytecode_gds));
867
868 gds.op = ds_opcode_map.at(instr.op());
869 gds.dst_gpr = instr.dest_sel();
870 gds.uav_id = (uav_idx >= 0 ? uav_idx : 0) + instr.uav_base();
871 gds.uav_index_mode = uav_idx >= 0 ? bim_none : bim_one;
872 gds.src_gpr = instr.src_sel();
873
874 gds.src_sel_x = instr.src_swizzle(0);
875 gds.src_sel_y = instr.src_swizzle(1);
876 gds.src_sel_z = instr.src_swizzle(2);
877
878 gds.dst_sel_x = instr.dest_swizzle(0);
879 gds.dst_sel_y = 7;
880 gds.dst_sel_z = 7;
881 gds.dst_sel_w = 7;
882 gds.src_gpr2 = 0;
883 gds.alloc_consume = 1; // Not Cayman
884
885 int r = r600_bytecode_add_gds(m_bc, &gds);
886 if (r)
887 return false;
888 m_bc->cf_last->vpm = 1;
889 m_bc->cf_last->barrier = 1;
890 return true;
891 }
892
emit_tf_write(const GDSStoreTessFactor & instr)893 bool AssemblyFromShaderLegacyImpl::emit_tf_write(const GDSStoreTessFactor& instr)
894 {
895 struct r600_bytecode_gds gds;
896
897 memset(&gds, 0, sizeof(struct r600_bytecode_gds));
898 gds.src_gpr = instr.sel();
899 gds.src_sel_x = instr.chan(0);
900 gds.src_sel_y = instr.chan(1);
901 gds.src_sel_z = 4;
902 gds.dst_sel_x = 7;
903 gds.dst_sel_y = 7;
904 gds.dst_sel_z = 7;
905 gds.dst_sel_w = 7;
906 gds.op = FETCH_OP_TF_WRITE;
907
908 if (r600_bytecode_add_gds(m_bc, &gds) != 0)
909 return false;
910
911 if (instr.chan(2) != 7) {
912 memset(&gds, 0, sizeof(struct r600_bytecode_gds));
913 gds.src_gpr = instr.sel();
914 gds.src_sel_x = instr.chan(2);
915 gds.src_sel_y = instr.chan(3);
916 gds.src_sel_z = 4;
917 gds.dst_sel_x = 7;
918 gds.dst_sel_y = 7;
919 gds.dst_sel_z = 7;
920 gds.dst_sel_w = 7;
921 gds.op = FETCH_OP_TF_WRITE;
922
923 if (r600_bytecode_add_gds(m_bc, &gds))
924 return false;
925 }
926 return true;
927 }
928
emit_ldswrite(const LDSWriteInstruction & instr)929 bool AssemblyFromShaderLegacyImpl::emit_ldswrite(const LDSWriteInstruction& instr)
930 {
931 r600_bytecode_alu alu;
932 memset(&alu, 0, sizeof(r600_bytecode_alu));
933
934 alu.last = true;
935 alu.is_lds_idx_op = true;
936 copy_src(alu.src[0], instr.address());
937 copy_src(alu.src[1], instr.value0());
938
939 if (instr.num_components() == 1) {
940 alu.op = LDS_OP2_LDS_WRITE;
941 } else {
942 alu.op = LDS_OP3_LDS_WRITE_REL;
943 alu.lds_idx = 1;
944 copy_src(alu.src[2], instr.value1());
945 }
946
947 return r600_bytecode_add_alu(m_bc, &alu) == 0;
948 }
949
emit_ldsread(const LDSReadInstruction & instr)950 bool AssemblyFromShaderLegacyImpl::emit_ldsread(const LDSReadInstruction& instr)
951 {
952 int r;
953 unsigned nread = 0;
954 unsigned nfetch = 0;
955 unsigned n_values = instr.num_values();
956
957 r600_bytecode_alu alu_fetch;
958 r600_bytecode_alu alu_read;
959
960 /* We must add a new ALU clause if the fetch and read op would be split otherwise
961 * r600_asm limites at 120 slots = 240 dwords */
962 if (m_bc->cf_last->ndw > 240 - 4 * n_values)
963 m_bc->force_add_cf = 1;
964
965 while (nread < n_values) {
966 if (nfetch < n_values) {
967 memset(&alu_fetch, 0, sizeof(r600_bytecode_alu));
968 alu_fetch.is_lds_idx_op = true;
969 alu_fetch.op = LDS_OP1_LDS_READ_RET;
970
971 copy_src(alu_fetch.src[0], instr.address(nfetch));
972 alu_fetch.src[1].sel = V_SQ_ALU_SRC_0;
973 alu_fetch.src[2].sel = V_SQ_ALU_SRC_0;
974 alu_fetch.last = 1;
975 r = r600_bytecode_add_alu(m_bc, &alu_fetch);
976 m_bc->cf_last->nlds_read++;
977 if (r)
978 return false;
979 }
980
981 if (nfetch >= n_values) {
982 memset(&alu_read, 0, sizeof(r600_bytecode_alu));
983 copy_dst(alu_read.dst, instr.dest(nread));
984 alu_read.op = ALU_OP1_MOV;
985 alu_read.src[0].sel = EG_V_SQ_ALU_SRC_LDS_OQ_A_POP;
986 alu_read.last = 1;
987 alu_read.dst.write = 1;
988 r = r600_bytecode_add_alu(m_bc, &alu_read);
989 m_bc->cf_last->nqueue_read++;
990 if (r)
991 return false;
992 ++nread;
993 }
994 ++nfetch;
995 }
996 assert(m_bc->cf_last->nlds_read == m_bc->cf_last->nqueue_read);
997
998 return true;
999 }
1000
emit_ldsatomic(const LDSAtomicInstruction & instr)1001 bool AssemblyFromShaderLegacyImpl::emit_ldsatomic(const LDSAtomicInstruction& instr)
1002 {
1003 if (m_bc->cf_last->ndw > 240 - 4)
1004 m_bc->force_add_cf = 1;
1005
1006 r600_bytecode_alu alu_fetch;
1007 r600_bytecode_alu alu_read;
1008
1009 memset(&alu_fetch, 0, sizeof(r600_bytecode_alu));
1010 alu_fetch.is_lds_idx_op = true;
1011 alu_fetch.op = instr.op();
1012
1013 copy_src(alu_fetch.src[0], instr.address());
1014 copy_src(alu_fetch.src[1], instr.src0());
1015
1016 if (instr.src1())
1017 copy_src(alu_fetch.src[2], *instr.src1());
1018 alu_fetch.last = 1;
1019 int r = r600_bytecode_add_alu(m_bc, &alu_fetch);
1020 if (r)
1021 return false;
1022
1023 memset(&alu_read, 0, sizeof(r600_bytecode_alu));
1024 copy_dst(alu_read.dst, instr.dest());
1025 alu_read.op = ALU_OP1_MOV;
1026 alu_read.src[0].sel = EG_V_SQ_ALU_SRC_LDS_OQ_A_POP;
1027 alu_read.last = 1;
1028 alu_read.dst.write = 1;
1029 r = r600_bytecode_add_alu(m_bc, &alu_read);
1030 if (r)
1031 return false;
1032 return true;
1033 }
1034
emit_rat(const RatInstruction & instr)1035 bool AssemblyFromShaderLegacyImpl::emit_rat(const RatInstruction& instr)
1036 {
1037 struct r600_bytecode_gds gds;
1038
1039 int rat_idx = instr.rat_id();
1040 EBufferIndexMode rat_index_mode = bim_none;
1041 auto addr = instr.rat_id_offset();
1042
1043 if (addr) {
1044 if (addr->type() != Value::literal) {
1045 rat_index_mode = emit_index_reg(*addr, 1);
1046 } else {
1047 const LiteralValue& addr_reg = static_cast<const LiteralValue&>(*addr);
1048 rat_idx += addr_reg.value();
1049 }
1050 }
1051 memset(&gds, 0, sizeof(struct r600_bytecode_gds));
1052
1053 r600_bytecode_add_cfinst(m_bc, instr.cf_opcode());
1054 auto cf = m_bc->cf_last;
1055 cf->rat.id = rat_idx + m_shader->rat_base;
1056 cf->rat.inst = instr.rat_op();
1057 cf->rat.index_mode = rat_index_mode;
1058 cf->output.type = instr.need_ack() ? 3 : 1;
1059 cf->output.gpr = instr.data_gpr();
1060 cf->output.index_gpr = instr.index_gpr();
1061 cf->output.comp_mask = instr.comp_mask();
1062 cf->output.burst_count = instr.burst_count();
1063 assert(instr.data_swz(0) == PIPE_SWIZZLE_X);
1064 if (cf->rat.inst != RatInstruction::STORE_TYPED) {
1065 assert(instr.data_swz(1) == PIPE_SWIZZLE_Y ||
1066 instr.data_swz(1) == PIPE_SWIZZLE_MAX) ;
1067 assert(instr.data_swz(2) == PIPE_SWIZZLE_Z ||
1068 instr.data_swz(2) == PIPE_SWIZZLE_MAX) ;
1069 }
1070
1071 cf->vpm = 1;
1072 cf->barrier = 1;
1073 cf->mark = instr.need_ack();
1074 cf->output.elem_size = instr.elm_size();
1075 return true;
1076 }
1077
1078 EBufferIndexMode
emit_index_reg(const Value & addr,unsigned idx)1079 AssemblyFromShaderLegacyImpl::emit_index_reg(const Value& addr, unsigned idx)
1080 {
1081 assert(idx < 2);
1082
1083 EAluOp idxop = idx ? op1_set_cf_idx1 : op1_set_cf_idx0;
1084
1085 if (!m_bc->index_loaded[idx] || m_loop_nesting ||
1086 m_bc->index_reg[idx] != addr.sel()
1087 || m_bc->index_reg_chan[idx] != addr.chan()) {
1088 struct r600_bytecode_alu alu;
1089
1090 // Make sure MOVA is not last instr in clause
1091 if ((m_bc->cf_last->ndw>>1) >= 110)
1092 m_bc->force_add_cf = 1;
1093
1094 memset(&alu, 0, sizeof(alu));
1095 alu.op = opcode_map.at(op1_mova_int);
1096 alu.dst.chan = 0;
1097 alu.src[0].sel = addr.sel();
1098 alu.src[0].chan = addr.chan();
1099 alu.last = 1;
1100 sfn_log << SfnLog::assembly << " mova_int, ";
1101 int r = r600_bytecode_add_alu(m_bc, &alu);
1102 if (r)
1103 return bim_invalid;
1104
1105 m_bc->ar_loaded = 0;
1106
1107 alu.op = opcode_map.at(idxop);
1108 alu.dst.chan = 0;
1109 alu.src[0].sel = 0;
1110 alu.src[0].chan = 0;
1111 alu.last = 1;
1112 sfn_log << SfnLog::assembly << "op1_set_cf_idx" << idx;
1113 r = r600_bytecode_add_alu(m_bc, &alu);
1114 if (r)
1115 return bim_invalid;
1116
1117 m_bc->index_reg[idx] = addr.sel();
1118 m_bc->index_reg_chan[idx] = addr.chan();
1119 m_bc->index_loaded[idx] = true;
1120 sfn_log << SfnLog::assembly << "\n";
1121 }
1122 return idx == 0 ? bim_zero : bim_one;
1123 }
1124
copy_dst(r600_bytecode_alu_dst & dst,const Value & d)1125 bool AssemblyFromShaderLegacyImpl::copy_dst(r600_bytecode_alu_dst& dst,
1126 const Value& d)
1127 {
1128 assert(d.type() == Value::gpr || d.type() == Value::gpr_array_value);
1129
1130 if (d.sel() > 124) {
1131 R600_ERR("shader_from_nir: Don't support more then 124 GPRs, but try using %d\n", d.sel());
1132 return false;
1133 }
1134
1135 dst.sel = d.sel();
1136 dst.chan = d.chan();
1137
1138 if (m_bc->index_reg[1] == dst.sel &&
1139 m_bc->index_reg_chan[1] == dst.chan)
1140 m_bc->index_loaded[1] = false;
1141
1142 if (m_bc->index_reg[0] == dst.sel &&
1143 m_bc->index_reg_chan[0] == dst.chan)
1144 m_bc->index_loaded[0] = false;
1145
1146 return true;
1147 }
1148
copy_src(r600_bytecode_alu_src & src,const Value & s)1149 bool AssemblyFromShaderLegacyImpl::copy_src(r600_bytecode_alu_src& src, const Value& s)
1150 {
1151
1152 if (s.type() == Value::gpr && s.sel() > 124) {
1153 R600_ERR("shader_from_nir: Don't support more then 124 GPRs, try using %d\n", s.sel());
1154 return false;
1155 }
1156
1157 if (s.type() == Value::lds_direct) {
1158 R600_ERR("shader_from_nir: LDS_DIRECT values not supported\n");
1159 return false;
1160 }
1161
1162 if (s.type() == Value::kconst && s.sel() < 512) {
1163 R600_ERR("shader_from_nir: Uniforms should have values >= 512, got %d \n", s.sel());
1164 return false;
1165 }
1166
1167 if (s.type() == Value::literal) {
1168 auto& v = static_cast<const LiteralValue&>(s);
1169 if (v.value() == 0) {
1170 src.sel = ALU_SRC_0;
1171 src.chan = 0;
1172 --m_nliterals_in_group;
1173 return true;
1174 }
1175 if (v.value() == 1) {
1176 src.sel = ALU_SRC_1_INT;
1177 src.chan = 0;
1178 --m_nliterals_in_group;
1179 return true;
1180 }
1181 if (v.value_float() == 1.0f) {
1182 src.sel = ALU_SRC_1;
1183 src.chan = 0;
1184 --m_nliterals_in_group;
1185 return true;
1186 }
1187 if (v.value_float() == 0.5f) {
1188 src.sel = ALU_SRC_0_5;
1189 src.chan = 0;
1190 --m_nliterals_in_group;
1191 return true;
1192 }
1193 if (v.value() == 0xffffffff) {
1194 src.sel = ALU_SRC_M_1_INT;
1195 src.chan = 0;
1196 --m_nliterals_in_group;
1197 return true;
1198 }
1199 src.value = v.value();
1200 }
1201
1202 src.sel = s.sel();
1203 src.chan = s.chan();
1204 if (s.type() == Value::kconst) {
1205 const UniformValue& cv = static_cast<const UniformValue&>(s);
1206 src.kc_bank = cv.kcache_bank();
1207 auto addr = cv.addr();
1208 if (addr) {
1209 src.kc_rel = 1;
1210 emit_index_reg(*addr, 0);
1211 auto type = m_bc->cf_last->op;
1212 if (r600_bytecode_add_cf(m_bc)) {
1213 return false;
1214 }
1215 m_bc->cf_last->op = type;
1216 }
1217 }
1218
1219 return true;
1220 }
1221
1222 const std::map<EAluOp, int> opcode_map = {
1223
1224 {op2_add, ALU_OP2_ADD},
1225 {op2_mul, ALU_OP2_MUL},
1226 {op2_mul_ieee, ALU_OP2_MUL_IEEE},
1227 {op2_max, ALU_OP2_MAX},
1228 {op2_min, ALU_OP2_MIN},
1229 {op2_max_dx10, ALU_OP2_MAX_DX10},
1230 {op2_min_dx10, ALU_OP2_MIN_DX10},
1231 {op2_sete, ALU_OP2_SETE},
1232 {op2_setgt, ALU_OP2_SETGT},
1233 {op2_setge, ALU_OP2_SETGE},
1234 {op2_setne, ALU_OP2_SETNE},
1235 {op2_sete_dx10, ALU_OP2_SETE_DX10},
1236 {op2_setgt_dx10, ALU_OP2_SETGT_DX10},
1237 {op2_setge_dx10, ALU_OP2_SETGE_DX10},
1238 {op2_setne_dx10, ALU_OP2_SETNE_DX10},
1239 {op1_fract, ALU_OP1_FRACT},
1240 {op1_trunc, ALU_OP1_TRUNC},
1241 {op1_ceil, ALU_OP1_CEIL},
1242 {op1_rndne, ALU_OP1_RNDNE},
1243 {op1_floor, ALU_OP1_FLOOR},
1244 {op2_ashr_int, ALU_OP2_ASHR_INT},
1245 {op2_lshr_int, ALU_OP2_LSHR_INT},
1246 {op2_lshl_int, ALU_OP2_LSHL_INT},
1247 {op1_mov, ALU_OP1_MOV},
1248 {op0_nop, ALU_OP0_NOP},
1249 {op2_mul_64, ALU_OP2_MUL_64},
1250 {op1v_flt64_to_flt32, ALU_OP1_FLT64_TO_FLT32},
1251 {op1v_flt32_to_flt64, ALU_OP1_FLT32_TO_FLT64},
1252 {op2_pred_setgt_uint, ALU_OP2_PRED_SETGT_UINT},
1253 {op2_pred_setge_uint, ALU_OP2_PRED_SETGE_UINT},
1254 {op2_pred_sete, ALU_OP2_PRED_SETE},
1255 {op2_pred_setgt, ALU_OP2_PRED_SETGT},
1256 {op2_pred_setge, ALU_OP2_PRED_SETGE},
1257 {op2_pred_setne, ALU_OP2_PRED_SETNE},
1258 //{op2_pred_set_inv, ALU_OP2_PRED_SET},
1259 //{op2_pred_set_clr, ALU_OP2_PRED_SET_CRL},
1260 //{op2_pred_set_restore, ALU_OP2_PRED_SET_RESTORE},
1261 {op2_pred_sete_push, ALU_OP2_PRED_SETE_PUSH},
1262 {op2_pred_setgt_push, ALU_OP2_PRED_SETGT_PUSH},
1263 {op2_pred_setge_push, ALU_OP2_PRED_SETGE_PUSH},
1264 {op2_pred_setne_push, ALU_OP2_PRED_SETNE_PUSH},
1265 {op2_kille, ALU_OP2_KILLE},
1266 {op2_killgt, ALU_OP2_KILLGT},
1267 {op2_killge, ALU_OP2_KILLGE},
1268 {op2_killne, ALU_OP2_KILLNE},
1269 {op2_and_int, ALU_OP2_AND_INT},
1270 {op2_or_int, ALU_OP2_OR_INT},
1271 {op2_xor_int, ALU_OP2_XOR_INT},
1272 {op1_not_int, ALU_OP1_NOT_INT},
1273 {op2_add_int, ALU_OP2_ADD_INT},
1274 {op2_sub_int, ALU_OP2_SUB_INT},
1275 {op2_max_int, ALU_OP2_MAX_INT},
1276 {op2_min_int, ALU_OP2_MIN_INT},
1277 {op2_max_uint, ALU_OP2_MAX_UINT},
1278 {op2_min_uint, ALU_OP2_MIN_UINT},
1279 {op2_sete_int, ALU_OP2_SETE_INT},
1280 {op2_setgt_int, ALU_OP2_SETGT_INT},
1281 {op2_setge_int, ALU_OP2_SETGE_INT},
1282 {op2_setne_int, ALU_OP2_SETNE_INT},
1283 {op2_setgt_uint, ALU_OP2_SETGT_UINT},
1284 {op2_setge_uint, ALU_OP2_SETGE_UINT},
1285 {op2_killgt_uint, ALU_OP2_KILLGT_UINT},
1286 {op2_killge_uint, ALU_OP2_KILLGE_UINT},
1287 //p2_prede_int, ALU_OP2_PREDE_INT},
1288 {op2_pred_setgt_int, ALU_OP2_PRED_SETGT_INT},
1289 {op2_pred_setge_int, ALU_OP2_PRED_SETGE_INT},
1290 {op2_pred_setne_int, ALU_OP2_PRED_SETNE_INT},
1291 {op2_kille_int, ALU_OP2_KILLE_INT},
1292 {op2_killgt_int, ALU_OP2_KILLGT_INT},
1293 {op2_killge_int, ALU_OP2_KILLGE_INT},
1294 {op2_killne_int, ALU_OP2_KILLNE_INT},
1295 {op2_pred_sete_push_int, ALU_OP2_PRED_SETE_PUSH_INT},
1296 {op2_pred_setgt_push_int, ALU_OP2_PRED_SETGT_PUSH_INT},
1297 {op2_pred_setge_push_int, ALU_OP2_PRED_SETGE_PUSH_INT},
1298 {op2_pred_setne_push_int, ALU_OP2_PRED_SETNE_PUSH_INT},
1299 {op2_pred_setlt_push_int, ALU_OP2_PRED_SETLT_PUSH_INT},
1300 {op2_pred_setle_push_int, ALU_OP2_PRED_SETLE_PUSH_INT},
1301 {op1_flt_to_int, ALU_OP1_FLT_TO_INT},
1302 {op1_bfrev_int, ALU_OP1_BFREV_INT},
1303 {op2_addc_uint, ALU_OP2_ADDC_UINT},
1304 {op2_subb_uint, ALU_OP2_SUBB_UINT},
1305 {op0_group_barrier, ALU_OP0_GROUP_BARRIER},
1306 {op0_group_seq_begin, ALU_OP0_GROUP_SEQ_BEGIN},
1307 {op0_group_seq_end, ALU_OP0_GROUP_SEQ_END},
1308 {op2_set_mode, ALU_OP2_SET_MODE},
1309 {op1_set_cf_idx0, ALU_OP0_SET_CF_IDX0},
1310 {op1_set_cf_idx1, ALU_OP0_SET_CF_IDX1},
1311 {op2_set_lds_size, ALU_OP2_SET_LDS_SIZE},
1312 {op1_exp_ieee, ALU_OP1_EXP_IEEE},
1313 {op1_log_clamped, ALU_OP1_LOG_CLAMPED},
1314 {op1_log_ieee, ALU_OP1_LOG_IEEE},
1315 {op1_recip_clamped, ALU_OP1_RECIP_CLAMPED},
1316 {op1_recip_ff, ALU_OP1_RECIP_FF},
1317 {op1_recip_ieee, ALU_OP1_RECIP_IEEE},
1318 {op1_recipsqrt_clamped, ALU_OP1_RECIPSQRT_CLAMPED},
1319 {op1_recipsqrt_ff, ALU_OP1_RECIPSQRT_FF},
1320 {op1_recipsqrt_ieee1, ALU_OP1_RECIPSQRT_IEEE},
1321 {op1_sqrt_ieee, ALU_OP1_SQRT_IEEE},
1322 {op1_sin, ALU_OP1_SIN},
1323 {op1_cos, ALU_OP1_COS},
1324 {op2_mullo_int, ALU_OP2_MULLO_INT},
1325 {op2_mulhi_int, ALU_OP2_MULHI_INT},
1326 {op2_mullo_uint, ALU_OP2_MULLO_UINT},
1327 {op2_mulhi_uint, ALU_OP2_MULHI_UINT},
1328 {op1_recip_int, ALU_OP1_RECIP_INT},
1329 {op1_recip_uint, ALU_OP1_RECIP_UINT},
1330 {op1_recip_64, ALU_OP2_RECIP_64},
1331 {op1_recip_clamped_64, ALU_OP2_RECIP_CLAMPED_64},
1332 {op1_recipsqrt_64, ALU_OP2_RECIPSQRT_64},
1333 {op1_recipsqrt_clamped_64, ALU_OP2_RECIPSQRT_CLAMPED_64},
1334 {op1_sqrt_64, ALU_OP2_SQRT_64},
1335 {op1_flt_to_uint, ALU_OP1_FLT_TO_UINT},
1336 {op1_int_to_flt, ALU_OP1_INT_TO_FLT},
1337 {op1_uint_to_flt, ALU_OP1_UINT_TO_FLT},
1338 {op2_bfm_int, ALU_OP2_BFM_INT},
1339 {op1_flt32_to_flt16, ALU_OP1_FLT32_TO_FLT16},
1340 {op1_flt16_to_flt32, ALU_OP1_FLT16_TO_FLT32},
1341 {op1_ubyte0_flt, ALU_OP1_UBYTE0_FLT},
1342 {op1_ubyte1_flt, ALU_OP1_UBYTE1_FLT},
1343 {op1_ubyte2_flt, ALU_OP1_UBYTE2_FLT},
1344 {op1_ubyte3_flt, ALU_OP1_UBYTE3_FLT},
1345 {op1_bcnt_int, ALU_OP1_BCNT_INT},
1346 {op1_ffbh_uint, ALU_OP1_FFBH_UINT},
1347 {op1_ffbl_int, ALU_OP1_FFBL_INT},
1348 {op1_ffbh_int, ALU_OP1_FFBH_INT},
1349 {op1_flt_to_uint4, ALU_OP1_FLT_TO_UINT4},
1350 {op2_dot_ieee, ALU_OP2_DOT_IEEE},
1351 {op1_flt_to_int_rpi, ALU_OP1_FLT_TO_INT_RPI},
1352 {op1_flt_to_int_floor, ALU_OP1_FLT_TO_INT_FLOOR},
1353 {op2_mulhi_uint24, ALU_OP2_MULHI_UINT24},
1354 {op1_mbcnt_32hi_int, ALU_OP1_MBCNT_32HI_INT},
1355 {op1_offset_to_flt, ALU_OP1_OFFSET_TO_FLT},
1356 {op2_mul_uint24, ALU_OP2_MUL_UINT24},
1357 {op1_bcnt_accum_prev_int, ALU_OP1_BCNT_ACCUM_PREV_INT},
1358 {op1_mbcnt_32lo_accum_prev_int, ALU_OP1_MBCNT_32LO_ACCUM_PREV_INT},
1359 {op2_sete_64, ALU_OP2_SETE_64},
1360 {op2_setne_64, ALU_OP2_SETNE_64},
1361 {op2_setgt_64, ALU_OP2_SETGT_64},
1362 {op2_setge_64, ALU_OP2_SETGE_64},
1363 {op2_min_64, ALU_OP2_MIN_64},
1364 {op2_max_64, ALU_OP2_MAX_64},
1365 {op2_dot4, ALU_OP2_DOT4},
1366 {op2_dot4_ieee, ALU_OP2_DOT4_IEEE},
1367 {op2_cube, ALU_OP2_CUBE},
1368 {op1_max4, ALU_OP1_MAX4},
1369 {op1_frexp_64, ALU_OP1_FREXP_64},
1370 {op1_ldexp_64, ALU_OP2_LDEXP_64},
1371 {op1_fract_64, ALU_OP1_FRACT_64},
1372 {op2_pred_setgt_64, ALU_OP2_PRED_SETGT_64},
1373 {op2_pred_sete_64, ALU_OP2_PRED_SETE_64},
1374 {op2_pred_setge_64, ALU_OP2_PRED_SETGE_64},
1375 {op2_add_64, ALU_OP2_ADD_64},
1376 {op1_mova_int, ALU_OP1_MOVA_INT},
1377 {op1v_flt64_to_flt32, ALU_OP1_FLT64_TO_FLT32},
1378 {op1_flt32_to_flt64, ALU_OP1_FLT32_TO_FLT64},
1379 {op2_sad_accum_prev_uint, ALU_OP2_SAD_ACCUM_PREV_UINT},
1380 {op2_dot, ALU_OP2_DOT},
1381 //p2_mul_prev, ALU_OP2_MUL_PREV},
1382 //p2_mul_ieee_prev, ALU_OP2_MUL_IEEE_PREV},
1383 //p2_add_prev, ALU_OP2_ADD_PREV},
1384 {op2_muladd_prev, ALU_OP2_MULADD_PREV},
1385 {op2_muladd_ieee_prev, ALU_OP2_MULADD_IEEE_PREV},
1386 {op2_interp_xy, ALU_OP2_INTERP_XY},
1387 {op2_interp_zw, ALU_OP2_INTERP_ZW},
1388 {op2_interp_x, ALU_OP2_INTERP_X},
1389 {op2_interp_z, ALU_OP2_INTERP_Z},
1390 {op0_store_flags, ALU_OP1_STORE_FLAGS},
1391 {op1_load_store_flags, ALU_OP1_LOAD_STORE_FLAGS},
1392 {op0_lds_1a, ALU_OP2_LDS_1A},
1393 {op0_lds_1a1d, ALU_OP2_LDS_1A1D},
1394 {op0_lds_2a, ALU_OP2_LDS_2A},
1395 {op1_interp_load_p0, ALU_OP1_INTERP_LOAD_P0},
1396 {op1_interp_load_p10, ALU_OP1_INTERP_LOAD_P10},
1397 {op1_interp_load_p20, ALU_OP1_INTERP_LOAD_P20},
1398 // {op 3 all left shift 6
1399 {op3_bfe_uint, ALU_OP3_BFE_UINT},
1400 {op3_bfe_int, ALU_OP3_BFE_INT},
1401 {op3_bfi_int, ALU_OP3_BFI_INT},
1402 {op3_fma, ALU_OP3_FMA},
1403 {op3_cndne_64, ALU_OP3_CNDNE_64},
1404 {op3_fma_64, ALU_OP3_FMA_64},
1405 {op3_lerp_uint, ALU_OP3_LERP_UINT},
1406 {op3_bit_align_int, ALU_OP3_BIT_ALIGN_INT},
1407 {op3_byte_align_int, ALU_OP3_BYTE_ALIGN_INT},
1408 {op3_sad_accum_uint, ALU_OP3_SAD_ACCUM_UINT},
1409 {op3_sad_accum_hi_uint, ALU_OP3_SAD_ACCUM_HI_UINT},
1410 {op3_muladd_uint24, ALU_OP3_MULADD_UINT24},
1411 {op3_lds_idx_op, ALU_OP3_LDS_IDX_OP},
1412 {op3_muladd, ALU_OP3_MULADD},
1413 {op3_muladd_m2, ALU_OP3_MULADD_M2},
1414 {op3_muladd_m4, ALU_OP3_MULADD_M4},
1415 {op3_muladd_d2, ALU_OP3_MULADD_D2},
1416 {op3_muladd_ieee, ALU_OP3_MULADD_IEEE},
1417 {op3_cnde, ALU_OP3_CNDE},
1418 {op3_cndgt, ALU_OP3_CNDGT},
1419 {op3_cndge, ALU_OP3_CNDGE},
1420 {op3_cnde_int, ALU_OP3_CNDE_INT},
1421 {op3_cndgt_int, ALU_OP3_CNDGT_INT},
1422 {op3_cndge_int, ALU_OP3_CNDGE_INT},
1423 {op3_mul_lit, ALU_OP3_MUL_LIT},
1424 };
1425
1426 const std::map<ESDOp, int> ds_opcode_map = {
1427 {DS_OP_ADD, FETCH_OP_GDS_ADD},
1428 {DS_OP_SUB, FETCH_OP_GDS_SUB},
1429 {DS_OP_RSUB, FETCH_OP_GDS_RSUB},
1430 {DS_OP_INC, FETCH_OP_GDS_INC},
1431 {DS_OP_DEC, FETCH_OP_GDS_DEC},
1432 {DS_OP_MIN_INT, FETCH_OP_GDS_MIN_INT},
1433 {DS_OP_MAX_INT, FETCH_OP_GDS_MAX_INT},
1434 {DS_OP_MIN_UINT, FETCH_OP_GDS_MIN_UINT},
1435 {DS_OP_MAX_UINT, FETCH_OP_GDS_MAX_UINT},
1436 {DS_OP_AND, FETCH_OP_GDS_AND},
1437 {DS_OP_OR, FETCH_OP_GDS_OR},
1438 {DS_OP_XOR, FETCH_OP_GDS_XOR},
1439 {DS_OP_MSKOR, FETCH_OP_GDS_MSKOR},
1440 {DS_OP_WRITE, FETCH_OP_GDS_WRITE},
1441 {DS_OP_WRITE_REL, FETCH_OP_GDS_WRITE_REL},
1442 {DS_OP_WRITE2, FETCH_OP_GDS_WRITE2},
1443 {DS_OP_CMP_STORE, FETCH_OP_GDS_CMP_STORE},
1444 {DS_OP_CMP_STORE_SPF, FETCH_OP_GDS_CMP_STORE_SPF},
1445 {DS_OP_BYTE_WRITE, FETCH_OP_GDS_BYTE_WRITE},
1446 {DS_OP_SHORT_WRITE, FETCH_OP_GDS_SHORT_WRITE},
1447 {DS_OP_ADD_RET, FETCH_OP_GDS_ADD_RET},
1448 {DS_OP_SUB_RET, FETCH_OP_GDS_SUB_RET},
1449 {DS_OP_RSUB_RET, FETCH_OP_GDS_RSUB_RET},
1450 {DS_OP_INC_RET, FETCH_OP_GDS_INC_RET},
1451 {DS_OP_DEC_RET, FETCH_OP_GDS_DEC_RET},
1452 {DS_OP_MIN_INT_RET, FETCH_OP_GDS_MIN_INT_RET},
1453 {DS_OP_MAX_INT_RET, FETCH_OP_GDS_MAX_INT_RET},
1454 {DS_OP_MIN_UINT_RET, FETCH_OP_GDS_MIN_UINT_RET},
1455 {DS_OP_MAX_UINT_RET, FETCH_OP_GDS_MAX_UINT_RET},
1456 {DS_OP_AND_RET, FETCH_OP_GDS_AND_RET},
1457 {DS_OP_OR_RET, FETCH_OP_GDS_OR_RET},
1458 {DS_OP_XOR_RET, FETCH_OP_GDS_XOR_RET},
1459 {DS_OP_MSKOR_RET, FETCH_OP_GDS_MSKOR_RET},
1460 {DS_OP_XCHG_RET, FETCH_OP_GDS_XCHG_RET},
1461 {DS_OP_XCHG_REL_RET, FETCH_OP_GDS_XCHG_REL_RET},
1462 {DS_OP_XCHG2_RET, FETCH_OP_GDS_XCHG2_RET},
1463 {DS_OP_CMP_XCHG_RET, FETCH_OP_GDS_CMP_XCHG_RET},
1464 {DS_OP_CMP_XCHG_SPF_RET, FETCH_OP_GDS_CMP_XCHG_SPF_RET},
1465 {DS_OP_READ_RET, FETCH_OP_GDS_READ_RET},
1466 {DS_OP_READ_REL_RET, FETCH_OP_GDS_READ_REL_RET},
1467 {DS_OP_READ2_RET, FETCH_OP_GDS_READ2_RET},
1468 {DS_OP_READWRITE_RET, FETCH_OP_GDS_READWRITE_RET},
1469 {DS_OP_BYTE_READ_RET, FETCH_OP_GDS_BYTE_READ_RET},
1470 {DS_OP_UBYTE_READ_RET, FETCH_OP_GDS_UBYTE_READ_RET},
1471 {DS_OP_SHORT_READ_RET, FETCH_OP_GDS_SHORT_READ_RET},
1472 {DS_OP_USHORT_READ_RET, FETCH_OP_GDS_USHORT_READ_RET},
1473 {DS_OP_ATOMIC_ORDERED_ALLOC_RET, FETCH_OP_GDS_ATOMIC_ORDERED_ALLOC},
1474 {DS_OP_INVALID, 0},
1475 };
1476
1477 }
1478