1 /* -*- mesa-c++ -*-
2 *
3 * Copyright (c) 2018 Collabora LTD
4 *
5 * Author: Gert Wollny <gert.wollny@collabora.com>
6 *
7 * Permission is hereby granted, free of charge, to any person obtaining a
8 * copy of this software and associated documentation files (the "Software"),
9 * to deal in the Software without restriction, including without limitation
10 * on the rights to use, copy, modify, merge, publish, distribute, sub
11 * license, and/or sell copies of the Software, and to permit persons to whom
12 * the Software is furnished to do so, subject to the following conditions:
13 *
14 * The above copyright notice and this permission notice (including the next
15 * paragraph) shall be included in all copies or substantial portions of the
16 * Software.
17 *
18 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24 * USE OR OTHER DEALINGS IN THE SOFTWARE.
25 */
26
27 #include "../r600_pipe.h"
28 #include "../r600_shader.h"
29 #include "sfn_shader_vertex.h"
30
31 #include "sfn_shader_compute.h"
32 #include "sfn_shader_fragment.h"
33 #include "sfn_shader_geometry.h"
34 #include "sfn_liverange.h"
35 #include "sfn_ir_to_assembly.h"
36 #include "sfn_nir.h"
37 #include "sfn_instruction_misc.h"
38 #include "sfn_instruction_fetch.h"
39 #include "sfn_instruction_lds.h"
40
41 #include <iostream>
42
43 #define ENABLE_DEBUG 1
44
45 #ifdef ENABLE_DEBUG
46 #define DEBUG_SFN(X) \
47 do {\
48 X; \
49 } while (0)
50 #else
51 #define DEBUG_SFN(X)
52 #endif
53
54 namespace r600 {
55
56 using namespace std;
57
58
ShaderFromNirProcessor(pipe_shader_type ptype,r600_pipe_shader_selector & sel,r600_shader & sh_info,int scratch_size,enum chip_class chip_class,int atomic_base)59 ShaderFromNirProcessor::ShaderFromNirProcessor(pipe_shader_type ptype,
60 r600_pipe_shader_selector& sel,
61 r600_shader &sh_info, int scratch_size,
62 enum chip_class chip_class,
63 int atomic_base):
64 m_processor_type(ptype),
65 m_nesting_depth(0),
66 m_block_number(0),
67 m_export_output(0, -1),
68 m_sh_info(sh_info),
69 m_chip_class(chip_class),
70 m_tex_instr(*this),
71 m_alu_instr(*this),
72 m_ssbo_instr(*this),
73 m_pending_else(nullptr),
74 m_scratch_size(scratch_size),
75 m_next_hwatomic_loc(0),
76 m_sel(sel),
77 m_atomic_base(atomic_base),
78 m_image_count(0),
79 last_emitted_alu(nullptr)
80 {
81 m_sh_info.processor_type = ptype;
82
83 }
84
85
~ShaderFromNirProcessor()86 ShaderFromNirProcessor::~ShaderFromNirProcessor()
87 {
88 }
89
scan_instruction(nir_instr * instr)90 bool ShaderFromNirProcessor::scan_instruction(nir_instr *instr)
91 {
92 switch (instr->type) {
93 case nir_instr_type_tex: {
94 nir_tex_instr *t = nir_instr_as_tex(instr);
95 if (t->sampler_dim == GLSL_SAMPLER_DIM_BUF)
96 sh_info().uses_tex_buffers = true;
97 if (t->op == nir_texop_txs &&
98 t->sampler_dim == GLSL_SAMPLER_DIM_CUBE &&
99 t->is_array)
100 sh_info().has_txq_cube_array_z_comp = true;
101 break;
102 }
103 case nir_instr_type_intrinsic: {
104 auto *i = nir_instr_as_intrinsic(instr);
105 switch (i->intrinsic) {
106 case nir_intrinsic_ssbo_atomic_add:
107 case nir_intrinsic_image_atomic_add:
108 case nir_intrinsic_ssbo_atomic_and:
109 case nir_intrinsic_image_atomic_and:
110 case nir_intrinsic_ssbo_atomic_or:
111 case nir_intrinsic_image_atomic_or:
112 case nir_intrinsic_ssbo_atomic_imin:
113 case nir_intrinsic_image_atomic_imin:
114 case nir_intrinsic_ssbo_atomic_imax:
115 case nir_intrinsic_image_atomic_imax:
116 case nir_intrinsic_ssbo_atomic_umin:
117 case nir_intrinsic_image_atomic_umin:
118 case nir_intrinsic_ssbo_atomic_umax:
119 case nir_intrinsic_image_atomic_umax:
120 case nir_intrinsic_ssbo_atomic_xor:
121 case nir_intrinsic_image_atomic_xor:
122 case nir_intrinsic_ssbo_atomic_exchange:
123 case nir_intrinsic_image_atomic_exchange:
124 case nir_intrinsic_image_atomic_comp_swap:
125 case nir_intrinsic_ssbo_atomic_comp_swap:
126 m_sel.info.writes_memory = 1;
127 /* fallthrough */
128 case nir_intrinsic_image_load:
129 m_ssbo_instr.set_require_rat_return_address();
130 break;
131 case nir_intrinsic_image_size: {
132 if (nir_intrinsic_image_dim(i) == GLSL_SAMPLER_DIM_CUBE &&
133 nir_intrinsic_image_array(i) && nir_dest_num_components(i->dest) > 2)
134 sh_info().has_txq_cube_array_z_comp = true;
135 }
136
137
138
139 default:
140 ;
141 }
142
143
144 }
145 default:
146 ;
147 }
148
149 return scan_sysvalue_access(instr);
150 }
151
get_chip_class(void) const152 enum chip_class ShaderFromNirProcessor::get_chip_class(void) const
153 {
154 return m_chip_class;
155 }
156
allocate_reserved_registers()157 bool ShaderFromNirProcessor::allocate_reserved_registers()
158 {
159 bool retval = do_allocate_reserved_registers();
160 m_ssbo_instr.load_rat_return_address();
161 if (sh_info().uses_atomics)
162 m_ssbo_instr.load_atomic_inc_limits();
163 m_ssbo_instr.set_ssbo_offset(m_image_count);
164 return retval;
165 }
166
remap_shader_info(r600_shader & sh_info,std::vector<rename_reg_pair> & map,UNUSED ValueMap & values)167 static void remap_shader_info(r600_shader& sh_info,
168 std::vector<rename_reg_pair>& map,
169 UNUSED ValueMap& values)
170 {
171 for (unsigned i = 0; i < sh_info.ninput; ++i) {
172 sfn_log << SfnLog::merge << "Input " << i << " gpr:" << sh_info.input[i].gpr
173 << " of map.size()\n";
174
175 assert(sh_info.input[i].gpr < map.size());
176 auto new_index = map[sh_info.input[i].gpr];
177 if (new_index.valid)
178 sh_info.input[i].gpr = new_index.new_reg;
179 map[sh_info.input[i].gpr].used = true;
180 }
181
182 for (unsigned i = 0; i < sh_info.noutput; ++i) {
183 assert(sh_info.output[i].gpr < map.size());
184 auto new_index = map[sh_info.output[i].gpr];
185 if (new_index.valid)
186 sh_info.output[i].gpr = new_index.new_reg;
187 map[sh_info.output[i].gpr].used = true;
188 }
189 }
190
remap_registers()191 void ShaderFromNirProcessor::remap_registers()
192 {
193 // register renumbering
194 auto rc = register_count();
195 if (!rc)
196 return;
197
198 std::vector<register_live_range> register_live_ranges(rc);
199
200 auto temp_register_map = get_temp_registers();
201
202 Shader sh{m_output, temp_register_map};
203 LiverangeEvaluator().run(sh, register_live_ranges);
204 auto register_map = get_temp_registers_remapping(register_live_ranges);
205
206 sfn_log << SfnLog::merge << "=========Mapping===========\n";
207 for (size_t i = 0; i < register_map.size(); ++i)
208 if (register_map[i].valid)
209 sfn_log << SfnLog::merge << "Map:" << i << " -> " << register_map[i].new_reg << "\n";
210
211 ValueRemapper vmap0(register_map, temp_register_map);
212 for (auto& block: m_output)
213 block.remap_registers(vmap0);
214
215 remap_shader_info(m_sh_info, register_map, temp_register_map);
216
217 /* Mark inputs as used registers, these registers should no be remapped */
218 for (auto& v: sh.m_temp) {
219 if (v.second->type() == Value::gpr) {
220 const auto& g = static_cast<const GPRValue&>(*v.second);
221 if (g.is_input())
222 register_map[g.sel()].used = true;
223 }
224 }
225
226 int new_index = 0;
227 for (auto& i : register_map) {
228 i.valid = i.used;
229 if (i.used)
230 i.new_reg = new_index++;
231 }
232
233 ValueRemapper vmap1(register_map, temp_register_map);
234 for (auto& ir: m_output)
235 ir.remap_registers(vmap1);
236
237 remap_shader_info(m_sh_info, register_map, temp_register_map);
238 }
239
process_uniforms(nir_variable * uniform)240 bool ShaderFromNirProcessor::process_uniforms(nir_variable *uniform)
241 {
242 // m_uniform_type_map
243 m_uniform_type_map[uniform->data.location] = uniform->type;
244
245 if (uniform->type->contains_atomic()) {
246 int natomics = uniform->type->atomic_size() / ATOMIC_COUNTER_SIZE;
247 sh_info().nhwatomic += natomics;
248
249 if (uniform->type->is_array())
250 sh_info().indirect_files |= 1 << TGSI_FILE_HW_ATOMIC;
251
252 sh_info().uses_atomics = 1;
253
254 struct r600_shader_atomic& atom = sh_info().atomics[sh_info().nhwatomic_ranges];
255 ++sh_info().nhwatomic_ranges;
256 atom.buffer_id = uniform->data.binding;
257 atom.hw_idx = m_atomic_base + m_next_hwatomic_loc;
258
259 atom.start = uniform->data.offset >> 2;
260 atom.end = atom.start + natomics - 1;
261
262 if (m_atomic_base_map.find(uniform->data.binding) ==
263 m_atomic_base_map.end())
264 m_atomic_base_map[uniform->data.binding] = m_next_hwatomic_loc;
265
266 m_next_hwatomic_loc += natomics;
267
268 m_sel.info.file_count[TGSI_FILE_HW_ATOMIC] += atom.end - atom.start + 1;
269
270 sfn_log << SfnLog::io << "HW_ATOMIC file count: "
271 << m_sel.info.file_count[TGSI_FILE_HW_ATOMIC] << "\n";
272 }
273
274 auto type = uniform->type->is_array() ? uniform->type->without_array(): uniform->type;
275 if (type->is_image() || uniform->data.mode == nir_var_mem_ssbo) {
276 sh_info().uses_images = 1;
277 if (uniform->type->is_array())
278 sh_info().indirect_files |= TGSI_FILE_IMAGE;
279 }
280
281 if (uniform->type->is_image()) {
282 ++m_image_count;
283 }
284
285 return true;
286 }
287
process_inputs(nir_variable * input)288 bool ShaderFromNirProcessor::process_inputs(nir_variable *input)
289 {
290 return do_process_inputs(input);
291 }
292
process_outputs(nir_variable * output)293 bool ShaderFromNirProcessor::process_outputs(nir_variable *output)
294 {
295 return do_process_outputs(output);
296 }
297
add_array_deref(nir_deref_instr * instr)298 void ShaderFromNirProcessor::add_array_deref(nir_deref_instr *instr)
299 {
300 nir_variable *var = nir_deref_instr_get_variable(instr);
301
302 assert(nir_deref_mode_is(instr, nir_var_function_temp));
303 assert(glsl_type_is_array(var->type));
304
305 // add an alias for the index to the register(s);
306
307
308 }
309
set_var_address(nir_deref_instr * instr)310 void ShaderFromNirProcessor::set_var_address(nir_deref_instr *instr)
311 {
312 auto& dest = instr->dest;
313 unsigned index = dest.is_ssa ? dest.ssa.index : dest.reg.reg->index;
314 assert(util_bitcount(instr->modes) == 1);
315 m_var_mode[instr->var] = instr->modes;
316 m_var_derefs[index] = instr->var;
317
318 sfn_log << SfnLog::io << "Add var deref:" << index
319 << " with DDL:" << instr->var->data.driver_location << "\n";
320 }
321
evaluate_spi_sid(r600_shader_io & io)322 void ShaderFromNirProcessor::evaluate_spi_sid(r600_shader_io& io)
323 {
324 switch (io.name) {
325 case TGSI_SEMANTIC_POSITION:
326 case TGSI_SEMANTIC_PSIZE:
327 case TGSI_SEMANTIC_EDGEFLAG:
328 case TGSI_SEMANTIC_FACE:
329 case TGSI_SEMANTIC_SAMPLEMASK:
330 case TGSI_SEMANTIC_CLIPVERTEX:
331 io.spi_sid = 0;
332 break;
333 case TGSI_SEMANTIC_GENERIC:
334 case TGSI_SEMANTIC_TEXCOORD:
335 case TGSI_SEMANTIC_PCOORD:
336 io.spi_sid = io.sid + 1;
337 break;
338 default:
339 /* For non-generic params - pack name and sid into 8 bits */
340 io.spi_sid = (0x80 | (io.name << 3) | io.sid) + 1;
341 }
342 }
343
get_deref_location(const nir_src & src) const344 const nir_variable *ShaderFromNirProcessor::get_deref_location(const nir_src& src) const
345 {
346 unsigned index = src.is_ssa ? src.ssa->index : src.reg.reg->index;
347
348 sfn_log << SfnLog::io << "Search for deref:" << index << "\n";
349
350 auto v = m_var_derefs.find(index);
351 if (v != m_var_derefs.end())
352 return v->second;
353
354 fprintf(stderr, "R600: could not find deref with index %d\n", index);
355
356 return nullptr;
357
358 /*nir_deref_instr *deref = nir_instr_as_deref(src.ssa->parent_instr);
359 return nir_deref_instr_get_variable(deref); */
360 }
361
emit_tex_instruction(nir_instr * instr)362 bool ShaderFromNirProcessor::emit_tex_instruction(nir_instr* instr)
363 {
364 return m_tex_instr.emit(instr);
365 }
366
emit_instruction(AluInstruction * ir)367 void ShaderFromNirProcessor::emit_instruction(AluInstruction *ir)
368 {
369 if (last_emitted_alu && !last_emitted_alu->flag(alu_last_instr)) {
370 for (unsigned i = 0; i < ir->n_sources(); ++i) {
371 auto& s = ir->src(i);
372 if (s.type() == Value::kconst) {
373 auto& c = static_cast<UniformValue&>(s);
374 if (c.addr()) {
375 last_emitted_alu->set_flag(alu_last_instr);
376 break;
377 }
378 }
379 }
380 }
381 last_emitted_alu = ir;
382 emit_instruction_internal(ir);
383 }
384
385
emit_instruction(Instruction * ir)386 void ShaderFromNirProcessor::emit_instruction(Instruction *ir)
387 {
388
389 emit_instruction_internal(ir);
390 last_emitted_alu = nullptr;
391 }
392
emit_instruction_internal(Instruction * ir)393 void ShaderFromNirProcessor::emit_instruction_internal(Instruction *ir)
394 {
395 if (m_pending_else) {
396 append_block(-1);
397 m_output.back().emit(PInstruction(m_pending_else));
398 append_block(1);
399 m_pending_else = nullptr;
400 }
401
402 r600::sfn_log << SfnLog::instr << " as '" << *ir << "'\n";
403 if (m_output.empty())
404 append_block(0);
405
406 m_output.back().emit(Instruction::Pointer(ir));
407 }
408
emit_shader_start()409 void ShaderFromNirProcessor::emit_shader_start()
410 {
411 /* placeholder, may become an abstract method */
412 m_ssbo_instr.set_ssbo_offset(m_image_count);
413 }
414
emit_jump_instruction(nir_jump_instr * instr)415 bool ShaderFromNirProcessor::emit_jump_instruction(nir_jump_instr *instr)
416 {
417 switch (instr->type) {
418 case nir_jump_break: {
419 auto b = new LoopBreakInstruction();
420 emit_instruction(b);
421 return true;
422 }
423 case nir_jump_continue: {
424 auto b = new LoopContInstruction();
425 emit_instruction(b);
426 return true;
427 }
428 default: {
429 nir_instr *i = reinterpret_cast<nir_instr*>(instr);
430 sfn_log << SfnLog::err << "Jump instrunction " << *i << " not supported\n";
431 return false;
432 }
433 }
434 return true;
435 }
436
emit_alu_instruction(nir_instr * instr)437 bool ShaderFromNirProcessor::emit_alu_instruction(nir_instr* instr)
438 {
439 return m_alu_instr.emit(instr);
440 }
441
emit_deref_instruction_override(UNUSED nir_deref_instr * instr)442 bool ShaderFromNirProcessor::emit_deref_instruction_override(UNUSED nir_deref_instr* instr)
443 {
444 return false;
445 }
446
emit_loop_start(int loop_id)447 bool ShaderFromNirProcessor::emit_loop_start(int loop_id)
448 {
449 LoopBeginInstruction *loop = new LoopBeginInstruction();
450 emit_instruction(loop);
451 m_loop_begin_block_map[loop_id] = loop;
452 append_block(1);
453 return true;
454 }
emit_loop_end(int loop_id)455 bool ShaderFromNirProcessor::emit_loop_end(int loop_id)
456 {
457 auto start = m_loop_begin_block_map.find(loop_id);
458 if (start == m_loop_begin_block_map.end()) {
459 sfn_log << SfnLog::err << "End loop: Loop start for "
460 << loop_id << " not found\n";
461 return false;
462 }
463 m_nesting_depth--;
464 m_block_number++;
465 m_output.push_back(InstructionBlock(m_nesting_depth, m_block_number));
466 LoopEndInstruction *loop = new LoopEndInstruction(start->second);
467 emit_instruction(loop);
468
469 m_loop_begin_block_map.erase(start);
470 return true;
471 }
472
emit_if_start(int if_id,nir_if * if_stmt)473 bool ShaderFromNirProcessor::emit_if_start(int if_id, nir_if *if_stmt)
474 {
475
476 auto value = from_nir(if_stmt->condition, 0, 0);
477 AluInstruction *pred = new AluInstruction(op2_pred_setne_int, PValue(new GPRValue(0,0)),
478 value, Value::zero, EmitInstruction::last);
479 pred->set_flag(alu_update_exec);
480 pred->set_flag(alu_update_pred);
481 pred->set_cf_type(cf_alu_push_before);
482
483 append_block(1);
484
485 IfInstruction *ir = new IfInstruction(pred);
486 emit_instruction(ir);
487 assert(m_if_block_start_map.find(if_id) == m_if_block_start_map.end());
488 m_if_block_start_map[if_id] = ir;
489 return true;
490 }
491
emit_else_start(int if_id)492 bool ShaderFromNirProcessor::emit_else_start(int if_id)
493 {
494 auto iif = m_if_block_start_map.find(if_id);
495 if (iif == m_if_block_start_map.end()) {
496 std::cerr << "Error: ELSE branch " << if_id << " without starting conditional branch\n";
497 return false;
498 }
499
500 if (iif->second->type() != Instruction::cond_if) {
501 std::cerr << "Error: ELSE branch " << if_id << " not started by an IF branch\n";
502 return false;
503 }
504 IfInstruction *if_instr = static_cast<IfInstruction *>(iif->second);
505 ElseInstruction *ir = new ElseInstruction(if_instr);
506 m_if_block_start_map[if_id] = ir;
507 m_pending_else = ir;
508
509 return true;
510 }
511
emit_ifelse_end(int if_id)512 bool ShaderFromNirProcessor::emit_ifelse_end(int if_id)
513 {
514 auto ifelse = m_if_block_start_map.find(if_id);
515 if (ifelse == m_if_block_start_map.end()) {
516 std::cerr << "Error: ENDIF " << if_id << " without THEN or ELSE branch\n";
517 return false;
518 }
519
520 if (ifelse->second->type() != Instruction::cond_if &&
521 ifelse->second->type() != Instruction::cond_else) {
522 std::cerr << "Error: ENDIF " << if_id << " doesn't close an IF or ELSE branch\n";
523 return false;
524 }
525 /* Clear pending else, if the else branch was empty, non will be emitted */
526
527 m_pending_else = nullptr;
528
529 append_block(-1);
530 IfElseEndInstruction *ir = new IfElseEndInstruction();
531 emit_instruction(ir);
532
533 return true;
534 }
535
emit_load_tcs_param_base(nir_intrinsic_instr * instr,int offset)536 bool ShaderFromNirProcessor::emit_load_tcs_param_base(nir_intrinsic_instr* instr, int offset)
537 {
538 PValue src = get_temp_register();
539 emit_instruction(new AluInstruction(op1_mov, src, Value::zero, {alu_write, alu_last_instr}));
540
541 GPRVector dest = vec_from_nir(instr->dest, nir_dest_num_components(instr->dest));
542 emit_instruction(new FetchTCSIOParam(dest, src, offset));
543
544 return true;
545
546 }
547
emit_load_local_shared(nir_intrinsic_instr * instr)548 bool ShaderFromNirProcessor::emit_load_local_shared(nir_intrinsic_instr* instr)
549 {
550 auto address = varvec_from_nir(instr->src[0], instr->num_components);
551 auto dest_value = varvec_from_nir(instr->dest, instr->num_components);
552
553 emit_instruction(new LDSReadInstruction(address, dest_value));
554 return true;
555 }
556
557 static unsigned
lds_op_from_intrinsic(nir_intrinsic_op op)558 lds_op_from_intrinsic(nir_intrinsic_op op) {
559 switch (op) {
560 case nir_intrinsic_shared_atomic_add:
561 return LDS_OP2_LDS_ADD_RET;
562 case nir_intrinsic_shared_atomic_and:
563 return LDS_OP2_LDS_AND_RET;
564 case nir_intrinsic_shared_atomic_or:
565 return LDS_OP2_LDS_OR_RET;
566 case nir_intrinsic_shared_atomic_imax:
567 return LDS_OP2_LDS_MAX_INT_RET;
568 case nir_intrinsic_shared_atomic_umax:
569 return LDS_OP2_LDS_MAX_UINT_RET;
570 case nir_intrinsic_shared_atomic_imin:
571 return LDS_OP2_LDS_MIN_INT_RET;
572 case nir_intrinsic_shared_atomic_umin:
573 return LDS_OP2_LDS_MIN_UINT_RET;
574 case nir_intrinsic_shared_atomic_xor:
575 return LDS_OP2_LDS_XOR_RET;
576 case nir_intrinsic_shared_atomic_exchange:
577 return LDS_OP2_LDS_XCHG_RET;
578 case nir_intrinsic_shared_atomic_comp_swap:
579 return LDS_OP3_LDS_CMP_XCHG_RET;
580 default:
581 unreachable("Unsupported shared atomic opcode");
582 }
583 }
584
emit_atomic_local_shared(nir_intrinsic_instr * instr)585 bool ShaderFromNirProcessor::emit_atomic_local_shared(nir_intrinsic_instr* instr)
586 {
587 auto address = from_nir(instr->src[0], 0);
588 auto dest_value = from_nir(instr->dest, 0);
589 auto value = from_nir(instr->src[1], 0);
590 auto op = lds_op_from_intrinsic(instr->intrinsic);
591
592 if (unlikely(instr->intrinsic ==nir_intrinsic_shared_atomic_comp_swap)) {
593 auto value2 = from_nir(instr->src[2], 0);
594 emit_instruction(new LDSAtomicInstruction(dest_value, value, value2, address, op));
595 } else {
596 emit_instruction(new LDSAtomicInstruction(dest_value, value, address, op));
597 }
598 return true;
599 }
600
601
emit_store_local_shared(nir_intrinsic_instr * instr)602 bool ShaderFromNirProcessor::emit_store_local_shared(nir_intrinsic_instr* instr)
603 {
604 unsigned write_mask = nir_intrinsic_write_mask(instr);
605
606 auto address = from_nir(instr->src[1], 0);
607 int swizzle_base = (write_mask & 0x3) ? 0 : 2;
608 write_mask |= write_mask >> 2;
609
610 auto value = from_nir(instr->src[0], swizzle_base);
611 if (!(write_mask & 2)) {
612 emit_instruction(new LDSWriteInstruction(address, 0, value));
613 } else {
614 auto value1 = from_nir(instr->src[0], swizzle_base + 1);
615 emit_instruction(new LDSWriteInstruction(address, 0, value, value1));
616 }
617
618 return true;
619 }
620
emit_intrinsic_instruction(nir_intrinsic_instr * instr)621 bool ShaderFromNirProcessor::emit_intrinsic_instruction(nir_intrinsic_instr* instr)
622 {
623 r600::sfn_log << SfnLog::instr << "emit '"
624 << *reinterpret_cast<nir_instr*>(instr)
625 << "' (" << __func__ << ")\n";
626
627 if (emit_intrinsic_instruction_override(instr))
628 return true;
629
630 if (m_ssbo_instr.emit(&instr->instr)) {
631 m_sel.info.writes_memory = true;
632 return true;
633 }
634
635 switch (instr->intrinsic) {
636 case nir_intrinsic_load_deref: {
637 auto var = get_deref_location(instr->src[0]);
638 if (!var)
639 return false;
640 auto mode_helper = m_var_mode.find(var);
641 if (mode_helper == m_var_mode.end()) {
642 cerr << "r600-nir: variable '" << var->name << "' not found\n";
643 return false;
644 }
645 switch (mode_helper->second) {
646 case nir_var_shader_in:
647 return emit_load_input_deref(var, instr);
648 case nir_var_function_temp:
649 return emit_load_function_temp(var, instr);
650 default:
651 cerr << "r600-nir: Unsupported mode" << mode_helper->second
652 << "for src variable\n";
653 return false;
654 }
655 }
656 case nir_intrinsic_store_scratch:
657 return emit_store_scratch(instr);
658 case nir_intrinsic_load_scratch:
659 return emit_load_scratch(instr);
660 case nir_intrinsic_store_deref:
661 return emit_store_deref(instr);
662 case nir_intrinsic_load_uniform:
663 return load_uniform(instr);
664 case nir_intrinsic_discard:
665 case nir_intrinsic_discard_if:
666 return emit_discard_if(instr);
667 case nir_intrinsic_load_ubo_vec4:
668 return emit_load_ubo_vec4(instr);
669 case nir_intrinsic_load_tcs_in_param_base_r600:
670 return emit_load_tcs_param_base(instr, 0);
671 case nir_intrinsic_load_tcs_out_param_base_r600:
672 return emit_load_tcs_param_base(instr, 16);
673 case nir_intrinsic_load_local_shared_r600:
674 case nir_intrinsic_load_shared:
675 return emit_load_local_shared(instr);
676 case nir_intrinsic_store_local_shared_r600:
677 case nir_intrinsic_store_shared:
678 return emit_store_local_shared(instr);
679 case nir_intrinsic_control_barrier:
680 case nir_intrinsic_memory_barrier_tcs_patch:
681 case nir_intrinsic_memory_barrier_shared:
682 case nir_intrinsic_memory_barrier_buffer:
683 case nir_intrinsic_memory_barrier:
684 case nir_intrinsic_memory_barrier_image:
685 case nir_intrinsic_group_memory_barrier:
686 return emit_barrier(instr);
687 case nir_intrinsic_shared_atomic_add:
688 case nir_intrinsic_shared_atomic_and:
689 case nir_intrinsic_shared_atomic_or:
690 case nir_intrinsic_shared_atomic_imax:
691 case nir_intrinsic_shared_atomic_umax:
692 case nir_intrinsic_shared_atomic_imin:
693 case nir_intrinsic_shared_atomic_umin:
694 case nir_intrinsic_shared_atomic_xor:
695 case nir_intrinsic_shared_atomic_exchange:
696 case nir_intrinsic_shared_atomic_comp_swap:
697 return emit_atomic_local_shared(instr);
698 case nir_intrinsic_copy_deref:
699 case nir_intrinsic_load_constant:
700 case nir_intrinsic_load_input:
701 case nir_intrinsic_store_output:
702
703 default:
704 fprintf(stderr, "r600-nir: Unsupported intrinsic %d\n", instr->intrinsic);
705 return false;
706 }
707 return false;
708 }
709
emit_intrinsic_instruction_override(UNUSED nir_intrinsic_instr * instr)710 bool ShaderFromNirProcessor::emit_intrinsic_instruction_override(UNUSED nir_intrinsic_instr* instr)
711 {
712 return false;
713 }
714
715 bool
emit_load_function_temp(UNUSED const nir_variable * var,UNUSED nir_intrinsic_instr * instr)716 ShaderFromNirProcessor::emit_load_function_temp(UNUSED const nir_variable *var, UNUSED nir_intrinsic_instr *instr)
717 {
718 return false;
719 }
720
emit_barrier(UNUSED nir_intrinsic_instr * instr)721 bool ShaderFromNirProcessor::emit_barrier(UNUSED nir_intrinsic_instr* instr)
722 {
723 AluInstruction *ir = new AluInstruction(op0_group_barrier);
724 ir->set_flag(alu_last_instr);
725 emit_instruction(ir);
726 return true;
727 }
728
729
load_preloaded_value(const nir_dest & dest,int chan,PValue value,bool as_last)730 bool ShaderFromNirProcessor::load_preloaded_value(const nir_dest& dest, int chan, PValue value, bool as_last)
731 {
732 if (!dest.is_ssa) {
733 auto ir = new AluInstruction(op1_mov, from_nir(dest, 0), value, {alu_write});
734 if (as_last)
735 ir->set_flag(alu_last_instr);
736 emit_instruction(ir);
737 } else {
738 inject_register(dest.ssa.index, chan, value, true);
739 }
740 return true;
741 }
742
emit_store_scratch(nir_intrinsic_instr * instr)743 bool ShaderFromNirProcessor::emit_store_scratch(nir_intrinsic_instr* instr)
744 {
745 PValue address = from_nir(instr->src[1], 0, 0);
746
747 auto value = vec_from_nir_with_fetch_constant(instr->src[0], (1 << instr->num_components) - 1,
748 swizzle_from_comps(instr->num_components));
749
750 int writemask = nir_intrinsic_write_mask(instr);
751 int align = nir_intrinsic_align_mul(instr);
752 int align_offset = nir_intrinsic_align_offset(instr);
753
754 WriteScratchInstruction *ir = nullptr;
755 if (address->type() == Value::literal) {
756 const auto& lv = static_cast<const LiteralValue&>(*address);
757 ir = new WriteScratchInstruction(lv.value(), value, align, align_offset, writemask);
758 } else {
759 address = from_nir_with_fetch_constant(instr->src[1], 0);
760 ir = new WriteScratchInstruction(address, value, align, align_offset,
761 writemask, m_scratch_size);
762 }
763 emit_instruction(ir);
764 sh_info().needs_scratch_space = 1;
765 return true;
766 }
767
emit_load_scratch(nir_intrinsic_instr * instr)768 bool ShaderFromNirProcessor::emit_load_scratch(nir_intrinsic_instr* instr)
769 {
770 PValue address = from_nir_with_fetch_constant(instr->src[0], 0);
771 std::array<PValue, 4> dst_val;
772 for (int i = 0; i < 4; ++i)
773 dst_val[i] = from_nir(instr->dest, i < instr->num_components ? i : 7);
774
775 GPRVector dst(dst_val);
776 auto ir = new LoadFromScratch(dst, address, m_scratch_size);
777 ir->prelude_append(new WaitAck(0));
778 emit_instruction(ir);
779 sh_info().needs_scratch_space = 1;
780 return true;
781 }
782
vec_from_nir_with_fetch_constant(const nir_src & src,unsigned mask,const GPRVector::Swizzle & swizzle,bool match)783 GPRVector ShaderFromNirProcessor::vec_from_nir_with_fetch_constant(const nir_src& src,
784 unsigned mask,
785 const GPRVector::Swizzle& swizzle,
786 bool match)
787 {
788 bool use_same = true;
789 GPRVector::Values v;
790
791 std::array<bool,4> used_swizzles = {false, false, false, false};
792
793 /* Check whether all sources come from a GPR, and,
794 * if requested, whether they are swizzled as epected */
795
796 for (int i = 0; i < 4 && use_same; ++i) {
797 if ((1 << i) & mask) {
798 if (swizzle[i] < 4) {
799 v[i] = from_nir(src, swizzle[i]);
800 assert(v[i]);
801 use_same &= (v[i]->type() == Value::gpr);
802 if (match) {
803 use_same &= (v[i]->chan() == swizzle[i]);
804 }
805 used_swizzles[v[i]->chan()] = true;
806 }
807 }
808 }
809
810
811 /* Now check whether all inputs come from the same GPR, and fill
812 * empty slots in the vector with unused swizzles, bail out if
813 * the sources are not from the same GPR
814 */
815
816 if (use_same) {
817 int next_free_swizzle = 0;
818 while (used_swizzles[next_free_swizzle] && next_free_swizzle < 4)
819 next_free_swizzle++;
820
821 /* Find the first GPR index used */
822 int i = 0;
823 while (!v[i] && i < 4) ++i;
824 assert(i < 4);
825 unsigned sel = v[i]->sel();
826
827
828 for (i = 0; i < 4 && use_same; ++i) {
829 if (!v[i]) {
830 if (swizzle[i] >= 4)
831 v[i] = PValue(new GPRValue(sel, swizzle[i]));
832 else {
833 assert(next_free_swizzle < 4);
834 v[i] = PValue(new GPRValue(sel, next_free_swizzle));
835 used_swizzles[next_free_swizzle] = true;
836 while (next_free_swizzle < 4 && used_swizzles[next_free_swizzle])
837 next_free_swizzle++;
838 }
839 }
840 else
841 use_same &= v[i]->sel() == sel;
842 }
843 }
844
845 /* We can't re-use the source data because they either need re-swizzling, or
846 * they didn't come all from a GPR or the same GPR, so copy to a new vector
847 */
848 if (!use_same) {
849 AluInstruction *ir = nullptr;
850 GPRVector result(allocate_temp_register(), swizzle);
851 for (int i = 0; i < 4; ++i) {
852 if (swizzle[i] < 4 && (mask & (1 << i))) {
853 ir = new AluInstruction(op1_mov, result[i], from_nir(src, swizzle[i]),
854 EmitInstruction::write);
855 emit_instruction(ir);
856 }
857 }
858 if (ir)
859 ir->set_flag(alu_last_instr);
860 return result;
861 } else
862 return GPRVector(v);;
863 }
864
emit_load_ubo_vec4(nir_intrinsic_instr * instr)865 bool ShaderFromNirProcessor::emit_load_ubo_vec4(nir_intrinsic_instr* instr)
866 {
867 auto bufid = nir_src_as_const_value(instr->src[0]);
868 auto buf_offset = nir_src_as_const_value(instr->src[1]);
869
870 if (!buf_offset) {
871 /* TODO: if buf_offset is constant then this can also be solved by using the CF indes
872 * on the ALU block, and this would probably make sense when there are more then one
873 * loads with the same buffer ID. */
874
875 PValue addr = from_nir_with_fetch_constant(instr->src[1], 0);
876 GPRVector trgt;
877 std::array<int, 4> swz = {7,7,7,7};
878 for (unsigned i = 0; i < 4; ++i) {
879 if (i < nir_dest_num_components(instr->dest)) {
880 trgt.set_reg_i(i, from_nir(instr->dest, i));
881 swz[i] = i + nir_intrinsic_component(instr);
882 } else {
883 trgt.set_reg_i(i, from_nir(instr->dest, 7));
884 }
885 }
886
887 FetchInstruction *ir;
888 if (bufid) {
889 ir = new FetchInstruction(vc_fetch, no_index_offset, trgt, addr, 0,
890 1, nullptr, bim_none);
891 } else {
892 PValue bufid = from_nir(instr->src[0], 0, 0);
893 ir = new FetchInstruction(vc_fetch, no_index_offset, trgt, addr, 0,
894 1, bufid, bim_zero);
895 }
896 ir->set_dest_swizzle(swz);
897 emit_instruction(ir);
898 m_sh_info.indirect_files |= 1 << TGSI_FILE_CONSTANT;
899 return true;
900 }
901
902
903 if (bufid) {
904 int buf_cmp = nir_intrinsic_component(instr);
905 AluInstruction *ir = nullptr;
906 for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) {
907 int cmp = buf_cmp + i;
908 assert(cmp < 4);
909 auto u = PValue(new UniformValue(512 + buf_offset->u32, cmp, bufid->u32 + 1));
910 if (instr->dest.is_ssa)
911 load_preloaded_value(instr->dest, i, u);
912 else {
913 ir = new AluInstruction(op1_mov, from_nir(instr->dest, i), u, {alu_write});
914 emit_instruction(ir);
915 }
916 }
917 if (ir)
918 ir->set_flag(alu_last_instr);
919 return true;
920
921 } else {
922 int buf_cmp = nir_intrinsic_component(instr);
923 AluInstruction *ir = nullptr;
924 auto kc_id = from_nir(instr->src[0], 0);
925 for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) {
926 int cmp = buf_cmp + i;
927 auto u = PValue(new UniformValue(512 + buf_offset->u32, cmp, kc_id));
928 if (instr->dest.is_ssa)
929 load_preloaded_value(instr->dest, i, u);
930 else {
931 ir = new AluInstruction(op1_mov, from_nir(instr->dest, i), u, {alu_write});
932 emit_instruction(ir);
933 }
934 }
935 if (ir)
936 ir->set_flag(alu_last_instr);
937 return true;
938 }
939 }
940
emit_discard_if(nir_intrinsic_instr * instr)941 bool ShaderFromNirProcessor::emit_discard_if(nir_intrinsic_instr* instr)
942 {
943 r600::sfn_log << SfnLog::instr << "emit '"
944 << *reinterpret_cast<nir_instr*>(instr)
945 << "' (" << __func__ << ")\n";
946
947 if (instr->intrinsic == nir_intrinsic_discard_if) {
948 emit_instruction(new AluInstruction(op2_killne_int, PValue(new GPRValue(0,0)),
949 {from_nir(instr->src[0], 0, 0), Value::zero}, {alu_last_instr}));
950
951 } else {
952 emit_instruction(new AluInstruction(op2_kille, PValue(new GPRValue(0,0)),
953 {Value::zero, Value::zero}, {alu_last_instr}));
954 }
955 m_sh_info.uses_kill = 1;
956 return true;
957 }
958
emit_load_input_deref(const nir_variable * var,nir_intrinsic_instr * instr)959 bool ShaderFromNirProcessor::emit_load_input_deref(const nir_variable *var,
960 nir_intrinsic_instr* instr)
961 {
962 return do_emit_load_deref(var, instr);
963 }
964
load_uniform(nir_intrinsic_instr * instr)965 bool ShaderFromNirProcessor::load_uniform(nir_intrinsic_instr* instr)
966 {
967 r600::sfn_log << SfnLog::instr << __func__ << ": emit '"
968 << *reinterpret_cast<nir_instr*>(instr)
969 << "'\n";
970
971
972 /* If the target register is a SSA register and the loading is not
973 * indirect then we can do lazy loading, i.e. the uniform value can
974 * be used directly. Otherwise we have to load the data for real
975 * rigt away.
976 */
977 auto literal = nir_src_as_const_value(instr->src[0]);
978 int base = nir_intrinsic_base(instr);
979
980 if (literal) {
981 AluInstruction *ir = nullptr;
982
983 for (int i = 0; i < instr->num_components ; ++i) {
984 PValue u = PValue(new UniformValue(512 + literal->u32 + base, i));
985 sfn_log << SfnLog::io << "uniform "
986 << instr->dest.ssa.index << " const["<< i << "]: "<< instr->const_index[i] << "\n";
987
988 if (instr->dest.is_ssa)
989 load_preloaded_value(instr->dest, i, u);
990 else {
991 ir = new AluInstruction(op1_mov, from_nir(instr->dest, i),
992 u, {alu_write});
993 emit_instruction(ir);
994 }
995 }
996 if (ir)
997 ir->set_flag(alu_last_instr);
998 } else {
999 PValue addr = from_nir(instr->src[0], 0, 0);
1000 return load_uniform_indirect(instr, addr, 16 * base, 0);
1001 }
1002 return true;
1003 }
1004
load_uniform_indirect(nir_intrinsic_instr * instr,PValue addr,int offest,int bufferid)1005 bool ShaderFromNirProcessor::load_uniform_indirect(nir_intrinsic_instr* instr, PValue addr, int offest, int bufferid)
1006 {
1007 if (!addr) {
1008 std::cerr << "r600-nir: don't know how uniform is addressed\n";
1009 return false;
1010 }
1011
1012 GPRVector trgt;
1013 std::array<int, 4> swz = {7,7,7,7};
1014 for (int i = 0; i < 4; ++i) {
1015 trgt.set_reg_i(i, from_nir(instr->dest, i));
1016 swz[i] = i;
1017 }
1018
1019 if (addr->type() != Value::gpr) {
1020 emit_instruction(op1_mov, trgt.reg_i(0), {addr}, {alu_write, alu_last_instr});
1021 addr = trgt.reg_i(0);
1022 }
1023
1024 auto ir = new FetchInstruction(vc_fetch, no_index_offset, trgt, addr, offest,
1025 bufferid, PValue(), bim_none);
1026 ir->set_dest_swizzle(swz);
1027 emit_instruction(ir);
1028 m_sh_info.indirect_files |= 1 << TGSI_FILE_CONSTANT;
1029 return true;
1030 }
1031
emit_load_literal(const nir_load_const_instr * literal,const nir_src & src,unsigned writemask)1032 AluInstruction *ShaderFromNirProcessor::emit_load_literal(const nir_load_const_instr * literal, const nir_src& src, unsigned writemask)
1033 {
1034 AluInstruction *ir = nullptr;
1035 for (int i = 0; i < literal->def.num_components ; ++i) {
1036 if (writemask & (1 << i)){
1037 PValue lsrc;
1038 switch (literal->def.bit_size) {
1039
1040 case 1:
1041 sfn_log << SfnLog::reg << "Got literal of bit size 1\n";
1042 lsrc = literal->value[i].b ?
1043 PValue(new LiteralValue( 0xffffffff, i)) :
1044 Value::zero;
1045 break;
1046 case 32:
1047 sfn_log << SfnLog::reg << "Got literal of bit size 32\n";
1048 if (literal->value[i].u32 == 0)
1049 lsrc = Value::zero;
1050 else if (literal->value[i].u32 == 1)
1051 lsrc = Value::one_i;
1052 else if (literal->value[i].f32 == 1.0f)
1053 lsrc = Value::one_f;
1054 else if (literal->value[i].f32 == 0.5f)
1055 lsrc = Value::zero_dot_5;
1056 else
1057 lsrc = PValue(new LiteralValue(literal->value[i].u32, i));
1058 break;
1059 default:
1060 sfn_log << SfnLog::reg << "Got literal of bit size " << literal->def.bit_size
1061 << " falling back to 32 bit\n";
1062 lsrc = PValue(new LiteralValue(literal->value[i].u32, i));
1063 }
1064 ir = new AluInstruction(op1_mov, create_register_from_nir_src(src, i), lsrc, EmitInstruction::write);
1065
1066 emit_instruction(ir);
1067 }
1068 }
1069 return ir;
1070 }
1071
from_nir_with_fetch_constant(const nir_src & src,unsigned component,int channel)1072 PValue ShaderFromNirProcessor::from_nir_with_fetch_constant(const nir_src& src, unsigned component, int channel)
1073 {
1074 PValue value = from_nir(src, component);
1075 if (value->type() != Value::gpr &&
1076 value->type() != Value::gpr_vector &&
1077 value->type() != Value::gpr_array_value) {
1078 PValue retval = get_temp_register(channel);
1079 emit_instruction(new AluInstruction(op1_mov, retval, value,
1080 EmitInstruction::last_write));
1081 value = retval;
1082 }
1083 return value;
1084 }
1085
emit_store_deref(nir_intrinsic_instr * instr)1086 bool ShaderFromNirProcessor::emit_store_deref(nir_intrinsic_instr* instr)
1087 {
1088 auto out_var = get_deref_location(instr->src[0]);
1089 if (!out_var)
1090 return false;
1091
1092 return do_emit_store_deref(out_var, instr);
1093 }
1094
emit_deref_instruction(nir_deref_instr * instr)1095 bool ShaderFromNirProcessor::emit_deref_instruction(nir_deref_instr* instr)
1096 {
1097 r600::sfn_log << SfnLog::instr << __func__ << ": emit '"
1098 << *reinterpret_cast<nir_instr*>(instr)
1099 << "'\n";
1100
1101 /* Give the specific shader type a chance to process this, i.e. Geometry and
1102 * tesselation shaders need specialized deref_array, for the other shaders
1103 * it is lowered.
1104 */
1105 if (emit_deref_instruction_override(instr))
1106 return true;
1107
1108 switch (instr->deref_type) {
1109 case nir_deref_type_var:
1110 set_var_address(instr);
1111 return true;
1112 case nir_deref_type_array:
1113 case nir_deref_type_array_wildcard:
1114 case nir_deref_type_struct:
1115 case nir_deref_type_cast:
1116 default:
1117 fprintf(stderr, "R600: deref type %d not supported\n", instr->deref_type);
1118 }
1119 return false;
1120 }
1121
emit_instruction(EAluOp opcode,PValue dest,std::vector<PValue> srcs,const std::set<AluModifiers> & m_flags)1122 bool ShaderFromNirProcessor::emit_instruction(EAluOp opcode, PValue dest,
1123 std::vector<PValue> srcs,
1124 const std::set<AluModifiers>& m_flags)
1125 {
1126 AluInstruction *ir = new AluInstruction(opcode, dest, srcs, m_flags);
1127 emit_instruction(ir);
1128 return true;
1129 }
1130
add_param_output_reg(int loc,const GPRVector * gpr)1131 void ShaderFromNirProcessor::add_param_output_reg(int loc, const GPRVector *gpr)
1132 {
1133 m_output_register_map[loc] = gpr;
1134 }
1135
emit_export_instruction(WriteoutInstruction * ir)1136 void ShaderFromNirProcessor::emit_export_instruction(WriteoutInstruction *ir)
1137 {
1138 r600::sfn_log << SfnLog::instr << " as '" << *ir << "'\n";
1139 m_export_output.emit(PInstruction(ir));
1140 }
1141
output_register(unsigned location) const1142 const GPRVector * ShaderFromNirProcessor::output_register(unsigned location) const
1143 {
1144 const GPRVector *retval = nullptr;
1145 auto val = m_output_register_map.find(location);
1146 if (val != m_output_register_map.end())
1147 retval = val->second;
1148 return retval;
1149 }
1150
set_input(unsigned pos,PValue var)1151 void ShaderFromNirProcessor::set_input(unsigned pos, PValue var)
1152 {
1153 r600::sfn_log << SfnLog::io << "Set input[" << pos << "] =" << *var << "\n";
1154 m_inputs[pos] = var;
1155 }
1156
set_output(unsigned pos,int sel)1157 void ShaderFromNirProcessor::set_output(unsigned pos, int sel)
1158 {
1159 r600::sfn_log << SfnLog::io << "Set output[" << pos << "] =" << sel << "\n";
1160 m_outputs[pos] = sel;
1161 }
1162
append_block(int nesting_change)1163 void ShaderFromNirProcessor::append_block(int nesting_change)
1164 {
1165 m_nesting_depth += nesting_change;
1166 m_output.push_back(InstructionBlock(m_nesting_depth, m_block_number++));
1167 }
1168
finalize()1169 void ShaderFromNirProcessor::finalize()
1170 {
1171 do_finalize();
1172
1173 for (auto& i : m_inputs)
1174 m_sh_info.input[i.first].gpr = i.second->sel();
1175
1176 for (auto& i : m_outputs)
1177 m_sh_info.output[i.first].gpr = i.second;
1178
1179 m_output.push_back(m_export_output);
1180 }
1181
1182 }
1183