• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* -*- mesa-c++  -*-
2  *
3  * Copyright (c) 2018 Collabora LTD
4  *
5  * Author: Gert Wollny <gert.wollny@collabora.com>
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and associated documentation files (the "Software"),
9  * to deal in the Software without restriction, including without limitation
10  * on the rights to use, copy, modify, merge, publish, distribute, sub
11  * license, and/or sell copies of the Software, and to permit persons to whom
12  * the Software is furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the next
15  * paragraph) shall be included in all copies or substantial portions of the
16  * Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24  * USE OR OTHER DEALINGS IN THE SOFTWARE.
25  */
26 
27 #include "../r600_pipe.h"
28 #include "../r600_shader.h"
29 #include "sfn_shader_vertex.h"
30 
31 #include "sfn_shader_compute.h"
32 #include "sfn_shader_fragment.h"
33 #include "sfn_shader_geometry.h"
34 #include "sfn_liverange.h"
35 #include "sfn_ir_to_assembly.h"
36 #include "sfn_nir.h"
37 #include "sfn_instruction_misc.h"
38 #include "sfn_instruction_fetch.h"
39 #include "sfn_instruction_lds.h"
40 
41 #include <iostream>
42 
43 #define ENABLE_DEBUG 1
44 
45 #ifdef ENABLE_DEBUG
46 #define DEBUG_SFN(X)  \
47    do {\
48       X; \
49    } while (0)
50 #else
51 #define DEBUG_SFN(X)
52 #endif
53 
54 namespace r600 {
55 
56 using namespace std;
57 
58 
ShaderFromNirProcessor(pipe_shader_type ptype,r600_pipe_shader_selector & sel,r600_shader & sh_info,int scratch_size,enum chip_class chip_class,int atomic_base)59 ShaderFromNirProcessor::ShaderFromNirProcessor(pipe_shader_type ptype,
60                                                r600_pipe_shader_selector& sel,
61                                                r600_shader &sh_info, int scratch_size,
62                                                enum chip_class chip_class,
63                                                int atomic_base):
64    m_processor_type(ptype),
65    m_nesting_depth(0),
66    m_block_number(0),
67    m_export_output(0, -1),
68    m_sh_info(sh_info),
69    m_chip_class(chip_class),
70    m_tex_instr(*this),
71    m_alu_instr(*this),
72    m_ssbo_instr(*this),
73    m_pending_else(nullptr),
74    m_scratch_size(scratch_size),
75    m_next_hwatomic_loc(0),
76    m_sel(sel),
77    m_atomic_base(atomic_base),
78    m_image_count(0),
79    last_emitted_alu(nullptr)
80 {
81    m_sh_info.processor_type = ptype;
82 
83 }
84 
85 
~ShaderFromNirProcessor()86 ShaderFromNirProcessor::~ShaderFromNirProcessor()
87 {
88 }
89 
scan_instruction(nir_instr * instr)90 bool ShaderFromNirProcessor::scan_instruction(nir_instr *instr)
91 {
92    switch (instr->type) {
93    case nir_instr_type_tex: {
94       nir_tex_instr *t = nir_instr_as_tex(instr);
95       if (t->sampler_dim == GLSL_SAMPLER_DIM_BUF)
96          sh_info().uses_tex_buffers = true;
97       if (t->op == nir_texop_txs &&
98           t->sampler_dim == GLSL_SAMPLER_DIM_CUBE &&
99           t->is_array)
100          sh_info().has_txq_cube_array_z_comp = true;
101       break;
102    }
103    case nir_instr_type_intrinsic: {
104       auto *i = nir_instr_as_intrinsic(instr);
105       switch (i->intrinsic) {
106       case nir_intrinsic_ssbo_atomic_add:
107       case nir_intrinsic_image_atomic_add:
108       case nir_intrinsic_ssbo_atomic_and:
109       case nir_intrinsic_image_atomic_and:
110       case nir_intrinsic_ssbo_atomic_or:
111       case nir_intrinsic_image_atomic_or:
112       case nir_intrinsic_ssbo_atomic_imin:
113       case nir_intrinsic_image_atomic_imin:
114       case nir_intrinsic_ssbo_atomic_imax:
115       case nir_intrinsic_image_atomic_imax:
116       case nir_intrinsic_ssbo_atomic_umin:
117       case nir_intrinsic_image_atomic_umin:
118       case nir_intrinsic_ssbo_atomic_umax:
119       case nir_intrinsic_image_atomic_umax:
120       case nir_intrinsic_ssbo_atomic_xor:
121       case nir_intrinsic_image_atomic_xor:
122       case nir_intrinsic_ssbo_atomic_exchange:
123       case nir_intrinsic_image_atomic_exchange:
124       case nir_intrinsic_image_atomic_comp_swap:
125       case nir_intrinsic_ssbo_atomic_comp_swap:
126          m_sel.info.writes_memory = 1;
127          /* fallthrough */
128       case nir_intrinsic_image_load:
129          m_ssbo_instr.set_require_rat_return_address();
130          break;
131       case nir_intrinsic_image_size: {
132          if (nir_intrinsic_image_dim(i) == GLSL_SAMPLER_DIM_CUBE &&
133              nir_intrinsic_image_array(i) && nir_dest_num_components(i->dest) > 2)
134             sh_info().has_txq_cube_array_z_comp = true;
135       }
136 
137 
138 
139       default:
140          ;
141       }
142 
143 
144    }
145    default:
146       ;
147    }
148 
149    return scan_sysvalue_access(instr);
150 }
151 
get_chip_class(void) const152 enum chip_class ShaderFromNirProcessor::get_chip_class(void) const
153 {
154   return m_chip_class;
155 }
156 
allocate_reserved_registers()157 bool ShaderFromNirProcessor::allocate_reserved_registers()
158 {
159    bool retval = do_allocate_reserved_registers();
160    m_ssbo_instr.load_rat_return_address();
161    if (sh_info().uses_atomics)
162       m_ssbo_instr.load_atomic_inc_limits();
163    m_ssbo_instr.set_ssbo_offset(m_image_count);
164    return retval;
165 }
166 
remap_shader_info(r600_shader & sh_info,std::vector<rename_reg_pair> & map,UNUSED ValueMap & values)167 static void remap_shader_info(r600_shader& sh_info,
168                               std::vector<rename_reg_pair>& map,
169                               UNUSED ValueMap& values)
170 {
171    for (unsigned i = 0; i < sh_info.ninput; ++i) {
172       sfn_log << SfnLog::merge << "Input " << i << " gpr:" << sh_info.input[i].gpr
173               << " of map.size()\n";
174 
175       assert(sh_info.input[i].gpr < map.size());
176       auto new_index = map[sh_info.input[i].gpr];
177       if (new_index.valid)
178          sh_info.input[i].gpr = new_index.new_reg;
179       map[sh_info.input[i].gpr].used = true;
180    }
181 
182    for (unsigned i = 0; i < sh_info.noutput; ++i) {
183       assert(sh_info.output[i].gpr < map.size());
184       auto new_index = map[sh_info.output[i].gpr];
185       if (new_index.valid)
186          sh_info.output[i].gpr = new_index.new_reg;
187       map[sh_info.output[i].gpr].used = true;
188    }
189 }
190 
remap_registers()191 void ShaderFromNirProcessor::remap_registers()
192 {
193    // register renumbering
194    auto rc = register_count();
195    if (!rc)
196       return;
197 
198    std::vector<register_live_range> register_live_ranges(rc);
199 
200    auto temp_register_map = get_temp_registers();
201 
202    Shader sh{m_output, temp_register_map};
203    LiverangeEvaluator().run(sh, register_live_ranges);
204    auto register_map = get_temp_registers_remapping(register_live_ranges);
205 
206    sfn_log << SfnLog::merge << "=========Mapping===========\n";
207    for (size_t  i = 0; i < register_map.size(); ++i)
208       if (register_map[i].valid)
209          sfn_log << SfnLog::merge << "Map:" << i << " -> " << register_map[i].new_reg << "\n";
210 
211    ValueRemapper vmap0(register_map, temp_register_map);
212    for (auto& block: m_output)
213       block.remap_registers(vmap0);
214 
215    remap_shader_info(m_sh_info, register_map, temp_register_map);
216 
217    /* Mark inputs as used registers, these registers should no be remapped */
218    for (auto& v: sh.m_temp) {
219       if (v.second->type() == Value::gpr) {
220          const auto& g = static_cast<const GPRValue&>(*v.second);
221          if (g.is_input())
222             register_map[g.sel()].used = true;
223       }
224    }
225 
226    int new_index = 0;
227    for (auto& i : register_map) {
228       i.valid = i.used;
229       if (i.used)
230          i.new_reg = new_index++;
231    }
232 
233    ValueRemapper vmap1(register_map, temp_register_map);
234    for (auto& ir: m_output)
235       ir.remap_registers(vmap1);
236 
237    remap_shader_info(m_sh_info, register_map, temp_register_map);
238 }
239 
process_uniforms(nir_variable * uniform)240 bool ShaderFromNirProcessor::process_uniforms(nir_variable *uniform)
241 {
242    // m_uniform_type_map
243    m_uniform_type_map[uniform->data.location] = uniform->type;
244 
245    if (uniform->type->contains_atomic()) {
246       int natomics = uniform->type->atomic_size() / ATOMIC_COUNTER_SIZE;
247       sh_info().nhwatomic += natomics;
248 
249       if (uniform->type->is_array())
250          sh_info().indirect_files |= 1 << TGSI_FILE_HW_ATOMIC;
251 
252       sh_info().uses_atomics = 1;
253 
254       struct r600_shader_atomic& atom = sh_info().atomics[sh_info().nhwatomic_ranges];
255       ++sh_info().nhwatomic_ranges;
256       atom.buffer_id = uniform->data.binding;
257       atom.hw_idx = m_atomic_base + m_next_hwatomic_loc;
258 
259       atom.start = uniform->data.offset >> 2;
260       atom.end = atom.start + natomics - 1;
261 
262       if (m_atomic_base_map.find(uniform->data.binding) ==
263           m_atomic_base_map.end())
264          m_atomic_base_map[uniform->data.binding] = m_next_hwatomic_loc;
265 
266       m_next_hwatomic_loc += natomics;
267 
268       m_sel.info.file_count[TGSI_FILE_HW_ATOMIC] += atom.end  - atom.start + 1;
269 
270       sfn_log << SfnLog::io << "HW_ATOMIC file count: "
271               << m_sel.info.file_count[TGSI_FILE_HW_ATOMIC] << "\n";
272    }
273 
274    auto type = uniform->type->is_array() ? uniform->type->without_array(): uniform->type;
275    if (type->is_image() || uniform->data.mode == nir_var_mem_ssbo) {
276       sh_info().uses_images = 1;
277       if (uniform->type->is_array())
278          sh_info().indirect_files |= TGSI_FILE_IMAGE;
279    }
280 
281    if (uniform->type->is_image()) {
282       ++m_image_count;
283    }
284 
285    return true;
286 }
287 
process_inputs(nir_variable * input)288 bool ShaderFromNirProcessor::process_inputs(nir_variable *input)
289 {
290    return do_process_inputs(input);
291 }
292 
process_outputs(nir_variable * output)293 bool ShaderFromNirProcessor::process_outputs(nir_variable *output)
294 {
295    return do_process_outputs(output);
296 }
297 
add_array_deref(nir_deref_instr * instr)298 void ShaderFromNirProcessor::add_array_deref(nir_deref_instr *instr)
299 {
300    nir_variable *var = nir_deref_instr_get_variable(instr);
301 
302    assert(nir_deref_mode_is(instr, nir_var_function_temp));
303    assert(glsl_type_is_array(var->type));
304 
305    // add an alias for the index to the register(s);
306 
307 
308 }
309 
set_var_address(nir_deref_instr * instr)310 void ShaderFromNirProcessor::set_var_address(nir_deref_instr *instr)
311 {
312    auto& dest = instr->dest;
313    unsigned index = dest.is_ssa ? dest.ssa.index : dest.reg.reg->index;
314    assert(util_bitcount(instr->modes) == 1);
315    m_var_mode[instr->var] = instr->modes;
316    m_var_derefs[index] = instr->var;
317 
318    sfn_log << SfnLog::io << "Add var deref:" << index
319            << " with DDL:" << instr->var->data.driver_location << "\n";
320 }
321 
evaluate_spi_sid(r600_shader_io & io)322 void ShaderFromNirProcessor::evaluate_spi_sid(r600_shader_io& io)
323 {
324    switch (io.name) {
325    case TGSI_SEMANTIC_POSITION:
326    case TGSI_SEMANTIC_PSIZE:
327    case TGSI_SEMANTIC_EDGEFLAG:
328    case TGSI_SEMANTIC_FACE:
329    case TGSI_SEMANTIC_SAMPLEMASK:
330    case TGSI_SEMANTIC_CLIPVERTEX:
331       io.spi_sid = 0;
332       break;
333    case TGSI_SEMANTIC_GENERIC:
334    case TGSI_SEMANTIC_TEXCOORD:
335    case TGSI_SEMANTIC_PCOORD:
336       io.spi_sid = io.sid + 1;
337       break;
338    default:
339       /* For non-generic params - pack name and sid into 8 bits */
340       io.spi_sid = (0x80 | (io.name << 3) | io.sid) + 1;
341    }
342 }
343 
get_deref_location(const nir_src & src) const344 const nir_variable *ShaderFromNirProcessor::get_deref_location(const nir_src& src) const
345 {
346    unsigned index = src.is_ssa ? src.ssa->index : src.reg.reg->index;
347 
348    sfn_log << SfnLog::io << "Search for deref:" << index << "\n";
349 
350    auto v = m_var_derefs.find(index);
351    if (v != m_var_derefs.end())
352       return v->second;
353 
354      fprintf(stderr, "R600: could not find deref with index %d\n", index);
355 
356      return nullptr;
357 
358    /*nir_deref_instr *deref = nir_instr_as_deref(src.ssa->parent_instr);
359    return  nir_deref_instr_get_variable(deref); */
360 }
361 
emit_tex_instruction(nir_instr * instr)362 bool ShaderFromNirProcessor::emit_tex_instruction(nir_instr* instr)
363 {
364    return m_tex_instr.emit(instr);
365 }
366 
emit_instruction(AluInstruction * ir)367 void ShaderFromNirProcessor::emit_instruction(AluInstruction *ir)
368 {
369    if (last_emitted_alu && !last_emitted_alu->flag(alu_last_instr)) {
370       for (unsigned i = 0; i < ir->n_sources(); ++i) {
371          auto& s = ir->src(i);
372          if (s.type() == Value::kconst) {
373             auto& c = static_cast<UniformValue&>(s);
374             if (c.addr()) {
375                last_emitted_alu->set_flag(alu_last_instr);
376                break;
377             }
378          }
379       }
380    }
381    last_emitted_alu = ir;
382    emit_instruction_internal(ir);
383 }
384 
385 
emit_instruction(Instruction * ir)386 void ShaderFromNirProcessor::emit_instruction(Instruction *ir)
387 {
388 
389    emit_instruction_internal(ir);
390    last_emitted_alu = nullptr;
391 }
392 
emit_instruction_internal(Instruction * ir)393 void ShaderFromNirProcessor::emit_instruction_internal(Instruction *ir)
394 {
395    if (m_pending_else) {
396       append_block(-1);
397       m_output.back().emit(PInstruction(m_pending_else));
398       append_block(1);
399       m_pending_else = nullptr;
400    }
401 
402    r600::sfn_log << SfnLog::instr << "     as '" << *ir << "'\n";
403    if (m_output.empty())
404       append_block(0);
405 
406    m_output.back().emit(Instruction::Pointer(ir));
407 }
408 
emit_shader_start()409 void ShaderFromNirProcessor::emit_shader_start()
410 {
411    /* placeholder, may become an abstract method */
412    m_ssbo_instr.set_ssbo_offset(m_image_count);
413 }
414 
emit_jump_instruction(nir_jump_instr * instr)415 bool ShaderFromNirProcessor::emit_jump_instruction(nir_jump_instr *instr)
416 {
417    switch (instr->type) {
418    case nir_jump_break: {
419       auto b = new LoopBreakInstruction();
420       emit_instruction(b);
421       return true;
422    }
423    case nir_jump_continue: {
424       auto  b = new LoopContInstruction();
425       emit_instruction(b);
426       return true;
427    }
428    default: {
429       nir_instr *i = reinterpret_cast<nir_instr*>(instr);
430       sfn_log << SfnLog::err << "Jump instrunction " << *i <<  " not supported\n";
431       return false;
432    }
433    }
434    return true;
435 }
436 
emit_alu_instruction(nir_instr * instr)437 bool ShaderFromNirProcessor::emit_alu_instruction(nir_instr* instr)
438 {
439    return m_alu_instr.emit(instr);
440 }
441 
emit_deref_instruction_override(UNUSED nir_deref_instr * instr)442 bool ShaderFromNirProcessor::emit_deref_instruction_override(UNUSED nir_deref_instr* instr)
443 {
444    return false;
445 }
446 
emit_loop_start(int loop_id)447 bool ShaderFromNirProcessor::emit_loop_start(int loop_id)
448 {
449    LoopBeginInstruction *loop = new LoopBeginInstruction();
450    emit_instruction(loop);
451    m_loop_begin_block_map[loop_id] = loop;
452    append_block(1);
453    return true;
454 }
emit_loop_end(int loop_id)455 bool ShaderFromNirProcessor::emit_loop_end(int loop_id)
456 {
457    auto start = m_loop_begin_block_map.find(loop_id);
458    if (start == m_loop_begin_block_map.end()) {
459       sfn_log << SfnLog::err  << "End loop: Loop start for "
460               << loop_id << "  not found\n";
461       return false;
462    }
463    m_nesting_depth--;
464    m_block_number++;
465    m_output.push_back(InstructionBlock(m_nesting_depth, m_block_number));
466    LoopEndInstruction *loop = new LoopEndInstruction(start->second);
467    emit_instruction(loop);
468 
469    m_loop_begin_block_map.erase(start);
470    return true;
471 }
472 
emit_if_start(int if_id,nir_if * if_stmt)473 bool ShaderFromNirProcessor::emit_if_start(int if_id, nir_if *if_stmt)
474 {
475 
476    auto value = from_nir(if_stmt->condition, 0, 0);
477    AluInstruction *pred = new AluInstruction(op2_pred_setne_int, PValue(new GPRValue(0,0)),
478                                              value, Value::zero, EmitInstruction::last);
479    pred->set_flag(alu_update_exec);
480    pred->set_flag(alu_update_pred);
481    pred->set_cf_type(cf_alu_push_before);
482 
483    append_block(1);
484 
485    IfInstruction *ir = new IfInstruction(pred);
486    emit_instruction(ir);
487    assert(m_if_block_start_map.find(if_id) == m_if_block_start_map.end());
488    m_if_block_start_map[if_id] = ir;
489    return true;
490 }
491 
emit_else_start(int if_id)492 bool ShaderFromNirProcessor::emit_else_start(int if_id)
493 {
494    auto iif = m_if_block_start_map.find(if_id);
495    if (iif == m_if_block_start_map.end()) {
496       std::cerr << "Error: ELSE branch " << if_id << " without starting conditional branch\n";
497       return false;
498    }
499 
500    if (iif->second->type() != Instruction::cond_if) {
501       std::cerr << "Error: ELSE branch " << if_id << " not started by an IF branch\n";
502       return false;
503    }
504    IfInstruction *if_instr = static_cast<IfInstruction *>(iif->second);
505    ElseInstruction *ir = new ElseInstruction(if_instr);
506    m_if_block_start_map[if_id] = ir;
507    m_pending_else = ir;
508 
509    return true;
510 }
511 
emit_ifelse_end(int if_id)512 bool ShaderFromNirProcessor::emit_ifelse_end(int if_id)
513 {
514    auto ifelse = m_if_block_start_map.find(if_id);
515    if (ifelse == m_if_block_start_map.end()) {
516       std::cerr << "Error: ENDIF " << if_id << " without THEN or ELSE branch\n";
517       return false;
518    }
519 
520    if (ifelse->second->type() != Instruction::cond_if &&
521        ifelse->second->type() != Instruction::cond_else) {
522       std::cerr << "Error: ENDIF " << if_id << " doesn't close an IF or ELSE branch\n";
523       return false;
524    }
525    /* Clear pending else, if the else branch was empty, non will be emitted */
526 
527    m_pending_else = nullptr;
528 
529    append_block(-1);
530    IfElseEndInstruction *ir = new IfElseEndInstruction();
531    emit_instruction(ir);
532 
533    return true;
534 }
535 
emit_load_tcs_param_base(nir_intrinsic_instr * instr,int offset)536 bool ShaderFromNirProcessor::emit_load_tcs_param_base(nir_intrinsic_instr* instr, int offset)
537 {
538    PValue src = get_temp_register();
539    emit_instruction(new AluInstruction(op1_mov, src, Value::zero, {alu_write, alu_last_instr}));
540 
541    GPRVector dest = vec_from_nir(instr->dest, nir_dest_num_components(instr->dest));
542    emit_instruction(new FetchTCSIOParam(dest, src, offset));
543 
544    return true;
545 
546 }
547 
emit_load_local_shared(nir_intrinsic_instr * instr)548 bool ShaderFromNirProcessor::emit_load_local_shared(nir_intrinsic_instr* instr)
549 {
550    auto address = varvec_from_nir(instr->src[0], instr->num_components);
551    auto dest_value = varvec_from_nir(instr->dest, instr->num_components);
552 
553    emit_instruction(new LDSReadInstruction(address, dest_value));
554    return true;
555 }
556 
557 static unsigned
lds_op_from_intrinsic(nir_intrinsic_op op)558 lds_op_from_intrinsic(nir_intrinsic_op op) {
559    switch (op) {
560    case nir_intrinsic_shared_atomic_add:
561       return LDS_OP2_LDS_ADD_RET;
562    case nir_intrinsic_shared_atomic_and:
563       return LDS_OP2_LDS_AND_RET;
564    case nir_intrinsic_shared_atomic_or:
565       return LDS_OP2_LDS_OR_RET;
566    case nir_intrinsic_shared_atomic_imax:
567       return LDS_OP2_LDS_MAX_INT_RET;
568    case nir_intrinsic_shared_atomic_umax:
569       return LDS_OP2_LDS_MAX_UINT_RET;
570    case nir_intrinsic_shared_atomic_imin:
571       return LDS_OP2_LDS_MIN_INT_RET;
572    case nir_intrinsic_shared_atomic_umin:
573       return LDS_OP2_LDS_MIN_UINT_RET;
574    case nir_intrinsic_shared_atomic_xor:
575       return LDS_OP2_LDS_XOR_RET;
576    case nir_intrinsic_shared_atomic_exchange:
577       return LDS_OP2_LDS_XCHG_RET;
578    case nir_intrinsic_shared_atomic_comp_swap:
579       return LDS_OP3_LDS_CMP_XCHG_RET;
580    default:
581       unreachable("Unsupported shared atomic opcode");
582    }
583 }
584 
emit_atomic_local_shared(nir_intrinsic_instr * instr)585 bool ShaderFromNirProcessor::emit_atomic_local_shared(nir_intrinsic_instr* instr)
586 {
587    auto address = from_nir(instr->src[0], 0);
588    auto dest_value = from_nir(instr->dest, 0);
589    auto value = from_nir(instr->src[1], 0);
590    auto op = lds_op_from_intrinsic(instr->intrinsic);
591 
592    if (unlikely(instr->intrinsic ==nir_intrinsic_shared_atomic_comp_swap)) {
593       auto value2 = from_nir(instr->src[2], 0);
594       emit_instruction(new LDSAtomicInstruction(dest_value, value, value2, address, op));
595    } else {
596       emit_instruction(new LDSAtomicInstruction(dest_value, value, address, op));
597    }
598    return true;
599 }
600 
601 
emit_store_local_shared(nir_intrinsic_instr * instr)602 bool ShaderFromNirProcessor::emit_store_local_shared(nir_intrinsic_instr* instr)
603 {
604    unsigned write_mask = nir_intrinsic_write_mask(instr);
605 
606    auto address = from_nir(instr->src[1], 0);
607    int swizzle_base = (write_mask & 0x3) ? 0 : 2;
608    write_mask |= write_mask >> 2;
609 
610    auto value =  from_nir(instr->src[0], swizzle_base);
611    if (!(write_mask & 2)) {
612       emit_instruction(new LDSWriteInstruction(address, 0, value));
613    } else {
614       auto value1 = from_nir(instr->src[0], swizzle_base + 1);
615       emit_instruction(new LDSWriteInstruction(address, 0, value, value1));
616    }
617 
618    return true;
619 }
620 
emit_intrinsic_instruction(nir_intrinsic_instr * instr)621 bool ShaderFromNirProcessor::emit_intrinsic_instruction(nir_intrinsic_instr* instr)
622 {
623    r600::sfn_log << SfnLog::instr << "emit '"
624                  << *reinterpret_cast<nir_instr*>(instr)
625                  << "' (" << __func__ << ")\n";
626 
627    if (emit_intrinsic_instruction_override(instr))
628       return true;
629 
630    if (m_ssbo_instr.emit(&instr->instr)) {
631       m_sel.info.writes_memory = true;
632       return true;
633    }
634 
635    switch (instr->intrinsic) {
636    case nir_intrinsic_load_deref: {
637       auto var = get_deref_location(instr->src[0]);
638       if (!var)
639          return false;
640       auto mode_helper = m_var_mode.find(var);
641       if (mode_helper == m_var_mode.end()) {
642          cerr << "r600-nir: variable '" << var->name << "' not found\n";
643          return false;
644       }
645       switch (mode_helper->second) {
646       case nir_var_shader_in:
647          return emit_load_input_deref(var, instr);
648       case nir_var_function_temp:
649          return emit_load_function_temp(var, instr);
650       default:
651          cerr << "r600-nir: Unsupported mode" << mode_helper->second
652               << "for src variable\n";
653          return false;
654       }
655    }
656    case nir_intrinsic_store_scratch:
657       return emit_store_scratch(instr);
658    case nir_intrinsic_load_scratch:
659       return emit_load_scratch(instr);
660    case nir_intrinsic_store_deref:
661       return emit_store_deref(instr);
662    case nir_intrinsic_load_uniform:
663       return load_uniform(instr);
664    case nir_intrinsic_discard:
665    case nir_intrinsic_discard_if:
666       return emit_discard_if(instr);
667    case nir_intrinsic_load_ubo_vec4:
668       return emit_load_ubo_vec4(instr);
669    case nir_intrinsic_load_tcs_in_param_base_r600:
670       return emit_load_tcs_param_base(instr, 0);
671    case nir_intrinsic_load_tcs_out_param_base_r600:
672       return emit_load_tcs_param_base(instr, 16);
673    case nir_intrinsic_load_local_shared_r600:
674    case nir_intrinsic_load_shared:
675       return emit_load_local_shared(instr);
676    case nir_intrinsic_store_local_shared_r600:
677    case nir_intrinsic_store_shared:
678       return emit_store_local_shared(instr);
679    case nir_intrinsic_control_barrier:
680    case nir_intrinsic_memory_barrier_tcs_patch:
681    case nir_intrinsic_memory_barrier_shared:
682    case nir_intrinsic_memory_barrier_buffer:
683    case nir_intrinsic_memory_barrier:
684    case nir_intrinsic_memory_barrier_image:
685    case nir_intrinsic_group_memory_barrier:
686       return emit_barrier(instr);
687    case nir_intrinsic_shared_atomic_add:
688    case nir_intrinsic_shared_atomic_and:
689    case nir_intrinsic_shared_atomic_or:
690    case nir_intrinsic_shared_atomic_imax:
691    case nir_intrinsic_shared_atomic_umax:
692    case nir_intrinsic_shared_atomic_imin:
693    case nir_intrinsic_shared_atomic_umin:
694    case nir_intrinsic_shared_atomic_xor:
695    case nir_intrinsic_shared_atomic_exchange:
696    case nir_intrinsic_shared_atomic_comp_swap:
697       return emit_atomic_local_shared(instr);
698    case nir_intrinsic_copy_deref:
699    case nir_intrinsic_load_constant:
700    case nir_intrinsic_load_input:
701    case nir_intrinsic_store_output:
702 
703    default:
704       fprintf(stderr, "r600-nir: Unsupported intrinsic %d\n", instr->intrinsic);
705       return false;
706    }
707    return false;
708 }
709 
emit_intrinsic_instruction_override(UNUSED nir_intrinsic_instr * instr)710 bool ShaderFromNirProcessor::emit_intrinsic_instruction_override(UNUSED nir_intrinsic_instr* instr)
711 {
712    return false;
713 }
714 
715 bool
emit_load_function_temp(UNUSED const nir_variable * var,UNUSED nir_intrinsic_instr * instr)716 ShaderFromNirProcessor::emit_load_function_temp(UNUSED const nir_variable *var, UNUSED nir_intrinsic_instr *instr)
717 {
718    return false;
719 }
720 
emit_barrier(UNUSED nir_intrinsic_instr * instr)721 bool ShaderFromNirProcessor::emit_barrier(UNUSED nir_intrinsic_instr* instr)
722 {
723    AluInstruction *ir = new AluInstruction(op0_group_barrier);
724    ir->set_flag(alu_last_instr);
725    emit_instruction(ir);
726    return true;
727 }
728 
729 
load_preloaded_value(const nir_dest & dest,int chan,PValue value,bool as_last)730 bool ShaderFromNirProcessor::load_preloaded_value(const nir_dest& dest, int chan, PValue value, bool as_last)
731 {
732    if (!dest.is_ssa) {
733       auto ir = new AluInstruction(op1_mov, from_nir(dest, 0), value, {alu_write});
734       if (as_last)
735          ir->set_flag(alu_last_instr);
736       emit_instruction(ir);
737    } else {
738       inject_register(dest.ssa.index, chan, value, true);
739    }
740    return true;
741 }
742 
emit_store_scratch(nir_intrinsic_instr * instr)743 bool ShaderFromNirProcessor::emit_store_scratch(nir_intrinsic_instr* instr)
744 {
745    PValue address = from_nir(instr->src[1], 0, 0);
746 
747    auto value = vec_from_nir_with_fetch_constant(instr->src[0], (1 << instr->num_components) - 1,
748          swizzle_from_comps(instr->num_components));
749 
750    int writemask = nir_intrinsic_write_mask(instr);
751    int align = nir_intrinsic_align_mul(instr);
752    int align_offset = nir_intrinsic_align_offset(instr);
753 
754    WriteScratchInstruction *ir = nullptr;
755    if (address->type() == Value::literal) {
756       const auto& lv = static_cast<const LiteralValue&>(*address);
757       ir = new WriteScratchInstruction(lv.value(), value, align, align_offset, writemask);
758    } else {
759       address = from_nir_with_fetch_constant(instr->src[1], 0);
760       ir = new WriteScratchInstruction(address, value, align, align_offset,
761                                        writemask, m_scratch_size);
762    }
763    emit_instruction(ir);
764    sh_info().needs_scratch_space = 1;
765    return true;
766 }
767 
emit_load_scratch(nir_intrinsic_instr * instr)768 bool ShaderFromNirProcessor::emit_load_scratch(nir_intrinsic_instr* instr)
769 {
770    PValue address = from_nir_with_fetch_constant(instr->src[0], 0);
771    std::array<PValue, 4> dst_val;
772    for (int i = 0; i < 4; ++i)
773       dst_val[i] = from_nir(instr->dest, i < instr->num_components ? i : 7);
774 
775    GPRVector dst(dst_val);
776    auto ir = new LoadFromScratch(dst, address, m_scratch_size);
777    ir->prelude_append(new WaitAck(0));
778    emit_instruction(ir);
779    sh_info().needs_scratch_space = 1;
780    return true;
781 }
782 
vec_from_nir_with_fetch_constant(const nir_src & src,unsigned mask,const GPRVector::Swizzle & swizzle,bool match)783 GPRVector ShaderFromNirProcessor::vec_from_nir_with_fetch_constant(const nir_src& src,
784                                                                    unsigned mask,
785                                                                    const GPRVector::Swizzle& swizzle,
786                                                                    bool match)
787 {
788    bool use_same = true;
789    GPRVector::Values v;
790 
791    std::array<bool,4> used_swizzles = {false, false, false, false};
792 
793    /* Check whether all sources come from a GPR, and,
794     * if requested, whether they are swizzled as epected */
795 
796    for (int i = 0; i < 4 && use_same; ++i)  {
797       if ((1 << i) & mask) {
798          if (swizzle[i] < 4) {
799             v[i] = from_nir(src, swizzle[i]);
800             assert(v[i]);
801             use_same &= (v[i]->type() == Value::gpr);
802             if (match) {
803                use_same &= (v[i]->chan() == swizzle[i]);
804             }
805             used_swizzles[v[i]->chan()] = true;
806          }
807       }
808    }
809 
810 
811    /* Now check whether all inputs come from the same GPR, and fill
812     * empty slots in the vector with unused swizzles, bail out if
813     * the sources are not from the same GPR
814     */
815 
816    if (use_same) {
817       int next_free_swizzle = 0;
818       while (used_swizzles[next_free_swizzle] && next_free_swizzle < 4)
819          next_free_swizzle++;
820 
821       /* Find the first GPR index used */
822       int i = 0;
823       while (!v[i] && i < 4) ++i;
824       assert(i < 4);
825       unsigned sel = v[i]->sel();
826 
827 
828       for (i = 0; i < 4 && use_same; ++i) {
829          if (!v[i]) {
830             if (swizzle[i] >= 4)
831                v[i] = PValue(new GPRValue(sel, swizzle[i]));
832             else {
833                assert(next_free_swizzle < 4);
834                v[i] = PValue(new GPRValue(sel, next_free_swizzle));
835                used_swizzles[next_free_swizzle] = true;
836                while (next_free_swizzle < 4 && used_swizzles[next_free_swizzle])
837                   next_free_swizzle++;
838             }
839          }
840          else
841             use_same &= v[i]->sel() == sel;
842       }
843    }
844 
845    /* We can't re-use the source data because they either need re-swizzling, or
846     * they didn't come all from a GPR or the same GPR, so copy to a new vector
847     */
848    if (!use_same) {
849       AluInstruction *ir = nullptr;
850       GPRVector result(allocate_temp_register(), swizzle);
851       for (int i = 0; i < 4; ++i) {
852          if (swizzle[i] < 4 && (mask & (1 << i))) {
853             ir = new AluInstruction(op1_mov, result[i], from_nir(src, swizzle[i]),
854                                     EmitInstruction::write);
855             emit_instruction(ir);
856          }
857       }
858       if (ir)
859          ir->set_flag(alu_last_instr);
860       return result;
861    } else
862       return GPRVector(v);;
863 }
864 
emit_load_ubo_vec4(nir_intrinsic_instr * instr)865 bool ShaderFromNirProcessor::emit_load_ubo_vec4(nir_intrinsic_instr* instr)
866 {
867    auto bufid = nir_src_as_const_value(instr->src[0]);
868    auto buf_offset = nir_src_as_const_value(instr->src[1]);
869 
870    if (!buf_offset) {
871       /* TODO: if buf_offset is constant then this can also be solved by using the CF indes
872        * on the ALU block, and this would probably make sense when there are more then one
873        * loads with the same buffer ID. */
874 
875       PValue addr = from_nir_with_fetch_constant(instr->src[1], 0);
876       GPRVector trgt;
877       std::array<int, 4> swz = {7,7,7,7};
878       for (unsigned i = 0; i < 4; ++i) {
879          if (i < nir_dest_num_components(instr->dest)) {
880             trgt.set_reg_i(i, from_nir(instr->dest, i));
881             swz[i] = i + nir_intrinsic_component(instr);
882          } else {
883             trgt.set_reg_i(i, from_nir(instr->dest, 7));
884          }
885       }
886 
887       FetchInstruction *ir;
888       if (bufid) {
889          ir = new FetchInstruction(vc_fetch, no_index_offset, trgt, addr, 0,
890                                               1, nullptr, bim_none);
891       } else {
892          PValue bufid = from_nir(instr->src[0], 0, 0);
893          ir = new FetchInstruction(vc_fetch, no_index_offset, trgt, addr, 0,
894                                               1, bufid, bim_zero);
895       }
896       ir->set_dest_swizzle(swz);
897       emit_instruction(ir);
898       m_sh_info.indirect_files |= 1 << TGSI_FILE_CONSTANT;
899       return true;
900    }
901 
902 
903    if (bufid) {
904       int buf_cmp = nir_intrinsic_component(instr);
905       AluInstruction *ir = nullptr;
906       for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) {
907          int cmp = buf_cmp + i;
908          assert(cmp < 4);
909          auto u = PValue(new UniformValue(512 +  buf_offset->u32, cmp, bufid->u32 + 1));
910          if (instr->dest.is_ssa)
911             load_preloaded_value(instr->dest, i, u);
912          else {
913             ir = new AluInstruction(op1_mov, from_nir(instr->dest, i), u, {alu_write});
914             emit_instruction(ir);
915          }
916       }
917       if (ir)
918          ir->set_flag(alu_last_instr);
919       return true;
920 
921    } else {
922       int buf_cmp = nir_intrinsic_component(instr);
923       AluInstruction *ir = nullptr;
924       auto kc_id = from_nir(instr->src[0], 0);
925       for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) {
926          int cmp = buf_cmp + i;
927          auto u = PValue(new UniformValue(512 +  buf_offset->u32, cmp, kc_id));
928          if (instr->dest.is_ssa)
929             load_preloaded_value(instr->dest, i, u);
930          else {
931             ir = new AluInstruction(op1_mov, from_nir(instr->dest, i), u, {alu_write});
932             emit_instruction(ir);
933          }
934       }
935       if (ir)
936          ir->set_flag(alu_last_instr);
937       return true;
938    }
939 }
940 
emit_discard_if(nir_intrinsic_instr * instr)941 bool ShaderFromNirProcessor::emit_discard_if(nir_intrinsic_instr* instr)
942 {
943    r600::sfn_log << SfnLog::instr << "emit '"
944                  << *reinterpret_cast<nir_instr*>(instr)
945                  << "' (" << __func__ << ")\n";
946 
947    if (instr->intrinsic == nir_intrinsic_discard_if) {
948       emit_instruction(new AluInstruction(op2_killne_int, PValue(new GPRValue(0,0)),
949                           {from_nir(instr->src[0], 0, 0), Value::zero}, {alu_last_instr}));
950 
951    } else {
952       emit_instruction(new AluInstruction(op2_kille, PValue(new GPRValue(0,0)),
953                        {Value::zero, Value::zero}, {alu_last_instr}));
954    }
955    m_sh_info.uses_kill = 1;
956    return true;
957 }
958 
emit_load_input_deref(const nir_variable * var,nir_intrinsic_instr * instr)959 bool ShaderFromNirProcessor::emit_load_input_deref(const nir_variable *var,
960                                                    nir_intrinsic_instr* instr)
961 {
962    return do_emit_load_deref(var, instr);
963 }
964 
load_uniform(nir_intrinsic_instr * instr)965 bool ShaderFromNirProcessor::load_uniform(nir_intrinsic_instr* instr)
966 {
967    r600::sfn_log << SfnLog::instr << __func__ << ": emit '"
968                  << *reinterpret_cast<nir_instr*>(instr)
969                  << "'\n";
970 
971 
972    /* If the target register is a SSA register and the loading is not
973     * indirect then we can do lazy loading, i.e. the uniform value can
974     * be used directly. Otherwise we have to load the data for real
975     * rigt away.
976     */
977    auto literal = nir_src_as_const_value(instr->src[0]);
978    int base = nir_intrinsic_base(instr);
979 
980    if (literal) {
981       AluInstruction *ir = nullptr;
982 
983       for (int i = 0; i < instr->num_components ; ++i) {
984          PValue u = PValue(new UniformValue(512 + literal->u32 + base, i));
985          sfn_log << SfnLog::io << "uniform "
986                  << instr->dest.ssa.index << " const["<< i << "]: "<< instr->const_index[i] << "\n";
987 
988          if (instr->dest.is_ssa)
989             load_preloaded_value(instr->dest, i, u);
990          else {
991             ir = new AluInstruction(op1_mov, from_nir(instr->dest, i),
992                                                    u, {alu_write});
993              emit_instruction(ir);
994          }
995       }
996       if (ir)
997          ir->set_flag(alu_last_instr);
998    } else {
999       PValue addr = from_nir(instr->src[0], 0, 0);
1000       return load_uniform_indirect(instr, addr, 16 * base, 0);
1001    }
1002    return true;
1003 }
1004 
load_uniform_indirect(nir_intrinsic_instr * instr,PValue addr,int offest,int bufferid)1005 bool ShaderFromNirProcessor::load_uniform_indirect(nir_intrinsic_instr* instr, PValue addr, int offest, int bufferid)
1006 {
1007    if (!addr) {
1008       std::cerr << "r600-nir: don't know how uniform is addressed\n";
1009       return false;
1010    }
1011 
1012    GPRVector trgt;
1013    std::array<int, 4> swz = {7,7,7,7};
1014    for (int i = 0; i < 4; ++i) {
1015       trgt.set_reg_i(i, from_nir(instr->dest, i));
1016       swz[i] = i;
1017    }
1018 
1019    if (addr->type() != Value::gpr) {
1020       emit_instruction(op1_mov, trgt.reg_i(0), {addr}, {alu_write, alu_last_instr});
1021       addr = trgt.reg_i(0);
1022    }
1023 
1024    auto ir = new FetchInstruction(vc_fetch, no_index_offset, trgt, addr, offest,
1025                                   bufferid, PValue(), bim_none);
1026    ir->set_dest_swizzle(swz);
1027    emit_instruction(ir);
1028    m_sh_info.indirect_files |= 1 << TGSI_FILE_CONSTANT;
1029    return true;
1030 }
1031 
emit_load_literal(const nir_load_const_instr * literal,const nir_src & src,unsigned writemask)1032 AluInstruction *ShaderFromNirProcessor::emit_load_literal(const nir_load_const_instr * literal, const nir_src& src, unsigned writemask)
1033 {
1034    AluInstruction *ir = nullptr;
1035    for (int i = 0; i < literal->def.num_components ; ++i) {
1036       if (writemask & (1 << i)){
1037          PValue lsrc;
1038          switch (literal->def.bit_size) {
1039 
1040          case 1:
1041             sfn_log << SfnLog::reg << "Got literal of bit size 1\n";
1042             lsrc = literal->value[i].b ?
1043                      PValue(new LiteralValue( 0xffffffff, i)) :
1044                      Value::zero;
1045             break;
1046          case 32:
1047             sfn_log << SfnLog::reg << "Got literal of bit size 32\n";
1048             if (literal->value[i].u32 == 0)
1049                lsrc = Value::zero;
1050             else if (literal->value[i].u32 == 1)
1051                lsrc = Value::one_i;
1052             else if (literal->value[i].f32 == 1.0f)
1053                lsrc = Value::one_f;
1054             else if (literal->value[i].f32 == 0.5f)
1055                lsrc = Value::zero_dot_5;
1056             else
1057                lsrc = PValue(new LiteralValue(literal->value[i].u32, i));
1058             break;
1059          default:
1060             sfn_log << SfnLog::reg << "Got literal of bit size " << literal->def.bit_size
1061                     << " falling back to 32 bit\n";
1062             lsrc = PValue(new LiteralValue(literal->value[i].u32, i));
1063          }
1064          ir = new AluInstruction(op1_mov, create_register_from_nir_src(src, i), lsrc, EmitInstruction::write);
1065 
1066          emit_instruction(ir);
1067       }
1068    }
1069    return ir;
1070 }
1071 
from_nir_with_fetch_constant(const nir_src & src,unsigned component,int channel)1072 PValue ShaderFromNirProcessor::from_nir_with_fetch_constant(const nir_src& src, unsigned component, int channel)
1073 {
1074    PValue value = from_nir(src, component);
1075    if (value->type() != Value::gpr &&
1076        value->type() != Value::gpr_vector &&
1077        value->type() != Value::gpr_array_value) {
1078       PValue retval = get_temp_register(channel);
1079       emit_instruction(new AluInstruction(op1_mov, retval, value,
1080                                           EmitInstruction::last_write));
1081       value = retval;
1082    }
1083    return value;
1084 }
1085 
emit_store_deref(nir_intrinsic_instr * instr)1086 bool ShaderFromNirProcessor::emit_store_deref(nir_intrinsic_instr* instr)
1087 {
1088    auto out_var = get_deref_location(instr->src[0]);
1089    if (!out_var)
1090       return false;
1091 
1092    return do_emit_store_deref(out_var, instr);
1093 }
1094 
emit_deref_instruction(nir_deref_instr * instr)1095 bool ShaderFromNirProcessor::emit_deref_instruction(nir_deref_instr* instr)
1096 {
1097    r600::sfn_log << SfnLog::instr << __func__ << ": emit '"
1098                  << *reinterpret_cast<nir_instr*>(instr)
1099                  << "'\n";
1100 
1101    /* Give the specific shader type a chance to process this, i.e. Geometry and
1102     * tesselation shaders need specialized deref_array, for the other shaders
1103     * it is lowered.
1104     */
1105    if (emit_deref_instruction_override(instr))
1106       return true;
1107 
1108    switch (instr->deref_type) {
1109    case nir_deref_type_var:
1110       set_var_address(instr);
1111       return true;
1112    case nir_deref_type_array:
1113    case nir_deref_type_array_wildcard:
1114    case nir_deref_type_struct:
1115    case nir_deref_type_cast:
1116    default:
1117       fprintf(stderr, "R600: deref type %d not supported\n", instr->deref_type);
1118    }
1119    return false;
1120 }
1121 
emit_instruction(EAluOp opcode,PValue dest,std::vector<PValue> srcs,const std::set<AluModifiers> & m_flags)1122 bool ShaderFromNirProcessor::emit_instruction(EAluOp opcode, PValue dest,
1123                                               std::vector<PValue> srcs,
1124                                               const std::set<AluModifiers>& m_flags)
1125 {
1126    AluInstruction *ir = new AluInstruction(opcode, dest, srcs, m_flags);
1127    emit_instruction(ir);
1128    return true;
1129 }
1130 
add_param_output_reg(int loc,const GPRVector * gpr)1131 void ShaderFromNirProcessor::add_param_output_reg(int loc, const GPRVector *gpr)
1132 {
1133    m_output_register_map[loc] = gpr;
1134 }
1135 
emit_export_instruction(WriteoutInstruction * ir)1136 void ShaderFromNirProcessor::emit_export_instruction(WriteoutInstruction *ir)
1137 {
1138    r600::sfn_log << SfnLog::instr << "     as '" << *ir << "'\n";
1139    m_export_output.emit(PInstruction(ir));
1140 }
1141 
output_register(unsigned location) const1142 const GPRVector * ShaderFromNirProcessor::output_register(unsigned location) const
1143 {
1144    const GPRVector *retval = nullptr;
1145    auto val = m_output_register_map.find(location);
1146    if (val != m_output_register_map.end())
1147       retval =  val->second;
1148    return retval;
1149 }
1150 
set_input(unsigned pos,PValue var)1151 void ShaderFromNirProcessor::set_input(unsigned pos, PValue var)
1152 {
1153    r600::sfn_log << SfnLog::io << "Set input[" << pos << "] =" << *var <<  "\n";
1154    m_inputs[pos] = var;
1155 }
1156 
set_output(unsigned pos,int sel)1157 void ShaderFromNirProcessor::set_output(unsigned pos, int sel)
1158 {
1159    r600::sfn_log << SfnLog::io << "Set output[" << pos << "] =" << sel <<  "\n";
1160    m_outputs[pos] = sel;
1161 }
1162 
append_block(int nesting_change)1163 void ShaderFromNirProcessor::append_block(int nesting_change)
1164 {
1165    m_nesting_depth += nesting_change;
1166    m_output.push_back(InstructionBlock(m_nesting_depth, m_block_number++));
1167 }
1168 
finalize()1169 void ShaderFromNirProcessor::finalize()
1170 {
1171    do_finalize();
1172 
1173    for (auto& i : m_inputs)
1174       m_sh_info.input[i.first].gpr = i.second->sel();
1175 
1176    for (auto& i : m_outputs)
1177       m_sh_info.output[i.first].gpr = i.second;
1178 
1179    m_output.push_back(m_export_output);
1180 }
1181 
1182 }
1183