• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* -*- mesa-c++  -*-
2  *
3  * Copyright (c) 2022 Collabora LTD
4  *
5  * Author: Gert Wollny <gert.wollny@collabora.com>
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and associated documentation files (the "Software"),
9  * to deal in the Software without restriction, including without limitation
10  * on the rights to use, copy, modify, merge, publish, distribute, sub
11  * license, and/or sell copies of the Software, and to permit persons to whom
12  * the Software is furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the next
15  * paragraph) shall be included in all copies or substantial portions of the
16  * Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24  * USE OR OTHER DEALINGS IN THE SOFTWARE.
25  */
26 
27 #include "sfn_shader.h"
28 #include "sfn_instr.h"
29 #include "sfn_instr_alugroup.h"
30 #include "sfn_instr_export.h"
31 #include "sfn_instr_fetch.h"
32 #include "sfn_instr_lds.h"
33 #include "sfn_instr_mem.h"
34 
35 #include "sfn_instr_controlflow.h"
36 #include "sfn_liverangeevaluator.h"
37 
38 #include "sfn_shader_cs.h"
39 #include "sfn_shader_fs.h"
40 #include "sfn_shader_vs.h"
41 #include "sfn_shader_gs.h"
42 #include "sfn_shader_tess.h"
43 
44 #include "sfn_debug.h"
45 #include "gallium/drivers/r600/r600_shader.h"
46 
47 #include "tgsi/tgsi_from_mesa.h"
48 
49 #include "nir.h"
50 
51 #include <numeric>
52 #include <sstream>
53 
54 namespace r600 {
55 
56 using std::string;
57 
58 std::pair<unsigned, unsigned>
r600_get_varying_semantic(unsigned varying_location)59 r600_get_varying_semantic(unsigned varying_location)
60 {
61    std::pair<unsigned, unsigned> result;
62    tgsi_get_gl_varying_semantic(static_cast<gl_varying_slot>(varying_location),
63                                 true, &result.first, &result.second);
64 
65    if (result.first == TGSI_SEMANTIC_GENERIC) {
66       result.second += 9;
67    } else if (result.first == TGSI_SEMANTIC_PCOORD) {
68       result.second = 8;
69    }
70    return result;
71 }
72 
set_sid(int sid)73 void ShaderIO::set_sid(int sid)
74 {
75    m_sid = sid;
76    switch (m_name) {
77    case TGSI_SEMANTIC_POSITION:
78    case TGSI_SEMANTIC_PSIZE:
79    case TGSI_SEMANTIC_EDGEFLAG:
80    case TGSI_SEMANTIC_FACE:
81    case TGSI_SEMANTIC_SAMPLEMASK:
82    case TGSI_SEMANTIC_CLIPVERTEX:
83       m_spi_sid = 0;
84    break;
85    case TGSI_SEMANTIC_GENERIC:
86    case TGSI_SEMANTIC_TEXCOORD:
87    case TGSI_SEMANTIC_PCOORD:
88       m_spi_sid = m_sid + 1;
89    break;
90    default:
91       /* For non-generic params - pack name and sid into 8 bits */
92       m_spi_sid = (0x80 | (m_name << 3) | m_sid) + 1;
93    }
94 }
95 
override_spi_sid(int spi)96 void ShaderIO::override_spi_sid(int spi)
97 {
98    m_spi_sid = spi;
99 }
100 
print(std::ostream & os) const101 void ShaderIO::print(std::ostream& os) const
102 {
103    os << m_type << " LOC:" << m_location << " NAME:" << m_name;
104    do_print(os);
105 
106    if (m_sid > 0) {
107       os << " SID:" << m_sid << " SPI_SID:" << m_spi_sid;
108    }
109 }
110 
111 
ShaderIO(const char * type,int loc,int name)112 ShaderIO::ShaderIO(const char *type, int loc, int name):
113    m_type(type),
114    m_location(loc),
115    m_name(name)
116 {
117 }
118 
ShaderOutput()119 ShaderOutput::ShaderOutput():
120    ShaderIO("OUTPUT", -1, -1)
121 {
122 }
123 
ShaderOutput(int location,int name,int writemask)124 ShaderOutput::ShaderOutput(int location, int name, int writemask):
125    ShaderIO("OUTPUT", location, name),
126    m_writemask(writemask)
127 {
128 
129 }
130 
do_print(std::ostream & os) const131 void ShaderOutput::do_print(std::ostream& os) const
132 {
133    os << " MASK:" << m_writemask;
134 }
135 
136 
ShaderInput(int location,int name)137 ShaderInput::ShaderInput(int location, int name):
138    ShaderIO("INPUT", location, name)
139 {
140 }
141 
ShaderInput()142 ShaderInput::ShaderInput():
143    ShaderInput(-1, -1)
144 {
145 }
146 
147 
do_print(std::ostream & os) const148 void ShaderInput::do_print(std::ostream& os) const
149 {
150    if (m_interpolator)
151       os << " INTERP:" << m_interpolator;
152    if (m_interpolate_loc)
153       os << " ILOC:" << m_interpolate_loc;
154    if (m_uses_interpolate_at_centroid)
155       os << " USE_CENTROID";
156 }
157 
set_interpolator(int interp,int interp_loc,bool uses_interpolate_at_centroid)158 void ShaderInput::set_interpolator(int interp, int interp_loc, bool uses_interpolate_at_centroid)
159 {
160    m_interpolator = interp;
161    m_interpolate_loc = interp_loc;
162    m_uses_interpolate_at_centroid = uses_interpolate_at_centroid;
163 }
164 
set_uses_interpolate_at_centroid()165 void ShaderInput::set_uses_interpolate_at_centroid()
166 {
167    m_uses_interpolate_at_centroid = true;
168 }
169 
Shader(const char * type_id)170 Shader::Shader(const char *type_id):
171    m_current_block(nullptr),
172    m_type_id(type_id),
173    m_chip_class(ISA_CC_R600),
174    m_next_block(0)
175 {
176    m_instr_factory = new InstrFactory();
177    m_chain_instr.this_shader = this;
178    start_new_block(0);
179 }
180 
set_input_gpr(int driver_lcation,int gpr)181 void Shader::set_input_gpr(int driver_lcation, int gpr)
182 {
183    auto i = m_inputs.find(driver_lcation);
184    assert(i != m_inputs.end());
185    i->second.set_gpr(gpr);
186 }
187 
add_info_from_string(std::istream & is)188 bool Shader::add_info_from_string(std::istream& is)
189 {
190    std::string type;
191    is >> type;
192 
193    if (type == "CHIPCLASS")
194       return read_chipclass(is);
195    if (type == "OUTPUT")
196       return read_output(is);
197    if (type == "INPUT")
198       return read_input(is);
199    if (type == "PROP")
200       return read_prop(is);
201    if (type == "SYSVALUES")
202       return allocate_registers_from_string(is, pin_fully);
203    if (type == "REGISTERS")
204       return allocate_registers_from_string(is, pin_free);
205    if (type == "ARRAYS")
206       return allocate_arrays_from_string(is);
207 
208 
209    return false;
210 }
211 
emit_instruction_from_string(const std::string & s)212 void Shader::emit_instruction_from_string(const std::string& s)
213 {
214 
215    sfn_log << SfnLog::instr << "Create Instr from '" << s << "'\n";
216    if (s == "BLOCK_START") {
217       if (!m_current_block->empty()) {
218          start_new_block(m_current_block->nesting_offset());
219          sfn_log << SfnLog::instr << "   Emit start block\n";
220       }
221       return;
222    }
223 
224    if (s == "BLOCK_END") {
225       return;
226    }
227 
228    auto ir = m_instr_factory->from_string(s, m_current_block->nesting_depth());
229    if (ir) {
230       emit_instruction(ir);
231       if (ir->end_block())
232          start_new_block(ir->nesting_offset());
233       sfn_log << SfnLog::instr << "   " << *ir << "\n";
234    }
235 }
236 
read_output(std::istream & is)237 bool Shader::read_output(std::istream& is)
238 {
239    string value;
240    is >> value;
241    int pos = int_from_string_with_prefix(value, "LOC:");
242    is >> value;
243    int name = int_from_string_with_prefix(value, "NAME:");
244    is >> value;
245    int mask = int_from_string_with_prefix(value, "MASK:");
246    ShaderOutput output(pos, name, mask);
247 
248    value.clear();
249    is >> value;
250    if (!value.empty()) {
251       int sid = int_from_string_with_prefix(value, "SID:");
252       output.set_sid(sid);
253       is >> value;
254       int spi_sid = int_from_string_with_prefix(value, "SPI_SID:");
255       assert(spi_sid == output.spi_sid());
256    }
257 
258    add_output(output);
259    return true;
260 }
261 
262 
read_input(std::istream & is)263 bool Shader::read_input(std::istream& is)
264 {
265    string value;
266    is >> value;
267    int pos = int_from_string_with_prefix(value, "LOC:");
268    is >> value;
269    int name = int_from_string_with_prefix(value, "NAME:");
270 
271    value.clear();
272 
273    ShaderInput input(pos, name);
274 
275    int interp = 0;
276    int interp_loc = 0;
277    bool use_centroid = false;
278 
279    is >> value;
280    while (!value.empty()) {
281       if (value.substr(0, 4) == "SID:") {
282          int sid = int_from_string_with_prefix(value, "SID:");
283          input.set_sid(sid);
284       } else if (value.substr(0, 8) == "SPI_SID:") {
285          int spi_sid = int_from_string_with_prefix(value, "SPI_SID:");
286          assert(spi_sid == input.spi_sid());
287       } else if (value.substr(0, 7) == "INTERP:") {
288          interp = int_from_string_with_prefix(value, "INTERP:");
289       } else if (value.substr(0, 5) == "ILOC:") {
290          interp_loc = int_from_string_with_prefix(value, "ILOC:");
291       } else if (value == "USE_CENTROID") {
292          use_centroid = true;
293       } else {
294          std::cerr << "Unknown parse value '" << value << "'";
295          assert(!value.c_str());
296       }
297       value.clear();
298       is >> value;
299    }
300 
301    input.set_interpolator(interp, interp_loc, use_centroid);
302 
303    add_input(input);
304    return true;
305 }
306 
allocate_registers_from_string(std::istream & is,Pin pin)307 bool Shader::allocate_registers_from_string(std::istream& is, Pin pin)
308 {
309    std::string line;
310    if (!std::getline(is, line))
311       return false;
312 
313    std::istringstream iline(line);
314 
315    while (!iline.eof())  {
316       string reg_str;
317       iline >> reg_str;
318 
319       if (reg_str.empty())
320          break;
321 
322       if (strchr(reg_str.c_str(), '@')) {
323          value_factory().dest_from_string(reg_str);
324       } else {
325          RegisterVec4::Swizzle swz = {0,1,2,3};
326          auto regs = value_factory().dest_vec4_from_string(reg_str, swz, pin);
327          for (int i = 0; i < 4; ++i) {
328             if (swz[i] < 4 && pin == pin_fully) {
329                regs[i]->pin_live_range(true, false);
330             }
331          }
332       }
333    }
334    return true;
335 }
336 
allocate_arrays_from_string(std::istream & is)337 bool Shader::allocate_arrays_from_string(std::istream& is)
338 {
339    std::string line;
340    if (!std::getline(is, line))
341       return false;
342 
343    std::istringstream iline(line);
344 
345    while (!iline.eof())  {
346       string reg_str;
347       iline >> reg_str;
348 
349       if (reg_str.empty())
350          break;
351 
352       value_factory().array_from_string(reg_str);
353    }
354    return true;
355 }
356 
read_chipclass(std::istream & is)357 bool Shader::read_chipclass(std::istream& is)
358 {
359    string name;
360    is >> name;
361    if (name == "R600")
362       m_chip_class = ISA_CC_R600;
363    else if (name == "R700")
364       m_chip_class = ISA_CC_R700;
365    else if (name == "EVERGREEN")
366       m_chip_class = ISA_CC_EVERGREEN;
367    else if (name == "CAYMAN")
368       m_chip_class = ISA_CC_CAYMAN;
369    else
370       return false;
371    return true;
372 }
373 
allocate_reserved_registers()374 void Shader::allocate_reserved_registers()
375 {
376    m_instr_factory->value_factory().set_virtual_register_base(0);
377    auto reserved_registers_end = do_allocate_reserved_registers();
378    m_instr_factory->value_factory().set_virtual_register_base(reserved_registers_end);
379    if (!m_atomics.empty()) {
380       m_atomic_update = value_factory().temp_register();
381       auto alu = new AluInstr(op1_mov, m_atomic_update,
382                               value_factory().one_i(),
383                               AluInstr::last_write);
384       alu->set_alu_flag(alu_no_schedule_bias);
385       emit_instruction(alu);
386    }
387 
388    if(m_flags.test(sh_needs_sbo_ret_address)) {
389       m_rat_return_address = value_factory().temp_register(0);
390       auto temp0 = value_factory().temp_register(0);
391       auto temp1 = value_factory().temp_register(1);
392       auto temp2 = value_factory().temp_register(2);
393 
394       auto group = new AluGroup();
395       group->add_instruction(new AluInstr(op1_mbcnt_32lo_accum_prev_int, temp0, value_factory().literal(-1), {alu_write}));
396       group->add_instruction(new AluInstr(op1_mbcnt_32hi_int, temp1, value_factory().literal(-1), {alu_write}));
397       emit_instruction(group);
398       emit_instruction(new AluInstr(op3_muladd_uint24, temp2, value_factory().inline_const(ALU_SRC_SE_ID, 0),
399                                           value_factory().literal(256), value_factory().inline_const(ALU_SRC_HW_WAVE_ID, 0), {alu_write, alu_last_instr}));
400       emit_instruction(new AluInstr(op3_muladd_uint24, m_rat_return_address,
401                                     temp2, value_factory().literal(0x40), temp0,
402                                     {alu_write, alu_last_instr}));
403    }
404 }
405 
translate_from_nir(nir_shader * nir,const pipe_stream_output_info * so_info,struct r600_shader * gs_shader,r600_shader_key & key,r600_chip_class chip_class)406 Shader *Shader::translate_from_nir(nir_shader *nir, const pipe_stream_output_info *so_info,
407                                    struct r600_shader* gs_shader,
408                                    r600_shader_key& key, r600_chip_class chip_class)
409 {
410    Shader *shader = nullptr;
411 
412    switch (nir->info.stage) {
413    case MESA_SHADER_FRAGMENT:
414       if (chip_class >= ISA_CC_EVERGREEN)
415          shader = new FragmentShaderEG(key);
416       else
417          shader = new FragmentShaderR600(key);
418    break;
419    case MESA_SHADER_VERTEX:
420       shader = new VertexShader(so_info, gs_shader, key);
421    break;
422    case MESA_SHADER_GEOMETRY:
423       shader = new GeometryShader(key);
424    break;
425    case MESA_SHADER_TESS_CTRL:
426       shader = new TCSShader(key);
427       break;
428    case MESA_SHADER_TESS_EVAL:
429       shader = new TESShader(so_info, gs_shader, key);
430       break;
431    case MESA_SHADER_COMPUTE:
432       shader = new ComputeShader(key);
433       break;
434    default:
435       return nullptr;
436    }
437 
438    shader->set_info(nir);
439 
440    shader->set_chip_class(chip_class);
441    if (!shader->process(nir))
442       return nullptr;
443 
444    return shader;
445 }
446 
set_info(nir_shader * nir)447 void Shader::set_info(nir_shader *nir)
448 {
449    m_scratch_size = nir->scratch_size;
450 }
451 
value_factory()452 ValueFactory& Shader::value_factory()
453 {
454    return m_instr_factory->value_factory();
455 }
456 
457 
process(nir_shader * nir)458 bool Shader::process(nir_shader *nir)
459 {
460    m_ssbo_image_offset = nir->info.num_images;
461 
462    if (nir->info.use_legacy_math_rules)
463       set_flag(sh_legacy_math_rules);
464 
465    nir_foreach_uniform_variable(var, nir)
466          scan_uniforms(var);
467 
468    // at this point all functions should be inlined
469    const nir_function *func = reinterpret_cast<const nir_function *>(exec_list_get_head_const(&nir->functions));
470 
471    if (!scan_shader(func))
472       return false;
473 
474    allocate_reserved_registers();
475 
476    allocate_local_registers(&func->impl->registers);
477 
478    sfn_log << SfnLog::trans << "Process shader \n";
479    foreach_list_typed(nir_cf_node, node, node, &func->impl->body) {
480       if (!process_cf_node(node))
481          return false;
482    }
483 
484    finalize();
485 
486    return true;
487 }
488 
allocate_local_registers(const exec_list * registers)489 void Shader::allocate_local_registers(const exec_list *registers)
490 {
491    if (value_factory().allocate_registers(registers))
492       m_indirect_files |= 1 << TGSI_FILE_TEMPORARY;
493 }
494 
scan_shader(const nir_function * func)495 bool Shader::scan_shader(const nir_function *func)
496 {
497 
498    nir_foreach_block(block, func->impl) {
499       nir_foreach_instr(instr, block) {
500          if (!scan_instruction(instr)) {
501             fprintf(stderr, "Unhandled sysvalue access ");
502             nir_print_instr(instr, stderr);
503             fprintf(stderr, "\n");
504             return false;
505          }
506       }
507    }
508 
509    int lds_pos = 0;
510    for (auto& [index, input] : m_inputs) {
511       if (input.need_lds_pos()) {
512          if (chip_class() < ISA_CC_EVERGREEN)
513             input.set_gpr(lds_pos);
514          input.set_lds_pos(lds_pos++);
515       }
516    }
517 
518    int param_id = 0;
519    for (auto& [index, out] : m_outputs) {
520       if (out.is_param())
521          out.set_pos(param_id++);
522    }
523 
524    return true;
525 }
526 
scan_uniforms(nir_variable * uniform)527 bool Shader::scan_uniforms(nir_variable *uniform)
528 {
529    if (uniform->type->contains_atomic()) {
530       int natomics = uniform->type->atomic_size() / ATOMIC_COUNTER_SIZE;
531       m_nhwatomic += natomics;
532 
533       if (uniform->type->is_array())
534          m_indirect_files |= 1 << TGSI_FILE_HW_ATOMIC;
535 
536       m_flags.set(sh_uses_atomics);
537 
538       r600_shader_atomic atom = {0};
539 
540       atom.buffer_id = uniform->data.binding;
541       atom.hw_idx = m_atomic_base + m_next_hwatomic_loc;
542 
543       atom.start = uniform->data.offset >> 2;
544       atom.end = atom.start + natomics - 1;
545 
546       if (m_atomic_base_map.find(uniform->data.binding) ==
547           m_atomic_base_map.end())
548          m_atomic_base_map[uniform->data.binding] = m_next_hwatomic_loc;
549 
550       m_next_hwatomic_loc += natomics;
551 
552       m_atomic_file_count += atom.end  - atom.start + 1;
553 
554       sfn_log << SfnLog::io << "HW_ATOMIC file count: "
555               << m_atomic_file_count << "\n";
556 
557       m_atomics.push_back(atom);
558    }
559 
560    auto type = uniform->type->is_array() ? uniform->type->without_array(): uniform->type;
561    if (type->is_image() || uniform->data.mode == nir_var_mem_ssbo) {
562       m_flags.set(sh_uses_images);
563       if (uniform->type->is_array() && ! (uniform->data.mode == nir_var_mem_ssbo))
564          m_indirect_files |= 1 << TGSI_FILE_IMAGE;
565    }
566 
567    return true;
568 }
569 
570 
scan_instruction(nir_instr * instr)571 bool Shader::scan_instruction(nir_instr *instr)
572 {
573    if (do_scan_instruction(instr))
574       return true;
575 
576    if (instr->type != nir_instr_type_intrinsic)
577       return true;
578 
579    auto intr = nir_instr_as_intrinsic(instr);
580 
581    // handle unhandled instructions
582    switch (intr->intrinsic) {
583    case nir_intrinsic_ssbo_atomic_add:
584    case nir_intrinsic_ssbo_atomic_comp_swap:
585    case nir_intrinsic_ssbo_atomic_or:
586    case nir_intrinsic_ssbo_atomic_xor:
587    case nir_intrinsic_ssbo_atomic_imax:
588    case nir_intrinsic_ssbo_atomic_imin:
589    case nir_intrinsic_ssbo_atomic_umax:
590    case nir_intrinsic_ssbo_atomic_umin:
591    case nir_intrinsic_ssbo_atomic_and:
592    case nir_intrinsic_ssbo_atomic_exchange:
593    case nir_intrinsic_image_load:
594    case nir_intrinsic_image_atomic_add:
595    case nir_intrinsic_image_atomic_and:
596    case nir_intrinsic_image_atomic_or:
597    case nir_intrinsic_image_atomic_xor:
598    case nir_intrinsic_image_atomic_exchange:
599    case nir_intrinsic_image_atomic_comp_swap:
600    case nir_intrinsic_image_atomic_umin:
601    case nir_intrinsic_image_atomic_umax:
602    case nir_intrinsic_image_atomic_imin:
603    case nir_intrinsic_image_atomic_imax:
604       m_flags.set(sh_needs_sbo_ret_address);
605       FALLTHROUGH;
606    case nir_intrinsic_image_store:
607    case nir_intrinsic_store_ssbo:
608       m_flags.set(sh_writes_memory);
609       m_flags.set(sh_uses_images);
610       break;
611    case nir_intrinsic_memory_barrier_image:
612    case nir_intrinsic_memory_barrier_buffer:
613    case nir_intrinsic_memory_barrier:
614    case nir_intrinsic_group_memory_barrier:
615       m_chain_instr.prepare_mem_barrier = true;
616    default:
617       ;
618    }
619    return true;
620 }
621 
process_cf_node(nir_cf_node * node)622 bool Shader::process_cf_node(nir_cf_node *node)
623 {
624    SFN_TRACE_FUNC(SfnLog::flow, "CF");
625 
626    switch (node->type) {
627    case nir_cf_node_block:
628    return process_block(nir_cf_node_as_block(node));
629    case nir_cf_node_if:
630    return process_if(nir_cf_node_as_if(node));
631    case nir_cf_node_loop:
632    return process_loop(nir_cf_node_as_loop(node));
633    default:
634    return false;
635    }
636 
637 }
638 
639 static bool
child_block_empty(const exec_list & list)640 child_block_empty (const exec_list& list)
641 {
642    if (list.is_empty())
643       return true;
644 
645    bool result = true;
646 
647    foreach_list_typed(nir_cf_node, n, node, &list) {
648 
649       if (n->type == nir_cf_node_block) {
650          if (!nir_cf_node_as_block(n)->instr_list.is_empty())
651             return false;
652       }
653       if (n->type == nir_cf_node_if)
654          return false;
655    }
656    return result;
657 }
658 
process_if(nir_if * if_stmt)659 bool Shader::process_if(nir_if *if_stmt)
660 {
661    SFN_TRACE_FUNC(SfnLog::flow, "IF");
662 
663    if (!emit_if_start(if_stmt))
664       return false;
665 
666    foreach_list_typed(nir_cf_node, n, node, &if_stmt->then_list) {
667       SFN_TRACE_FUNC(SfnLog::flow, "IF-then");
668          if (!process_cf_node(n))
669             return false;
670    }
671 
672    if (!child_block_empty(if_stmt->else_list)) {
673       if (!emit_control_flow(ControlFlowInstr::cf_else))
674          return false;
675       foreach_list_typed(nir_cf_node, n, node, &if_stmt->else_list)
676             if (!process_cf_node(n)) return false;
677    }
678 
679    if (!emit_control_flow(ControlFlowInstr::cf_endif))
680       return false;
681 
682    return true;
683 }
684 
emit_if_start(nir_if * if_stmt)685 bool Shader::emit_if_start(nir_if *if_stmt)
686 {
687    auto value = value_factory().src(if_stmt->condition, 0);
688    AluInstr *pred = new AluInstr(op2_pred_setne_int, value_factory().temp_register(),
689                                  value, value_factory().zero(), AluInstr::last);
690    pred->set_alu_flag(alu_update_exec);
691    pred->set_alu_flag(alu_update_pred);
692    pred->set_cf_type(cf_alu_push_before);
693 
694    IfInstr *ir = new IfInstr(pred);
695    emit_instruction(ir);
696    start_new_block(1);
697    return true;
698 }
699 
emit_control_flow(ControlFlowInstr::CFType type)700 bool Shader::emit_control_flow(ControlFlowInstr::CFType type)
701 {
702    auto ir = new ControlFlowInstr(type);
703    emit_instruction(ir);
704    int depth = 0;
705    switch (type) {
706    case ControlFlowInstr::cf_loop_begin:
707       m_loops.push_back(ir);
708       m_nloops++;
709       depth = 1;
710    break;
711    case ControlFlowInstr::cf_loop_end:
712       m_loops.pop_back();
713       FALLTHROUGH;
714    case ControlFlowInstr::cf_endif:
715       depth = -1;
716    break;
717    default:
718       ;
719    }
720 
721    start_new_block(depth);
722    return true;
723 }
724 
process_loop(nir_loop * node)725 bool Shader::process_loop(nir_loop *node)
726 {
727    SFN_TRACE_FUNC(SfnLog::flow, "LOOP");
728    if (!emit_control_flow(ControlFlowInstr::cf_loop_begin))
729       return false;
730 
731    foreach_list_typed(nir_cf_node, n, node, &node->body)
732          if (!process_cf_node(n)) return false;
733 
734    if (!emit_control_flow(ControlFlowInstr::cf_loop_end))
735       return false;
736 
737    return true;
738 }
739 
process_block(nir_block * block)740 bool Shader::process_block(nir_block *block)
741 {
742    SFN_TRACE_FUNC(SfnLog::flow, "BLOCK");
743 
744    nir_foreach_instr(instr, block) {
745       sfn_log << SfnLog::instr << "FROM:" << *instr << "\n";
746       bool r = process_instr(instr);
747       if (!r) {
748          sfn_log << SfnLog::err << "R600: Unsupported instruction: "
749                  << *instr << "\n";
750          return false;
751       }
752    }
753    return true;
754 }
755 
process_instr(nir_instr * instr)756 bool Shader::process_instr(nir_instr *instr)
757 {
758    return m_instr_factory->from_nir(instr, *this);
759 }
760 
process_intrinsic(nir_intrinsic_instr * intr)761 bool Shader::process_intrinsic(nir_intrinsic_instr *intr)
762 {
763    if (process_stage_intrinsic(intr))
764       return true;
765 
766    if (GDSInstr::emit_atomic_counter(intr, *this)) {
767       set_flag(sh_writes_memory);
768       return true;
769    }
770 
771    if (RatInstr::emit(intr, *this))
772       return true;
773 
774    switch (intr->intrinsic) {
775    case nir_intrinsic_store_output: return store_output(intr);
776    case nir_intrinsic_load_input: return load_input(intr);
777    case nir_intrinsic_load_uniform: return load_uniform(intr);
778    case nir_intrinsic_load_ubo_vec4: return load_ubo(intr);
779    case nir_intrinsic_store_scratch: return emit_store_scratch(intr);
780    case nir_intrinsic_load_scratch: return emit_load_scratch(intr);
781    case nir_intrinsic_store_local_shared_r600: return emit_local_store(intr);
782    case nir_intrinsic_load_local_shared_r600: return emit_local_load(intr);
783    case nir_intrinsic_load_tcs_in_param_base_r600: return emit_load_tcs_param_base(intr, 0);
784    case nir_intrinsic_load_tcs_out_param_base_r600: return emit_load_tcs_param_base(intr, 16);
785       // We only emit the group barrier, barriers across work groups
786       // are not yet implemented
787    case nir_intrinsic_control_barrier:
788    case nir_intrinsic_memory_barrier_tcs_patch:
789    case nir_intrinsic_memory_barrier_shared:
790       return emit_barrier(intr);
791    case nir_intrinsic_memory_barrier_atomic_counter:
792       return true;
793    case nir_intrinsic_group_memory_barrier:
794    case nir_intrinsic_memory_barrier_image:
795    case nir_intrinsic_memory_barrier_buffer:
796    case nir_intrinsic_memory_barrier:
797       return emit_wait_ack();
798 
799    case nir_intrinsic_shared_atomic_add:
800    case nir_intrinsic_shared_atomic_and:
801    case nir_intrinsic_shared_atomic_or:
802    case nir_intrinsic_shared_atomic_imax:
803    case nir_intrinsic_shared_atomic_umax:
804    case nir_intrinsic_shared_atomic_imin:
805    case nir_intrinsic_shared_atomic_umin:
806    case nir_intrinsic_shared_atomic_xor:
807    case nir_intrinsic_shared_atomic_exchange:
808    case nir_intrinsic_shared_atomic_comp_swap:
809       return emit_atomic_local_shared(intr);
810    case nir_intrinsic_shader_clock:
811       return emit_shader_clock(intr);
812 
813    default:
814    return false;
815    }
816 }
817 
818 static ESDOp
lds_op_from_intrinsic(nir_intrinsic_op op,bool ret)819 lds_op_from_intrinsic(nir_intrinsic_op op, bool ret) {
820    switch (op) {
821    case nir_intrinsic_shared_atomic_add:
822       return ret ? LDS_ADD_RET : LDS_ADD;
823    case nir_intrinsic_shared_atomic_and:
824       return ret ? LDS_AND_RET : LDS_AND;
825    case nir_intrinsic_shared_atomic_or:
826       return ret ? LDS_OR_RET : LDS_OR;
827    case nir_intrinsic_shared_atomic_imax:
828       return ret ? LDS_MAX_INT_RET : LDS_MAX_INT;
829    case nir_intrinsic_shared_atomic_umax:
830       return ret ? LDS_MAX_UINT_RET : LDS_MAX_UINT;
831    case nir_intrinsic_shared_atomic_imin:
832       return ret ? LDS_MIN_INT_RET : LDS_MIN_INT;
833    case nir_intrinsic_shared_atomic_umin:
834       return ret ? LDS_MIN_UINT_RET : LDS_MIN_UINT;
835    case nir_intrinsic_shared_atomic_xor:
836       return ret ? LDS_XOR_RET : LDS_XOR;
837    case nir_intrinsic_shared_atomic_exchange:
838       return LDS_XCHG_RET;
839    case nir_intrinsic_shared_atomic_comp_swap:
840       return LDS_CMP_XCHG_RET;
841    default:
842       unreachable("Unsupported shared atomic opcode");
843    }
844 }
845 
emit_load_to_register(PVirtualValue src)846 PRegister Shader::emit_load_to_register(PVirtualValue src)
847 {
848    assert(src);
849    PRegister dest = src->as_register();
850 
851    if (!dest) {
852       dest = value_factory().temp_register();
853       emit_instruction(new AluInstr(op1_mov, dest, src, AluInstr::last_write));
854    }
855    return dest;
856 }
857 
emit_atomic_local_shared(nir_intrinsic_instr * instr)858 bool Shader::emit_atomic_local_shared(nir_intrinsic_instr* instr)
859 {
860    bool uses_retval = !instr->dest.is_ssa || !list_is_empty(&instr->dest.ssa.uses);
861 
862    auto& vf = value_factory();
863 
864    auto dest_value = uses_retval ? vf.dest(instr->dest, 0, pin_free) : nullptr;
865 
866    auto op = lds_op_from_intrinsic(instr->intrinsic, uses_retval);
867 
868    auto address = vf.src(instr->src[0], 0);
869 
870    AluInstr::SrcValues src;
871    src.push_back(vf.src(instr->src[1], 0));
872 
873    if (unlikely(instr->intrinsic ==nir_intrinsic_shared_atomic_comp_swap))
874       src.push_back(vf.src(instr->src[2], 0));
875    emit_instruction(new LDSAtomicInstr(op, dest_value, address, src));
876    return true;
877 }
878 
evaluate_resource_offset(nir_intrinsic_instr * instr,int src_id)879 auto Shader::evaluate_resource_offset(nir_intrinsic_instr *instr, int src_id) -> std::pair<int, PRegister>
880 {
881    auto& vf = value_factory();
882 
883    PRegister uav_id{nullptr};
884    int offset = 0;
885 
886    auto uav_id_const = nir_src_as_const_value(instr->src[src_id]);
887    if (uav_id_const) {
888       offset += uav_id_const->u32;
889    } else {
890       auto uav_id_val = vf.src(instr->src[src_id], 0);
891       if (uav_id_val->as_register()) {
892          uav_id = uav_id_val->as_register();
893       } else {
894          uav_id = vf.temp_register();
895          emit_instruction(new AluInstr(op1_mov, uav_id, uav_id_val,
896                                  AluInstr::last_write));
897       }
898    }
899    return std::make_pair(offset, uav_id);
900 }
901 
902 
emit_store_scratch(nir_intrinsic_instr * intr)903 bool Shader::emit_store_scratch(nir_intrinsic_instr *intr)
904 {
905    auto& vf = m_instr_factory->value_factory();
906 
907    int writemask = nir_intrinsic_write_mask(intr);
908 
909 
910    RegisterVec4::Swizzle swz = {7,7,7,7};
911 
912    for (unsigned i = 0; i < intr->num_components; ++i)
913       swz[i] = (1 << i) & writemask ? i : 7;
914 
915    auto value = vf.temp_vec4(pin_group, swz);
916    AluInstr *ir = nullptr;
917    for (unsigned i = 0; i < intr->num_components; ++i) {
918       if (value[i]->chan() < 4) {
919          ir = new AluInstr(op1_mov, value[i], vf.src(intr->src[0], i), AluInstr::write);
920          ir->set_alu_flag(alu_no_schedule_bias);
921          emit_instruction(ir);
922       }
923    }
924    if (!ir)
925       return true;
926 
927    ir->set_alu_flag(alu_last_instr);
928 
929    auto address = vf.src(intr->src[1], 0);
930 
931 
932    int align = nir_intrinsic_align_mul(intr);
933    int align_offset = nir_intrinsic_align_offset(intr);
934 
935    ScratchIOInstr *ws_ir = nullptr;
936 
937    int offset = -1;
938    if (address->as_literal()) {
939       offset = address->as_literal()->value();
940    } else if (address->as_inline_const()) {
941       auto il = address->as_inline_const();
942       if (il->sel() == ALU_SRC_0)
943          offset = 0;
944       else if (il->sel() == ALU_SRC_1_INT)
945          offset = 1;
946    }
947 
948    if (offset >= 0) {
949       ws_ir = new ScratchIOInstr(value, offset, align, align_offset, writemask);
950    } else {
951       auto addr_temp  = vf.temp_register(0);
952       auto load_addr = new AluInstr(op1_mov, addr_temp, address, AluInstr::last_write);
953       load_addr->set_alu_flag(alu_no_schedule_bias);
954       emit_instruction(load_addr);
955 
956       ws_ir = new ScratchIOInstr(value, addr_temp, align, align_offset, writemask, m_scratch_size);
957    }
958    emit_instruction(ws_ir);
959 
960    m_flags.set(sh_needs_scratch_space);
961    return true;
962 }
963 
emit_load_scratch(nir_intrinsic_instr * intr)964 bool Shader::emit_load_scratch(nir_intrinsic_instr *intr)
965 {
966    auto addr = value_factory().src(intr->src[0], 0);
967    auto dest = value_factory().dest_vec4(intr->dest, pin_group);
968 
969    if (chip_class() >= ISA_CC_R700) {
970       RegisterVec4::Swizzle dest_swz = {7,7,7,7};
971 
972       for (unsigned i = 0; i < intr->num_components; ++i)
973          dest_swz[i] = i;
974 
975       auto *ir = new LoadFromScratch(dest, dest_swz, addr, m_scratch_size);
976       emit_instruction(ir);
977       chain_scratch_read(ir);
978    } else {
979       int align = nir_intrinsic_align_mul(intr);
980       int align_offset = nir_intrinsic_align_offset(intr);
981 
982 
983       int offset = -1;
984       if (addr->as_literal()) {
985          offset = addr->as_literal()->value();
986       } else if (addr->as_inline_const()) {
987          auto il = addr->as_inline_const();
988          if (il->sel() == ALU_SRC_0)
989             offset = 0;
990          else if (il->sel() == ALU_SRC_1_INT)
991             offset = 1;
992       }
993 
994       ScratchIOInstr *ir = nullptr;
995       if (offset >= 0) {
996          ir = new ScratchIOInstr(dest, offset, align, align_offset, 0xf, true);
997       } else {
998          auto addr_temp  = value_factory().temp_register(0);
999          auto load_addr = new AluInstr(op1_mov, addr_temp, addr, AluInstr::last_write);
1000          load_addr->set_alu_flag(alu_no_schedule_bias);
1001          emit_instruction(load_addr);
1002 
1003          ir = new ScratchIOInstr(dest, addr_temp, align, align_offset, 0xf,
1004                                  m_scratch_size, true);
1005       }
1006       emit_instruction(ir);
1007    }
1008 
1009 
1010    m_flags.set(sh_needs_scratch_space);
1011 
1012    return true;
1013 
1014 }
1015 
emit_local_store(nir_intrinsic_instr * instr)1016 bool Shader::emit_local_store(nir_intrinsic_instr *instr)
1017 {
1018    unsigned write_mask = nir_intrinsic_write_mask(instr);
1019 
1020    auto address = value_factory().src(instr->src[1], 0);
1021    int swizzle_base = 0;
1022    unsigned w = write_mask;
1023    while (!(w & 1)) {
1024       ++swizzle_base;
1025       w >>= 1;
1026    }
1027    write_mask = write_mask >> swizzle_base;
1028 
1029    if ((write_mask & 3) != 3) {
1030       auto value = value_factory().src(instr->src[0], swizzle_base);
1031       emit_instruction(new LDSAtomicInstr(LDS_WRITE, nullptr, address, {value}));
1032    } else {
1033       auto value = value_factory().src(instr->src[0], swizzle_base);
1034       auto value1 = value_factory().src(instr->src[0], swizzle_base + 1);
1035       emit_instruction(new LDSAtomicInstr(LDS_WRITE_REL, nullptr, address, {value, value1}));
1036    }
1037    return true;
1038 }
1039 
emit_local_load(nir_intrinsic_instr * instr)1040 bool Shader::emit_local_load(nir_intrinsic_instr* instr)
1041 {
1042    auto address = value_factory().src_vec(instr->src[0], instr->num_components);
1043    auto dest_value = value_factory().dest_vec(instr->dest, instr->num_components);
1044    emit_instruction(new LDSReadInstr(dest_value, address));
1045    return true;
1046 }
1047 
chain_scratch_read(Instr * instr)1048 void Shader::chain_scratch_read(Instr *instr)
1049 {
1050    m_chain_instr.apply(instr, &m_chain_instr.last_scratch_instr);
1051 }
1052 
chain_ssbo_read(Instr * instr)1053 void Shader::chain_ssbo_read(Instr *instr)
1054 {
1055    m_chain_instr.apply(instr, &m_chain_instr.last_ssbo_instr);
1056 }
1057 
emit_wait_ack()1058 bool Shader::emit_wait_ack()
1059 {
1060    start_new_block(0);
1061    emit_instruction(new ControlFlowInstr(ControlFlowInstr::cf_wait_ack));
1062    start_new_block(0);
1063    return true;
1064 }
1065 
visit(ScratchIOInstr * instr)1066 void Shader::InstructionChain::visit(ScratchIOInstr *instr)
1067 {
1068    apply(instr, &last_scratch_instr);
1069 }
1070 
visit(GDSInstr * instr)1071 void Shader::InstructionChain::visit(GDSInstr *instr)
1072 {
1073    apply(instr, &last_gds_instr);
1074    for (auto& loop : this_shader->m_loops) {
1075       loop->set_instr_flag(Instr::vpm);
1076    }
1077 }
1078 
visit(RatInstr * instr)1079 void Shader::InstructionChain::visit(RatInstr *instr)
1080 {
1081    apply(instr, &last_ssbo_instr);
1082    for (auto& loop : this_shader->m_loops) {
1083       loop->set_instr_flag(Instr::vpm);
1084    }
1085 
1086    if (prepare_mem_barrier)
1087       instr->set_ack();
1088 
1089    if (this_shader->m_current_block->inc_rat_emitted() > 15)
1090       this_shader->start_new_block(0);
1091 }
1092 
apply(Instr * current,Instr ** last)1093 void Shader::InstructionChain::apply(Instr *current, Instr **last) {
1094    if (*last)
1095       current->add_required_instr(*last);
1096    *last = current;
1097 }
1098 
emit_instruction(PInst instr)1099 void Shader::emit_instruction(PInst instr)
1100 {
1101    sfn_log << SfnLog::instr << "   " << *instr << "\n";
1102    instr->accept(m_chain_instr);
1103    m_current_block->push_back(instr);
1104 }
1105 
load_uniform(nir_intrinsic_instr * intr)1106 bool Shader::load_uniform(nir_intrinsic_instr *intr)
1107 {
1108    auto literal = nir_src_as_const_value(intr->src[0]);
1109 
1110    if (literal) {
1111       AluInstr *ir = nullptr;
1112       auto pin = intr->dest.is_ssa && nir_dest_num_components(intr->dest) == 1 ?
1113                pin_free : pin_none;
1114       for (unsigned i = 0; i < nir_dest_num_components(intr->dest); ++i) {
1115 
1116          sfn_log << SfnLog::io << "uniform "
1117                  << intr->dest.ssa.index << " const["<< i << "]: "<< intr->const_index[i] << "\n";
1118 
1119          auto uniform = value_factory().uniform(intr, i);
1120          ir = new AluInstr(op1_mov, value_factory().dest(intr->dest, i, pin),
1121                            uniform, {alu_write});
1122          emit_instruction(ir);
1123       }
1124       if (ir)
1125          ir->set_alu_flag(alu_last_instr);
1126       return true;
1127    } else {
1128       auto addr = value_factory().src(intr->src[0], 0);
1129       return load_uniform_indirect(intr, addr, 16 * nir_intrinsic_base(intr), 0);
1130    }
1131 }
1132 
load_uniform_indirect(nir_intrinsic_instr * intr,PVirtualValue addr,int offset,int buffer_id)1133 bool Shader::load_uniform_indirect(nir_intrinsic_instr *intr, PVirtualValue addr,
1134                                    int offset , int buffer_id)
1135 {
1136    auto addr_reg = addr->as_register();
1137    if (!addr) {
1138       auto tmp = value_factory().temp_register();
1139       emit_instruction(new AluInstr(op1_mov, tmp, addr, AluInstr::last_write));
1140       addr = tmp;
1141    }
1142 
1143    RegisterVec4 dest = value_factory().dest_vec4(intr->dest, pin_group);
1144 
1145    auto ir = new LoadFromBuffer(dest, {0,1,2,3}, addr_reg, offset, buffer_id,
1146                                 nullptr, fmt_32_32_32_32_float);
1147    emit_instruction(ir);
1148    m_flags.set(sh_indirect_const_file);
1149    return true;
1150 }
1151 
emit_load_tcs_param_base(nir_intrinsic_instr * instr,int offset)1152 bool Shader::emit_load_tcs_param_base(nir_intrinsic_instr* instr, int offset)
1153 {
1154    auto src = value_factory().temp_register();
1155    emit_instruction(new AluInstr(op1_mov, src, value_factory().zero(),
1156                                  AluInstr::last_write));
1157 
1158    auto dest = value_factory().dest_vec4(instr->dest, pin_group);
1159    auto fetch = new LoadFromBuffer(dest, {0,1,2,3}, src, offset,
1160                                    R600_LDS_INFO_CONST_BUFFER, nullptr,
1161                                    fmt_32_32_32_32);
1162 
1163    fetch->set_fetch_flag(LoadFromBuffer::srf_mode);
1164    emit_instruction(fetch);
1165 
1166    return true;
1167 }
1168 
emit_shader_clock(nir_intrinsic_instr * instr)1169 bool Shader::emit_shader_clock(nir_intrinsic_instr* instr)
1170 {
1171    auto& vf = value_factory();
1172    auto group = new AluGroup();
1173    group->add_instruction(new AluInstr(op1_mov, vf.dest(instr->dest, 0, pin_chan),
1174                                        vf.inline_const(ALU_SRC_TIME_LO, 0), AluInstr::write));
1175    group->add_instruction(new AluInstr(op1_mov, vf.dest(instr->dest, 1, pin_chan),
1176                                        vf.inline_const(ALU_SRC_TIME_HI, 0), AluInstr::last_write));
1177    emit_instruction(group);
1178    return true;
1179 }
1180 
1181 
emit_barrier(nir_intrinsic_instr * intr)1182 bool Shader::emit_barrier(nir_intrinsic_instr* intr)
1183 {
1184    (void)intr;
1185    /* Put barrier into it's own block, so that optimizers and the
1186     * scheduler don't move code */
1187    start_new_block(0);
1188    auto op = new AluInstr(op0_group_barrier, 0);
1189    op->set_alu_flag(alu_last_instr);
1190    emit_instruction(op);
1191    start_new_block(0);
1192    return true;
1193 }
1194 
load_ubo(nir_intrinsic_instr * instr)1195 bool Shader::load_ubo(nir_intrinsic_instr *instr)
1196 {
1197    auto bufid = nir_src_as_const_value(instr->src[0]);
1198    auto buf_offset = nir_src_as_const_value(instr->src[1]);
1199 
1200    if (!buf_offset) {
1201       /* TODO: if bufid is constant then this can also be solved by using the CF indes
1202        * on the ALU block, and this would probably make sense when there are more then one
1203        * loads with the same buffer ID. */
1204 
1205       auto addr = value_factory().src(instr->src[1], 0)->as_register();
1206       RegisterVec4::Swizzle dest_swz {7,7,7,7};
1207       auto dest = value_factory().dest_vec4(instr->dest, pin_group);
1208 
1209       for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) {
1210          dest_swz[i] = i + nir_intrinsic_component(instr);
1211       }
1212 
1213       LoadFromBuffer *ir;
1214       if (bufid) {
1215          ir = new LoadFromBuffer(dest, dest_swz, addr, 0, 1 + bufid->u32,
1216                                  nullptr, fmt_32_32_32_32_float);
1217       } else {
1218          auto buffer_id = emit_load_to_register(value_factory().src(instr->src[0], 0));
1219          ir = new LoadFromBuffer(dest, dest_swz, addr, 0, 1, buffer_id,
1220                                  fmt_32_32_32_32_float);
1221       }
1222       emit_instruction(ir);
1223       return true;
1224    }
1225 
1226    /* direct load using the constant cache */
1227    if (bufid) {
1228       int buf_cmp = nir_intrinsic_component(instr);
1229 
1230       AluInstr *ir = nullptr;
1231       auto pin = instr->dest.is_ssa && nir_dest_num_components(instr->dest) == 1 ?
1232                     pin_free : pin_none;
1233       for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) {
1234 
1235          sfn_log << SfnLog::io << "UBO[" << bufid << "] "
1236                  << instr->dest.ssa.index << " const["<< i << "]: "<< instr->const_index[i] << "\n";
1237 
1238          auto uniform = value_factory().uniform(512 +  buf_offset->u32, i + buf_cmp, bufid->u32 + 1);
1239          ir = new AluInstr(op1_mov, value_factory().dest(instr->dest, i, pin),
1240                            uniform, {alu_write});
1241          emit_instruction(ir);
1242       }
1243       if (ir)
1244          ir->set_alu_flag(alu_last_instr);
1245       return true;
1246    } else {
1247       int buf_cmp = nir_intrinsic_component(instr);
1248       AluInstr *ir = nullptr;
1249       auto kc_id = value_factory().src(instr->src[0], 0);
1250 
1251       for (unsigned i = 0; i < nir_dest_num_components(instr->dest); ++i) {
1252          int cmp = buf_cmp + i;
1253          auto u = new UniformValue(512 +  buf_offset->u32, cmp, kc_id);
1254          auto dest = value_factory().dest(instr->dest, i, pin_none);
1255          ir = new AluInstr(op1_mov,  dest, u, AluInstr::write);
1256          emit_instruction(ir);
1257       }
1258       if (ir)
1259          ir->set_alu_flag(alu_last_instr);
1260       m_indirect_files |= 1 << TGSI_FILE_CONSTANT;
1261       return true;
1262    }
1263 }
1264 
start_new_block(int depth)1265 void Shader::start_new_block(int depth)
1266 {
1267    int depth_offset = m_current_block ? m_current_block->nesting_depth() : 0;
1268    m_current_block = new Block(depth + depth_offset, m_next_block++);
1269    m_root.push_back(m_current_block);
1270 }
1271 
emit_simple_mov(nir_dest & dest,int chan,PVirtualValue src,Pin pin)1272 bool Shader::emit_simple_mov(nir_dest& dest, int chan, PVirtualValue src, Pin pin)
1273 {
1274    auto dst = value_factory().dest(dest, chan, pin);
1275    emit_instruction(new AluInstr(op1_mov, dst, src, AluInstr::last_write));
1276    return true;
1277 }
1278 
print(std::ostream & os) const1279 void Shader::print(std::ostream& os) const
1280 {
1281    print_header(os);
1282 
1283    for (auto& [dummy, i]: m_inputs) {
1284       i.print(os);
1285       os << "\n";
1286    }
1287 
1288    for (auto& [dummy, o]: m_outputs) {
1289       o.print(os);
1290       os << "\n";
1291    }
1292 
1293    os << "SHADER\n";
1294    for (auto& b : m_root)
1295       b->print(os);
1296 }
1297 
1298 const char *chip_class_names[] = {
1299    "R600",
1300    "R700",
1301    "EVERGREEN",
1302    "CAYMAN"
1303 };
1304 
print_header(std::ostream & os) const1305 void Shader::print_header(std::ostream& os) const
1306 {
1307    assert(m_chip_class <= ISA_CC_CAYMAN);
1308    os << m_type_id << "\n";
1309    os << "CHIPCLASS " << chip_class_names[m_chip_class] << "\n";
1310    print_properties(os);
1311 }
1312 
print_properties(std::ostream & os) const1313 void Shader::print_properties(std::ostream& os) const
1314 {
1315    do_print_properties(os);
1316 }
1317 
equal_to(const Shader & other) const1318 bool Shader::equal_to(const Shader& other) const
1319 {
1320    if (m_root.size() != other.m_root.size())
1321       return false;
1322    return std::inner_product(m_root.begin(), m_root.end(),
1323                              other.m_root.begin(),
1324                              true,
1325                              [](bool lhs, bool rhs){ return lhs & rhs;},
1326                              [](const Block::Pointer lhs, const Block::Pointer rhs) -> bool {
1327                                 return lhs->is_equal_to(*rhs);
1328                              });
1329 }
1330 
get_shader_info(r600_shader * sh_info)1331 void Shader::get_shader_info(r600_shader *sh_info)
1332 {
1333    sh_info->ninput = m_inputs.size();
1334    int lds_pos = 0;
1335    int input_array_array_loc = 0;
1336    for (auto& [index, info] : m_inputs) {
1337       r600_shader_io& io = sh_info->input[input_array_array_loc++];
1338 
1339       io.sid = info.sid();
1340       io.gpr = info.gpr();
1341       io.spi_sid = info.spi_sid();
1342       io.ij_index = info.ij_index();
1343       io.name = info.name();
1344       io.interpolate = info.interpolator();
1345       io.interpolate_location = info.interpolate_loc();
1346       if (info.need_lds_pos())
1347          io.lds_pos = lds_pos++;
1348       else
1349          io.lds_pos = 0;
1350 
1351       io.ring_offset = info.ring_offset();
1352       io.uses_interpolate_at_centroid = info.uses_interpolate_at_centroid();
1353 
1354       sfn_log << SfnLog::io << "Emit Input [" << index << "] sid:" << io.sid << " spi_sid:" << io.spi_sid << "\n";
1355       assert(io.spi_sid >= 0);
1356    }
1357 
1358    sh_info->nlds = lds_pos;
1359    sh_info->noutput = m_outputs.size();
1360    sh_info->num_loops = m_nloops;
1361    int output_array_array_loc = 0;
1362 
1363    for (auto& [index, info] : m_outputs) {
1364       r600_shader_io& io = sh_info->output[output_array_array_loc++];
1365       io.sid = info.sid();
1366       io.gpr = info.gpr();
1367       io.spi_sid = info.spi_sid();
1368       io.name = info.name();
1369       io.write_mask = info.writemask();
1370 
1371       sfn_log << SfnLog::io << "Emit output[" << index << "] sid:" << io.sid << " spi_sid:" << io.spi_sid << "\n";
1372       assert(io.spi_sid >= 0);
1373    }
1374 
1375    sh_info->nhwatomic = m_nhwatomic;
1376    sh_info->atomic_base = m_atomic_base;
1377    sh_info->nhwatomic_ranges = m_atomics.size();
1378    for (unsigned i = 0; i < m_atomics.size(); ++i)
1379       sh_info->atomics[i] = m_atomics[i];
1380 
1381    if (m_flags.test(sh_indirect_const_file))
1382          sh_info->indirect_files |= 1 << TGSI_FILE_CONSTANT;
1383 
1384    if (m_flags.test(sh_indirect_atomic))
1385       sh_info->indirect_files |= 1 << TGSI_FILE_HW_ATOMIC;
1386 
1387    sh_info->uses_tex_buffers = m_flags.test(sh_uses_tex_buffer);
1388 
1389    value_factory().get_shader_info(sh_info);
1390 
1391    sh_info->needs_scratch_space = m_flags.test(sh_needs_scratch_space);
1392    sh_info->uses_images = m_flags.test(sh_uses_images);
1393    sh_info->uses_atomics = m_flags.test(sh_uses_atomics);
1394    sh_info->has_txq_cube_array_z_comp = m_flags.test(sh_txs_cube_array_comp);
1395    sh_info->indirect_files = m_indirect_files;
1396    do_get_shader_info(sh_info);
1397 }
1398 
atomic_update()1399 PRegister Shader::atomic_update()
1400 {
1401    assert(m_atomic_update);
1402    return m_atomic_update;
1403 }
1404 
remap_atomic_base(int base)1405 int Shader::remap_atomic_base(int base)
1406 {
1407    return m_atomic_base_map[base];
1408 }
1409 
do_get_shader_info(r600_shader * sh_info)1410 void Shader::do_get_shader_info(r600_shader *sh_info)
1411 {
1412    sh_info->uses_atomics = m_nhwatomic > 0;
1413 }
1414 
1415 
input(int base) const1416 const ShaderInput& Shader::input(int base) const
1417 {
1418    auto io = m_inputs.find(base);
1419    assert(io != m_inputs.end());
1420    return io->second;
1421 }
1422 
output(int base) const1423 const ShaderOutput& Shader::output(int base) const
1424 {
1425    auto io = m_outputs.find(base);
1426    assert(io != m_outputs.end());
1427    return io->second;
1428 }
1429 
prepare_live_range_map()1430 LiveRangeMap Shader::prepare_live_range_map()
1431 {
1432    return m_instr_factory->value_factory().prepare_live_range_map();
1433 
1434 }
1435 
reset_function(ShaderBlocks & new_root)1436 void Shader::reset_function(ShaderBlocks& new_root)
1437 {
1438    std::swap(m_root, new_root);
1439 }
1440 
finalize()1441 void Shader::finalize()
1442 {
1443    do_finalize();
1444 }
1445 
do_finalize()1446 void Shader::do_finalize()
1447 {
1448 
1449 }
1450 
1451 }
1452