• Home
  • Line#
  • Scopes#
  • Navigate#
  • Raw
  • Download
1 /* -*- mesa-c++  -*-
2  *
3  * Copyright (c) 2022 Collabora LTD
4  *
5  * Author: Gert Wollny <gert.wollny@collabora.com>
6  *
7  * Permission is hereby granted, free of charge, to any person obtaining a
8  * copy of this software and associated documentation files (the "Software"),
9  * to deal in the Software without restriction, including without limitation
10  * on the rights to use, copy, modify, merge, publish, distribute, sub
11  * license, and/or sell copies of the Software, and to permit persons to whom
12  * the Software is furnished to do so, subject to the following conditions:
13  *
14  * The above copyright notice and this permission notice (including the next
15  * paragraph) shall be included in all copies or substantial portions of the
16  * Software.
17  *
18  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
21  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
22  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
23  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
24  * USE OR OTHER DEALINGS IN THE SOFTWARE.
25  */
26 
27 #include "sfn_shader.h"
28 
29 #include "gallium/drivers/r600/r600_shader.h"
30 #include "nir.h"
31 #include "nir_intrinsics.h"
32 #include "nir_intrinsics_indices.h"
33 #include "sfn_debug.h"
34 #include "sfn_instr.h"
35 #include "sfn_instr_alu.h"
36 #include "sfn_instr_alugroup.h"
37 #include "sfn_instr_controlflow.h"
38 #include "sfn_instr_export.h"
39 #include "sfn_instr_fetch.h"
40 #include "sfn_instr_lds.h"
41 #include "sfn_instr_mem.h"
42 #include "sfn_liverangeevaluator.h"
43 #include "sfn_shader_cs.h"
44 #include "sfn_shader_fs.h"
45 #include "sfn_shader_gs.h"
46 #include "sfn_shader_tess.h"
47 #include "sfn_shader_vs.h"
48 #include "util/u_math.h"
49 
50 #include <numeric>
51 #include <sstream>
52 
53 namespace r600 {
54 
55 using std::string;
56 
57 void
print(std::ostream & os) const58 ShaderIO::print(std::ostream& os) const
59 {
60    os << m_type << " LOC:" << m_location;
61    if (m_varying_slot != NUM_TOTAL_VARYING_SLOTS)
62       os << " VARYING_SLOT:" << static_cast<int>(m_varying_slot);
63    if (m_no_varying)
64       os << " NO_VARYING";
65    do_print(os);
66 }
67 
68 int
spi_sid() const69 ShaderIO::spi_sid() const
70 {
71    if (no_varying())
72       return 0;
73 
74    switch (varying_slot()) {
75    case NUM_TOTAL_VARYING_SLOTS:
76    case VARYING_SLOT_POS:
77    case VARYING_SLOT_PSIZ:
78    case VARYING_SLOT_EDGE:
79    case VARYING_SLOT_FACE:
80    case VARYING_SLOT_CLIP_VERTEX:
81       return 0;
82    default:
83       static_assert(static_cast<int>(NUM_TOTAL_VARYING_SLOTS) <= 0x100 - 1,
84                     "All varying slots plus 1 must be usable as 8-bit SPI semantic IDs");
85       return static_cast<int>(varying_slot()) + 1;
86    }
87 }
88 
ShaderIO(const char * type,int loc,gl_varying_slot varying_slot)89 ShaderIO::ShaderIO(const char *type, int loc, gl_varying_slot varying_slot):
90     m_type(type),
91     m_location(loc),
92     m_varying_slot(varying_slot)
93 {
94 }
95 
ShaderOutput(int location,int writemask,gl_varying_slot varying_slot)96 ShaderOutput::ShaderOutput(int location, int writemask, gl_varying_slot varying_slot):
97     ShaderIO("OUTPUT", location, varying_slot),
98     m_writemask(writemask)
99 {
100 }
101 
ShaderOutput()102 ShaderOutput::ShaderOutput():
103     ShaderOutput(-1, 0)
104 {
105 }
106 
107 void
do_print(std::ostream & os) const108 ShaderOutput::do_print(std::ostream& os) const
109 {
110    if (m_frag_result != static_cast<gl_frag_result>(FRAG_RESULT_MAX))
111       os << " FRAG_RESULT:" << static_cast<int>(m_frag_result);
112    os << " MASK:" << m_writemask;
113 }
114 
ShaderInput(int location,gl_varying_slot varying_slot)115 ShaderInput::ShaderInput(int location, gl_varying_slot varying_slot):
116     ShaderIO("INPUT", location, varying_slot)
117 {
118 }
119 
ShaderInput()120 ShaderInput::ShaderInput():
121     ShaderInput(-1)
122 {
123 }
124 
125 void
do_print(std::ostream & os) const126 ShaderInput::do_print(std::ostream& os) const
127 {
128    if (m_system_value != SYSTEM_VALUE_MAX)
129       os << " SYSVALUE: " << static_cast<int>(m_system_value);
130    if (m_interpolator)
131       os << " INTERP:" << m_interpolator;
132    if (m_interpolate_loc)
133       os << " ILOC:" << m_interpolate_loc;
134    if (m_uses_interpolate_at_centroid)
135       os << " USE_CENTROID";
136 }
137 
138 void
set_interpolator(int interp,int interp_loc,bool uses_interpolate_at_centroid)139 ShaderInput::set_interpolator(int interp,
140                               int interp_loc,
141                               bool uses_interpolate_at_centroid)
142 {
143    m_interpolator = interp;
144    m_interpolate_loc = interp_loc;
145    m_uses_interpolate_at_centroid = uses_interpolate_at_centroid;
146 }
147 
148 void
set_uses_interpolate_at_centroid()149 ShaderInput::set_uses_interpolate_at_centroid()
150 {
151    m_uses_interpolate_at_centroid = true;
152 }
153 
154 int64_t Shader::s_next_shader_id = 1;
155 
Shader(const char * type_id,unsigned atomic_base)156 Shader::Shader(const char *type_id, unsigned atomic_base):
157     m_current_block(nullptr),
158     m_type_id(type_id),
159     m_chip_class(ISA_CC_R600),
160     m_next_block(0),
161     m_atomic_base(atomic_base),
162     m_shader_id(s_next_shader_id++)
163 {
164    m_instr_factory = new InstrFactory();
165    m_chain_instr.this_shader = this;
166    start_new_block(0);
167 }
168 
169 void
set_input_gpr(int driver_lcation,int gpr)170 Shader::set_input_gpr(int driver_lcation, int gpr)
171 {
172    auto i = m_inputs.find(driver_lcation);
173    assert(i != m_inputs.end());
174    i->second.set_gpr(gpr);
175 }
176 
177 bool
add_info_from_string(std::istream & is)178 Shader::add_info_from_string(std::istream& is)
179 {
180    std::string type;
181    is >> type;
182 
183    if (type == "CHIPCLASS")
184       return read_chipclass(is);
185    if (type == "FAMILY")
186       return read_family(is);
187    if (type == "OUTPUT")
188       return read_output(is);
189    if (type == "INPUT")
190       return read_input(is);
191    if (type == "PROP")
192       return read_prop(is);
193    if (type == "SYSVALUES")
194       return allocate_registers_from_string(is, pin_fully);
195    if (type == "REGISTERS")
196       return allocate_registers_from_string(is, pin_free);
197    if (type == "ARRAYS")
198       return allocate_arrays_from_string(is);
199 
200    return false;
201 }
202 
203 void
emit_instruction_from_string(const std::string & s)204 Shader::emit_instruction_from_string(const std::string& s)
205 {
206 
207    sfn_log << SfnLog::instr << "Create Instr from '" << s << "'\n";
208    if (s == "BLOCK_START") {
209       if (!m_current_block->empty()) {
210          start_new_block(m_current_block->nesting_offset());
211          sfn_log << SfnLog::instr << "   Emit start block\n";
212       }
213       return;
214    }
215 
216    if (s == "BLOCK_END") {
217       return;
218    }
219 
220    auto ir = m_instr_factory->from_string(s, m_current_block->nesting_depth(),
221                                           m_chip_class == ISA_CC_CAYMAN);
222    if (ir) {
223       emit_instruction(ir);
224       if (ir->end_block())
225          start_new_block(ir->nesting_offset());
226       sfn_log << SfnLog::instr << "   " << *ir << "\n";
227    }
228 }
229 
230 bool
read_output(std::istream & is)231 Shader::read_output(std::istream& is)
232 {
233    ShaderOutput output;
234 
235    std::string token;
236    for (is >> token; !token.empty(); token.clear(), is >> token) {
237       int value;
238       if (int_from_string_with_prefix_optional(token, "LOC:", value))
239          output.set_location(value);
240       else if (int_from_string_with_prefix_optional(token, "VARYING_SLOT:", value))
241          output.set_varying_slot(static_cast<gl_varying_slot>(value));
242       else if (token == "NO_VARYING")
243          output.set_no_varying(true);
244       else if (int_from_string_with_prefix_optional(token, "FRAG_RESULT:", value))
245          output.set_frag_result(static_cast<gl_frag_result>(value));
246       else if (int_from_string_with_prefix_optional(token, "MASK:", value))
247          output.set_writemask(value);
248       else {
249          std::cerr << "Unknown parse value '" << token << "'";
250          assert(!"Unknown parse value in read_output");
251       }
252    }
253 
254    add_output(output);
255    return true;
256 }
257 
258 bool
read_input(std::istream & is)259 Shader::read_input(std::istream& is)
260 {
261    ShaderInput input;
262 
263    int interp = 0;
264    int interp_loc = 0;
265    bool use_centroid = false;
266 
267    std::string token;
268    for (is >> token; !token.empty(); token.clear(), is >> token) {
269       int value;
270       if (int_from_string_with_prefix_optional(token, "LOC:", value))
271          input.set_location(value);
272       else if (int_from_string_with_prefix_optional(token, "VARYING_SLOT:", value))
273          input.set_varying_slot(static_cast<gl_varying_slot>(value));
274       else if (token == "NO_VARYING")
275          input.set_no_varying(true);
276       else if (int_from_string_with_prefix_optional(token, "SYSVALUE:", value))
277          input.set_system_value(static_cast<gl_system_value>(value));
278       else if (int_from_string_with_prefix_optional(token, "INTERP:", interp))
279          ;
280       else if (int_from_string_with_prefix_optional(token, "ILOC:", interp_loc))
281          ;
282       else if (token == "USE_CENTROID")
283          use_centroid = true;
284       else {
285          std::cerr << "Unknown parse value '" << token << "'";
286          assert(!"Unknown parse value in read_input");
287       }
288    }
289 
290    input.set_interpolator(interp, interp_loc, use_centroid);
291 
292    add_input(input);
293    return true;
294 }
295 
296 bool
allocate_registers_from_string(std::istream & is,Pin pin)297 Shader::allocate_registers_from_string(std::istream& is, Pin pin)
298 {
299    std::string line;
300    if (!std::getline(is, line))
301       return false;
302 
303    std::istringstream iline(line);
304 
305    while (!iline.eof()) {
306       string reg_str;
307       iline >> reg_str;
308 
309       if (reg_str.empty())
310          break;
311 
312       if (strchr(reg_str.c_str(), '@') ||
313           reg_str == "AR" ||
314           reg_str.substr(0,3) == "IDX") {
315          value_factory().dest_from_string(reg_str);
316       } else {
317          RegisterVec4::Swizzle swz = {0, 1, 2, 3};
318          auto regs = value_factory().dest_vec4_from_string(reg_str, swz, pin);
319          for (int i = 0; i < 4; ++i) {
320             if (swz[i] < 4 && pin == pin_fully) {
321                regs[i]->set_flag(Register::pin_start);
322             }
323          }
324       }
325    }
326    return true;
327 }
328 
329 bool
allocate_arrays_from_string(std::istream & is)330 Shader::allocate_arrays_from_string(std::istream& is)
331 {
332    std::string line;
333    if (!std::getline(is, line))
334       return false;
335 
336    std::istringstream iline(line);
337 
338    while (!iline.eof()) {
339       string reg_str;
340       iline >> reg_str;
341 
342       if (reg_str.empty())
343          break;
344 
345       value_factory().array_from_string(reg_str);
346    }
347    return true;
348 }
349 
350 bool
read_chipclass(std::istream & is)351 Shader::read_chipclass(std::istream& is)
352 {
353    string name;
354    is >> name;
355    if (name == "R600")
356       m_chip_class = ISA_CC_R600;
357    else if (name == "R700")
358       m_chip_class = ISA_CC_R700;
359    else if (name == "EVERGREEN")
360       m_chip_class = ISA_CC_EVERGREEN;
361    else if (name == "CAYMAN")
362       m_chip_class = ISA_CC_CAYMAN;
363    else
364       return false;
365    return true;
366 }
367 
368 bool
read_family(std::istream & is)369 Shader::read_family(std::istream& is)
370 {
371    string name;
372    is >> name;
373 #define CHECK_FAMILY(F) if (name == #F) m_chip_family = CHIP_ ## F
374 
375    CHECK_FAMILY(R600);
376    else CHECK_FAMILY(R600);
377    else CHECK_FAMILY(RV610);
378    else CHECK_FAMILY(RV630);
379    else CHECK_FAMILY(RV670);
380    else CHECK_FAMILY(RV620);
381    else CHECK_FAMILY(RV635);
382    else CHECK_FAMILY(RS780);
383    else CHECK_FAMILY(RS880);
384    /* GFX3 (R7xx) */
385    else CHECK_FAMILY(RV770);
386    else CHECK_FAMILY(RV730);
387    else CHECK_FAMILY(RV710);
388    else CHECK_FAMILY(RV740);
389    /* GFX4 (Evergreen) */
390    else CHECK_FAMILY(CEDAR);
391    else CHECK_FAMILY(REDWOOD);
392    else CHECK_FAMILY(JUNIPER);
393    else CHECK_FAMILY(CYPRESS);
394    else CHECK_FAMILY(HEMLOCK);
395    else CHECK_FAMILY(PALM);
396    else CHECK_FAMILY(SUMO);
397    else CHECK_FAMILY(SUMO2);
398    else CHECK_FAMILY(BARTS);
399    else CHECK_FAMILY(TURKS);
400    else CHECK_FAMILY(CAICOS);
401    /* GFX5 (Northern Islands) */
402    else CHECK_FAMILY(CAYMAN);
403    else CHECK_FAMILY(ARUBA);
404    else
405       return false;
406    return true;
407 }
408 
409 void
allocate_reserved_registers()410 Shader::allocate_reserved_registers()
411 {
412    m_instr_factory->value_factory().set_virtual_register_base(0);
413    auto reserved_registers_end = do_allocate_reserved_registers();
414    m_instr_factory->value_factory().set_virtual_register_base(reserved_registers_end);
415    if (!m_atomics.empty()) {
416       m_atomic_update = value_factory().temp_register();
417       auto alu = new AluInstr(op1_mov,
418                               m_atomic_update,
419                               value_factory().one_i(),
420                               AluInstr::last_write);
421       alu->set_alu_flag(alu_no_schedule_bias);
422       emit_instruction(alu);
423    }
424 
425    if (m_flags.test(sh_needs_sbo_ret_address)) {
426       m_rat_return_address = value_factory().temp_register(0);
427       auto temp0 = value_factory().temp_register(0);
428       auto temp1 = value_factory().temp_register(1);
429       auto temp2 = value_factory().temp_register(2);
430 
431       auto group = new AluGroup();
432       group->add_instruction(new AluInstr(
433          op1_mbcnt_32lo_accum_prev_int, temp0, value_factory().literal(-1), {alu_write}));
434       group->add_instruction(new AluInstr(
435          op1_mbcnt_32hi_int, temp1, value_factory().literal(-1), {alu_write}));
436       emit_instruction(group);
437       emit_instruction(new AluInstr(op3_muladd_uint24,
438                                     temp2,
439                                     value_factory().inline_const(ALU_SRC_SE_ID, 0),
440                                     value_factory().literal(256),
441                                     value_factory().inline_const(ALU_SRC_HW_WAVE_ID, 0),
442                                     {alu_write, alu_last_instr}));
443       emit_instruction(new AluInstr(op3_muladd_uint24,
444                                     m_rat_return_address,
445                                     temp2,
446                                     value_factory().literal(0x40),
447                                     temp0,
448                                     {alu_write, alu_last_instr}));
449    }
450 }
451 
452 Shader *
translate_from_nir(nir_shader * nir,const pipe_stream_output_info * so_info,struct r600_shader * gs_shader,const r600_shader_key & key,r600_chip_class chip_class,radeon_family family)453 Shader::translate_from_nir(nir_shader *nir,
454                            const pipe_stream_output_info *so_info,
455                            struct r600_shader *gs_shader,
456                            const r600_shader_key& key,
457                            r600_chip_class chip_class,
458                            radeon_family family)
459 {
460    Shader *shader = nullptr;
461 
462    switch (nir->info.stage) {
463    case MESA_SHADER_FRAGMENT:
464       if (chip_class >= ISA_CC_EVERGREEN)
465          shader = new FragmentShaderEG(key);
466       else
467          shader = new FragmentShaderR600(key);
468       break;
469    case MESA_SHADER_VERTEX:
470       shader = new VertexShader(so_info, gs_shader, key);
471       break;
472    case MESA_SHADER_GEOMETRY:
473       shader = new GeometryShader(key);
474       break;
475    case MESA_SHADER_TESS_CTRL:
476       shader = new TCSShader(key);
477       break;
478    case MESA_SHADER_TESS_EVAL:
479       shader = new TESShader(so_info, gs_shader, key);
480       break;
481    case MESA_SHADER_KERNEL:
482    case MESA_SHADER_COMPUTE:
483       shader = new ComputeShader(key, BITSET_COUNT(nir->info.samplers_used));
484       break;
485    default:
486       return nullptr;
487    }
488 
489    shader->set_info(nir);
490 
491    shader->set_chip_class(chip_class);
492    shader->set_chip_family(family);
493 
494    if (!shader->process(nir))
495       return nullptr;
496 
497    return shader;
498 }
499 
500 void
set_info(nir_shader * nir)501 Shader::set_info(nir_shader *nir)
502 {
503    m_scratch_size = nir->scratch_size;
504 }
505 
506 ValueFactory&
value_factory()507 Shader::value_factory()
508 {
509    return m_instr_factory->value_factory();
510 }
511 
512 bool
process(nir_shader * nir)513 Shader::process(nir_shader *nir)
514 {
515    m_ssbo_image_offset = nir->info.num_images;
516 
517    if (nir->info.use_legacy_math_rules)
518       set_flag(sh_legacy_math_rules);
519 
520    nir_foreach_uniform_variable(var, nir) scan_uniforms(var);
521 
522    // at this point all functions should be inlined
523    const nir_function *func =
524       reinterpret_cast<const nir_function *>(exec_list_get_head_const(&nir->functions));
525 
526    if (!scan_shader(func))
527       return false;
528 
529    allocate_reserved_registers();
530 
531    value_factory().allocate_registers(m_register_allocations);
532    m_required_registers = value_factory().array_registers();
533 
534    sfn_log << SfnLog::trans << "Process shader \n";
535    foreach_list_typed(nir_cf_node, node, node, &func->impl->body)
536    {
537       if (!process_cf_node(node))
538          return false;
539    }
540 
541    finalize();
542 
543    return true;
544 }
545 
546 bool
scan_shader(const nir_function * func)547 Shader::scan_shader(const nir_function *func)
548 {
549 
550    nir_foreach_block(block, func->impl)
551    {
552       nir_foreach_instr(instr, block)
553       {
554          if (!scan_instruction(instr)) {
555             fprintf(stderr, "Unhandled sysvalue access ");
556             nir_print_instr(instr, stderr);
557             fprintf(stderr, "\n");
558             return false;
559          }
560       }
561    }
562 
563    int lds_pos = 0;
564    for (auto& [index, input] : m_inputs) {
565       if (input.need_lds_pos()) {
566          if (chip_class() < ISA_CC_EVERGREEN)
567             input.set_gpr(lds_pos);
568          input.set_lds_pos(lds_pos++);
569       }
570    }
571 
572    int export_param = 0;
573    for (auto& [index, out] : m_outputs) {
574       if (out.spi_sid())
575          out.set_export_param(export_param++);
576    }
577 
578    return true;
579 }
580 
581 bool
scan_uniforms(nir_variable * uniform)582 Shader::scan_uniforms(nir_variable *uniform)
583 {
584    if (glsl_contains_atomic(uniform->type)) {
585       int natomics = glsl_atomic_size(uniform->type) / 4; /* ATOMIC_COUNTER_SIZE */
586       m_nhwatomic += natomics;
587 
588       if (glsl_type_is_array(uniform->type))
589          m_indirect_files |= 1 << TGSI_FILE_HW_ATOMIC;
590 
591       m_flags.set(sh_uses_atomics);
592 
593       r600_shader_atomic atom = {0};
594 
595       atom.buffer_id = uniform->data.binding;
596       atom.hw_idx = m_atomic_base + m_next_hwatomic_loc;
597 
598       atom.start = uniform->data.offset >> 2;
599       atom.end = atom.start + natomics - 1;
600 
601       if (m_atomic_base_map.find(uniform->data.binding) == m_atomic_base_map.end())
602          m_atomic_base_map[uniform->data.binding] = m_next_hwatomic_loc;
603 
604       m_next_hwatomic_loc += natomics;
605 
606       m_atomic_file_count += atom.end - atom.start + 1;
607 
608       sfn_log << SfnLog::io << "HW_ATOMIC file count: " << m_atomic_file_count << "\n";
609 
610       m_atomics.push_back(atom);
611    }
612 
613    auto type = glsl_without_array(uniform->type);
614    if (glsl_type_is_image(type) || uniform->data.mode == nir_var_mem_ssbo) {
615       m_flags.set(sh_uses_images);
616       if (glsl_type_is_array(uniform->type) && !(uniform->data.mode == nir_var_mem_ssbo))
617          m_indirect_files |= 1 << TGSI_FILE_IMAGE;
618    }
619 
620    return true;
621 }
622 
623 bool
scan_instruction(nir_instr * instr)624 Shader::scan_instruction(nir_instr *instr)
625 {
626    if (do_scan_instruction(instr))
627       return true;
628 
629    if (instr->type != nir_instr_type_intrinsic)
630       return true;
631 
632    auto intr = nir_instr_as_intrinsic(instr);
633 
634    // handle unhandled instructions
635    switch (intr->intrinsic) {
636    case nir_intrinsic_ssbo_atomic:
637    case nir_intrinsic_ssbo_atomic_swap:
638    case nir_intrinsic_image_load:
639    case nir_intrinsic_image_atomic:
640    case nir_intrinsic_image_atomic_swap:
641       m_flags.set(sh_needs_sbo_ret_address);
642       FALLTHROUGH;
643    case nir_intrinsic_image_store:
644    case nir_intrinsic_store_ssbo:
645       m_flags.set(sh_writes_memory);
646       m_flags.set(sh_uses_images);
647       break;
648    case nir_intrinsic_barrier:
649       m_chain_instr.prepare_mem_barrier |=
650             (nir_intrinsic_memory_modes(intr) &
651              (nir_var_mem_ssbo | nir_var_mem_global | nir_var_image) &&
652              nir_intrinsic_memory_scope(intr) != SCOPE_NONE);
653       break;
654    case nir_intrinsic_decl_reg:
655       m_register_allocations.push_back(intr);
656       break;
657    default:;
658    }
659    return true;
660 }
661 
662 bool
process_cf_node(nir_cf_node * node)663 Shader::process_cf_node(nir_cf_node *node)
664 {
665    SFN_TRACE_FUNC(SfnLog::flow, "CF");
666 
667    switch (node->type) {
668    case nir_cf_node_block:
669       return process_block(nir_cf_node_as_block(node));
670    case nir_cf_node_if:
671       return process_if(nir_cf_node_as_if(node));
672    case nir_cf_node_loop:
673       return process_loop(nir_cf_node_as_loop(node));
674    default:
675       return false;
676    }
677 }
678 
679 static bool
child_block_empty(const exec_list & list)680 child_block_empty(const exec_list& list)
681 {
682    if (list.is_empty())
683       return true;
684 
685    bool result = true;
686 
687    foreach_list_typed(nir_cf_node, n, node, &list)
688    {
689 
690       if (n->type == nir_cf_node_block) {
691          if (!nir_cf_node_as_block(n)->instr_list.is_empty())
692             return false;
693       }
694       if (n->type == nir_cf_node_if)
695          return false;
696    }
697    return result;
698 }
699 
value_has_non_const_source(VirtualValue * value)700 static bool value_has_non_const_source(VirtualValue *value)
701 {
702    auto reg = value->as_register();
703    if (reg) {
704       // Non-ssa registers are probably the result of some control flow
705       // that makes the values non-uniform across the work group
706       if (!reg->has_flag(Register::ssa))
707          return true;
708 
709       for (const auto& p : reg->parents()) {
710          auto alu = p->as_alu();
711          if (alu) {
712             for (auto& s : p->as_alu()->sources()) {
713                return value_has_non_const_source(s);
714             }
715          } else {
716             return true;
717          }
718       }
719    }
720    return false;
721 }
722 
723 bool
process_if(nir_if * if_stmt)724 Shader::process_if(nir_if *if_stmt)
725 {
726    SFN_TRACE_FUNC(SfnLog::flow, "IF");
727 
728    auto value = value_factory().src(if_stmt->condition, 0);
729 
730    bool non_const_cond = value_has_non_const_source(value);
731 
732    EAluOp op = child_block_empty(if_stmt->then_list) ? op2_prede_int :
733                                                        op2_pred_setne_int;
734 
735    AluInstr *pred = new AluInstr(op,
736                                  value_factory().temp_register(),
737                                  value,
738                                  value_factory().zero(),
739                                  AluInstr::last);
740    pred->set_alu_flag(alu_update_exec);
741    pred->set_alu_flag(alu_update_pred);
742    pred->set_cf_type(cf_alu_push_before);
743 
744    IfInstr *ir = new IfInstr(pred);
745    emit_instruction(ir);
746    if (non_const_cond)
747       ++m_control_flow_depth;
748    start_new_block(1);
749 
750    if (!child_block_empty(if_stmt->then_list)) {
751       foreach_list_typed(nir_cf_node, n, node, &if_stmt->then_list)
752       {
753          SFN_TRACE_FUNC(SfnLog::flow, "IF-then");
754          if (!process_cf_node(n))
755             return false;
756       }
757       if (!child_block_empty(if_stmt->else_list)) {
758          if (!emit_control_flow(ControlFlowInstr::cf_else))
759             return false;
760          foreach_list_typed(nir_cf_node,
761                             n,
762                             node,
763                             &if_stmt->else_list)
764                if (!process_cf_node(n)) return false;
765       }
766    } else {
767       assert(!child_block_empty(if_stmt->else_list));
768       foreach_list_typed(nir_cf_node,
769                          n,
770                          node,
771                          &if_stmt->else_list)
772             if (!process_cf_node(n)) return false;
773    }
774 
775    if (!emit_control_flow(ControlFlowInstr::cf_endif))
776       return false;
777 
778    if (non_const_cond)
779       --m_control_flow_depth;
780 
781    return true;
782 }
783 
784 bool
emit_control_flow(ControlFlowInstr::CFType type)785 Shader::emit_control_flow(ControlFlowInstr::CFType type)
786 {
787    auto ir = new ControlFlowInstr(type);
788    emit_instruction(ir);
789    int depth = 0;
790    switch (type) {
791    case ControlFlowInstr::cf_loop_begin:
792       m_loops.push_back(ir);
793       m_nloops++;
794       depth = 1;
795       break;
796    case ControlFlowInstr::cf_loop_end:
797       m_loops.pop_back();
798       FALLTHROUGH;
799    case ControlFlowInstr::cf_endif:
800       depth = -1;
801       break;
802    default:;
803    }
804 
805    start_new_block(depth);
806    return true;
807 }
808 
809 bool
process_loop(nir_loop * node)810 Shader::process_loop(nir_loop *node)
811 {
812    assert(!nir_loop_has_continue_construct(node));
813    SFN_TRACE_FUNC(SfnLog::flow, "LOOP");
814    if (!emit_control_flow(ControlFlowInstr::cf_loop_begin))
815       return false;
816 
817    foreach_list_typed(nir_cf_node,
818                       n,
819                       node,
820                       &node->body) if (!process_cf_node(n)) return false;
821 
822    if (!emit_control_flow(ControlFlowInstr::cf_loop_end))
823       return false;
824 
825    return true;
826 }
827 
828 bool
process_block(nir_block * block)829 Shader::process_block(nir_block *block)
830 {
831    SFN_TRACE_FUNC(SfnLog::flow, "BLOCK");
832 
833    nir_foreach_instr(instr, block)
834    {
835       sfn_log << SfnLog::instr << "FROM:" << *instr << "\n";
836       bool r = process_instr(instr);
837       if (!r) {
838          sfn_log << SfnLog::err << "R600: Unsupported instruction: " << *instr << "\n";
839          return false;
840       }
841    }
842    return true;
843 }
844 
845 bool
process_instr(nir_instr * instr)846 Shader::process_instr(nir_instr *instr)
847 {
848    return m_instr_factory->from_nir(instr, *this);
849 }
850 
851 bool
process_intrinsic(nir_intrinsic_instr * intr)852 Shader::process_intrinsic(nir_intrinsic_instr *intr)
853 {
854    if (process_stage_intrinsic(intr))
855       return true;
856 
857    if (GDSInstr::emit_atomic_counter(intr, *this)) {
858       set_flag(sh_writes_memory);
859       return true;
860    }
861 
862    if (RatInstr::emit(intr, *this))
863       return true;
864 
865    switch (intr->intrinsic) {
866    case nir_intrinsic_store_output:
867       return store_output(intr);
868    case nir_intrinsic_load_input:
869       return load_input(intr);
870    case nir_intrinsic_load_ubo_vec4:
871       return load_ubo(intr);
872    case nir_intrinsic_store_scratch:
873       return emit_store_scratch(intr);
874    case nir_intrinsic_load_scratch:
875       return emit_load_scratch(intr);
876    case nir_intrinsic_store_local_shared_r600:
877       return emit_local_store(intr);
878    case nir_intrinsic_load_global:
879    case nir_intrinsic_load_global_constant:
880       return emit_load_global(intr);
881    case nir_intrinsic_load_local_shared_r600:
882       return emit_local_load(intr);
883    case nir_intrinsic_load_tcs_in_param_base_r600:
884       return emit_load_tcs_param_base(intr, 0);
885    case nir_intrinsic_load_tcs_out_param_base_r600:
886       return emit_load_tcs_param_base(intr, 16);
887    case nir_intrinsic_barrier:
888       return emit_barrier(intr);
889    case nir_intrinsic_shared_atomic:
890    case nir_intrinsic_shared_atomic_swap:
891       return emit_atomic_local_shared(intr);
892    case nir_intrinsic_shader_clock:
893       return emit_shader_clock(intr);
894    case nir_intrinsic_load_reg:
895       return emit_load_reg(intr);
896    case nir_intrinsic_load_reg_indirect:
897       return emit_load_reg_indirect(intr);
898    case nir_intrinsic_store_reg:
899       return emit_store_reg(intr);
900    case nir_intrinsic_store_reg_indirect:
901       return emit_store_reg_indirect(intr);
902    case nir_intrinsic_decl_reg:
903       // Registers and arrays are allocated at
904       // conversion startup time
905       return true;
906    default:
907       return false;
908    }
909 }
910 
911 static ESDOp
lds_op_from_intrinsic(nir_atomic_op op,bool ret)912 lds_op_from_intrinsic(nir_atomic_op op, bool ret)
913 {
914    switch (op) {
915    case nir_atomic_op_iadd:
916       return ret ? LDS_ADD_RET : LDS_ADD;
917    case nir_atomic_op_iand:
918       return ret ? LDS_AND_RET : LDS_AND;
919    case nir_atomic_op_ior:
920       return ret ? LDS_OR_RET : LDS_OR;
921    case nir_atomic_op_imax:
922       return ret ? LDS_MAX_INT_RET : LDS_MAX_INT;
923    case nir_atomic_op_umax:
924       return ret ? LDS_MAX_UINT_RET : LDS_MAX_UINT;
925    case nir_atomic_op_imin:
926       return ret ? LDS_MIN_INT_RET : LDS_MIN_INT;
927    case nir_atomic_op_umin:
928       return ret ? LDS_MIN_UINT_RET : LDS_MIN_UINT;
929    case nir_atomic_op_ixor:
930       return ret ? LDS_XOR_RET : LDS_XOR;
931    case nir_atomic_op_xchg:
932       return LDS_XCHG_RET;
933    case nir_atomic_op_cmpxchg:
934       return LDS_CMP_XCHG_RET;
935    default:
936       unreachable("Unsupported shared atomic_op opcode");
937    }
938 }
939 
940 PRegister
emit_load_to_register(PVirtualValue src)941 Shader::emit_load_to_register(PVirtualValue src)
942 {
943    assert(src);
944    PRegister dest = src->as_register();
945 
946    if (!dest) {
947       dest = value_factory().temp_register();
948       emit_instruction(new AluInstr(op1_mov, dest, src, AluInstr::last_write));
949    }
950    return dest;
951 }
952 
953 // add visitor to resolve array and register
954 class RegisterAccessHandler : public RegisterVisitor {
955 
956 public:
957    RegisterAccessHandler(Shader& shader, nir_intrinsic_instr *intr);
958 
visit(LocalArrayValue & value)959    void visit(LocalArrayValue& value) override {(void)value; assert(0);}
visit(UniformValue & value)960    void visit(UniformValue& value) override {(void)value; assert(0);}
visit(LiteralConstant & value)961    void visit(LiteralConstant& value) override {(void)value; assert(0);}
visit(InlineConstant & value)962    void visit(InlineConstant& value) override {(void)value; assert(0);}
963 
964    Shader& sh;
965    nir_intrinsic_instr *ir;
966    PVirtualValue addr{nullptr};
967    bool success{true};
968 };
969 
970 class RegisterReadHandler : public RegisterAccessHandler {
971 
972 public:
973    using RegisterAccessHandler::RegisterAccessHandler;
974    using RegisterAccessHandler::visit;
975 
976    void visit(LocalArray& value) override;
977    void visit(Register& value) override;
978 };
979 
emit_load_reg(nir_intrinsic_instr * intr)980 bool Shader::emit_load_reg(nir_intrinsic_instr *intr)
981 {
982    RegisterReadHandler visitor(*this, intr);
983    auto handle = value_factory().src(intr->src[0], 0);
984    handle->accept(visitor);
985    return visitor.success;
986 }
987 
emit_load_reg_indirect(nir_intrinsic_instr * intr)988 bool Shader::emit_load_reg_indirect(nir_intrinsic_instr *intr)
989 {
990    RegisterReadHandler visitor(*this, intr);
991    visitor.addr =  value_factory().src(intr->src[1], 0);
992    auto handle = value_factory().src(intr->src[0], 0);
993    handle->accept(visitor);
994    return visitor.success;
995 }
996 
997 class RegisterWriteHandler : public RegisterAccessHandler {
998 
999 public:
1000    using RegisterAccessHandler::RegisterAccessHandler;
1001    using RegisterAccessHandler::visit;
1002 
1003    void visit(LocalArray& value) override;
1004    void visit(Register& value) override;
1005 };
1006 
1007 
emit_store_reg(nir_intrinsic_instr * intr)1008 bool Shader::emit_store_reg(nir_intrinsic_instr *intr)
1009 {
1010    RegisterWriteHandler visitor(*this, intr);
1011    auto handle = value_factory().src(intr->src[1], 0);
1012    handle->accept(visitor);
1013    return visitor.success;
1014 }
1015 
emit_store_reg_indirect(nir_intrinsic_instr * intr)1016 bool Shader::emit_store_reg_indirect(nir_intrinsic_instr *intr)
1017 {
1018    RegisterWriteHandler visitor(*this, intr);
1019    visitor.addr =  value_factory().src(intr->src[2], 0);
1020 
1021    auto handle = value_factory().src(intr->src[1], 0);
1022    handle->accept(visitor);
1023    return visitor.success;
1024 }
1025 
RegisterAccessHandler(Shader & shader,nir_intrinsic_instr * intr)1026 RegisterAccessHandler::RegisterAccessHandler(Shader& shader, nir_intrinsic_instr *intr):
1027    sh(shader),
1028    ir(intr)
1029 {}
1030 
visit(LocalArray & array)1031 void RegisterReadHandler::visit(LocalArray& array)
1032 {
1033    int slots =  ir->def.bit_size / 32;
1034    auto pin = ir->def.num_components > 1 ? pin_none : pin_free;
1035    for (int i = 0; i < ir->def.num_components; ++i) {
1036       for (int s = 0; s < slots; ++s) {
1037          int chan = i * slots + s;
1038          auto dest = sh.value_factory().dest(ir->def, chan, pin);
1039          auto src = array.element(nir_intrinsic_base(ir), addr, chan);
1040          sh.emit_instruction(new AluInstr(op1_mov, dest, src, AluInstr::write));
1041       }
1042    }
1043 }
1044 
visit(Register & reg)1045 void RegisterReadHandler::visit(Register& reg)
1046 {
1047    auto dest = sh.value_factory().dest(ir->def, 0, pin_free);
1048    sh.emit_instruction(new AluInstr(op1_mov, dest, &reg, AluInstr::write));
1049 }
1050 
visit(LocalArray & array)1051 void RegisterWriteHandler::visit(LocalArray& array)
1052 {
1053    int writemask = nir_intrinsic_write_mask(ir);
1054    int slots =  ir->src->ssa->bit_size / 32;
1055 
1056    for (int i = 0; i < ir->num_components; ++i) {
1057       if (!(writemask & (1 << i)))
1058          continue;
1059       for (int s = 0; s < slots; ++s) {
1060          int chan = i * slots + s;
1061 
1062          auto dest = array.element(nir_intrinsic_base(ir), addr, chan);
1063          auto src = sh.value_factory().src(ir->src[0], chan);
1064          sh.emit_instruction(new AluInstr(op1_mov, dest, src, AluInstr::write));
1065       }
1066    }
1067 }
1068 
visit(Register & dest)1069 void RegisterWriteHandler::visit(Register& dest)
1070 {
1071    int writemask = nir_intrinsic_write_mask(ir);
1072    assert(writemask == 1);
1073    auto src = sh.value_factory().src(ir->src[0], 0);
1074    sh.emit_instruction(new AluInstr(op1_mov, &dest, src, AluInstr::write));
1075 }
1076 
1077 bool
emit_atomic_local_shared(nir_intrinsic_instr * instr)1078 Shader::emit_atomic_local_shared(nir_intrinsic_instr *instr)
1079 {
1080    bool uses_retval = !list_is_empty(&instr->def.uses);
1081 
1082    auto& vf = value_factory();
1083 
1084    auto dest_value = uses_retval ? vf.dest(instr->def, 0, pin_free) : nullptr;
1085 
1086    auto op = lds_op_from_intrinsic(nir_intrinsic_atomic_op(instr), uses_retval);
1087 
1088    /* For these two instructions we don't have opcodes that don't read back
1089     * the result, so we have to add a dummy-readback to remove the the return
1090     * value from read queue. */
1091    if (!uses_retval &&
1092        (op == LDS_XCHG_RET || op == LDS_CMP_XCHG_RET)) {
1093       dest_value = vf.dest(instr->def, 0, pin_free);
1094    }
1095 
1096    auto address = vf.src(instr->src[0], 0);
1097 
1098    AluInstr::SrcValues src;
1099    src.push_back(vf.src(instr->src[1], 0));
1100 
1101    if (unlikely(instr->intrinsic == nir_intrinsic_shared_atomic_swap))
1102       src.push_back(vf.src(instr->src[2], 0));
1103    emit_instruction(new LDSAtomicInstr(op, dest_value, address, src));
1104    return true;
1105 }
1106 
1107 auto
evaluate_resource_offset(nir_intrinsic_instr * instr,int src_id)1108 Shader::evaluate_resource_offset(nir_intrinsic_instr *instr, int src_id)
1109    -> std::pair<int, PRegister>
1110 {
1111    auto& vf = value_factory();
1112 
1113    PRegister uav_id{nullptr};
1114    int offset = nir_intrinsic_has_range_base(instr) ?
1115                    nir_intrinsic_range_base(instr) : 0;
1116 
1117    auto uav_id_const = nir_src_as_const_value(instr->src[src_id]);
1118    if (uav_id_const) {
1119       offset += uav_id_const->u32;
1120    } else {
1121       auto uav_id_val = vf.src(instr->src[src_id], 0);
1122       if (uav_id_val->as_register()) {
1123          uav_id = uav_id_val->as_register();
1124       } else {
1125          uav_id = vf.temp_register();
1126          emit_instruction(new AluInstr(op1_mov, uav_id, uav_id_val, AluInstr::last_write));
1127       }
1128    }
1129    return std::make_pair(offset, uav_id);
1130 }
1131 
1132 bool
emit_store_scratch(nir_intrinsic_instr * intr)1133 Shader::emit_store_scratch(nir_intrinsic_instr *intr)
1134 {
1135    auto& vf = m_instr_factory->value_factory();
1136 
1137    int writemask = nir_intrinsic_write_mask(intr);
1138 
1139    RegisterVec4::Swizzle swz = {7, 7, 7, 7};
1140 
1141    for (unsigned i = 0; i < intr->num_components; ++i)
1142       swz[i] = (1 << i) & writemask ? i : 7;
1143 
1144    auto value = vf.temp_vec4(pin_group, swz);
1145    AluInstr *ir = nullptr;
1146    for (unsigned i = 0; i < intr->num_components; ++i) {
1147       if (value[i]->chan() < 4) {
1148          ir = new AluInstr(op1_mov, value[i], vf.src(intr->src[0], i), AluInstr::write);
1149          ir->set_alu_flag(alu_no_schedule_bias);
1150          emit_instruction(ir);
1151       }
1152    }
1153    if (!ir)
1154       return true;
1155 
1156    ir->set_alu_flag(alu_last_instr);
1157 
1158    auto address = vf.src(intr->src[1], 0);
1159 
1160    int align = nir_intrinsic_align_mul(intr);
1161    int align_offset = nir_intrinsic_align_offset(intr);
1162 
1163    ScratchIOInstr *ws_ir = nullptr;
1164 
1165    int offset = -1;
1166    if (address->as_literal()) {
1167       offset = address->as_literal()->value();
1168    } else if (address->as_inline_const()) {
1169       auto il = address->as_inline_const();
1170       if (il->sel() == ALU_SRC_0)
1171          offset = 0;
1172       else if (il->sel() == ALU_SRC_1_INT)
1173          offset = 1;
1174    }
1175 
1176    if (offset >= 0) {
1177       ws_ir = new ScratchIOInstr(value, offset, align, align_offset, writemask);
1178    } else {
1179       auto addr_temp = vf.temp_register(0);
1180       auto load_addr = new AluInstr(op1_mov, addr_temp, address, AluInstr::last_write);
1181       load_addr->set_alu_flag(alu_no_schedule_bias);
1182       emit_instruction(load_addr);
1183 
1184       ws_ir = new ScratchIOInstr(
1185          value, addr_temp, align, align_offset, writemask, m_scratch_size);
1186    }
1187    emit_instruction(ws_ir);
1188 
1189    m_flags.set(sh_needs_scratch_space);
1190    return true;
1191 }
1192 
1193 bool
emit_load_scratch(nir_intrinsic_instr * intr)1194 Shader::emit_load_scratch(nir_intrinsic_instr *intr)
1195 {
1196    auto addr = value_factory().src(intr->src[0], 0);
1197    auto dest = value_factory().dest_vec4(intr->def, pin_group);
1198 
1199    if (chip_class() >= ISA_CC_R700) {
1200       RegisterVec4::Swizzle dest_swz = {7, 7, 7, 7};
1201 
1202       for (unsigned i = 0; i < intr->num_components; ++i)
1203          dest_swz[i] = i;
1204 
1205       auto *ir = new LoadFromScratch(dest, dest_swz, addr, m_scratch_size);
1206       emit_instruction(ir);
1207       chain_scratch_read(ir);
1208    } else {
1209       int align = nir_intrinsic_align_mul(intr);
1210       int align_offset = nir_intrinsic_align_offset(intr);
1211 
1212       int offset = -1;
1213       if (addr->as_literal()) {
1214          offset = addr->as_literal()->value();
1215       } else if (addr->as_inline_const()) {
1216          auto il = addr->as_inline_const();
1217          if (il->sel() == ALU_SRC_0)
1218             offset = 0;
1219          else if (il->sel() == ALU_SRC_1_INT)
1220             offset = 1;
1221       }
1222 
1223       ScratchIOInstr *ir = nullptr;
1224       if (offset >= 0) {
1225          ir = new ScratchIOInstr(dest, offset, align, align_offset, 0xf, true);
1226       } else {
1227          auto addr_temp = value_factory().temp_register(0);
1228          auto load_addr = new AluInstr(op1_mov, addr_temp, addr, AluInstr::last_write);
1229          load_addr->set_alu_flag(alu_no_schedule_bias);
1230          emit_instruction(load_addr);
1231 
1232          ir = new ScratchIOInstr(
1233             dest, addr_temp, align, align_offset, 0xf, m_scratch_size, true);
1234       }
1235       emit_instruction(ir);
1236    }
1237 
1238    m_flags.set(sh_needs_scratch_space);
1239 
1240    return true;
1241 }
1242 
emit_load_global(nir_intrinsic_instr * intr)1243 bool Shader::emit_load_global(nir_intrinsic_instr *intr)
1244 {
1245    auto dest = value_factory().dest_vec4(intr->def, pin_group);
1246 
1247    auto src_value = value_factory().src(intr->src[0], 0);
1248    auto src = src_value->as_register();
1249    if (!src) {
1250       src = value_factory().temp_register();
1251       emit_instruction(new AluInstr(op1_mov, src, src_value, AluInstr::last_write));
1252    }
1253    auto load = new LoadFromBuffer(dest, {0,7,7,7}, src, 0, 1, NULL, fmt_32);
1254    load->set_mfc(4);
1255    load->set_num_format(vtx_nf_int);
1256    load->reset_fetch_flag(FetchInstr::format_comp_signed);
1257 
1258    emit_instruction(load);
1259    return true;
1260 }
1261 
1262 bool
emit_local_store(nir_intrinsic_instr * instr)1263 Shader::emit_local_store(nir_intrinsic_instr *instr)
1264 {
1265    unsigned write_mask = nir_intrinsic_write_mask(instr);
1266 
1267    auto address = value_factory().src(instr->src[1], 0);
1268    int swizzle_base = 0;
1269    unsigned w = write_mask;
1270    while (!(w & 1)) {
1271       ++swizzle_base;
1272       w >>= 1;
1273    }
1274    write_mask = write_mask >> swizzle_base;
1275 
1276    if ((write_mask & 3) != 3) {
1277       auto value = value_factory().src(instr->src[0], swizzle_base);
1278       emit_instruction(new LDSAtomicInstr(LDS_WRITE, nullptr, address, {value}));
1279    } else {
1280       auto value = value_factory().src(instr->src[0], swizzle_base);
1281       auto value1 = value_factory().src(instr->src[0], swizzle_base + 1);
1282       emit_instruction(
1283          new LDSAtomicInstr(LDS_WRITE_REL, nullptr, address, {value, value1}));
1284    }
1285    return true;
1286 }
1287 
1288 bool
emit_local_load(nir_intrinsic_instr * instr)1289 Shader::emit_local_load(nir_intrinsic_instr *instr)
1290 {
1291    auto address = value_factory().src_vec(instr->src[0], instr->num_components);
1292    auto dest_value = value_factory().dest_vec(instr->def, instr->num_components);
1293    emit_instruction(new LDSReadInstr(dest_value, address));
1294    return true;
1295 }
1296 
1297 void
chain_scratch_read(Instr * instr)1298 Shader::chain_scratch_read(Instr *instr)
1299 {
1300    m_chain_instr.apply(instr, &m_chain_instr.last_scratch_instr);
1301 }
1302 
1303 void
chain_ssbo_read(Instr * instr)1304 Shader::chain_ssbo_read(Instr *instr)
1305 {
1306    m_chain_instr.apply(instr, &m_chain_instr.last_ssbo_instr);
1307 }
1308 
1309 bool
emit_wait_ack()1310 Shader::emit_wait_ack()
1311 {
1312    start_new_block(0);
1313    emit_instruction(new ControlFlowInstr(ControlFlowInstr::cf_wait_ack));
1314    start_new_block(0);
1315    return true;
1316 }
1317 
get_array_hash(const VirtualValue & value)1318 static uint32_t get_array_hash(const VirtualValue& value)
1319 {
1320    assert (value.pin() == pin_array);
1321    const LocalArrayValue& av = static_cast<const LocalArrayValue&>(value);
1322    return av.chan() | (av.array().base_sel() << 2);
1323 }
1324 
visit(AluInstr * instr)1325 void Shader::InstructionChain::visit(AluInstr *instr)
1326 {
1327    if (instr->is_kill()) {
1328       last_kill_instr = instr;
1329 
1330       // these instructions have side effects, they should
1331       // not be re-order with kill
1332       if (last_gds_instr)
1333          instr->add_required_instr(last_gds_instr);
1334 
1335       if (last_ssbo_instr)
1336          instr->add_required_instr(last_ssbo_instr);
1337    }
1338 
1339    /* Make sure array reads and writes depends on the last indirect access
1340     * so that we don't overwrite array elements too early */
1341 
1342    if (auto d = instr->dest()) {
1343       if (d->pin() == pin_array) {
1344          if (d->addr()) {
1345             last_alu_with_indirect_reg[get_array_hash(*d)] = instr;
1346             return;
1347          }
1348          auto pos = last_alu_with_indirect_reg.find(get_array_hash(*d));
1349          if (pos != last_alu_with_indirect_reg.end()) {
1350             instr->add_required_instr(pos->second);
1351          }
1352       }
1353    }
1354 
1355    for (auto& s : instr->sources()) {
1356       if (s->pin() == pin_array) {
1357          if (s->get_addr()) {
1358             last_alu_with_indirect_reg[get_array_hash(*s)] = instr;
1359             return;
1360          }
1361          auto pos = last_alu_with_indirect_reg.find(get_array_hash(*s));
1362          if (pos != last_alu_with_indirect_reg.end()) {
1363             instr->add_required_instr(pos->second);
1364          }
1365       }
1366    }
1367 }
1368 
1369 void
visit(ScratchIOInstr * instr)1370 Shader::InstructionChain::visit(ScratchIOInstr *instr)
1371 {
1372    apply(instr, &last_scratch_instr);
1373 }
1374 
1375 void
visit(GDSInstr * instr)1376 Shader::InstructionChain::visit(GDSInstr *instr)
1377 {
1378    apply(instr, &last_gds_instr);
1379    Instr::Flags flag = instr->has_instr_flag(Instr::helper) ? Instr::helper : Instr::vpm;
1380    for (auto& loop : this_shader->m_loops) {
1381       loop->set_instr_flag(flag);
1382    }
1383    if (last_kill_instr)
1384       instr->add_required_instr(last_kill_instr);
1385 
1386 }
1387 
1388 void
visit(RatInstr * instr)1389 Shader::InstructionChain::visit(RatInstr *instr)
1390 {
1391    apply(instr, &last_ssbo_instr);
1392    Instr::Flags flag = instr->has_instr_flag(Instr::helper) ? Instr::helper : Instr::vpm;
1393    for (auto& loop : this_shader->m_loops) {
1394       loop->set_instr_flag(flag);
1395    }
1396 
1397    if (prepare_mem_barrier)
1398       instr->set_ack();
1399 
1400    if (this_shader->m_current_block->inc_rat_emitted() > 15)
1401       this_shader->start_new_block(0);
1402 
1403    if (last_kill_instr)
1404       instr->add_required_instr(last_kill_instr);
1405 }
1406 
1407 void
apply(Instr * current,Instr ** last)1408 Shader::InstructionChain::apply(Instr *current, Instr **last)
1409 {
1410    if (*last)
1411       current->add_required_instr(*last);
1412    *last = current;
1413 }
1414 
1415 void
emit_instruction(PInst instr)1416 Shader::emit_instruction(PInst instr)
1417 {
1418    sfn_log << SfnLog::instr << "   " << *instr << "\n";
1419    instr->accept(m_chain_instr);
1420    m_current_block->push_back(instr);
1421 }
1422 
1423 bool
emit_load_tcs_param_base(nir_intrinsic_instr * instr,int offset)1424 Shader::emit_load_tcs_param_base(nir_intrinsic_instr *instr, int offset)
1425 {
1426    auto src = value_factory().temp_register();
1427    emit_instruction(
1428       new AluInstr(op1_mov, src, value_factory().zero(), AluInstr::last_write));
1429 
1430    auto dest = value_factory().dest_vec4(instr->def, pin_group);
1431    auto fetch = new LoadFromBuffer(dest,
1432                                    {0, 1, 2, 3},
1433                                    src,
1434                                    offset,
1435                                    R600_LDS_INFO_CONST_BUFFER,
1436                                    nullptr,
1437                                    fmt_32_32_32_32);
1438 
1439    fetch->set_fetch_flag(LoadFromBuffer::srf_mode);
1440    emit_instruction(fetch);
1441 
1442    return true;
1443 }
1444 
1445 bool
emit_shader_clock(nir_intrinsic_instr * instr)1446 Shader::emit_shader_clock(nir_intrinsic_instr *instr)
1447 {
1448    auto& vf = value_factory();
1449    auto group = new AluGroup();
1450    group->add_instruction(new AluInstr(op1_mov,
1451                                        vf.dest(instr->def, 0, pin_chan),
1452                                        vf.inline_const(ALU_SRC_TIME_LO, 0),
1453                                        AluInstr::write));
1454    group->add_instruction(new AluInstr(op1_mov,
1455                                        vf.dest(instr->def, 1, pin_chan),
1456                                        vf.inline_const(ALU_SRC_TIME_HI, 0),
1457                                        AluInstr::last_write));
1458    emit_instruction(group);
1459    return true;
1460 }
1461 
1462 bool
emit_group_barrier(nir_intrinsic_instr * intr)1463 Shader::emit_group_barrier(nir_intrinsic_instr *intr)
1464 {
1465    assert(m_control_flow_depth == 0);
1466    (void)intr;
1467    /* Put barrier into it's own block, so that optimizers and the
1468     * scheduler don't move code */
1469    start_new_block(0);
1470    auto op = new AluInstr(op0_group_barrier, 0);
1471    op->set_alu_flag(alu_last_instr);
1472    emit_instruction(op);
1473    start_new_block(0);
1474    return true;
1475 }
1476 
emit_barrier(nir_intrinsic_instr * intr)1477 bool Shader::emit_barrier(nir_intrinsic_instr *intr)
1478 {
1479 
1480    if ((nir_intrinsic_execution_scope(intr) == SCOPE_WORKGROUP)) {
1481       if (!emit_group_barrier(intr))
1482          return false;
1483    }
1484 
1485    /* We don't check nir_var_mem_shared because we don't emit a real barrier -
1486     * for this we need to implement GWS (Global Wave Sync).
1487     * Here we just emit a wait_ack - this is no real barrier,
1488     * it's just a wait for RAT writes to be finished (if they
1489     * are emitted with the _ACK opcode and the `mark` flag set - it
1490     * is very likely that WAIT_ACK is also only relevant for this
1491     * shader instance). */
1492    auto full_barrier_mem_modes = nir_var_mem_ssbo |  nir_var_image | nir_var_mem_global;
1493 
1494    if ((nir_intrinsic_memory_scope(intr) != SCOPE_NONE) &&
1495        (nir_intrinsic_memory_modes(intr) & full_barrier_mem_modes)) {
1496       return emit_wait_ack();
1497    }
1498 
1499    return true;
1500 }
1501 
1502 bool
load_ubo(nir_intrinsic_instr * instr)1503 Shader::load_ubo(nir_intrinsic_instr *instr)
1504 {
1505    auto bufid = nir_src_as_const_value(instr->src[0]);
1506    auto buf_offset = nir_src_as_const_value(instr->src[1]);
1507    auto base_id = nir_intrinsic_base(instr);
1508 
1509    if (!buf_offset) {
1510       /* TODO: if bufid is constant then this can also be solved by using the
1511        * CF index on the ALU block, and this would probably make sense when
1512        * there are more then one loads with the same buffer ID. */
1513 
1514       auto addr = value_factory().src(instr->src[1], 0)->as_register();
1515       RegisterVec4::Swizzle dest_swz{7, 7, 7, 7};
1516       auto dest = value_factory().dest_vec4(instr->def, pin_group);
1517 
1518       for (unsigned i = 0; i < instr->def.num_components; ++i) {
1519          dest_swz[i] = i + nir_intrinsic_component(instr);
1520       }
1521 
1522       LoadFromBuffer *ir;
1523       if (bufid) {
1524          ir = new LoadFromBuffer(
1525             dest, dest_swz, addr, 0, bufid->u32, nullptr, fmt_32_32_32_32_float);
1526       } else {
1527          auto buffer_id = emit_load_to_register(value_factory().src(instr->src[0], 0));
1528          ir = new LoadFromBuffer(
1529             dest, dest_swz, addr, 0, base_id, buffer_id, fmt_32_32_32_32_float);
1530       }
1531       emit_instruction(ir);
1532       return true;
1533    }
1534 
1535    /* direct load using the constant cache */
1536    if (bufid) {
1537       int buf_cmp = nir_intrinsic_component(instr);
1538 
1539       AluInstr *ir = nullptr;
1540       auto pin = instr->def.num_components == 1
1541                     ? pin_free
1542                     : pin_none;
1543       for (unsigned i = 0; i < instr->def.num_components; ++i) {
1544 
1545          sfn_log << SfnLog::io << "UBO[" << bufid << "] " << instr->def.index
1546                  << " const[" << i << "]: " << instr->const_index[i] << "\n";
1547 
1548          auto uniform =
1549             value_factory().uniform(512 + buf_offset->u32, i + buf_cmp, bufid->u32);
1550          ir = new AluInstr(op1_mov,
1551                            value_factory().dest(instr->def, i, pin),
1552                            uniform,
1553                            {alu_write});
1554          emit_instruction(ir);
1555       }
1556       if (ir)
1557          ir->set_alu_flag(alu_last_instr);
1558       return true;
1559    } else {
1560       int buf_cmp = nir_intrinsic_component(instr);
1561       AluInstr *ir = nullptr;
1562       auto kc_id = value_factory().src(instr->src[0], 0);
1563 
1564       for (unsigned i = 0; i < instr->def.num_components; ++i) {
1565          int cmp = buf_cmp + i;
1566          auto u =
1567             new UniformValue(512 + buf_offset->u32, cmp, kc_id, nir_intrinsic_base(instr));
1568          auto dest = value_factory().dest(instr->def, i, pin_none);
1569          ir = new AluInstr(op1_mov, dest, u, AluInstr::write);
1570          emit_instruction(ir);
1571       }
1572       if (ir)
1573          ir->set_alu_flag(alu_last_instr);
1574       m_indirect_files |= 1 << TGSI_FILE_CONSTANT;
1575       return true;
1576    }
1577 }
1578 
1579 void
start_new_block(int depth)1580 Shader::start_new_block(int depth)
1581 {
1582    int depth_offset = m_current_block ? m_current_block->nesting_depth() : 0;
1583    m_current_block = new Block(depth + depth_offset, m_next_block++);
1584    m_root.push_back(m_current_block);
1585 }
1586 
1587 bool
emit_simple_mov(nir_def & def,int chan,PVirtualValue src,Pin pin)1588 Shader::emit_simple_mov(nir_def& def, int chan, PVirtualValue src, Pin pin)
1589 {
1590    auto dst = value_factory().dest(def, chan, pin);
1591    emit_instruction(new AluInstr(op1_mov, dst, src, AluInstr::last_write));
1592    return true;
1593 }
1594 
1595 void
print(std::ostream & os) const1596 Shader::print(std::ostream& os) const
1597 {
1598    print_header(os);
1599 
1600    for (auto& [dummy, i] : m_inputs) {
1601       i.print(os);
1602       os << "\n";
1603    }
1604 
1605    for (auto& [dummy, o] : m_outputs) {
1606       o.print(os);
1607       os << "\n";
1608    }
1609 
1610    os << "SHADER\n";
1611    for (auto& b : m_root)
1612       b->print(os);
1613 }
1614 
1615 const char *chip_class_names[] = {"R600", "R700", "EVERGREEN", "CAYMAN"};
1616 
1617 void
print_header(std::ostream & os) const1618 Shader::print_header(std::ostream& os) const
1619 {
1620    assert(m_chip_class <= ISA_CC_CAYMAN);
1621    os << "Shader: " << m_shader_id << "\n";
1622    os << m_type_id << "\n";
1623    os << "CHIPCLASS " << chip_class_names[m_chip_class] << "\n";
1624    print_properties(os);
1625 }
1626 
1627 void
print_properties(std::ostream & os) const1628 Shader::print_properties(std::ostream& os) const
1629 {
1630    do_print_properties(os);
1631 }
1632 
1633 bool
equal_to(const Shader & other) const1634 Shader::equal_to(const Shader& other) const
1635 {
1636    if (m_root.size() != other.m_root.size())
1637       return false;
1638    return std::inner_product(
1639       m_root.begin(),
1640       m_root.end(),
1641       other.m_root.begin(),
1642       true,
1643       [](bool lhs, bool rhs) { return lhs & rhs; },
1644       [](const Block::Pointer lhs, const Block::Pointer rhs) -> bool {
1645          return lhs->is_equal_to(*rhs);
1646       });
1647 }
1648 
1649 void
get_shader_info(r600_shader * sh_info)1650 Shader::get_shader_info(r600_shader *sh_info)
1651 {
1652    sh_info->ninput = m_inputs.size();
1653    sh_info->nlds = 0;
1654    int input_array_array_loc = 0;
1655    for (auto& [index, info] : m_inputs) {
1656       r600_shader_io& io = sh_info->input[input_array_array_loc++];
1657 
1658       io.varying_slot = info.varying_slot();
1659       io.system_value = info.system_value();
1660       io.gpr = info.gpr();
1661       io.spi_sid = info.spi_sid();
1662       io.ij_index = info.ij_index();
1663       io.interpolate = info.interpolator();
1664       io.interpolate_location = info.interpolate_loc();
1665       if (info.need_lds_pos()) {
1666          io.lds_pos = info.lds_pos();
1667          sh_info->nlds = MAX2(unsigned(info.lds_pos() + 1), sh_info->nlds);
1668       } else {
1669          io.lds_pos = 0;
1670       }
1671 
1672       io.ring_offset = info.ring_offset();
1673       io.uses_interpolate_at_centroid = info.uses_interpolate_at_centroid();
1674 
1675       sfn_log << SfnLog::io << "Emit input [" << index << "]";
1676       if (io.varying_slot != NUM_TOTAL_VARYING_SLOTS)
1677          sfn_log << " varying_slot:" << static_cast<int>(io.varying_slot);
1678       if (io.system_value != SYSTEM_VALUE_MAX)
1679          sfn_log << " system_value:" << static_cast<int>(io.system_value);
1680       sfn_log << " spi_sid:" << io.spi_sid << "\n";
1681       assert(io.spi_sid >= 0);
1682    }
1683 
1684    sh_info->noutput = m_outputs.size();
1685    /* VS is required to export at least one parameter. */
1686    sh_info->highest_export_param = 0;
1687    sh_info->num_loops = m_nloops;
1688    int output_array_array_loc = 0;
1689 
1690    for (auto& [index, info] : m_outputs) {
1691       r600_shader_io& io = sh_info->output[output_array_array_loc++];
1692       io.varying_slot = info.varying_slot();
1693       io.frag_result = info.frag_result();
1694       io.gpr = info.gpr();
1695       io.spi_sid = info.spi_sid();
1696       io.write_mask = info.writemask();
1697       io.export_param = info.export_param();
1698       if (info.export_param() >= 0)
1699          sh_info->highest_export_param = MAX2(unsigned(info.export_param()),
1700                                               sh_info->highest_export_param);
1701 
1702       sfn_log << SfnLog::io << "Emit output[" << index << "]";
1703       if (io.varying_slot != NUM_TOTAL_VARYING_SLOTS)
1704          sfn_log << " varying_slot:" << static_cast<int>(io.varying_slot);
1705       if (io.frag_result != static_cast<gl_frag_result>(FRAG_RESULT_MAX))
1706          sfn_log << " frag_result:" << static_cast<int>(io.frag_result);
1707       sfn_log << " spi_sid:" << io.spi_sid << " write_mask:" << io.write_mask << "\n";
1708       assert(io.spi_sid >= 0);
1709    }
1710 
1711    sh_info->nhwatomic = m_nhwatomic;
1712    sh_info->atomic_base = m_atomic_base;
1713    sh_info->nhwatomic_ranges = m_atomics.size();
1714    for (unsigned i = 0; i < m_atomics.size(); ++i)
1715       sh_info->atomics[i] = m_atomics[i];
1716 
1717    if (m_flags.test(sh_indirect_const_file))
1718       sh_info->indirect_files |= 1 << TGSI_FILE_CONSTANT;
1719 
1720    if (m_flags.test(sh_indirect_atomic))
1721       sh_info->indirect_files |= 1 << TGSI_FILE_HW_ATOMIC;
1722 
1723    sh_info->uses_tex_buffers = m_flags.test(sh_uses_tex_buffer);
1724 
1725    value_factory().get_shader_info(sh_info);
1726 
1727    sh_info->needs_scratch_space = m_flags.test(sh_needs_scratch_space);
1728    sh_info->uses_images = m_flags.test(sh_uses_images);
1729    sh_info->uses_atomics = m_flags.test(sh_uses_atomics);
1730    sh_info->disable_sb = m_flags.test(sh_disble_sb);
1731    sh_info->has_txq_cube_array_z_comp = m_flags.test(sh_txs_cube_array_comp);
1732    sh_info->indirect_files = m_indirect_files;
1733    do_get_shader_info(sh_info);
1734 }
1735 
1736 PRegister
atomic_update()1737 Shader::atomic_update()
1738 {
1739    assert(m_atomic_update);
1740    return m_atomic_update;
1741 }
1742 
1743 int
remap_atomic_base(int base)1744 Shader::remap_atomic_base(int base)
1745 {
1746    return m_atomic_base_map[base];
1747 }
1748 
1749 void
do_get_shader_info(r600_shader * sh_info)1750 Shader::do_get_shader_info(r600_shader *sh_info)
1751 {
1752    sh_info->uses_atomics = m_nhwatomic > 0;
1753 }
1754 
1755 const ShaderInput&
input(int base) const1756 Shader::input(int base) const
1757 {
1758    auto io = m_inputs.find(base);
1759    assert(io != m_inputs.end());
1760    return io->second;
1761 }
1762 
1763 const ShaderOutput&
output(int base) const1764 Shader::output(int base) const
1765 {
1766    auto io = m_outputs.find(base);
1767    assert(io != m_outputs.end());
1768    return io->second;
1769 }
1770 
1771 LiveRangeMap
prepare_live_range_map()1772 Shader::prepare_live_range_map()
1773 {
1774    return m_instr_factory->value_factory().prepare_live_range_map();
1775 }
1776 
1777 void
reset_function(ShaderBlocks & new_root)1778 Shader::reset_function(ShaderBlocks& new_root)
1779 {
1780    std::swap(m_root, new_root);
1781 }
1782 
1783 void
finalize()1784 Shader::finalize()
1785 {
1786    do_finalize();
1787 }
1788 
1789 void
do_finalize()1790 Shader::do_finalize()
1791 {
1792 }
1793 
1794 } // namespace r600
1795